diff --git a/1_fetch.R b/1_fetch.R
index 4f34b95e..998d700e 100644
--- a/1_fetch.R
+++ b/1_fetch.R
@@ -9,6 +9,21 @@ source("1_fetch/src/fetch_nhdv2_attributes_from_sb.R")
p1_targets_list <- list(
+ # Track and read in the state boundaries sf object. This file was
+ # created by running `states <- get_state_shp()` followed by
+ # `saveRDS(states, "1_fetch/in/state_boundaries.rds)`. get_state_shp
+ # is from 3_visualize/src/map_sites.R. It depends on the USAboundaries
+ # package, which is currently archived on CRAN.
+ tar_target(
+ p1_state_boundaries_rds,
+ "1_fetch/in/state_boundaries.rds",
+ format = "file"
+ ),
+ tar_target(
+ p1_state_boundaries_sf,
+ readRDS(p1_state_boundaries_rds)
+ ),
+
# download WQP data product from science base for discrete samples
tar_target(
p1_wqp_data_file,
diff --git a/1_fetch/in/state_boundaries.rds b/1_fetch/in/state_boundaries.rds
new file mode 100644
index 00000000..56bdd03d
Binary files /dev/null and b/1_fetch/in/state_boundaries.rds differ
diff --git a/3_visualize.R b/3_visualize.R
index 21ffdb7a..19e95f89 100644
--- a/3_visualize.R
+++ b/3_visualize.R
@@ -77,6 +77,7 @@ p3_targets_list <- list(
p3_site_map_png,
map_sites(flowlines = p1_nhd_reaches_sf,
matched_sites = p2a_site_splits,
+ states_shp = p1_state_boundaries_sf,
out_file = "3_visualize/out/do_site_map.png")
),
diff --git a/3_visualize/src/map_sites.R b/3_visualize/src/map_sites.R
index 461834f1..b60dae93 100644
--- a/3_visualize/src/map_sites.R
+++ b/3_visualize/src/map_sites.R
@@ -9,6 +9,8 @@
#' @param matched_sites sf object containing site locations and the flowline
#' reach identifier ("COMID") that the site has been matched to. Must contain
#' columns "COMID" and "geometry".
+#' @param states_shp sf object containing the U.S. state boundaries to
+#' include in the watershed plot.
#' @param out_file character string indicating the name of the saved file,
#' including file path and extension.
#' @param huc6_select vector of character string(s) indicating the HUC6 basins that
@@ -37,6 +39,7 @@
#'
map_sites <- function(flowlines,
matched_sites,
+ states_shp,
out_file,
huc6_select = "020402",
basin_bbox = c(xmin = -76.39556, ymin = 39.5, xmax = -74.37121, ymax = 40.89106),
@@ -109,7 +112,7 @@ map_sites <- function(flowlines,
ggspatial::annotation_scale(bar_cols = c("gray70","white"))
# create inset map
- inset_map <- map_drb_watershed(matched_sites)
+ inset_map <- map_drb_watershed(matched_sites, states_shp = states_shp)
# grab legend
legend <- cowplot::get_legend(sites_map)
@@ -145,10 +148,6 @@ map_sites <- function(flowlines,
#' @param huc8 vector of character string(s) indicating the HUC8 basins that
#' make up the watershed of interest. By default, the HUC8 basins that make up
#' the Delaware River Basin will be used.
-#' @param states vector of character string(s) indicating which states should
-#' be included in the inset map, using two-letter postal code abbreviations for
-#' each state. By default, the states surrounding the Delaware River Basin will
-#' be downloaded, including "NY", "PA", "NJ", "DE", and "MD".
#' @param epsg_out integer indicating the coordinate reference system that
#' should be used when creating the inset map. Defaults to EPSG 3857, pseudo-
#' mercator: https://epsg.io/3857
@@ -157,17 +156,14 @@ map_sites <- function(flowlines,
#' Returns an inset map as a ggplot object.
#'
map_drb_watershed <- function(sites,
+ states_shp,
huc8 = c("02040101","02040102","02040103",
"02040104","02040105","02040106",
"02040201","02040202","02040203",
"02040204","02040205","02040206",
"02040207"),
- states = c("NY","PA","NJ","DE","MD"),
epsg_out = 3857){
- # Download shapefiles for states that encompass the watershed of interest
- states_shp <- USAboundaries::us_states(resolution = "high", states = states)
-
# Download HUC8 boundaries associated with watershed boundary
boundary <- nhdplusTools::get_huc8(id = huc8) %>%
sf::st_union()
@@ -191,6 +187,21 @@ map_drb_watershed <- function(sites,
}
+#' @title Download state shapefiles
+#'
+#' @param states vector of character string(s) indicating which states should
+#' be included in the inset map, using two-letter postal code abbreviations for
+#' each state. By default, the states surrounding the Delaware River Basin will
+#' be downloaded, including "NY", "PA", "NJ", "DE", and "MD".
+#'
+get_state_shp <- function(states = c("NY","PA","NJ","DE","MD")){
+
+ # Download shapefiles for states that encompass the watershed of interest
+ states_shp <- USAboundaries::us_states(resolution = "high", states = states)
+
+}
+
+
#' @title Create leaflet map of site locations
#' @description
diff --git a/3_visualize/src/plotting_helpers.R b/3_visualize/src/plotting_helpers.R
new file mode 100644
index 00000000..7d1ce35d
--- /dev/null
+++ b/3_visualize/src/plotting_helpers.R
@@ -0,0 +1,402 @@
+### This script contains helper functions for plotting model predictive performance
+
+
+#' @title Plot model performance
+#'
+#' @description
+#' Function to plot the model performance by partition.
+#'
+#' @param metrics_df data frame containing the predictive performance metrics.
+#' Must contain columns "variable", "partition", and "model_id".
+#' @param performance_metric character string indicating which performance metric
+#' to plot from `metrics_df`.
+#' @param fileout character string indicating name of saved png file, including
+#' file path and extension.
+#' @param partition_select character string or character vector indicating which
+#' model partitions should be included in the plot. Options are "val", "train", and "val_times".
+#' @param partition_colors character string or character vector containing the hex
+#' color codes that are used to represent the model partitions.
+#' @param y_range numeric vector of length 2, indicating the desired range of the y-axis. If
+#' none is provided, we will estimate the best y-axis range. Defaults to NULL.
+#' @param y_label character string containing the y axis label. If none is provided,
+#' we will do our best to impute one. Defaults to NULL.
+#' @param plot_title logical; should the plot include a title indicating the model_id?
+#' Defaults to TRUE.
+#' @param panel_grids logical; should we plot the background grid lines? Defaults to TRUE.
+#' @param box_size numeric; width of border lines around box plots
+#' @param facet_text_size integer
+#' @param axis_text_size integer
+#' @param axis_title_size integer
+#' @param legend_text_size integer
+#' @param plot_height_in numeric; height of saved plot in inches
+#' @param plot_width_in numeric; width of saved plot in inches
+#'
+#' @returns
+#' Saves a png file containing a boxplot with one panel for each target variable.
+#'
+plot_metrics_by_partition <- function(metrics_df,
+ performance_metric,
+ fileout,
+ partition_select = c("val","train","val_times"),
+ partition_colors = c("#bdd7e7","#6baed6","#2171b5"),
+ y_range = NULL,
+ y_label = NULL,
+ plot_title = TRUE,
+ panel_grids = TRUE,
+ box_size = 0.25,
+ facet_text_size = 13,
+ axis_text_size = 12,
+ axis_title_size = 13,
+ legend_text_size = 12,
+ plot_height_in = 4,
+ plot_width_in = 6.4){
+
+ # subset the metrics data frame and rename the column containing
+ # the requested performance metric to the more generic "metric"
+ metrics_df_subset <- metrics_df %>%
+ filter(partition %in% partition_select) %>%
+ select('model_id', 'partition', 'variable', 'rep_id',c(!!performance_metric)) %>%
+ rename(metric := !!performance_metric) %>%
+ mutate(variable_renamed = recode(variable,
+ "do_max" = "DO-max",
+ "do_min" = "DO-min",
+ "do_mean" = "DO-mean")) %>%
+ mutate(variable_renamed = factor(variable_renamed,
+ levels = c("DO-mean","DO-min","DO-max")))
+
+ # define the y-axis label
+ if(is.null(y_label)){
+ if(grepl("rmse", performance_metric)){
+ y_label <- expression(RMSE~(mg~O[2]~L^-1))
+ }
+ if(grepl("nse", performance_metric)){
+ y_label <- expression(NSE)
+ }
+ if(grepl("bias", performance_metric)){
+ y_label <- expression(Mean~bias~(mg~O[2]~L^-1))
+ }
+ if(grepl("kge", performance_metric)){
+ y_label <- expression(KGE)
+ }
+ }
+
+ # define the y-axis range
+ if(is.null(y_range)){
+ y_range <- c((0.85*min(metrics_df_subset$metric)),
+ (1.15*max(metrics_df_subset$metric)))
+ }
+
+ # make the plot
+ p <- metrics_df_subset %>%
+ ggplot() +
+ geom_boxplot(aes(x= partition, y = metric, fill = partition), size = box_size) +
+ facet_wrap(~variable_renamed) +
+ scale_fill_manual(values = partition_colors, name = "") +
+ labs(x = "", y = y_label)+
+ coord_cartesian(ylim = c(y_range[1], y_range[2])) +
+ theme_bw() +
+ theme(strip.background = element_blank(),
+ strip.text = element_text(size = facet_text_size),
+ axis.title = element_text(size = axis_title_size),
+ axis.text = element_text(size = axis_text_size),
+ axis.title.y = element_text(margin = margin(r = 10)),
+ axis.title.x = element_text(margin = margin(t = 12)),
+ legend.text = element_text(size = legend_text_size),
+ legend.key.size = unit(0.65, "cm"),
+ legend.key.height = unit(0.75, 'cm'))
+
+ if(!panel_grids){
+ p <- p + theme(panel.grid.major = element_blank(),
+ panel.grid.minor = element_blank())
+ }
+ if(plot_title){
+ p <- p + ggtitle(unique(metrics_df_subset$model_id))
+ }
+
+ # save the plot
+ ggsave(filename = fileout, plot = p,
+ width = plot_width_in, height = plot_height_in, units = c("in"),
+ dpi = 300)
+
+ return(fileout)
+}
+
+
+
+#' @title Read reach-level feather files
+#'
+#' @description
+#' Function to read in feather file containing model predictions and performance
+#' metrics for individual site locations.
+#'
+#' @param path file path of the feather file
+#' @param preds_file file name with .feather file extension
+#'
+#' @returns
+#' Returns a data frame containing the reach-level predictive performance metrics.
+#'
+read_preds_feather <- function(path, preds_file = "val_preds.feather"){
+
+ rep <- substr(path, nchar(path), nchar(path))
+ file_name <- paste0(path,"/",preds_file)
+
+ preds <- arrow::read_feather(file_name) %>%
+ mutate(rep_id = rep)
+
+ return(preds)
+}
+
+
+
+#' @title Plot timeseries of model predictions with observations
+#'
+#' @description
+#' Function to plot a timeseries of observations with predictions from up to
+#' two different models, "base" for baseline LSTM, and/or "metab_dense" for
+#' the LSTM with a metab dense layer. All replicates are plotted as separate
+#' lines.
+#'
+#' @param preds_data data frame containing the model predictions and observations.
+#' Must include columns "site_no", "rep_id", "date", and `variable_obs`
+#' @param site_no character string indicating which site to plot
+#' @param variable_obs character string indicating which target variable to plot.
+#' Options include "do_mean", "do_max", and "do_min"
+#' @param plot_preds character vector indicating which model predictions to plot.
+#' Options include "base" and/or "metab_dense". Defaults to c("base", "metab_dense").
+#' @param preds_colors character string or character vector containing the hex
+#' color codes that are used to represent the different models.
+#' @param date_breaks character string indicating the date breaks to use. Defaults
+#' to "2_months". See ??scale_x_date for more information.
+#' @param date_label_format character string indicating the format to use for the
+#' date breaks. Defaults to "%b %Y". See ??scale_x_date for more information.
+#' @param line_width numeric; controls the width of the lines for each model rep
+#' @param line_alpha numeric; controls the transparency of the lines for each model rep
+#'
+#' @returns
+#' Returns a ggplot object
+#'
+plot_preds_timeseries <- function(preds_data,
+ site_no,
+ variable_obs = "do_mean",
+ plot_preds = c("base", "metab_dense"),
+ preds_colors = c("#5ab4ac", "#fc8d62"),
+ date_breaks = "2 months",
+ date_label_format = "%b %Y",
+ line_width = 0.25,
+ line_alpha = 0.5){
+
+ # get site name
+ site_info <- dataRetrieval::readNWISsite(siteNumbers = c(site_no))
+ site_name <- site_info$station_nm
+
+ # define variable names that correspond w/ preferences in `plot_preds`
+ variable_pred_base <- paste0(variable_obs, "_base")
+ variable_pred_metab <- paste0(variable_obs, "_metab_dense")
+ cols_select <- c("date", "site_id", "rep_id", variable_obs, variable_pred_base, variable_pred_metab)
+
+ # define y-axis labels that correspond w/ selected `variable_obs`
+ if(variable_obs == "do_mean"){
+ y_axis_label <- expression(Daily~mean~DO~(mg~O[2]~L^-1))
+ }
+ if(variable_obs == "do_min"){
+ y_axis_label <- expression(Daily~max~DO~(mg~O[2]~L^-1))
+ }
+ if(variable_obs == "do_max"){
+ y_axis_label <- expression(Daily~max~DO~(mg~O[2]~L^-1))
+ }
+
+ # format data frame
+ preds_data_subset <- preds_data %>%
+ filter(site_id %in% c(site_no)) %>%
+ select(all_of(cols_select)) %>%
+ rename(var_obs := !!variable_obs,
+ var_mod_base := !!variable_pred_base,
+ var_mod_metab := !!variable_pred_metab) %>%
+ mutate(rep_id = as.factor(rep_id))
+
+ # make base figure by plotting observed DO data
+ p <- preds_data_subset %>%
+ ggplot() +
+ geom_point(aes(x= date, y = var_obs, color = "observed"), size = 0.9)
+
+ # now add on model predictions, depending on which model ids are requested in `plot_preds`
+ if(length(plot_preds) == 1 && plot_preds == "base"){
+ color_values <- c('gray50',preds_colors[1],'white')
+ labels <- paste("",
+ c('observed','DL-predicted','PGDL-predicted'),
+ "")
+ p1 <- p +
+ geom_line(aes(x=date, y = var_mod_base, group = rep_id, color = "predicted"),size = line_width, alpha = line_alpha) +
+ geom_line(aes(x=date, y = var_mod_metab, group = rep_id, color = "zpredicted"),size = line_width, alpha = 0)
+ }
+
+ if(length(plot_preds) == 1 && plot_preds == "metab_dense"){
+ color_values <- c('gray50',preds_colors[1],'white')
+ labels <- paste("",
+ c('observed','PGDL-predicted','DL-predicted'),
+ "")
+ p1 <- p +
+ geom_line(aes(x=date, y = var_mod_base, group = rep_id, color = "zpredicted"), size = line_width, alpha = 0) +
+ geom_line(aes(x=date, y = var_mod_metab, group = rep_id, color = "predicted"), size = line_width, alpha = line_alpha)
+ }
+
+ if("metab_dense" %in% plot_preds & "base" %in% plot_preds){
+ color_values <- c('gray50',preds_colors[1],preds_colors[2])
+ labels <- paste("",
+ c('observed','DL-predicted','PGDL-predicted'),
+ "")
+ p1 <- p +
+ geom_line(aes(x = date, y = var_mod_base, group = rep_id, color = "predicted"), size = line_width, alpha = line_alpha) +
+ geom_line(aes(x = date, y = var_mod_metab, group = rep_id, color = "zpredicted"), size = line_width, alpha = line_alpha)
+ }
+
+ # finalize plot formatting
+ p2 <- p1 +
+ scale_color_manual(name = '',
+ labels = labels,
+ values = color_values,
+ guide = guide_legend(override.aes = list(
+ linetype = c("blank", "solid",'solid'),
+ shape = c(16, NA,NA),
+ size = c(2.5,1,1)))) +
+ scale_x_date(date_breaks = date_breaks,
+ date_labels = date_label_format) +
+ ggtitle(label = site_name, subtitle = paste0("USGS ", site_no)) +
+ labs(x = "", y = y_axis_label) +
+ theme_classic() +
+ theme(axis.title = element_text(size = 13),
+ axis.text = element_text(size = 12),
+ legend.text = ggtext::element_markdown(size = 13),
+ legend.key.size = unit(0.65, "cm"),
+ legend.key.height = unit(0.75, 'cm'),
+ axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))
+
+ return(p2)
+}
+
+
+
+#' @title Plot scatterplot of model predictions with observations
+#'
+#' @description
+#' Function to plot a scatterplot of observations with predictions from up to
+#' two different models, "base" for baseline LSTM, and/or "metab_dense" for the
+#' LSTM with a metab dense layer. Note that all replicates are included in this plot.
+#'
+#' @param preds_data data frame containing the model predictions and observations.
+#' Must include columns "site_no", "rep_id", "date", and `variable_obs`
+#' @param site_no character string indicating which site to plot
+#' @param variable_obs character string indicating which target variable to plot.
+#' Options include "do_mean", "do_max", and "do_min"
+#' @param plot_preds character vector indicating which model predictions to plot.
+#' Options include "base" and/or "metab_dense". Defaults to c("base", "metab_dense").
+#' @param preds_colors character string or character vector containing the hex
+#' color codes that are used to represent the different models.
+#' @param breaks_x sequence of length 3 that represents the lower limit for the x-axis,
+#' the upper limit for the x-axis, and the step between major axis breaks.
+#' @param breaks_y sequence of length 3 that represents the lower limit for the y-axis,
+#' the upper limit for the y-axis, and the step between major axis breaks.
+#' @param point_size numeric; controls the size of the points
+#' @param point_alpha numeric; controls the transparency of the points
+#' @param plot_legend logical; should a legend be included in the plot?
+#'
+#' @returns
+#' Returns a ggplot object
+#'
+plot_preds_scatter <- function(preds_data,
+ site_no,
+ variable_obs = "do_mean",
+ plot_preds = c("base", "metab_dense"),
+ preds_colors = c("#5ab4ac", "#fc8d62"),
+ breaks_x = seq(6,18,3),
+ breaks_y = seq(6,18,3),
+ point_size = 1,
+ point_alpha = 0.5,
+ plot_legend = FALSE){
+
+ # define variable names that correspond w/ preferences in `plot_preds`
+ variable_pred_base <- paste0(variable_obs, "_base")
+ variable_pred_metab <- paste0(variable_obs, "_metab_dense")
+ cols_select <- c("date", "site_id", "rep_id", variable_obs, variable_pred_base, variable_pred_metab)
+
+ # define y-axis labels that correspond w/ selected `variable_obs`
+ if(variable_obs == "do_mean"){
+ y_axis_label <- expression(Predicted~mean~DO~(mg~O[2]~L^-1))
+ x_axis_label <- expression(Observed~mean~DO~(mg~O[2]~L^-1))
+ }
+ if(variable_obs == "do_min"){
+ y_axis_label <- expression(Predicted~max~DO~(mg~O[2]~L^-1))
+ x_axis_label <- expression(Observed~max~DO~(mg~O[2]~L^-1))
+ }
+ if(variable_obs == "do_max"){
+ y_axis_label <- expression(Predicted~max~DO~(mg~O[2]~L^-1))
+ x_axis_label <- expression(Observed~max~DO~(mg~O[2]~L^-1))
+ }
+
+ # format data frame
+ preds_data_subset <- preds_data %>%
+ filter(site_id %in% c(site_no)) %>%
+ select(all_of(cols_select)) %>%
+ rename(var_obs := !!variable_obs,
+ var_mod_base := !!variable_pred_base,
+ var_mod_metab := !!variable_pred_metab) %>%
+ mutate(rep_id = as.factor(rep_id))
+
+ # make base figure
+ p <- preds_data_subset %>%
+ ggplot()
+
+ # now add on model predictions, depending on which model ids are requested in `plot_preds`
+ if(length(plot_preds) == 1 && plot_preds == "base"){
+ p1 <- p +
+ geom_point(aes(x = var_obs, y = var_mod_base, group = rep_id, color = "predicted"),size = point_size, alpha = point_alpha) +
+ scale_color_manual(name = '',
+ labels = c("DL-predicted"),
+ values = preds_colors[1])
+ }
+ if(length(plot_preds) == 1 && plot_preds == "metab_dense"){
+ p1 <- p +
+ geom_point(aes(x = var_obs, y = var_mod_metab, color = "zpredicted"),size = point_size, alpha = point_alpha) +
+ scale_color_manual(name = '',
+ labels = c("PGDL-predicted"),
+ values = preds_colors[2])
+ }
+ if("metab_dense" %in% plot_preds & "base" %in% plot_preds){
+ p1 <- p +
+ geom_point(aes(x = var_obs, y = var_mod_base, group = rep_id, color = "predicted"), size = point_size, alpha = point_alpha) +
+ geom_point(aes(x = var_obs, y = var_mod_metab, group = rep_id, color = "zpredicted"), size = point_size, alpha = point_alpha) +
+ scale_color_manual(name = '',
+ labels = c("DL-predicted","PGDL-predicted"),
+ values = preds_colors)
+ }
+
+ # finalize plot formatting
+ p2 <- p1 +
+ coord_cartesian(xlim=c(min(breaks_x), max(breaks_x)),
+ ylim = c(min(breaks_y),max(breaks_y))) +
+ scale_x_continuous(breaks = breaks_x) +
+ scale_y_continuous(breaks = breaks_y) +
+ labs(x = x_axis_label, y = y_axis_label) +
+ geom_abline(slope = 1, intercept = 0, lty = 2, size = 0.75) +
+ theme_classic() +
+ theme(axis.title = element_text(size = 13),
+ axis.text = element_text(size = 12),
+ axis.title.x = element_text(margin = margin(t = 10)),
+ axis.title.y = element_text(margin = margin(r = 10)),
+ legend.text = element_text(size = 12))
+
+ if(!plot_legend){
+ p2 <- p2 + theme(legend.position = "none")
+ }
+
+ return(p2)
+}
+
+
+
diff --git a/3_visualize/src/plotting_script_delete_later.R b/3_visualize/src/plotting_script_delete_later.R
new file mode 100644
index 00000000..9257b8a6
--- /dev/null
+++ b/3_visualize/src/plotting_script_delete_later.R
@@ -0,0 +1,96 @@
+# pseudo targets pipeline for creating plots of model predictive performance
+
+source("3_visualize/src/plotting_helpers.R")
+library(tidyverse)
+library(targets)
+library(patchwork)
+tar_load(p2a_metrics_files)
+tar_load(p2a_do_and_metab)
+
+# read in and bind the overall_metrics files from each model id
+p3_overall_metrics <- lapply(p2a_metrics_files, function(x){
+ dat <- readr::read_csv(x, show_col_types = FALSE) %>%
+ mutate(model_id = str_replace_all(x, '2a_model/out/models/|/exp_overall_metrics.csv', ''))
+ }) %>%
+ bind_rows()
+
+# plot overall model performance by train/test partition
+# targets pipeline will save plots for all model ids
+p3_overall_metrics_png <- plot_metrics_by_partition(metrics_df = filter(p3_overall_metrics, model_id == "0_baseline_LSTM"),
+ performance_metric = "rmse",
+ fileout = "3_visualize/out/baseline_rmse.png",
+ partition_select = c("train", "val"),
+ partition_colors = c("#a6cee3","#1f78b4"))
+
+# save directories containing model reps
+p3_preds_dir_0_baseline_LSTM <- list.files("2a_model/out/models/0_baseline_LSTM/nstates_10/nep_100", full.names = TRUE)
+p3_preds_dir_2_multitask_dense <- list.files("2a_model/out/models/2_multitask_dense/nstates_10/nep_100", full.names = TRUE)
+
+# read in feather files from model rep sub-directories and create a data frame w/ DO predictions from 0_baseline_LSTM
+p3_preds_0_baseline_LSTM <- p3_preds_dir_0_baseline_LSTM %>%
+ purrr::map_dfr(read_preds_feather, preds_file = "val_preds.feather", .id = "rep_file") %>%
+ mutate(model_id = str_replace_all(rep_file, '2a_model/out/models/|/exp_overall_metrics.csv', '')) %>%
+ rename(do_min_base = do_min, do_max_base = do_max, do_mean_base = do_mean)
+
+# read in feather files from model rep sub-directories and create a data frame w/ DO predictions from 2_multitask_dense
+p3_preds_2_multitask_dense <- p3_preds_dir_2_multitask_dense %>%
+ purrr::map_dfr(read_preds_feather, preds_file = "val_preds.feather", .id = "rep_file") %>%
+ mutate(model_id = str_replace_all(rep_file, '2a_model/out/models/|/exp_overall_metrics.csv', '')) %>%
+ rename(do_min_metab_dense = do_min, do_max_metab_dense = do_max, do_mean_metab_dense = do_mean)
+
+# bind together predictions from both models, note that the preds columns have different names
+# for each model id
+p3_preds_all_models <- p3_preds_0_baseline_LSTM %>%
+ select(-c(model_id, rep_file)) %>%
+ left_join(y = select(p3_preds_2_multitask_dense, -model_id, -rep_file),
+ by = c("site_id", "date", "rep_id")) %>%
+ select(-c(GPP, ER, K600, depth, temp.water))
+
+# bind together model predictions with observations
+p3_preds_all_models_w_obs <- p3_preds_all_models %>%
+ mutate(date = as.Date(date)) %>%
+ left_join(y = p2a_do_and_metab, by = c("site_id","date"))
+
+# create a timeseries plot that shows the model predictions vs observations
+p3_preds_timeseries <- plot_preds_timeseries(preds_data = filter(p3_preds_all_models_w_obs,
+ date > "2011-10-01",
+ date < "2016-10-02"),
+ site_no = "01481500",
+ variable_obs = "do_mean",
+ plot_preds = c("base", "metab_dense"),
+ preds_colors = c("#1b9e77", "#d95f02"),
+ date_breaks = "1 year",
+ date_label_format = "%Y",
+ line_alpha = 0.5)
+
+# create a scatterplot that shows the model predictions vs observations.
+# note that the scatterplot includes all model reps!
+p3_preds_scatterplot <- plot_preds_scatter(preds_data = filter(p3_preds_all_models_w_obs,
+ date > "2011-10-01",
+ date < "2016-10-02"),
+ site_no = "01481500",
+ variable_obs = "do_mean",
+ plot_preds = c("base", "metab_dense"),
+ preds_colors = c("#1b9e77", "#d95f02"),
+ breaks_x = seq(4,16,3),
+ breaks_y = seq(4,16,3),
+ point_size = 0.7,
+ point_alpha = 0.3,
+ plot_legend = FALSE)
+
+# combine the timeseries and the scatterplot in one plot
+p3_preds_plot_combined <- p3_preds_timeseries +
+ p3_preds_scatterplot +
+ plot_layout(ncol = 2, widths = c(2, 1))
+
+# save the combined plot showing model predictions vs. observations
+ggsave(filename = "3_visualize/out/model_preds_vs_obs.png",
+ plot = p3_preds_plot_combined,
+ width = 12, height = 4.5, units = c("in"),
+ dpi = 300)
+
+
+
+
+
+
diff --git a/_targets.R b/_targets.R
index ded954ab..4148ff50 100644
--- a/_targets.R
+++ b/_targets.R
@@ -3,9 +3,9 @@ library(targets)
options(tidyverse.quiet = TRUE)
tar_option_set(packages = c("tidyverse", "lubridate", "rmarkdown", "knitr",
"dataRetrieval", "nhdplusTools", "sbtools",
- "leaflet", "sf", "USAboundaries", "cowplot",
- "ggspatial", "patchwork", "streamMetabolizer",
- "reticulate", "yaml"))
+ "leaflet", "sf", "cowplot", "ggspatial",
+ "patchwork", "streamMetabolizer", "reticulate",
+ "yaml"))
source("1_fetch.R")
source("2_process.R")