diff --git a/1_fetch.R b/1_fetch.R
index 4f34b95e..998d700e 100644
--- a/1_fetch.R
+++ b/1_fetch.R
@@ -9,6 +9,21 @@ source("1_fetch/src/fetch_nhdv2_attributes_from_sb.R")
 
 p1_targets_list <- list(
   
+  # Track and read in the state boundaries sf object. This file was 
+  # created by running `states <- get_state_shp()` followed by 
+  # `saveRDS(states, "1_fetch/in/state_boundaries.rds)`. get_state_shp
+  # is from 3_visualize/src/map_sites.R. It depends on the USAboundaries
+  # package, which is currently archived on CRAN.
+  tar_target(
+    p1_state_boundaries_rds,
+    "1_fetch/in/state_boundaries.rds",
+    format = "file"
+  ),
+  tar_target(
+    p1_state_boundaries_sf,
+    readRDS(p1_state_boundaries_rds)
+  ),
+  
   # download WQP data product from science base for discrete samples
   tar_target(
     p1_wqp_data_file,
diff --git a/1_fetch/in/state_boundaries.rds b/1_fetch/in/state_boundaries.rds
new file mode 100644
index 00000000..56bdd03d
Binary files /dev/null and b/1_fetch/in/state_boundaries.rds differ
diff --git a/3_visualize.R b/3_visualize.R
index 21ffdb7a..19e95f89 100644
--- a/3_visualize.R
+++ b/3_visualize.R
@@ -77,6 +77,7 @@ p3_targets_list <- list(
     p3_site_map_png,
     map_sites(flowlines = p1_nhd_reaches_sf,
               matched_sites = p2a_site_splits,
+              states_shp = p1_state_boundaries_sf,
               out_file = "3_visualize/out/do_site_map.png")
   ),
   
diff --git a/3_visualize/src/map_sites.R b/3_visualize/src/map_sites.R
index 461834f1..b60dae93 100644
--- a/3_visualize/src/map_sites.R
+++ b/3_visualize/src/map_sites.R
@@ -9,6 +9,8 @@
 #' @param matched_sites sf object containing site locations and the flowline
 #' reach identifier ("COMID") that the site has been matched to. Must contain
 #' columns "COMID" and "geometry".
+#' @param states_shp sf object containing the U.S. state boundaries to
+#' include in the watershed plot.
 #' @param out_file character string indicating the name of the saved file, 
 #' including file path and extension.
 #' @param huc6_select vector of character string(s) indicating the HUC6 basins that
@@ -37,6 +39,7 @@
 #' 
 map_sites <- function(flowlines, 
                       matched_sites, 
+                      states_shp,
                       out_file,
                       huc6_select = "020402", 
                       basin_bbox = c(xmin = -76.39556, ymin = 39.5, xmax = -74.37121, ymax = 40.89106),
@@ -109,7 +112,7 @@ map_sites <- function(flowlines,
     ggspatial::annotation_scale(bar_cols = c("gray70","white"))
   
   # create inset map
-  inset_map <- map_drb_watershed(matched_sites)
+  inset_map <- map_drb_watershed(matched_sites, states_shp = states_shp)
   
   # grab legend 
   legend <- cowplot::get_legend(sites_map)
@@ -145,10 +148,6 @@ map_sites <- function(flowlines,
 #' @param huc8 vector of character string(s) indicating the HUC8 basins that
 #' make up the watershed of interest. By default, the HUC8 basins that make up
 #' the Delaware River Basin will be used.
-#' @param states vector of character string(s) indicating which states should
-#' be included in the inset map, using two-letter postal code abbreviations for 
-#' each state. By default, the states surrounding the Delaware River Basin will
-#' be downloaded, including "NY", "PA", "NJ", "DE", and "MD".
 #' @param epsg_out integer indicating the coordinate reference system that 
 #' should be used when creating the inset map. Defaults to EPSG 3857, pseudo-
 #' mercator: https://epsg.io/3857
@@ -157,17 +156,14 @@ map_sites <- function(flowlines,
 #' Returns an inset map as a ggplot object.
 #'
 map_drb_watershed <- function(sites,
+                              states_shp,
                               huc8 = c("02040101","02040102","02040103",
                                        "02040104","02040105","02040106",
                                        "02040201","02040202","02040203",
                                        "02040204","02040205","02040206",
                                        "02040207"),
-                              states = c("NY","PA","NJ","DE","MD"),
                               epsg_out = 3857){
   
-  # Download shapefiles for states that encompass the watershed of interest
-  states_shp <- USAboundaries::us_states(resolution = "high", states = states)
-  
   # Download HUC8 boundaries associated with watershed boundary
   boundary <- nhdplusTools::get_huc8(id = huc8) %>%
     sf::st_union()
@@ -191,6 +187,21 @@ map_drb_watershed <- function(sites,
 }
 
 
+#' @title Download state shapefiles
+#' 
+#' @param states vector of character string(s) indicating which states should
+#' be included in the inset map, using two-letter postal code abbreviations for 
+#' each state. By default, the states surrounding the Delaware River Basin will
+#' be downloaded, including "NY", "PA", "NJ", "DE", and "MD".
+#' 
+get_state_shp <- function(states = c("NY","PA","NJ","DE","MD")){
+  
+  # Download shapefiles for states that encompass the watershed of interest
+  states_shp <- USAboundaries::us_states(resolution = "high", states = states)
+  
+}
+
+
 #' @title Create leaflet map of site locations
 
 #' @description 
diff --git a/3_visualize/src/plotting_helpers.R b/3_visualize/src/plotting_helpers.R
new file mode 100644
index 00000000..7d1ce35d
--- /dev/null
+++ b/3_visualize/src/plotting_helpers.R
@@ -0,0 +1,402 @@
+### This script contains helper functions for plotting model predictive performance
+
+
+#' @title Plot model performance
+#' 
+#' @description 
+#' Function to plot the model performance by partition. 
+#' 
+#' @param metrics_df data frame containing the predictive performance metrics. 
+#' Must contain columns "variable", "partition", and "model_id".
+#' @param performance_metric character string indicating which performance metric 
+#' to plot from `metrics_df`.
+#' @param fileout character string indicating name of saved png file, including 
+#' file path and extension.  
+#' @param partition_select character string or character vector indicating which 
+#' model partitions should be included in the plot. Options are "val", "train", and "val_times".
+#' @param partition_colors character string or character vector containing the hex
+#' color codes that are used to represent the model partitions.
+#' @param y_range numeric vector of length 2, indicating the desired range of the y-axis. If
+#' none is provided, we will estimate the best y-axis range. Defaults to NULL.
+#' @param y_label character string containing the y axis label. If none is provided, 
+#' we will do our best to impute one. Defaults to NULL.
+#' @param plot_title logical; should the plot include a title indicating the model_id? 
+#' Defaults to TRUE.
+#' @param panel_grids logical; should we plot the background grid lines? Defaults to TRUE.
+#' @param box_size numeric; width of border lines around box plots
+#' @param facet_text_size integer
+#' @param axis_text_size integer
+#' @param axis_title_size integer
+#' @param legend_text_size integer
+#' @param plot_height_in numeric; height of saved plot in inches
+#' @param plot_width_in numeric; width of saved plot in inches
+#' 
+#' @returns 
+#' Saves a png file containing a boxplot with one panel for each target variable. 
+#'
+plot_metrics_by_partition <- function(metrics_df, 
+                                      performance_metric,
+                                      fileout,
+                                      partition_select = c("val","train","val_times"),
+                                      partition_colors = c("#bdd7e7","#6baed6","#2171b5"),
+                                      y_range = NULL,
+                                      y_label = NULL,
+                                      plot_title = TRUE,
+                                      panel_grids = TRUE,
+                                      box_size = 0.25,
+                                      facet_text_size = 13,
+                                      axis_text_size = 12,
+                                      axis_title_size = 13,
+                                      legend_text_size = 12,
+                                      plot_height_in = 4,
+                                      plot_width_in = 6.4){
+  
+  # subset the metrics data frame and rename the column containing
+  # the requested performance metric to the more generic "metric"
+  metrics_df_subset <- metrics_df %>%
+    filter(partition %in% partition_select) %>%
+    select('model_id', 'partition', 'variable', 'rep_id',c(!!performance_metric)) %>%
+    rename(metric := !!performance_metric) %>%
+    mutate(variable_renamed = recode(variable,
+                                     "do_max" = "DO-max", 
+                                     "do_min" = "DO-min", 
+                                     "do_mean" = "DO-mean")) %>%
+    mutate(variable_renamed = factor(variable_renamed,
+                                     levels = c("DO-mean","DO-min","DO-max")))
+  
+  # define the y-axis label
+  if(is.null(y_label)){
+    if(grepl("rmse", performance_metric)){
+      y_label <- expression(RMSE~(mg~O[2]~L^-1))
+    }
+    if(grepl("nse", performance_metric)){
+      y_label <- expression(NSE)
+    }
+    if(grepl("bias", performance_metric)){
+      y_label <- expression(Mean~bias~(mg~O[2]~L^-1))
+    }
+    if(grepl("kge", performance_metric)){
+      y_label <- expression(KGE)
+    }
+  }
+  
+  # define the y-axis range
+  if(is.null(y_range)){
+    y_range <- c((0.85*min(metrics_df_subset$metric)),
+                 (1.15*max(metrics_df_subset$metric)))
+  }
+  
+  # make the plot
+  p <- metrics_df_subset %>%
+    ggplot() + 
+    geom_boxplot(aes(x= partition, y = metric, fill = partition), size = box_size) +
+    facet_wrap(~variable_renamed) + 
+    scale_fill_manual(values = partition_colors, name = "") +
+    labs(x = "", y = y_label)+
+    coord_cartesian(ylim = c(y_range[1], y_range[2])) +
+    theme_bw() + 
+    theme(strip.background = element_blank(),
+          strip.text = element_text(size = facet_text_size),
+          axis.title = element_text(size = axis_title_size),
+          axis.text = element_text(size = axis_text_size),
+          axis.title.y = element_text(margin = margin(r = 10)),
+          axis.title.x = element_text(margin = margin(t = 12)),
+          legend.text = element_text(size = legend_text_size),
+          legend.key.size = unit(0.65, "cm"),
+          legend.key.height = unit(0.75, 'cm'))
+  
+  if(!panel_grids){
+    p <- p + theme(panel.grid.major = element_blank(),
+                   panel.grid.minor = element_blank())
+  }
+  if(plot_title){
+    p <- p + ggtitle(unique(metrics_df_subset$model_id))
+  }
+  
+  # save the plot
+  ggsave(filename = fileout, plot = p, 
+         width = plot_width_in, height = plot_height_in, units = c("in"), 
+         dpi = 300)
+  
+  return(fileout)
+}
+
+
+
+#' @title Read reach-level feather files
+#' 
+#' @description
+#' Function to read in feather file containing model predictions and performance
+#' metrics for individual site locations.
+#' 
+#' @param path file path of the feather file
+#' @param preds_file file name with .feather file extension
+#' 
+#' @returns 
+#' Returns a data frame containing the reach-level predictive performance metrics.
+#' 
+read_preds_feather <- function(path, preds_file = "val_preds.feather"){
+  
+  rep <- substr(path, nchar(path), nchar(path))
+  file_name <- paste0(path,"/",preds_file)
+  
+  preds <- arrow::read_feather(file_name) %>%
+    mutate(rep_id = rep)
+  
+  return(preds)
+}
+
+
+
+#' @title Plot timeseries of model predictions with observations
+#' 
+#' @description 
+#' Function to plot a timeseries of observations with predictions from up to
+#' two different models, "base" for baseline LSTM, and/or "metab_dense" for
+#' the LSTM with a metab dense layer. All replicates are plotted as separate
+#' lines.
+#' 
+#' @param preds_data data frame containing the model predictions and observations.
+#' Must include columns "site_no", "rep_id", "date", and `variable_obs`
+#' @param site_no character string indicating which site to plot
+#' @param variable_obs character string indicating which target variable to plot.
+#' Options include "do_mean", "do_max", and "do_min"
+#' @param plot_preds character vector indicating which model predictions to plot. 
+#' Options include "base" and/or "metab_dense". Defaults to c("base", "metab_dense").
+#' @param preds_colors character string or character vector containing the hex
+#' color codes that are used to represent the different models.
+#' @param date_breaks character string indicating the date breaks to use. Defaults
+#' to "2_months". See ??scale_x_date for more information.
+#' @param date_label_format character string indicating the format to use for the 
+#' date breaks. Defaults to "%b %Y". See ??scale_x_date for more information.
+#' @param line_width numeric; controls the width of the lines for each model rep
+#' @param line_alpha numeric; controls the transparency of the lines for each model rep
+#' 
+#' @returns 
+#' Returns a ggplot object
+#' 
+plot_preds_timeseries <- function(preds_data, 
+                                  site_no, 
+                                  variable_obs = "do_mean",
+                                  plot_preds = c("base", "metab_dense"),
+                                  preds_colors = c("#5ab4ac", "#fc8d62"),
+                                  date_breaks = "2 months", 
+                                  date_label_format = "%b %Y",
+                                  line_width = 0.25,
+                                  line_alpha = 0.5){
+  
+  # get site name
+  site_info <- dataRetrieval::readNWISsite(siteNumbers = c(site_no))
+  site_name <- site_info$station_nm
+  
+  # define variable names that correspond w/ preferences in `plot_preds`
+  variable_pred_base <- paste0(variable_obs, "_base")
+  variable_pred_metab <- paste0(variable_obs, "_metab_dense")
+  cols_select <- c("date", "site_id", "rep_id", variable_obs, variable_pred_base, variable_pred_metab)
+  
+  # define y-axis labels that correspond w/ selected `variable_obs`
+  if(variable_obs == "do_mean"){
+    y_axis_label <- expression(Daily~mean~DO~(mg~O[2]~L^-1))
+  }
+  if(variable_obs == "do_min"){
+    y_axis_label <- expression(Daily~max~DO~(mg~O[2]~L^-1))
+  }
+  if(variable_obs == "do_max"){
+    y_axis_label <- expression(Daily~max~DO~(mg~O[2]~L^-1))
+  }
+  
+  # format data frame
+  preds_data_subset <- preds_data %>% 
+    filter(site_id %in% c(site_no)) %>%
+    select(all_of(cols_select)) %>%
+    rename(var_obs := !!variable_obs,
+           var_mod_base := !!variable_pred_base,
+           var_mod_metab := !!variable_pred_metab) %>%
+    mutate(rep_id = as.factor(rep_id))
+
+  # make base figure by plotting observed DO data
+  p <- preds_data_subset %>%
+    ggplot() + 
+    geom_point(aes(x= date, y = var_obs, color = "observed"), size = 0.9) 
+    
+  # now add on model predictions, depending on which model ids are requested in `plot_preds`
+  if(length(plot_preds) == 1 && plot_preds == "base"){
+    color_values <- c('gray50',preds_colors[1],'white')
+    labels <- paste("<span style='color:",
+                    c('black','black','white'),
+                    "'>",
+                    c('observed','DL-predicted','PGDL-predicted'),
+                    "</span>")
+    p1 <- p + 
+      geom_line(aes(x=date, y = var_mod_base, group = rep_id, color = "predicted"),size = line_width, alpha = line_alpha) +
+      geom_line(aes(x=date, y = var_mod_metab, group = rep_id, color = "zpredicted"),size = line_width, alpha = 0) 
+  }
+  
+  if(length(plot_preds) == 1 && plot_preds == "metab_dense"){
+    color_values <- c('gray50',preds_colors[1],'white')
+    labels <- paste("<span style='color:",
+                    c('black','black','white'),
+                    "'>",
+                    c('observed','PGDL-predicted','DL-predicted'),
+                    "</span>")
+    p1 <- p + 
+      geom_line(aes(x=date, y = var_mod_base, group = rep_id, color = "zpredicted"), size = line_width, alpha = 0) +
+      geom_line(aes(x=date, y = var_mod_metab, group = rep_id, color = "predicted"), size = line_width, alpha = line_alpha)
+  }
+  
+  if("metab_dense" %in% plot_preds & "base" %in% plot_preds){
+    color_values <- c('gray50',preds_colors[1],preds_colors[2])
+    labels <- paste("<span style='color:",
+                    c('black','black','black'),
+                    "'>",
+                    c('observed','DL-predicted','PGDL-predicted'),
+                    "</span>")
+    p1 <- p + 
+      geom_line(aes(x = date, y = var_mod_base, group = rep_id, color = "predicted"), size = line_width, alpha = line_alpha) +
+      geom_line(aes(x = date, y = var_mod_metab, group = rep_id, color = "zpredicted"), size = line_width, alpha = line_alpha)
+  }
+  
+  # finalize plot formatting 
+  p2 <- p1 + 
+    scale_color_manual(name = '',
+                       labels = labels,
+                       values = color_values,
+                       guide = guide_legend(override.aes = list(
+                         linetype = c("blank", "solid",'solid'),
+                         shape = c(16, NA,NA),
+                         size = c(2.5,1,1)))) +
+    scale_x_date(date_breaks = date_breaks, 
+                 date_labels = date_label_format) +
+    ggtitle(label = site_name, subtitle = paste0("USGS ", site_no)) + 
+    labs(x = "", y = y_axis_label) +
+    theme_classic() + 
+    theme(axis.title = element_text(size = 13),
+          axis.text = element_text(size = 12),
+          legend.text = ggtext::element_markdown(size = 13),
+          legend.key.size = unit(0.65, "cm"),
+          legend.key.height = unit(0.75, 'cm'),
+          axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))
+  
+  return(p2)
+}
+
+
+
+#' @title Plot scatterplot of model predictions with observations
+#' 
+#' @description 
+#' Function to plot a scatterplot of observations with predictions from up to
+#' two different models, "base" for baseline LSTM, and/or "metab_dense" for the
+#' LSTM with a metab dense layer. Note that all replicates are included in this plot.
+#' 
+#' @param preds_data data frame containing the model predictions and observations.
+#' Must include columns "site_no", "rep_id", "date", and `variable_obs`
+#' @param site_no character string indicating which site to plot
+#' @param variable_obs character string indicating which target variable to plot.
+#' Options include "do_mean", "do_max", and "do_min"
+#' @param plot_preds character vector indicating which model predictions to plot. 
+#' Options include "base" and/or "metab_dense". Defaults to c("base", "metab_dense").
+#' @param preds_colors character string or character vector containing the hex
+#' color codes that are used to represent the different models.
+#' @param breaks_x sequence of length 3 that represents the lower limit for the x-axis,
+#' the upper limit for the x-axis, and the step between major axis breaks.
+#' @param breaks_y sequence of length 3 that represents the lower limit for the y-axis,
+#' the upper limit for the y-axis, and the step between major axis breaks.
+#' @param point_size numeric; controls the size of the points
+#' @param point_alpha numeric; controls the transparency of the points
+#' @param plot_legend logical; should a legend be included in the plot?
+#' 
+#' @returns 
+#' Returns a ggplot object
+#' 
+plot_preds_scatter <- function(preds_data, 
+                               site_no, 
+                               variable_obs = "do_mean",
+                               plot_preds = c("base", "metab_dense"),
+                               preds_colors = c("#5ab4ac", "#fc8d62"),
+                               breaks_x = seq(6,18,3), 
+                               breaks_y = seq(6,18,3),
+                               point_size = 1,
+                               point_alpha = 0.5,
+                               plot_legend = FALSE){
+  
+  # define variable names that correspond w/ preferences in `plot_preds`
+  variable_pred_base <- paste0(variable_obs, "_base")
+  variable_pred_metab <- paste0(variable_obs, "_metab_dense")
+  cols_select <- c("date", "site_id", "rep_id", variable_obs, variable_pred_base, variable_pred_metab)
+  
+  # define y-axis labels that correspond w/ selected `variable_obs`
+  if(variable_obs == "do_mean"){
+    y_axis_label <- expression(Predicted~mean~DO~(mg~O[2]~L^-1))
+    x_axis_label <- expression(Observed~mean~DO~(mg~O[2]~L^-1))
+  }
+  if(variable_obs == "do_min"){
+    y_axis_label <- expression(Predicted~max~DO~(mg~O[2]~L^-1))
+    x_axis_label <- expression(Observed~max~DO~(mg~O[2]~L^-1))
+  }
+  if(variable_obs == "do_max"){
+    y_axis_label <- expression(Predicted~max~DO~(mg~O[2]~L^-1))
+    x_axis_label <- expression(Observed~max~DO~(mg~O[2]~L^-1))
+  }
+  
+  # format data frame
+  preds_data_subset <- preds_data %>% 
+    filter(site_id %in% c(site_no)) %>%
+    select(all_of(cols_select)) %>%
+    rename(var_obs := !!variable_obs,
+           var_mod_base := !!variable_pred_base,
+           var_mod_metab := !!variable_pred_metab) %>%
+    mutate(rep_id = as.factor(rep_id))
+  
+  # make base figure
+  p <- preds_data_subset %>%
+    ggplot() 
+  
+  # now add on model predictions, depending on which model ids are requested in `plot_preds`
+  if(length(plot_preds) == 1 && plot_preds == "base"){
+    p1 <- p + 
+      geom_point(aes(x = var_obs, y = var_mod_base, group = rep_id, color = "predicted"),size = point_size, alpha = point_alpha) +
+      scale_color_manual(name = '',
+                         labels = c("DL-predicted"),
+                         values = preds_colors[1])
+  }
+  if(length(plot_preds) == 1 && plot_preds == "metab_dense"){
+    p1 <- p +
+      geom_point(aes(x = var_obs, y = var_mod_metab, color = "zpredicted"),size = point_size, alpha = point_alpha) +
+      scale_color_manual(name = '',
+                         labels = c("PGDL-predicted"),
+                         values = preds_colors[2])
+  }
+  if("metab_dense" %in% plot_preds & "base" %in% plot_preds){
+    p1 <- p +
+      geom_point(aes(x = var_obs, y = var_mod_base, group = rep_id, color = "predicted"), size = point_size, alpha = point_alpha) + 
+      geom_point(aes(x = var_obs, y = var_mod_metab, group = rep_id, color = "zpredicted"), size = point_size, alpha = point_alpha) +
+      scale_color_manual(name = '',
+                         labels = c("DL-predicted","PGDL-predicted"),
+                         values = preds_colors)
+  }
+  
+  # finalize plot formatting 
+  p2 <- p1 + 
+    coord_cartesian(xlim=c(min(breaks_x), max(breaks_x)), 
+                    ylim = c(min(breaks_y),max(breaks_y))) +
+    scale_x_continuous(breaks = breaks_x) +
+    scale_y_continuous(breaks = breaks_y) +
+    labs(x = x_axis_label, y = y_axis_label) +
+    geom_abline(slope = 1, intercept = 0, lty = 2, size = 0.75) +
+    theme_classic() + 
+    theme(axis.title = element_text(size = 13),
+          axis.text = element_text(size = 12),
+          axis.title.x = element_text(margin = margin(t = 10)),
+          axis.title.y = element_text(margin = margin(r = 10)),
+          legend.text = element_text(size = 12))
+  
+  if(!plot_legend){
+    p2 <- p2 + theme(legend.position = "none")
+  }
+  
+  return(p2)
+}
+
+
+
diff --git a/3_visualize/src/plotting_script_delete_later.R b/3_visualize/src/plotting_script_delete_later.R
new file mode 100644
index 00000000..9257b8a6
--- /dev/null
+++ b/3_visualize/src/plotting_script_delete_later.R
@@ -0,0 +1,96 @@
+# pseudo targets pipeline for creating plots of model predictive performance
+
+source("3_visualize/src/plotting_helpers.R")
+library(tidyverse)
+library(targets)
+library(patchwork)
+tar_load(p2a_metrics_files)
+tar_load(p2a_do_and_metab)
+
+# read in and bind the overall_metrics files from each model id
+p3_overall_metrics <- lapply(p2a_metrics_files, function(x){
+  dat <- readr::read_csv(x, show_col_types = FALSE) %>%
+    mutate(model_id = str_replace_all(x, '2a_model/out/models/|/exp_overall_metrics.csv', ''))
+  }) %>%
+  bind_rows()
+
+# plot overall model performance by train/test partition
+# targets pipeline will save plots for all model ids
+p3_overall_metrics_png <- plot_metrics_by_partition(metrics_df = filter(p3_overall_metrics, model_id == "0_baseline_LSTM"),
+                                                    performance_metric = "rmse", 
+                                                    fileout = "3_visualize/out/baseline_rmse.png",
+                                                    partition_select = c("train", "val"),
+                                                    partition_colors = c("#a6cee3","#1f78b4"))
+
+# save directories containing model reps
+p3_preds_dir_0_baseline_LSTM <- list.files("2a_model/out/models/0_baseline_LSTM/nstates_10/nep_100", full.names = TRUE) 
+p3_preds_dir_2_multitask_dense <- list.files("2a_model/out/models/2_multitask_dense/nstates_10/nep_100", full.names = TRUE) 
+
+# read in feather files from model rep sub-directories and create a data frame w/ DO predictions from 0_baseline_LSTM
+p3_preds_0_baseline_LSTM <- p3_preds_dir_0_baseline_LSTM %>%
+  purrr::map_dfr(read_preds_feather, preds_file = "val_preds.feather", .id = "rep_file") %>%
+  mutate(model_id = str_replace_all(rep_file, '2a_model/out/models/|/exp_overall_metrics.csv', '')) %>%
+  rename(do_min_base = do_min, do_max_base = do_max, do_mean_base = do_mean)
+
+# read in feather files from model rep sub-directories and create a data frame w/ DO predictions from 2_multitask_dense
+p3_preds_2_multitask_dense <- p3_preds_dir_2_multitask_dense %>%
+  purrr::map_dfr(read_preds_feather, preds_file = "val_preds.feather", .id = "rep_file") %>%
+  mutate(model_id = str_replace_all(rep_file, '2a_model/out/models/|/exp_overall_metrics.csv', '')) %>%
+  rename(do_min_metab_dense = do_min, do_max_metab_dense = do_max, do_mean_metab_dense = do_mean)
+
+# bind together predictions from both models, note that the preds columns have different names 
+# for each model id
+p3_preds_all_models <- p3_preds_0_baseline_LSTM %>%
+  select(-c(model_id, rep_file)) %>%
+  left_join(y = select(p3_preds_2_multitask_dense, -model_id, -rep_file), 
+            by = c("site_id", "date", "rep_id")) %>%
+  select(-c(GPP, ER, K600, depth, temp.water))
+
+# bind together model predictions with observations
+p3_preds_all_models_w_obs <- p3_preds_all_models %>%
+  mutate(date = as.Date(date)) %>%
+  left_join(y = p2a_do_and_metab, by = c("site_id","date"))
+
+# create a timeseries plot that shows the model predictions vs observations
+p3_preds_timeseries <- plot_preds_timeseries(preds_data = filter(p3_preds_all_models_w_obs, 
+                                                                 date > "2011-10-01", 
+                                                                 date < "2016-10-02"),
+                                             site_no = "01481500",
+                                             variable_obs = "do_mean",
+                                             plot_preds = c("base", "metab_dense"),
+                                             preds_colors = c("#1b9e77", "#d95f02"),
+                                             date_breaks = "1 year", 
+                                             date_label_format = "%Y",
+                                             line_alpha = 0.5)
+
+# create a scatterplot that shows the model predictions vs observations.
+# note that the scatterplot includes all model reps!
+p3_preds_scatterplot <- plot_preds_scatter(preds_data = filter(p3_preds_all_models_w_obs, 
+                                                               date > "2011-10-01", 
+                                                               date < "2016-10-02"), 
+                                           site_no = "01481500", 
+                                           variable_obs = "do_mean",
+                                           plot_preds = c("base", "metab_dense"),
+                                           preds_colors = c("#1b9e77", "#d95f02"),
+                                           breaks_x = seq(4,16,3), 
+                                           breaks_y = seq(4,16,3),
+                                           point_size = 0.7,
+                                           point_alpha = 0.3, 
+                                           plot_legend = FALSE)
+
+# combine the timeseries and the scatterplot in one plot
+p3_preds_plot_combined <- p3_preds_timeseries + 
+  p3_preds_scatterplot + 
+  plot_layout(ncol = 2, widths = c(2, 1))
+
+# save the combined plot showing model predictions vs. observations
+ggsave(filename = "3_visualize/out/model_preds_vs_obs.png", 
+       plot = p3_preds_plot_combined, 
+       width = 12, height = 4.5, units = c("in"), 
+       dpi = 300)
+  
+  
+  
+
+
+
diff --git a/_targets.R b/_targets.R
index ded954ab..4148ff50 100644
--- a/_targets.R
+++ b/_targets.R
@@ -3,9 +3,9 @@ library(targets)
 options(tidyverse.quiet = TRUE)
 tar_option_set(packages = c("tidyverse", "lubridate", "rmarkdown", "knitr",
                             "dataRetrieval", "nhdplusTools", "sbtools",
-                            "leaflet", "sf", "USAboundaries", "cowplot",
-                            "ggspatial", "patchwork", "streamMetabolizer", 
-                            "reticulate", "yaml"))
+                            "leaflet", "sf", "cowplot", "ggspatial", 
+                            "patchwork", "streamMetabolizer",  "reticulate",
+                            "yaml"))
 
 source("1_fetch.R")
 source("2_process.R")