From 08afdbcf075073e6983bcffce95e61b842d87df9 Mon Sep 17 00:00:00 2001
From: Julia Silge <julia.silge@gmail.com>
Date: Thu, 3 Feb 2022 19:13:24 -0700
Subject: [PATCH] Bring in more figure changes from print version

---
 01-software-modeling.Rmd                 |  4 ++--
 03-base-r.Rmd                            |  8 +++++---
 08-feature-engineering.Rmd               |  4 ++--
 10-resampling.Rmd                        |  2 +-
 11-comparing-models.Rmd                  | 14 ++++++-------
 12-tuning-parameters.Rmd                 | 24 +++++++++++-----------
 13-grid-search.Rmd                       | 22 ++++++++++----------
 14-iterative-search.Rmd                  |  6 +++---
 15-workflow-sets.Rmd                     |  2 +-
 16-dimensionality-reduction.Rmd          | 10 +++++----
 17-encoding-categorical-data.Rmd         |  2 +-
 18-explaining-models-and-predictions.Rmd |  2 +-
 19-when-should-you-trust-predictions.Rmd | 26 ++++++++++++------------
 20-ensemble-models.Rmd                   |  1 +
 21-inferential-analysis.Rmd              | 10 ++++-----
 15 files changed, 71 insertions(+), 66 deletions(-)

diff --git a/01-software-modeling.Rmd b/01-software-modeling.Rmd
index d2b09286..9914e072 100644
--- a/01-software-modeling.Rmd
+++ b/01-software-modeling.Rmd
@@ -89,7 +89,7 @@ plm_plot <-
 ames_plot <- 
   ggplot(ames, aes(x = Latitude, y = Sale_Price)) + 
   geom_point(alpha = .2) + 
-  geom_smooth(se = FALSE, method = stats::loess, method.args = list(span = .3), col = "red") + 
+  geom_smooth(se = FALSE, method = stats::loess, method.args = list(span = .3), color = "red") + 
   scale_y_log10() + 
   ylab("House Sale Price ($US)") + 
   ggtitle("(b) Using a model-based smoother to discover trends.")
@@ -200,7 +200,7 @@ This iterative process is especially true for modeling. Figure \@ref(fig:softwar
 
 * **Feature engineering:** The understanding gained from EDA results in the creation of specific model terms that make it easier to accurately model the observed data. This can include complex methodologies (e.g., PCA) or simpler features (using the ratio of two predictors). Chapter \@ref(recipes) focuses entirely on this important step.
 
-* **Model tuning and selection (circles with blue and yellow segments):** A variety of models are generated and their performance is compared. Some models require _parameter tuning_ where some structural parameters are required to be specified or optimized. The colored segments within the circles signify the repeated data splitting used during resampling (see Chapter \@ref(resampling)). 
+* **Model tuning and selection (circles with alternating segments):** A variety of models are generated and their performance is compared. Some models require _parameter tuning_ where some structural parameters are required to be specified or optimized. The colored segments within the circles signify the repeated data splitting used during resampling (see Chapter \@ref(resampling)). 
 
 * **Model evaluation:** During this phase of model development, we assess the model's performance metrics, examine residual plots, and conduct other EDA-like analyses to understand how well the models work. In some cases, formal between-model comparisons (Chapter \@ref(compare)) help you to understand whether any differences in models are within the experimental noise.   
 
diff --git a/03-base-r.Rmd b/03-base-r.Rmd
index 2a526b4d..37b4edeb 100644
--- a/03-base-r.Rmd
+++ b/03-base-r.Rmd
@@ -23,11 +23,13 @@ names(crickets)
 
 # Plot the temperature on the x-axis, the chirp rate on the y-axis. The plot
 # elements will be colored differently for each species:
-ggplot(crickets, aes(x = temp, y = rate, col = species)) + 
+ggplot(crickets, 
+       aes(x = temp, y = rate, color = species, pch = species, lty = species)) + 
   # Plot points for each data point and color by species
-  geom_point() + 
+  geom_point(size = 2) + 
   # Show a simple linear model fit created separately for each species:
-  geom_smooth(method = lm, se = FALSE) + 
+  geom_smooth(method = lm, se = FALSE, alpha = 0.5) + 
+  scale_color_brewer(palette = "Paired") +
   labs(x = "Temperature (C)", y = "Chirp Rate (per minute)")
 ```
 
diff --git a/08-feature-engineering.Rmd b/08-feature-engineering.Rmd
index 02907ed8..c757606c 100644
--- a/08-feature-engineering.Rmd
+++ b/08-feature-engineering.Rmd
@@ -255,7 +255,7 @@ After exploring the Ames training set, we might find that the regression slopes
 ggplot(ames_train, aes(x = Gr_Liv_Area, y = 10^Sale_Price)) + 
   geom_point(alpha = .2) + 
   facet_wrap(~ Bldg_Type) + 
-  geom_smooth(method = lm, formula = y ~ x, se = FALSE, col = "red") + 
+  geom_smooth(method = lm, formula = y ~ x, se = FALSE, color = "lightblue") + 
   scale_x_log10() + 
   scale_y_log10() + 
   labs(x = "Gross Living Area", y = "Sale Price (USD)")
@@ -330,7 +330,7 @@ plot_smoother <- function(deg_free) {
     geom_smooth(
       method = lm,
       formula = y ~ ns(x, df = deg_free),
-      col = "red",
+      color = "lightblue",
       se = FALSE
     ) +
     labs(title = paste(deg_free, "Spline Terms"),
diff --git a/10-resampling.Rmd b/10-resampling.Rmd
index e8066379..e8b59a52 100644
--- a/10-resampling.Rmd
+++ b/10-resampling.Rmd
@@ -468,7 +468,7 @@ Since this analysis used 10-fold cross-validation, there is one unique predictio
 assess_res %>% 
   ggplot(aes(x = Sale_Price, y = .pred)) + 
   geom_point(alpha = .15) +
-  geom_abline(col = "red") + 
+  geom_abline(color = "red") + 
   coord_obs_pred() + 
   ylab("Predicted")
 ```
diff --git a/11-comparing-models.Rmd b/11-comparing-models.Rmd
index 9d6bd4f8..6d97ade1 100644
--- a/11-comparing-models.Rmd
+++ b/11-comparing-models.Rmd
@@ -136,7 +136,7 @@ These high correlations indicate that, across models, there are large within-res
 ```{r compare-rsq-plot, eval=FALSE}
 rsq_indiv_estimates %>% 
   mutate(wflow_id = reorder(wflow_id, .estimate)) %>% 
-  ggplot(aes(x = wflow_id, y = .estimate, group = id, col = id)) + 
+  ggplot(aes(x = wflow_id, y = .estimate, group = id, color = id)) + 
   geom_line(alpha = .5, lwd = 1.25) + 
   theme(legend.position = "none")
 ```
@@ -151,7 +151,7 @@ y_lab <- expression(R^2 ~ statistics)
 
 rsq_indiv_estimates %>% 
   mutate(wflow_id = reorder(wflow_id, .estimate)) %>% 
-  ggplot(aes(x = wflow_id, y = .estimate, group = id, col = id)) + 
+  ggplot(aes(x = wflow_id, y = .estimate, group = id, color = id)) + 
   geom_line(alpha = .5, lwd = 1.25) + 
   theme(legend.position = "none") + 
   labs(x = NULL, y = y_lab)
@@ -350,7 +350,7 @@ The four posterior distributions are visualized in Figure \@ref(fig:four-posteri
 model_post %>% 
   mutate(model = forcats::fct_inorder(model)) %>%
   ggplot(aes(x = posterior)) + 
-  geom_histogram(bins = 50, col = "white", fill = "blue", alpha = 0.4) + 
+  geom_histogram(bins = 50, color = "white", fill = "blue", alpha = 0.4) + 
   facet_wrap(~ model, ncol = 1)
 ```
 
@@ -364,8 +364,8 @@ x_lab <- expression(Posterior ~ "for" ~ mean ~ R^2)
 model_post %>% 
   mutate(model = forcats::fct_inorder(model)) %>%
   ggplot(aes(x = posterior)) + 
-  geom_histogram(bins = 50, col = "white", fill = "blue", alpha = 0.4) + 
-  facet_wrap(~ model, ncol = 1) + 
+  geom_histogram(bins = 50, color = "white", fill = "blue", alpha = 0.4) + 
+  facet_wrap(~ model, ncolor = 1) + 
   labs(x = x_lab)
 ```
 
@@ -398,7 +398,7 @@ rqs_diff %>%
   as_tibble() %>% 
   ggplot(aes(x = difference)) + 
   geom_vline(xintercept = 0, lty = 2) + 
-  geom_histogram(bins = 50, col = "white", fill = "red", alpha = 0.4)
+  geom_histogram(bins = 50, color = "white", fill = "red", alpha = 0.4)
 ```
 
 ```{r posterior-difference}
@@ -419,7 +419,7 @@ rqs_diff %>%
   as_tibble() %>% 
   ggplot(aes(x = difference)) + 
   geom_vline(xintercept = 0, lty = 2) + 
-  geom_histogram(bins = 50, col = "white", fill = "red", alpha = 0.4) + 
+  geom_histogram(bins = 50, color = "white", fill = "red", alpha = 0.4) + 
   labs(x = x_lab)
 ```
 
diff --git a/12-tuning-parameters.Rmd b/12-tuning-parameters.Rmd
index 20073d52..095c7214 100644
--- a/12-tuning-parameters.Rmd
+++ b/12-tuning-parameters.Rmd
@@ -112,10 +112,10 @@ To demonstrate, consider the classification data shown in Figure \@ref(fig:two-c
 #| echo = FALSE,
 #| fig.cap = "An example two-class classification data set with two predictors.",
 #| fig.alt = "An example two-class classification data set with two predictors. The two predictors have a moderate correlation and there is some locations of separation between the classes."
-ggplot(training_set, aes(x = A, y = B, col = Class)) + 
-  geom_point(alpha = .5) + 
+ggplot(training_set, aes(x = A, y = B, color = Class, pch = Class)) + 
+  geom_point(alpha = 0.7) + 
   coord_equal()  + 
-  labs(x = "Predictor A", y = "Predictor B", col = NULL) + 
+  labs(x = "Predictor A", y = "Predictor B", color = NULL, pch = NULL) +  
   scale_color_manual(values = c("#CC6677", "#88CCEE"))
 ```
 
@@ -266,9 +266,9 @@ link_grids <-
 
 link_grids %>% 
   ggplot(aes(x = A, y = B)) + 
-  geom_point(data = testing_set, aes(col = Class, pch = Class), 
-             alpha = .5, show.legend = FALSE) + 
-  geom_contour(aes( z = .pred_Class1, lty = link), breaks = 0.5, col = "black") + 
+  geom_point(data = testing_set, aes(color = Class, pch = Class), 
+             alpha = 0.7, show.legend = FALSE) + 
+  geom_contour(aes( z = .pred_Class1, lty = link), breaks = 0.5, color = "black") + 
   scale_color_manual(values = c("#CC6677", "#88CCEE")) + 
   coord_equal() + 
   labs(x = "Predictor A", y = "Predictor B")
@@ -352,9 +352,9 @@ te_plot <-
     label = ifelse(label == " 1 units", " 1 unit", label)
   ) %>% 
   ggplot(aes(x = A, y = B)) + 
-  geom_point(data = testing_set, aes(col = Class, pch = Class), 
-             alpha = .5, show.legend = FALSE) + 
-  geom_contour(aes( z = .pred_Class1), breaks = 0.5, col = "black") + 
+  geom_point(data = testing_set, aes(color = Class, pch = Class), 
+             alpha = 0.5, show.legend = FALSE) + 
+  geom_contour(aes( z = .pred_Class1), breaks = 0.5, color = "black") + 
   scale_color_manual(values = c("#CC6677", "#88CCEE")) + 
   facet_wrap(~ label, nrow = 1) + 
   coord_equal() + 
@@ -374,9 +374,9 @@ tr_plot <-
     label = ifelse(label == " 1 units", " 1 unit", label)
   ) %>% 
   ggplot(aes(x = A, y = B)) +
-  geom_point(data = training_set, aes(col = Class, pch = Class), 
-             alpha = .5, show.legend = FALSE) + 
-  geom_contour(aes( z = .pred_Class1), breaks = 0.5, col = "black") + 
+  geom_point(data = training_set, aes(color = Class, pch = Class), 
+             alpha = 0.5, show.legend = FALSE) + 
+  geom_contour(aes( z = .pred_Class1), breaks = 0.5, color = "black") + 
   scale_color_manual(values = c("#CC6677", "#88CCEE")) + 
   facet_wrap(~ label, nrow = 1) + 
   coord_equal() + 
diff --git a/13-grid-search.Rmd b/13-grid-search.Rmd
index 48a00d25..f3bc1ce8 100644
--- a/13-grid-search.Rmd
+++ b/13-grid-search.Rmd
@@ -501,7 +501,7 @@ load("extras/parallel_times/resamples_times.RData")
 resamples_times %>%
   dplyr::rename(operation = label) %>% 
   ggplot(aes(y = id_alt, x = duration, fill = operation)) +
-  geom_bar(stat = "identity", col = "black") +
+  geom_bar(stat = "identity", color = "black") +
   labs(y = NULL, x = "Elapsed Time") + 
   scale_fill_brewer(palette = "Paired") +
   theme(legend.position = "top")
@@ -615,7 +615,7 @@ start_stop_dat %>%
    ymax = id_stop,
    fill = operation
   ),
-  col = "black"
+  color = "black"
  ) +
  facet_wrap(~ pid, nrow = 2) +
  labs(y = NULL, x = "Elapsed Time") + 
@@ -653,8 +653,8 @@ First, let's consider the raw execution times in Figure \@ref(fig:parallel-times
 #| fig.alt = "Execution times for model tuning versus the number of workers using different delegation schemes. The diagonal black line indicates a linear speedup where the addition of a new worker process has maximal effect. The 'everything' scheme shows that the benefits decrease after three or four workers, especially when there is expensive preprocessing. The 'resamples' scheme has almost linear speedups across all tasks."
 
 load("extras/parallel_times/xgb_times.RData")
-ggplot(times, aes(x = num_cores, y = elapsed, col = parallel_over, shape = parallel_over)) + 
-  geom_point() + 
+ggplot(times, aes(x = num_cores, y = elapsed, color = parallel_over, shape = parallel_over)) + 
+  geom_point(size = 2) + 
   geom_line() +
   facet_wrap(~ preprocessing) + 
   labs(x = "Number of Workers", y = "Execution Time (s)") + 
@@ -684,9 +684,9 @@ We can also view these data in terms of speed-ups in Figure \@ref(fig:parallel-s
 #| fig.cap = "Speed-ups for model tuning versus the number of workers using different delegation schemes.",
 #| fig.alt = "Speed-ups for model tuning versus the number of workers using different delegation schemes."
 
-ggplot(times, aes(x = num_cores, y = speed_up, col = parallel_over, shape = parallel_over)) + 
+ggplot(times, aes(x = num_cores, y = speed_up, color = parallel_over, shape = parallel_over)) + 
   geom_abline(lty = 1) + 
-  geom_point() + 
+  geom_point(size = 2) + 
   geom_line() +
   facet_wrap(~ preprocessing) + 
   coord_obs_pred() + 
@@ -778,9 +778,9 @@ iter_three <- race_details %>% dplyr::filter(iter == 3)
 
 iter_three %>% 
   ggplot(aes(x = -estimate, y = .config)) + 
-  geom_vline(xintercept = 0, lty = 2, col = "green") +
-  geom_point(size = 2, aes(col = decision)) +
-  geom_errorbarh(aes(xmin = -estimate, xmax = -upper, col = decision), height = .3, show.legend = FALSE) + 
+  geom_vline(xintercept = 0, lty = 2, color = "green") +
+  geom_point(size = 2, aes(color = decision)) +
+  geom_errorbarh(aes(xmin = -estimate, xmax = -upper, color = decision), height = .3, show.legend = FALSE) + 
   labs(x = "Loss of ROC AUC", y = NULL) + 
   scale_colour_manual(values = race_cols)
 ```
@@ -801,8 +801,8 @@ race_ci_plots <- function(x, iters = max(x$iter)) {
     p <-
       x %>% 
       dplyr::filter(iter == i) %>% 
-      ggplot(aes(x = -estimate, y = .config, col = decision)) +
-      geom_vline(xintercept = 0, col = "green", lty = 2) +
+      ggplot(aes(x = -estimate, y = .config, color = decision)) +
+      geom_vline(xintercept = 0, color = "green", lty = 2) +
       geom_point(size = 2) +
       labs(title = ttl, y = "", x = "Loss of ROC AUC") +
       scale_color_manual(values = c(best = "blue", retain = "black", discard = "grey"), 
diff --git a/14-iterative-search.Rmd b/14-iterative-search.Rmd
index 938659c9..a86fee74 100644
--- a/14-iterative-search.Rmd
+++ b/14-iterative-search.Rmd
@@ -241,7 +241,7 @@ To demonstrate, let's look at a toy example with a single parameter that has val
 y_lab <- expression(Estimated ~ R^2)
 
 ggplot(grid, aes(x = x, y = y)) + 
-  geom_line(col = "red", alpha = .5, lwd = 1.25) + 
+  geom_line(color = "red", alpha = .5, lwd = 1.25) + 
   labs(y = y_lab, x = "Tuning Parameter") +
   geom_point(data = current_iter)
 ```
@@ -317,7 +317,7 @@ small_pred %>%
   group_by(value) %>% 
   do(get_density(.)) %>% 
   ungroup() %>% 
-  ggplot(aes(x = x, y = density, col = `Parameter Value`, lty = `Parameter Value`)) + 
+  ggplot(aes(x = x, y = density, color = `Parameter Value`, lty = `Parameter Value`)) + 
   geom_line() +
   geom_vline(xintercept = max(current_iter$y), lty = 3) +  
   labs(x = x_lab) + 
@@ -736,7 +736,7 @@ The process starts with initial values of `penalty = 0.025` and `mixture = 0.050
 #| fig.alt = "An illustration of how simulated annealing determines what is the local neighborhood for two numeric tuning parameters. The clouds of points show possible next values where one would be selected at random. The candidate points are small circular clouds surrounding the current best point."
 
 ggplot(neighbors_values, aes(x = penalty, y = mixture)) + 
-  geom_point(alpha = .3, size = 3/4, aes(col = factor(Iteration)), show.legend = FALSE) + 
+  geom_point(alpha = .3, size = 3/4, aes(color = factor(Iteration)), show.legend = FALSE) + 
   scale_x_continuous(trans = "log10", limits = pen_rng) + 
   scale_y_continuous(limits = mix_rng) + 
   geom_point(data = best_values) + 
diff --git a/15-workflow-sets.Rmd b/15-workflow-sets.Rmd
index 270f8d9f..8e0830a6 100644
--- a/15-workflow-sets.Rmd
+++ b/15-workflow-sets.Rmd
@@ -483,7 +483,7 @@ collect_metrics(boosting_test_results)
 boosting_test_results %>% 
    collect_predictions() %>% 
    ggplot(aes(x = compressive_strength, y = .pred)) + 
-   geom_abline(col = "green", lty = 2) + 
+   geom_abline(color = "gray50", lty = 2) + 
    geom_point(alpha = 0.5) + 
    coord_obs_pred() + 
    labs(x = "observed", y = "predicted")
diff --git a/16-dimensionality-reduction.Rmd b/16-dimensionality-reduction.Rmd
index 3a7afdec..e290f5dd 100644
--- a/16-dimensionality-reduction.Rmd
+++ b/16-dimensionality-reduction.Rmd
@@ -231,13 +231,13 @@ library(patchwork)
 p1 <- 
   bean_validation %>% 
   ggplot(aes(x = area)) + 
-  geom_histogram(bins = 30, col = "white", fill = "blue", alpha = 1/3) + 
+  geom_histogram(bins = 30, color = "white", fill = "blue", alpha = 1/3) + 
   ggtitle("Original validation set data")
 
 p2 <- 
   bean_val_processed %>% 
   ggplot(aes(x = area)) + 
-  geom_histogram(bins = 30, col = "white", fill = "red", alpha = 1/3) + 
+  geom_histogram(bins = 30, color = "white", fill = "red", alpha = 1/3) + 
   ggtitle("Processed validation set data")
 
 p1 + p2
@@ -278,7 +278,7 @@ plot_validation_results <- function(recipe, dat = assessment(bean_val$splits[[1]
     # Process the data (the validation set by default)
     bake(new_data = dat) %>%
     # Create the scatterplot matrix
-    ggplot(aes(x = .panel_x, y = .panel_y, col = class, fill = class)) +
+    ggplot(aes(x = .panel_x, y = .panel_y, color = class, fill = class)) +
     geom_point(alpha = 0.4, size = 0.5) +
     geom_autodensity(alpha = .3) +
     facet_matrix(vars(-class), layer.diag = 2) + 
@@ -319,6 +319,7 @@ bean_rec_trained %>%
   step_pca(all_numeric_predictors(), num_comp = 4) %>% 
   prep() %>% 
   plot_top_loadings(component_number <= 4, n = 5) + 
+  scale_fill_brewer(palette = "Paired") +
   ggtitle("Principal Component Analysis")
 ```
 
@@ -357,6 +358,7 @@ bean_rec_trained %>%
   step_pls(all_numeric_predictors(), outcome = "class", num_comp = 4) %>%
   prep() %>% 
   plot_top_loadings(component_number <= 4, n = 5, type = "pls") + 
+  scale_fill_brewer(palette = "Paired") +
   ggtitle("Partial Least Squares")
 ```
 
@@ -530,7 +532,7 @@ Figure \@ref(fig:dimensionality-rankings) illustrates this ranking.
 #| fig.alt = "Area under the ROC curve from the validation set. The three best model configurations use PLS together with regularized discriminant analysis, a multi-layer perceptron, and a naive Bayes model."
 
 rankings %>% 
-  ggplot(aes(x = rank, y = mean, pch = method, col = model)) + 
+  ggplot(aes(x = rank, y = mean, pch = method, color = model)) + 
   geom_point(cex = 3.5) + 
   theme(legend.position = "right") +
   labs(y = "ROC AUC")  +
diff --git a/17-encoding-categorical-data.Rmd b/17-encoding-categorical-data.Rmd
index f3460c25..0be8e515 100644
--- a/17-encoding-categorical-data.Rmd
+++ b/17-encoding-categorical-data.Rmd
@@ -198,7 +198,7 @@ glm_estimates %>%
       mutate(level = as.character(Neighborhood))
   ) %>%
   ggplot(aes(`no pooling`, pooling, size = sqrt(n))) +
-  geom_abline(color = "red", lty = 2) +
+  geom_abline(color = "gray50", lty = 2) +
   geom_point(alpha = 0.7) +
   coord_fixed()
 ```
diff --git a/18-explaining-models-and-predictions.Rmd b/18-explaining-models-and-predictions.Rmd
index f38a4f1c..b7333d6e 100644
--- a/18-explaining-models-and-predictions.Rmd
+++ b/18-explaining-models-and-predictions.Rmd
@@ -57,7 +57,7 @@ bind_rows(
   augment(rf_fit, ames_train) %>% mutate(model = "random forest")
 ) %>%
   ggplot(aes(Sale_Price, .pred, color = model)) +
-  geom_abline(col = "gray50", lty = 2) + 
+  geom_abline(color = "gray50", lty = 2) + 
   geom_point(alpha = 0.3, show.legend = FALSE) +
   facet_wrap(vars(model)) +
   scale_color_brewer(palette = "Paired") +
diff --git a/19-when-should-you-trust-predictions.Rmd b/19-when-should-you-trust-predictions.Rmd
index 03bfdf41..40622888 100644
--- a/19-when-should-you-trust-predictions.Rmd
+++ b/19-when-should-you-trust-predictions.Rmd
@@ -97,8 +97,8 @@ grid_pred %>%
   mutate(`Probability of Class 1` = .pred_class_1) %>% 
   ggplot(aes(x = x, y = y)) + 
   geom_raster(aes(fill = `Probability of Class 1`)) +
-  geom_point(data = testing_set, aes(shape = class, col = class), alpha = .75, size = 2) + 
-  geom_contour(aes(z = .pred_class_1), breaks = .5, col = "black", lty = 2) + 
+  geom_point(data = testing_set, aes(shape = class, color = class), alpha = .75, size = 2) + 
+  geom_contour(aes(z = .pred_class_1), breaks = .5, color = "black", lty = 2) + 
   coord_equal() + 
   labs(x = "Predictor x", y = "Predictor y") + 
   scale_fill_gradient2(low = "#FDB863", mid = "white", high = "#B2ABD2", midpoint = .5) + 
@@ -164,7 +164,7 @@ eq_zone_results <- function(buffer) {
 # Evaluate a sequence of buffers and plot the results. 
 map_dfr(seq(0, .1, length.out = 40), eq_zone_results) %>% 
   pivot_longer(c(-buffer), names_to = "statistic", values_to = "value") %>% 
-  ggplot(aes(x = buffer, y = value, col = statistic)) + 
+  ggplot(aes(x = buffer, y = value, lty = statistic)) + 
   geom_step(size = 1.2, alpha = 0.8) + 
   labs(y = NULL)
 ```
@@ -294,7 +294,7 @@ add_day <- function(x) {
 res_test %>%
   mutate(day = add_day(.)) %>%
   ggplot(aes(x = date)) +
-  geom_point(aes(y = ridership, col = day, pch = day), size = 3) +
+  geom_point(aes(y = ridership, color = day, pch = day), size = 3) +
   geom_line(aes(y = .pred), alpha = .75) +
   geom_ribbon(aes(ymin = .pred_lower, ymax = .pred_upper), fill = "blue", alpha = .1)  +
   scale_color_brewer(palette = "Set2") +
@@ -336,7 +336,7 @@ Look at this terrible model performance visually in Figure \@ref(fig:chicago-202
 res_2020 %>%
   mutate(day = add_day(.)) %>%
   ggplot(aes(x = date)) +
-  geom_point(aes(y = ridership, col = day, pch = day), size = 3) +
+  geom_point(aes(y = ridership, color = day, pch = day), size = 3) +
   geom_line(aes(y = .pred), alpha = .75) +
   geom_ribbon(aes(ymin = .pred_lower, ymax = .pred_upper), fill = "blue", alpha = .1) +
   scale_shape_manual(values = 15:22) +
@@ -430,7 +430,7 @@ pca_dist <-
 dist_hist <-
   training_pca %>%
   ggplot(aes(x = distance)) +
-  geom_histogram(bins = 30, col = "white") +
+  geom_histogram(bins = 30, color = "white") +
   labs(x = "Distance to Training Set Center", title = "(d) Reference Distribution") +
   theme(plot.title = element_text(size = 9))
 
@@ -469,16 +469,16 @@ test_pca_dist <-
   geom_segment(
     data = testing_pca,
     aes(x = PC1_mean, y = PC2_mean, xend = PC1, yend = PC2), 
-    col = "lightblue",
+    color = "lightblue",
     lty = 2
   )  + 
   geom_segment(
     data = new_pca,
     aes(x = PC1_mean, y = PC2_mean, xend = PC1, yend = PC2), 
-    col = "red"
+    color = "red"
   )  + 
-  geom_point(data = testing_pca, aes(x = PC1, y = PC2), col = "lightblue", size = 2, pch = 17) +
-  geom_point(data = new_pca, aes(x = PC1, y = PC2), size = 2, col = "red") +
+  geom_point(data = testing_pca, aes(x = PC1, y = PC2), color = "lightblue", size = 2, pch = 17) +
+  geom_point(data = new_pca, aes(x = PC1, y = PC2), size = 2, color = "red") +
   coord_obs_pred() + 
   labs(x = "Component 1", y = "Component 2", title = "Distances to Training Set Center") + 
   theme_bw() + 
@@ -487,9 +487,9 @@ test_pca_dist <-
 test_dist_hist <- 
   training_pca %>% 
   ggplot(aes(x = distance)) + 
-  geom_histogram(bins = 30, col = "white", alpha = .5) + 
-  geom_vline(xintercept = testing_pca$distance, col = "lightblue", lty = 2) +
-  geom_vline(xintercept = new_pca$distance, col = "red") +
+  geom_histogram(bins = 30, color = "white", alpha = .5) + 
+  geom_vline(xintercept = testing_pca$distance, color = "lightblue", lty = 2) +
+  geom_vline(xintercept = new_pca$distance, color = "red") +
   xlab("Distance to Training Set Center")
 
 test_pca_dist + test_dist_hist
diff --git a/20-ensemble-models.Rmd b/20-ensemble-models.Rmd
index 9fbaa5d0..a685fab6 100644
--- a/20-ensemble-models.Rmd
+++ b/20-ensemble-models.Rmd
@@ -181,6 +181,7 @@ The regularized linear regression meta-learning model contained `r num_coefs` bl
 ```{r ensembles-blending-weights, eval = FALSE}
 autoplot(ens, "weights") +
   geom_text(aes(x = weight + 0.01, label = model), hjust = 0) + 
+  theme(legend.position = "none") +
   lims(x = c(-0.01, 0.75))
 ```
 
diff --git a/21-inferential-analysis.Rmd b/21-inferential-analysis.Rmd
index bfc73a6c..f1a06610 100644
--- a/21-inferential-analysis.Rmd
+++ b/21-inferential-analysis.Rmd
@@ -44,7 +44,7 @@ tidymodels_prefer()
 data("bioChemists", package = "pscl")
 
 ggplot(bioChemists, aes(x = art)) + 
-  geom_histogram(binwidth = 1, col = "white") + 
+  geom_histogram(binwidth = 1, color = "white") + 
   labs(x = "Number of articles within 3y of graduation")
 ```
 
@@ -253,9 +253,9 @@ glm_boot %>%
       mutate(method = "parametric") %>% 
       select(term, method, .estimate = estimate, .lower = conf.low, .upper = conf.high)
   )%>%
-  ggplot(aes(x = .estimate, y = term, col = method)) +
+  ggplot(aes(x = .estimate, y = term, color = method, pch = method)) +
   geom_vline(xintercept = 0, lty = 3) +
-  geom_point(position = position_dodge(width = 1 / 2), cex = .9) +
+  geom_point(size = 2, position = position_dodge(width = 1 / 2)) +
   geom_errorbar(aes(xmin = .lower, xmax = .upper),
                 width = 1 / 4,
                 position = position_dodge(width = 1 / 2))  +
@@ -399,9 +399,9 @@ It's a good idea to visualize the bootstrap distributions of the coefficients, a
 bootstrap_models %>% 
   unnest(zero_coefs) %>% 
   ggplot(aes(x = estimate)) +
-  geom_histogram(bins = 25, col = "white") + 
+  geom_histogram(bins = 25, color = "white") + 
   facet_wrap(~ term, scales = "free_x") + 
-  geom_vline(xintercept = 0, col = "green")
+  geom_vline(xintercept = 0, lty = 2, color = "gray70")
 ```
 
 ```{r zip-bootstrap, ref.label = "inference-zip-bootstrap"}