diff --git a/NEWS.md b/NEWS.md index 2b7a252179..6a9d993fe8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # ggplot2 (development version) +* `geom_boxplot()`/`stat_boxplot()` gain a `quantile.type` parameter (default `7`) + to control the percentile definition used for hinges and median; set `quantile.type = 2` + to match SAS's default `PCTLDEF = 5`, enabling parity with SAS boxplots out-of-the-box. + (@munoztd0, #6819) * `make_constructor()` no longer captures `rlang::list2()` at build time. * The `arrow` and `arrow.fill` arguments are now available in `geom_linerange()` and `geom_pointrange()` layers (@teunbrand, #6481). diff --git a/R/stat-boxplot.R b/R/stat-boxplot.R index cfc137d4c3..5f199511f5 100644 --- a/R/stat-boxplot.R +++ b/R/stat-boxplot.R @@ -53,7 +53,7 @@ StatBoxplot <- ggproto("StatBoxplot", Stat, extra_params = c("na.rm", "orientation"), - compute_group = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5, flipped_aes = FALSE) { + compute_group = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5, flipped_aes = FALSE, quantile.type = 7) { data <- flip_data(data, flipped_aes) qs <- c(0, 0.25, 0.5, 0.75, 1) @@ -61,7 +61,8 @@ StatBoxplot <- ggproto("StatBoxplot", Stat, mod <- quantreg::rq(y ~ 1, weights = weight, data = data, tau = qs) stats <- as.numeric(stats::coef(mod)) } else { - stats <- as.numeric(stats::quantile(data$y, qs)) + # Follow base R default (type = 7) unless overridden by user + stats <- as.numeric(stats::quantile(data$y, qs, type = quantile.type)) } names(stats) <- c("ymin", "lower", "middle", "upper", "ymax") iqr <- diff(stats[c(2, 4)]) @@ -99,6 +100,8 @@ StatBoxplot <- ggproto("StatBoxplot", Stat, #' @rdname geom_boxplot #' @param coef Length of the whiskers as multiple of IQR. Defaults to 1.5. +#' @param quantile.type An integer between 1 and 9 setting the quantile algorithm +#' per [`stats::quantile(type)`][stats::quantile]. Defaults to `7` #' @inheritParams shared_layer_parameters #' @export #' @eval rd_computed_vars( diff --git a/man/geom_boxplot.Rd b/man/geom_boxplot.Rd index 5f9962f6a0..f747628213 100644 --- a/man/geom_boxplot.Rd +++ b/man/geom_boxplot.Rd @@ -53,6 +53,7 @@ stat_boxplot( ..., orientation = NA, coef = 1.5, + quantile.type = 7, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE @@ -180,6 +181,9 @@ overriding these connections, see how the \link[=layer_stats]{stat} and \link[=layer_geoms]{geom} arguments work.} \item{coef}{Length of the whiskers as multiple of IQR. Defaults to 1.5.} + +\item{quantile.type}{An integer between 1 and 9 setting the quantile algorithm +per \code{\link[stats:quantile]{stats::quantile(type)}}. Defaults to \code{7}} } \description{ The boxplot compactly displays the distribution of a continuous variable. diff --git a/tests/testthat/test-geom-boxplot.R b/tests/testthat/test-geom-boxplot.R index 82c45c3fb7..21fafc40ee 100644 --- a/tests/testthat/test-geom-boxplot.R +++ b/tests/testthat/test-geom-boxplot.R @@ -109,3 +109,32 @@ test_that("boxplot draws correctly", { ) ) }) + +test_that("quantile.type changes hinges for small samples (unweighted)", { + df <- data_frame(x = 1, y = c(1, 2, 3, 4)) + + p_default <- ggplot(df, aes(x, y)) + stat_boxplot() + d_default <- get_layer_data(p_default) + + p_t2 <- ggplot(df, aes(x, y)) + stat_boxplot(quantile.type = 2) + d_t2 <- get_layer_data(p_t2) + + # Lower/upper hinges should differ under different quantile definitions + expect_false(isTRUE(all.equal(d_default$lower, d_t2$lower))) + expect_false(isTRUE(all.equal(d_default$upper, d_t2$upper))) +}) + +test_that("quantile.type = 7 matches default behavior (backward compatible)", { + set.seed(123) + df <- data_frame(x = 1, y = rnorm(25)) + + p_default <- ggplot(df, aes(x, y)) + stat_boxplot() + d_default <- get_layer_data(p_default) + + p_t7 <- ggplot(df, aes(x, y)) + stat_boxplot(quantile.type = 7) + d_t7 <- get_layer_data(p_t7) + + expect_equal(d_default$lower, d_t7$lower) + expect_equal(d_default$middle, d_t7$middle) + expect_equal(d_default$upper, d_t7$upper) +})