From 5785f6447f06d3e0c80269861574177ad14ed893 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 16 Oct 2025 17:39:52 -0400 Subject: [PATCH 01/11] Init --- NAMESPACE | 3 + R/add_difference_row.R | 426 +++++++++++++++++++++++++++++++++++++++ R/reexport.R | 4 + R/tbl_survfit_times.R | 7 +- man/reexports.Rd | 3 +- man/tbl_survfit_times.Rd | 56 ++++- 6 files changed, 490 insertions(+), 9 deletions(-) create mode 100644 R/add_difference_row.R diff --git a/NAMESPACE b/NAMESPACE index 124285cd..0ff9110d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(add_difference_row,tbl_survfit_times) S3method(add_overall,tbl_baseline_chg) S3method(add_overall,tbl_hierarchical_rate_and_count) S3method(add_overall,tbl_hierarchical_rate_by_grade) @@ -8,6 +9,7 @@ S3method(add_overall,tbl_survfit_quantiles) S3method(add_overall,tbl_survfit_times) export("%>%") export(add_blank_rows) +export(add_difference_row) export(add_hierarchical_count_row) export(add_overall) export(filter_hierarchical) @@ -48,6 +50,7 @@ importFrom(dplyr,matches) importFrom(dplyr,num_range) importFrom(dplyr,starts_with) importFrom(dplyr,where) +importFrom(gtsummary,add_difference_row) importFrom(gtsummary,add_overall) importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) diff --git a/R/add_difference_row.R b/R/add_difference_row.R new file mode 100644 index 00000000..a145242d --- /dev/null +++ b/R/add_difference_row.R @@ -0,0 +1,426 @@ +#' Add difference rows between groups +#' +#' @description +#' `r lifecycle::badge('experimental')`\cr +#' Adds difference to tables created by [`tbl_survfit_times()`] as additional rows. +#' This function is often useful when there are more than two groups to compare. +#' +#' Pairwise differences are calculated relative to the specified +#' `by` variable's specified reference level. +#' +#' @inheritParams gtsummary::add_difference.tbl_summary +#' @param reference (`scalar`)\cr +#' Value of the `tbl_survfit_times(by)` variable value that is the reference for +#' each of the difference calculations. For factors, use the character +#' level. +#' @param header (`string`)\cr +#' When supplied, a header row will appear above the difference statistics. +#' @param statistic ([`formula-list-selector`][gtsummary::syntax])\cr +#' Specifies summary statistics to display for each time. The default is +#' `everything() ~ c("{estimate}", "({conf.low}, {conf.high})", "{p.value}")`. +#' The statistics available to include are `"estimate"`, `"std.error"`, +#' `"statistic"`, `"conf.low"`, `"conf.high"`, `"p.value"`. +#' +#' @export +#' @return a gtsummary table +#' @name tbl_survfit_times +#' +#' @examples +#' # Example 2 - Survival Differences ----------- +#' tbl_survfit_times( +#' data = cards::ADTTE, +#' by = "TRTA", +#' times = c(30, 60), +#' label = "Day {time}" +#' ) |> +#' add_difference_row(reference = "Placebo") +NULL + +#' @rdname tbl_survfit_times +#' @export +add_difference_row.tbl_survfit_times <- function(x, + # y = "survival::Surv(time = AVAL, event = 1 - CNSR, type = 'right', origin = 0)", + reference, + times = everything(), + statistic = everything() ~ c("{estimate}", "({conf.low}, {conf.high})", "{p.value}"), + header = NULL, + conf.level = 0.95, + pvalue_fun = label_roche_pvalue(digits = 4), + estimate_fun = label_roche_number(digits = 2, scale = 100), + ...) { + # check inputs --------------------------------------------------------------- + browser() + set_cli_abort_call() + check_dots_empty(call = get_cli_abort_call()) + updated_call_list <- c(x$call_list, list(add_difference = match.call())) + check_not_missing(reference) + check_scalar(reference) + if (inherits(reference, "factor")) { + cli::cli_abort( + c("The scalar in the {.arg reference} argument cannot be a {.cls factor}.", + i = "Use the {.cls character} level instead.") + ) + } + + # checking that input x has a by var and it has two levels + if (is_empty(x$inputs$by)) { + "Cannot run {.fun add_difference_row} when {.code tbl_summary()} does not include a {.arg by} argument." |> + cli::cli_abort(call = get_cli_abort_call()) + } + + # if `pvalue_fun` not modified, check if we need to use a theme p-value + if (missing(pvalue_fun)) { + pvalue_fun <- + get_theme_element("pkgwide-fn:pvalue_fun") %||% + pvalue_fun + } + pvalue_fun <- as_function(pvalue_fun) + + cards::process_selectors( + scope_table_body(x$table_body, x$inputs$data[x$inputs$include]), + include = {{ include }} + ) + + # checking for `tbl_summary(percent = c("cell", "row"))`, which don't apply + if (!x$inputs$percent %in% "column" && + any(unlist(x$inputs$type[include]) %in% c("categorical", "dichotomous"))) { + cli::cli_warn(c( + "The {.code add_difference_row()} results for categorical variables may not + compatible with {.code tbl_summary(percent = c('cell', 'row'))}.", + i = "Use column percentages instead, {.code tbl_summary(percent = 'column')}." + )) + } + + cards::process_selectors(x$inputs$data, group = {{ group }}, adj.vars = {{ adj.vars }}) + check_scalar(group, allow_empty = TRUE) + + cards::process_formula_selectors( + scope_table_body(x$table_body, x$inputs$data[include]), + test = test, + include_env = TRUE + ) + # add the calling env to the test + test <- .add_env_to_list_elements(test, env = caller_env()) + + cards::process_formula_selectors( + scope_table_body(x$table_body, x$inputs$data[include]), + statistic = statistic + ) + cards::check_list_elements( + statistic, + predicate = \(x) is.character(x) && !is_empty(.extract_glue_elements(x)), + error_msg = + c("Each element passed in the {.arg statistic} argument must be a character vector with at least one glue element.", + "i" = "For example, {.code everything() ~ c('{{estimate}}', '{{conf.low}}, {{conf.high}}', '{{p.value}}')}"), + ) + + # calculate survival difference ---------------------------------------------------------------- + # calculate ARD for by vars + if (!is_empty(by)) { + ard_by <- cards::ard_tabulate(data, variables = all_of(by)) + } + ard_n <- cards::ard_total_n(data) + + # get the confidence level + conf.level <- + ard_surv_times |> + dplyr::filter(.data$stat_name == "conf.level") |> + dplyr::pull("stat") |> + unlist() + + # survival difference + surv_diff <- + cardx::ard_survival_survfit_diff( + x = rlang::inject(survival::survfit(!!form, data = data)), + times = times, + conf.level = conf.level + ) |> + dplyr::filter(!.data$stat_name %in% c("method", "reference_level")) |> + cards::update_ard_fmt_fun( + stat_names = c("estimate", "conf.low", "conf.high"), + fmt_fun = estimate_fun + ) + + # build gtsummary table ------------------------------------------------------ + tbl <- + dplyr::bind_rows( + surv_diff |> + # remove model-wide stats + dplyr::filter(.data$variable == "time") |> + dplyr::mutate( + variable = paste0(.data$variable, unlist(.data$variable_level)), + variable_level = NULL + ), + case_switch(!is_empty(by) ~ ard_by), + ard_n + ) |> + gtsummary::tbl_ard_summary( + by = any_of(by), + type = starts_with("time") ~ "continuous2", + statistic = starts_with("time") ~ statistic, + label = + map(times, ~ glue::glue_data(list(time = .x), label)) |> + set_names(paste0("time", times)) + ) |> + gtsummary::modify_header( + gtsummary::all_stat_cols() ~ "{level} \n(N = {n})", + label = "" + ) |> + gtsummary::modify_table_body( + ~ .x |> + dplyr::mutate( + label = dplyr::case_when( + .data$label == "Number of Subjects at Risk" ~ "Patients remaining at risk", + .data$label == "Survival Probability" ~ "Event Free Rate (%)", + .data$label == "(CI Lower Bound, CI Upper Bound)" ~ glue("{style_roche_number(conf.level, scale = 100)}% CI"), + .default = .data$label + ) + ) + ) + + # add all available test meta data to a data frame --------------------------- + df_test_meta_data <- + imap( + test, + ~ dplyr::tibble(variable = .y, fun_to_run = list(.x), test_name = attr(.x, "test_name") %||% NA_character_) + ) |> + dplyr::bind_rows() + + # add test names to `.$table_body` so it can be used in selectors ------------ + if (!"test_name" %in% names(x$table_body)) { + x$table_body <- + dplyr::left_join( + x$table_body, + df_test_meta_data[c("variable", "test_name")], + by = "variable" + ) |> + dplyr::relocate("test_name", .after = "variable") + } else { + x$table_body <- + dplyr::rows_update( + x$table_body, + df_test_meta_data[c("variable", "test_name")], + by = "variable", + unmatched = "ignore" + ) |> + dplyr::relocate("test_name", .after = "variable") + } + + # now process the `test.args` and `estimate_fun` arguments ------------------- + cards::process_formula_selectors( + scope_table_body(x$table_body, x$inputs$data[include]), + estimate_fun = estimate_fun + ) + # fill in unspecified variables + cards::fill_formula_selectors( + scope_table_body(x$table_body, x$inputs$data[include]), + estimate_fun = eval(formals(asNamespace("gtsummary")[["add_difference.tbl_summary"]])[["estimate_fun"]]) + ) + + cards::process_formula_selectors( + scope_table_body(x$table_body, x$inputs$data[include]), + test.args = test.args + ) + cards::check_list_elements( + test.args, + predicate = \(x) is.list(x) && is_named(x), + error_msg = c("Error in the argument {.arg {arg_name}} for variable {.val {variable}}.", + i = "Value must be a named list." + ) + ) + + # check reference level is appropriate + lst_by_levels <- + x$table_styling$header |> + dplyr::filter(grepl(pattern = "^stat_\\d*[1-9]\\d*$", x = .data$column)) |> + dplyr::select("column", "modify_stat_level") |> + deframe() |> + lapply(FUN = as.character) + if (!as.character(reference) %in% unlist(lst_by_levels)) { + cli::cli_abort( + "The {.arg reference} argument must be one of {.val {unlist(lst_by_levels)}}.", + call = get_cli_abort_call() + ) + } + + # prep data for tests, by adding reference level to the first position in factor + data <- x$inputs$data + data[[x$inputs$by]] <- fct_relevel(data[[x$inputs$by]], reference, after = 0L) + + # create data frame that is one line per test to be calculated + df_results <- + tidyr::expand_grid( + variable = include, + reference_level = reference, + compare_level = unlist(lst_by_levels) |> setdiff(reference) + ) |> + # merge in table_body column name and subsetted data frame + dplyr::left_join( + enframe(unlist(lst_by_levels), "column", "compare_level") |> + dplyr::filter(!.data$compare_level %in% .env$reference) |> + dplyr::mutate( + data = + map( + .data$compare_level, + \(.x, .y) { + .env$data |> + dplyr::filter(.data[[x$inputs$by]] %in% c(.x, .env$reference)) |> + dplyr::mutate("{x$inputs$by}" := factor(.data[[x$inputs$by]])) # this removes unobserved levels of a factor + } + ) + ), + by = "compare_level" + ) + + df_results$result <- + pmap( + list(df_results$variable, + df_results$data), + \(variable, data) { + .calculate_one_test( + data = data, + variable = variable, + x = x, + df_test_meta_data = df_test_meta_data, + estimate_fun = estimate_fun, + pvalue_fun = pvalue_fun, + group = group, + test.args = test.args, + adj.vars = adj.vars, + conf.level = conf.level, + apply_fmt_fun = TRUE + ) + } + ) + + # create vector of results + df_results$result_fmt <- + pmap( + list(df_results$variable, df_results$result), + \(variable, result) { + lst_results <- + result |> + dplyr::filter(map_lgl(.data$stat_fmt, Negate(is.null))) |> + cards::get_ard_statistics(.column = "stat_fmt") + + map( + statistic[[variable]], + ~ glue::glue_data(.x = lst_results, .x) + ) + } + ) + + # create label for new statistics + df_results$result_lbl <- + pmap( + list(df_results$variable, df_results$result), + \(variable, result) { + lst_results <- + result |> + dplyr::filter(map_lgl(.data$stat_fmt, Negate(is.null))) |> + cards::get_ard_statistics(.column = "stat_label") + + map( + statistic[[variable]], + ~ ifelse( + .x == "{conf.low}, {conf.high}", + glue::glue("{style_number(conf.level, scale = 100)}% CI"), # replace {conf.low}, {conf.high} with "95% CI" + glue::glue_data(.x = lst_results, .x) + ) + ) + } + ) + + # prep results to place them in table_body + df_results_wide <- + df_results |> + dplyr::left_join( + df_test_meta_data[c("variable", "test_name")], + by = "variable" + ) |> + dplyr::select("variable", "test_name", "column", "result_fmt", label = "result_lbl") |> + tidyr::unnest(cols = c("result_fmt", "label")) |> + dplyr::mutate(across(c("result_fmt", "label"), unlist)) |> + tidyr::pivot_wider( + id_cols = c("variable", "test_name", "label"), + values_from = "result_fmt", + names_from = "column" + ) |> + dplyr::mutate(row_type = "difference_row") + + # get index values where new lines are to be inserted + variable_index <- + x$table_body |> + dplyr::select("variable") |> + dplyr::mutate(row_number = dplyr::row_number()) |> + dplyr::filter( + .by = "variable", + dplyr::n() == dplyr::row_number(), + .data$variable %in% .env$include + ) |> + dplyr::slice_tail(by = "variable", n = 1L) |> + deframe() + + # add each of the rows to table_body + for (v in rev(names(variable_index))) { + x$table_body <- + x$table_body |> + dplyr::add_row( + dplyr::bind_rows( + if (!is_empty(.env$header)) { + data.frame(variable = paste0(v, "-row_difference"), row_type = "label", label = header) + }, + dplyr::filter(df_results_wide, .data$variable == .env$v) |> + dplyr::mutate(variable = paste0(.data$variable, "-row_difference")) + ), + .after = variable_index[[v]] + ) + } + + # prepping ARD to return with result ----------------------------------------- + card <- + df_results |> + dplyr::rowwise() |> + dplyr::mutate( + result_lst = + list(.data$result) |> + set_names(nm = paste(shQuote(.data$reference_level, type = "sh"), shQuote(.data$compare_level, type = "sh"), sep = " vs. ")) + ) |> + dplyr::select("variable", "result_lst") |> + tidyr::nest(result_nested = "result_lst") |> + dplyr::rowwise() |> + dplyr::mutate( + result_final = + list(.data$result_nested[[1]]) |> + set_names(nm = .data$variable) + + ) |> + dplyr::pull("result_final") + + # add info to table ---------------------------------------------------------- + x$call_list[["add_difference_row"]] <- match.call() + x$cards[["add_difference_row"]] <- card + # print warnings/errors from calculations + x$cards[["add_difference_row"]] |> + map(dplyr::bind_rows) |> + dplyr::bind_rows() |> + dplyr::filter(.data$stat_name %in% c("estimate", "std.error", "parameter", + "statistic", "conf.low", "conf.high", "p.value")) |> + cards::print_ard_conditions() + + # add final styling to table ------------------------------------------------- + x |> + .modify_indent( + columns = "label", + rows = .data$row_type == "difference_row", + indent = 4L + ) |> + .modify_missing_symbol( + columns = + x$table_styling$header |> + dplyr::filter(.data$modify_stat_level == .env$reference) |> + dplyr::pull("column"), + rows = .data$row_type == "difference_row", + symbol = "\U2014" + ) +} diff --git a/R/reexport.R b/R/reexport.R index cc3e6ca2..ea11712d 100644 --- a/R/reexport.R +++ b/R/reexport.R @@ -16,3 +16,7 @@ gtsummary::filter_hierarchical #' @export #' @importFrom gtsummary sort_hierarchical gtsummary::sort_hierarchical + +#' @export +#' @importFrom gtsummary add_difference_row +gtsummary::add_difference_row diff --git a/R/tbl_survfit_times.R b/R/tbl_survfit_times.R index 920d9cd6..3f7ba955 100644 --- a/R/tbl_survfit_times.R +++ b/R/tbl_survfit_times.R @@ -40,12 +40,13 @@ NULL #' @rdname tbl_survfit_times #' @export +#' @order 1 tbl_survfit_times <- function(data, times, y = "survival::Surv(time = AVAL, event = 1 - CNSR, type = 'right', origin = 0)", by = NULL, label = "Time {time}", - statistic = c("{n.risk}", "{estimate}%", "{conf.low}%, {conf.high}%"), + statistic = c("{n.risk}", "{estimate}", "({conf.low}, {conf.high})"), estimate_fun = label_roche_number(digits = 1, scale = 100), method.args = list(conf.int = 0.95)) { # check inputs --------------------------------------------------------------- @@ -136,8 +137,8 @@ tbl_survfit_times <- function(data, dplyr::mutate( label = dplyr::case_when( .data$label == "Number of Subjects at Risk" ~ "Patients remaining at risk", - .data$label == "Survival Probability%" ~ "Event Free Rate (%)", - .data$label == "CI Lower Bound%, CI Upper Bound%" ~ glue("{style_roche_number(conf.level, scale = 100)}% CI"), + .data$label == "Survival Probability" ~ "Event Free Rate (%)", + .data$label == "(CI Lower Bound, CI Upper Bound)" ~ glue("{style_roche_number(conf.level, scale = 100)}% CI"), .default = .data$label ) ) diff --git a/man/reexports.Rd b/man/reexports.Rd index 64d63500..5c1195f0 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -7,6 +7,7 @@ \alias{add_overall} \alias{filter_hierarchical} \alias{sort_hierarchical} +\alias{add_difference_row} \title{Objects exported from other packages} \keyword{internal} \description{ @@ -16,6 +17,6 @@ below to see their documentation. \describe{ \item{dplyr}{\code{\link[dplyr:reexports]{\%>\%}}} - \item{gtsummary}{\code{\link[gtsummary]{add_overall}}, \code{\link[gtsummary]{filter_hierarchical}}, \code{\link[gtsummary]{sort_hierarchical}}} + \item{gtsummary}{\code{\link[gtsummary]{add_difference_row}}, \code{\link[gtsummary]{add_overall}}, \code{\link[gtsummary]{filter_hierarchical}}, \code{\link[gtsummary]{sort_hierarchical}}} }} diff --git a/man/tbl_survfit_times.Rd b/man/tbl_survfit_times.Rd index f46fa41a..8d0b0a7f 100644 --- a/man/tbl_survfit_times.Rd +++ b/man/tbl_survfit_times.Rd @@ -1,9 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tbl_survfit_times.R +% Please edit documentation in R/tbl_survfit_times.R, R/add_difference_row.R \name{tbl_survfit_times} \alias{tbl_survfit_times} +\alias{add_difference_row.tbl_survfit_times} \alias{add_overall.tbl_survfit_times} -\title{Survival Times} +\title{Add difference rows between groups} \usage{ tbl_survfit_times( data, @@ -11,11 +12,23 @@ tbl_survfit_times( y = "survival::Surv(time = AVAL, event = 1 - CNSR, type = 'right', origin = 0)", by = NULL, label = "Time {time}", - statistic = c("{n.risk}", "{estimate}\%", "{conf.low}\%, {conf.high}\%"), + statistic = c("{n.risk}", "{estimate}", "({conf.low}, {conf.high})"), estimate_fun = label_roche_number(digits = 1, scale = 100), method.args = list(conf.int = 0.95) ) +\method{add_difference_row}{tbl_survfit_times}( + x, + reference, + times = everything(), + statistic = everything() ~ c("{estimate}", "({conf.low}, {conf.high})", "{p.value}"), + header = NULL, + conf.level = 0.95, + pvalue_fun = label_roche_pvalue(digits = 4), + estimate_fun = label_roche_number(digits = 2, scale = 100), + ... +) + \method{add_overall}{tbl_survfit_times}( x, last = FALSE, @@ -63,19 +76,44 @@ function.} \item{x}{(\code{tbl_survfit_times})\cr A stratified 'tbl_survfit_times' object} +\item{reference}{(\code{scalar})\cr +Value of the \code{tbl_survfit_times(by)} variable value that is the reference for +each of the difference calculations. For factors, use the character +level.} + +\item{header}{(\code{string})\cr +When supplied, a header row will appear above the difference statistics.} + +\item{conf.level}{(\code{numeric})\cr +a scalar in the interval \verb{(0, 1)} indicating the confidence level. Default is 0.95} + +\item{pvalue_fun}{(\code{function})\cr +Function to round and format p-values. Default is \code{label_style_pvalue()}. +The function must have a numeric vector input, and return a string that is +the rounded/formatted p-value (e.g. \code{pvalue_fun = label_style_pvalue(digits = 2)}).} + +\item{...}{These dots are for future extensions and must be empty.} + \item{last}{(scalar \code{logical})\cr Logical indicator to display overall column last in table. Default is \code{FALSE}, which will display overall column first.} \item{col_label}{(\code{string})\cr String indicating the column label. Default is \code{"**Overall** \nN = {style_number(N)}"}} - -\item{...}{These dots are for future extensions and must be empty.} } \value{ +a gtsummary table + a gtsummary table } \description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr +Adds difference to tables created by \code{\link[=tbl_survfit_times]{tbl_survfit_times()}} as additional rows. +This function is often useful when there are more than two groups to compare. + +Pairwise differences are calculated relative to the specified +\code{by} variable's specified reference level. + Create a gtsummary table with Kaplan-Meier estimated survival estimates and specified times. } @@ -85,6 +123,14 @@ also need to be updated. To change the label, call the \code{modify_table_body() function to directly update the underlying \code{x$table_body} data frame. } \examples{ +# Example 2 - Survival Differences ----------- +tbl_survfit_times( + data = cards::ADTTE, + by = "TRTA", + times = c(30, 60), + label = "Day {time}" +) |> + add_difference_row(reference = "Placebo") # Example 1 ---------------------------------- tbl_survfit_times( data = cards::ADTTE, From 0ca9e8f087a10f27b3fd929baabf7104f972c46d Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 16 Oct 2025 20:28:45 -0400 Subject: [PATCH 02/11] Make add_difference_row function --- R/add_difference_row.R | 427 +++++++++------------------------------ man/tbl_survfit_times.Rd | 7 +- 2 files changed, 99 insertions(+), 335 deletions(-) diff --git a/R/add_difference_row.R b/R/add_difference_row.R index a145242d..9363d99d 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -9,12 +9,10 @@ #' `by` variable's specified reference level. #' #' @inheritParams gtsummary::add_difference.tbl_summary -#' @param reference (`scalar`)\cr +#' @param reference (`string`)\cr #' Value of the `tbl_survfit_times(by)` variable value that is the reference for #' each of the difference calculations. For factors, use the character #' level. -#' @param header (`string`)\cr -#' When supplied, a header row will appear above the difference statistics. #' @param statistic ([`formula-list-selector`][gtsummary::syntax])\cr #' Specifies summary statistics to display for each time. The default is #' `everything() ~ c("{estimate}", "({conf.low}, {conf.high})", "{p.value}")`. @@ -39,97 +37,65 @@ NULL #' @rdname tbl_survfit_times #' @export add_difference_row.tbl_survfit_times <- function(x, - # y = "survival::Surv(time = AVAL, event = 1 - CNSR, type = 'right', origin = 0)", reference, - times = everything(), - statistic = everything() ~ c("{estimate}", "({conf.low}, {conf.high})", "{p.value}"), - header = NULL, + statistic = c("{estimate}", "({conf.low}, {conf.high})", "{p.value}"), conf.level = 0.95, - pvalue_fun = label_roche_pvalue(digits = 4), + pvalue_fun = label_roche_pvalue(), estimate_fun = label_roche_number(digits = 2, scale = 100), ...) { # check inputs --------------------------------------------------------------- - browser() set_cli_abort_call() check_dots_empty(call = get_cli_abort_call()) - updated_call_list <- c(x$call_list, list(add_difference = match.call())) check_not_missing(reference) - check_scalar(reference) - if (inherits(reference, "factor")) { - cli::cli_abort( - c("The scalar in the {.arg reference} argument cannot be a {.cls factor}.", - i = "Use the {.cls character} level instead.") - ) - } + check_string(reference) + check_range(conf.level, range = c(0, 1)) + check_class(statistic, "character") + check_class(pvalue_fun, "function") + check_class(estimate_fun, "function") - # checking that input x has a by var and it has two levels + # check that input `x` has a `by` var and it has 2+ levels if (is_empty(x$inputs$by)) { - "Cannot run {.fun add_difference_row} when {.code tbl_summary()} does not include a {.arg by} argument." |> + "Cannot run {.fun add_difference_row} when {.code tbl_survfit_times()} does not include a {.arg by} argument." |> cli::cli_abort(call = get_cli_abort_call()) } - # if `pvalue_fun` not modified, check if we need to use a theme p-value - if (missing(pvalue_fun)) { - pvalue_fun <- - get_theme_element("pkgwide-fn:pvalue_fun") %||% - pvalue_fun + # check reference level is appropriate + lst_by_levels <- + x$table_styling$header |> + dplyr::filter(grepl(pattern = "^stat_\\d*[1-9]\\d*$", x = .data$column)) |> + dplyr::select("column", "modify_stat_level") |> + deframe() |> + lapply(FUN = as.character) + if (!as.character(reference) %in% unlist(lst_by_levels)) { + cli::cli_abort( + "The {.arg reference} argument must be one of {.val {unlist(lst_by_levels)}}.", + call = get_cli_abort_call() + ) } - pvalue_fun <- as_function(pvalue_fun) - cards::process_selectors( - scope_table_body(x$table_body, x$inputs$data[x$inputs$include]), - include = {{ include }} - ) - - # checking for `tbl_summary(percent = c("cell", "row"))`, which don't apply - if (!x$inputs$percent %in% "column" && - any(unlist(x$inputs$type[include]) %in% c("categorical", "dichotomous"))) { - cli::cli_warn(c( - "The {.code add_difference_row()} results for categorical variables may not - compatible with {.code tbl_summary(percent = c('cell', 'row'))}.", - i = "Use column percentages instead, {.code tbl_summary(percent = 'column')}." - )) - } + func_inputs <- as.list(environment()) + by <- x$inputs$by + y <- x$inputs$y + times <- x$inputs$times + data <- x$inputs$data + form <- glue("{y} ~ {ifelse(is_empty(by), 1, cardx::bt(by))}") |> stats::as.formula() - cards::process_selectors(x$inputs$data, group = {{ group }}, adj.vars = {{ adj.vars }}) - check_scalar(group, allow_empty = TRUE) + # subset data on complete row ------------------------------------------------ + data <- data[stats::complete.cases(data[all.vars(form)]), ] - cards::process_formula_selectors( - scope_table_body(x$table_body, x$inputs$data[include]), - test = test, - include_env = TRUE - ) - # add the calling env to the test - test <- .add_env_to_list_elements(test, env = caller_env()) + # add reference level to the first position in factor + data[[by]] <- fct_relevel(data[[by]], reference, after = 0L) + ref_col <- names(lst_by_levels)[lst_by_levels == reference] - cards::process_formula_selectors( - scope_table_body(x$table_body, x$inputs$data[include]), - statistic = statistic - ) - cards::check_list_elements( - statistic, - predicate = \(x) is.character(x) && !is_empty(.extract_glue_elements(x)), - error_msg = - c("Each element passed in the {.arg statistic} argument must be a character vector with at least one glue element.", - "i" = "For example, {.code everything() ~ c('{{estimate}}', '{{conf.low}}, {{conf.high}}', '{{p.value}}')}"), - ) + # move reference column to first position + x <- x |> + gtsummary::modify_table_body( + ~ .x |> + dplyr::relocate(ref_col, .after = label) + ) # calculate survival difference ---------------------------------------------------------------- - # calculate ARD for by vars - if (!is_empty(by)) { - ard_by <- cards::ard_tabulate(data, variables = all_of(by)) - } - ard_n <- cards::ard_total_n(data) - - # get the confidence level - conf.level <- - ard_surv_times |> - dplyr::filter(.data$stat_name == "conf.level") |> - dplyr::pull("stat") |> - unlist() - - # survival difference - surv_diff <- + card <- cardx::ard_survival_survfit_diff( x = rlang::inject(survival::survfit(!!form, data = data)), times = times, @@ -139,288 +105,87 @@ add_difference_row.tbl_survfit_times <- function(x, cards::update_ard_fmt_fun( stat_names = c("estimate", "conf.low", "conf.high"), fmt_fun = estimate_fun + ) |> + cards::update_ard_fmt_fun( + stat_names = "p.value", + fmt_fun = pvalue_fun + ) |> + dplyr::mutate( + variable = paste0(.data$variable, unlist(.data$variable_level)), + variable_level = NULL + ) + + ard_surv_diff <- + cards::bind_ard( + card |> + dplyr::filter( + unlist(.data$group1_level) == unlist(card$group1_level)[1] + ) |> + dplyr::mutate( + group1_level = as.list(factor(reference, levels = levels(data[[by]]))), + stat = list(NULL) + ), + card ) # build gtsummary table ------------------------------------------------------ - tbl <- - dplyr::bind_rows( - surv_diff |> - # remove model-wide stats - dplyr::filter(.data$variable == "time") |> - dplyr::mutate( - variable = paste0(.data$variable, unlist(.data$variable_level)), - variable_level = NULL - ), - case_switch(!is_empty(by) ~ ard_by), - ard_n - ) |> + tbl_surv_diff <- + ard_surv_diff |> gtsummary::tbl_ard_summary( by = any_of(by), type = starts_with("time") ~ "continuous2", - statistic = starts_with("time") ~ statistic, - label = - map(times, ~ glue::glue_data(list(time = .x), label)) |> - set_names(paste0("time", times)) - ) |> - gtsummary::modify_header( - gtsummary::all_stat_cols() ~ "{level} \n(N = {n})", - label = "" + statistic = starts_with("time") ~ statistic ) |> + gtsummary::remove_row_type(type = "header") |> gtsummary::modify_table_body( ~ .x |> dplyr::mutate( label = dplyr::case_when( - .data$label == "Number of Subjects at Risk" ~ "Patients remaining at risk", - .data$label == "Survival Probability" ~ "Event Free Rate (%)", + .data$label == "Survival Difference" ~ "Difference in Event Free Rate", .data$label == "(CI Lower Bound, CI Upper Bound)" ~ glue("{style_roche_number(conf.level, scale = 100)}% CI"), + .data$label == "p-value" ~ "p-value (Z-test)", .default = .data$label - ) + ), + # update row_type + row_type = "difference_row", + !!ref_col := NA ) - ) - - # add all available test meta data to a data frame --------------------------- - df_test_meta_data <- - imap( - test, - ~ dplyr::tibble(variable = .y, fun_to_run = list(.x), test_name = attr(.x, "test_name") %||% NA_character_) ) |> - dplyr::bind_rows() - - # add test names to `.$table_body` so it can be used in selectors ------------ - if (!"test_name" %in% names(x$table_body)) { - x$table_body <- - dplyr::left_join( - x$table_body, - df_test_meta_data[c("variable", "test_name")], - by = "variable" - ) |> - dplyr::relocate("test_name", .after = "variable") - } else { - x$table_body <- - dplyr::rows_update( - x$table_body, - df_test_meta_data[c("variable", "test_name")], - by = "variable", - unmatched = "ignore" - ) |> - dplyr::relocate("test_name", .after = "variable") - } - - # now process the `test.args` and `estimate_fun` arguments ------------------- - cards::process_formula_selectors( - scope_table_body(x$table_body, x$inputs$data[include]), - estimate_fun = estimate_fun - ) - # fill in unspecified variables - cards::fill_formula_selectors( - scope_table_body(x$table_body, x$inputs$data[include]), - estimate_fun = eval(formals(asNamespace("gtsummary")[["add_difference.tbl_summary"]])[["estimate_fun"]]) - ) - - cards::process_formula_selectors( - scope_table_body(x$table_body, x$inputs$data[include]), - test.args = test.args - ) - cards::check_list_elements( - test.args, - predicate = \(x) is.list(x) && is_named(x), - error_msg = c("Error in the argument {.arg {arg_name}} for variable {.val {variable}}.", - i = "Value must be a named list." - ) - ) - - # check reference level is appropriate - lst_by_levels <- - x$table_styling$header |> - dplyr::filter(grepl(pattern = "^stat_\\d*[1-9]\\d*$", x = .data$column)) |> - dplyr::select("column", "modify_stat_level") |> - deframe() |> - lapply(FUN = as.character) - if (!as.character(reference) %in% unlist(lst_by_levels)) { - cli::cli_abort( - "The {.arg reference} argument must be one of {.val {unlist(lst_by_levels)}}.", - call = get_cli_abort_call() + gtsummary::modify_indent(columns = label, rows = row_type == "difference_row", indent = 8L) |> + gtsummary::modify_indent(columns = label, rows = label == "Difference in Event Free Rate", indent = 4L) |> + gtsummary::modify_missing_symbol( + columns = + x$table_styling$header |> + dplyr::filter(.data$modify_stat_level == .env$reference) |> + dplyr::pull("column"), + rows = .data$row_type == "difference_row", + symbol = "\U2014" ) - } - # prep data for tests, by adding reference level to the first position in factor - data <- x$inputs$data - data[[x$inputs$by]] <- fct_relevel(data[[x$inputs$by]], reference, after = 0L) - - # create data frame that is one line per test to be calculated - df_results <- - tidyr::expand_grid( - variable = include, - reference_level = reference, - compare_level = unlist(lst_by_levels) |> setdiff(reference) + x <- + tbl_stack( + tbls = list(x, tbl_surv_diff), + quiet = TRUE ) |> - # merge in table_body column name and subsetted data frame - dplyr::left_join( - enframe(unlist(lst_by_levels), "column", "compare_level") |> - dplyr::filter(!.data$compare_level %in% .env$reference) |> + # move survival difference sections under each section for each matching survival time + gtsummary::modify_table_body( + ~ .x |> dplyr::mutate( - data = - map( - .data$compare_level, - \(.x, .y) { - .env$data |> - dplyr::filter(.data[[x$inputs$by]] %in% c(.x, .env$reference)) |> - dplyr::mutate("{x$inputs$by}" := factor(.data[[x$inputs$by]])) # this removes unobserved levels of a factor - } - ) - ), - by = "compare_level" - ) - - df_results$result <- - pmap( - list(df_results$variable, - df_results$data), - \(variable, data) { - .calculate_one_test( - data = data, - variable = variable, - x = x, - df_test_meta_data = df_test_meta_data, - estimate_fun = estimate_fun, - pvalue_fun = pvalue_fun, - group = group, - test.args = test.args, - adj.vars = adj.vars, - conf.level = conf.level, - apply_fmt_fun = TRUE - ) - } - ) - - # create vector of results - df_results$result_fmt <- - pmap( - list(df_results$variable, df_results$result), - \(variable, result) { - lst_results <- - result |> - dplyr::filter(map_lgl(.data$stat_fmt, Negate(is.null))) |> - cards::get_ard_statistics(.column = "stat_fmt") - - map( - statistic[[variable]], - ~ glue::glue_data(.x = lst_results, .x) - ) - } - ) - - # create label for new statistics - df_results$result_lbl <- - pmap( - list(df_results$variable, df_results$result), - \(variable, result) { - lst_results <- - result |> - dplyr::filter(map_lgl(.data$stat_fmt, Negate(is.null))) |> - cards::get_ard_statistics(.column = "stat_label") - - map( - statistic[[variable]], - ~ ifelse( - .x == "{conf.low}, {conf.high}", - glue::glue("{style_number(conf.level, scale = 100)}% CI"), # replace {conf.low}, {conf.high} with "95% CI" - glue::glue_data(.x = lst_results, .x) - ) + variable = as.factor(variable), + idx_row = dplyr::row_number() + ) |> + dplyr::arrange(variable, idx_row) |> + dplyr::mutate( + variable = as.character(variable) ) - } ) - # prep results to place them in table_body - df_results_wide <- - df_results |> - dplyr::left_join( - df_test_meta_data[c("variable", "test_name")], - by = "variable" - ) |> - dplyr::select("variable", "test_name", "column", "result_fmt", label = "result_lbl") |> - tidyr::unnest(cols = c("result_fmt", "label")) |> - dplyr::mutate(across(c("result_fmt", "label"), unlist)) |> - tidyr::pivot_wider( - id_cols = c("variable", "test_name", "label"), - values_from = "result_fmt", - names_from = "column" - ) |> - dplyr::mutate(row_type = "difference_row") - - # get index values where new lines are to be inserted - variable_index <- - x$table_body |> - dplyr::select("variable") |> - dplyr::mutate(row_number = dplyr::row_number()) |> - dplyr::filter( - .by = "variable", - dplyr::n() == dplyr::row_number(), - .data$variable %in% .env$include - ) |> - dplyr::slice_tail(by = "variable", n = 1L) |> - deframe() - - # add each of the rows to table_body - for (v in rev(names(variable_index))) { - x$table_body <- - x$table_body |> - dplyr::add_row( - dplyr::bind_rows( - if (!is_empty(.env$header)) { - data.frame(variable = paste0(v, "-row_difference"), row_type = "label", label = header) - }, - dplyr::filter(df_results_wide, .data$variable == .env$v) |> - dplyr::mutate(variable = paste0(.data$variable, "-row_difference")) - ), - .after = variable_index[[v]] - ) - } - - # prepping ARD to return with result ----------------------------------------- - card <- - df_results |> - dplyr::rowwise() |> - dplyr::mutate( - result_lst = - list(.data$result) |> - set_names(nm = paste(shQuote(.data$reference_level, type = "sh"), shQuote(.data$compare_level, type = "sh"), sep = " vs. ")) - ) |> - dplyr::select("variable", "result_lst") |> - tidyr::nest(result_nested = "result_lst") |> - dplyr::rowwise() |> - dplyr::mutate( - result_final = - list(.data$result_nested[[1]]) |> - set_names(nm = .data$variable) - - ) |> - dplyr::pull("result_final") - # add info to table ---------------------------------------------------------- x$call_list[["add_difference_row"]] <- match.call() x$cards[["add_difference_row"]] <- card + # print warnings/errors from calculations - x$cards[["add_difference_row"]] |> - map(dplyr::bind_rows) |> - dplyr::bind_rows() |> - dplyr::filter(.data$stat_name %in% c("estimate", "std.error", "parameter", - "statistic", "conf.low", "conf.high", "p.value")) |> - cards::print_ard_conditions() + x$cards[["add_difference_row"]] |> cards::print_ard_conditions() - # add final styling to table ------------------------------------------------- - x |> - .modify_indent( - columns = "label", - rows = .data$row_type == "difference_row", - indent = 4L - ) |> - .modify_missing_symbol( - columns = - x$table_styling$header |> - dplyr::filter(.data$modify_stat_level == .env$reference) |> - dplyr::pull("column"), - rows = .data$row_type == "difference_row", - symbol = "\U2014" - ) + x } diff --git a/man/tbl_survfit_times.Rd b/man/tbl_survfit_times.Rd index 8d0b0a7f..7c0840fb 100644 --- a/man/tbl_survfit_times.Rd +++ b/man/tbl_survfit_times.Rd @@ -20,11 +20,10 @@ tbl_survfit_times( \method{add_difference_row}{tbl_survfit_times}( x, reference, - times = everything(), - statistic = everything() ~ c("{estimate}", "({conf.low}, {conf.high})", "{p.value}"), + statistic = c("{estimate}", "({conf.low}, {conf.high})", "{p.value}"), header = NULL, conf.level = 0.95, - pvalue_fun = label_roche_pvalue(digits = 4), + pvalue_fun = label_roche_pvalue(), estimate_fun = label_roche_number(digits = 2, scale = 100), ... ) @@ -76,7 +75,7 @@ function.} \item{x}{(\code{tbl_survfit_times})\cr A stratified 'tbl_survfit_times' object} -\item{reference}{(\code{scalar})\cr +\item{reference}{(\code{string})\cr Value of the \code{tbl_survfit_times(by)} variable value that is the reference for each of the difference calculations. For factors, use the character level.} From dd68d11d46bce7282ca4ec432ccac51f13bd47eb Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 16 Oct 2025 20:53:26 -0400 Subject: [PATCH 03/11] Update documentation --- R/add_difference_row.R | 32 ++++++++++++-------------- R/tbl_survfit_times.R | 8 +++++-- man/tbl_survfit_times.Rd | 49 +++++++++++++++++++++------------------- 3 files changed, 46 insertions(+), 43 deletions(-) diff --git a/R/add_difference_row.R b/R/add_difference_row.R index 9363d99d..08eeb9c3 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -1,9 +1,11 @@ -#' Add difference rows between groups +#' @describeIn tbl_survfit_times #' -#' @description -#' `r lifecycle::badge('experimental')`\cr -#' Adds difference to tables created by [`tbl_survfit_times()`] as additional rows. -#' This function is often useful when there are more than two groups to compare. +#' Adds difference between groups to tables created by [`tbl_survfit_times()`] as additional rows. +#' +#' Difference statistics are calculated using [cardx::ard_survival_survfit_diff()] +#' for all `tbl_survfit_times(times)` variable values, using `survfit` formula +#' `survival::survfit(y ~ by, data = data)` where `y`, `by` and `data` are +#' the inputs of the same names to the `tbl_survfit_times()` object `x`. #' #' Pairwise differences are calculated relative to the specified #' `by` variable's specified reference level. @@ -12,16 +14,14 @@ #' @param reference (`string`)\cr #' Value of the `tbl_survfit_times(by)` variable value that is the reference for #' each of the difference calculations. For factors, use the character -#' level. -#' @param statistic ([`formula-list-selector`][gtsummary::syntax])\cr -#' Specifies summary statistics to display for each time. The default is -#' `everything() ~ c("{estimate}", "({conf.low}, {conf.high})", "{p.value}")`. -#' The statistics available to include are `"estimate"`, `"std.error"`, -#' `"statistic"`, `"conf.low"`, `"conf.high"`, `"p.value"`. +#' level. The reference column will appear as the leftmost column in the table. +#' @param pvalue_fun (`function`)\cr +#' Function to round and format the `p.value` statistic. Default is [label_roche_pvalue()]. +#' The function must have a numeric vector input, and return a string that is the +#' rounded/formatted p-value (e.g. `pvalue_fun = label_style_pvalue(digits = 4)`). #' #' @export -#' @return a gtsummary table -#' @name tbl_survfit_times +#' @order 3 #' #' @examples #' # Example 2 - Survival Differences ----------- @@ -32,10 +32,6 @@ #' label = "Day {time}" #' ) |> #' add_difference_row(reference = "Placebo") -NULL - -#' @rdname tbl_survfit_times -#' @export add_difference_row.tbl_survfit_times <- function(x, reference, statistic = c("{estimate}", "({conf.low}, {conf.high})", "{p.value}"), @@ -78,7 +74,7 @@ add_difference_row.tbl_survfit_times <- function(x, y <- x$inputs$y times <- x$inputs$times data <- x$inputs$data - form <- glue("{y} ~ {ifelse(is_empty(by), 1, cardx::bt(by))}") |> stats::as.formula() + form <- glue("{y} ~ {cardx::bt(by)}") |> stats::as.formula() # subset data on complete row ------------------------------------------------ data <- data[stats::complete.cases(data[all.vars(form)]), ] diff --git a/R/tbl_survfit_times.R b/R/tbl_survfit_times.R index 3f7ba955..111d37f2 100644 --- a/R/tbl_survfit_times.R +++ b/R/tbl_survfit_times.R @@ -13,7 +13,10 @@ #' Character vector of the statistics to report. #' May use any of the following statistics: #' `c(n.risk, estimate, std.error, conf.low, conf.high)`, -#' Default is `c("{n.risk}", "{estimate}%", "{conf.low}%, {conf.high}%")` +#' Default is `c("{n.risk}", "{estimate}", "({conf.low}, {conf.high})")` +#' +#' Statistics available to include when using `add_difference_row()` are: +#' `"estimate"`, `"std.error"`, `"statistic"`, `"conf.low"`, `"conf.high"`, `"p.value"`. #' @param estimate_fun (`function`) \cr #' Function used to style/round the `c(estimate, conf.low, conf.high)` statistics. #' @param x (`tbl_survfit_times`)\cr @@ -26,6 +29,7 @@ #' #' @returns a gtsummary table #' @name tbl_survfit_times +#' @order 1 #' #' @examples #' # Example 1 ---------------------------------- @@ -40,7 +44,7 @@ NULL #' @rdname tbl_survfit_times #' @export -#' @order 1 +#' @order 2 tbl_survfit_times <- function(data, times, y = "survival::Surv(time = AVAL, event = 1 - CNSR, type = 'right', origin = 0)", diff --git a/man/tbl_survfit_times.Rd b/man/tbl_survfit_times.Rd index 7c0840fb..fb30adbd 100644 --- a/man/tbl_survfit_times.Rd +++ b/man/tbl_survfit_times.Rd @@ -4,7 +4,7 @@ \alias{tbl_survfit_times} \alias{add_difference_row.tbl_survfit_times} \alias{add_overall.tbl_survfit_times} -\title{Add difference rows between groups} +\title{Survival Times} \usage{ tbl_survfit_times( data, @@ -21,7 +21,6 @@ tbl_survfit_times( x, reference, statistic = c("{estimate}", "({conf.low}, {conf.high})", "{p.value}"), - header = NULL, conf.level = 0.95, pvalue_fun = label_roche_pvalue(), estimate_fun = label_roche_number(digits = 2, scale = 100), @@ -59,7 +58,10 @@ the glue syntax injects the time estimate into the label.} Character vector of the statistics to report. May use any of the following statistics: \code{c(n.risk, estimate, std.error, conf.low, conf.high)}, -Default is \code{c("{n.risk}", "{estimate}\%", "{conf.low}\%, {conf.high}\%")}} +Default is \code{c("{n.risk}", "{estimate}", "({conf.low}, {conf.high})")} + +Statistics available to include when using \code{add_difference_row()} are: +\code{"estimate"}, \code{"std.error"}, \code{"statistic"}, \code{"conf.low"}, \code{"conf.high"}, \code{"p.value"}.} \item{estimate_fun}{(\code{function}) \cr Function used to style/round the \code{c(estimate, conf.low, conf.high)} statistics.} @@ -78,18 +80,15 @@ A stratified 'tbl_survfit_times' object} \item{reference}{(\code{string})\cr Value of the \code{tbl_survfit_times(by)} variable value that is the reference for each of the difference calculations. For factors, use the character -level.} - -\item{header}{(\code{string})\cr -When supplied, a header row will appear above the difference statistics.} +level. The reference column will appear as the leftmost column in the table.} \item{conf.level}{(\code{numeric})\cr a scalar in the interval \verb{(0, 1)} indicating the confidence level. Default is 0.95} \item{pvalue_fun}{(\code{function})\cr -Function to round and format p-values. Default is \code{label_style_pvalue()}. -The function must have a numeric vector input, and return a string that is -the rounded/formatted p-value (e.g. \code{pvalue_fun = label_style_pvalue(digits = 2)}).} +Function to round and format the \code{p.value} statistic. Default is \code{\link[=label_roche_pvalue]{label_roche_pvalue()}}. +The function must have a numeric vector input, and return a string that is the +rounded/formatted p-value (e.g. \code{pvalue_fun = label_style_pvalue(digits = 4)}).} \item{...}{These dots are for future extensions and must be empty.} @@ -101,18 +100,9 @@ Default is \code{FALSE}, which will display overall column first.} String indicating the column label. Default is \code{"**Overall** \nN = {style_number(N)}"}} } \value{ -a gtsummary table - a gtsummary table } \description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr -Adds difference to tables created by \code{\link[=tbl_survfit_times]{tbl_survfit_times()}} as additional rows. -This function is often useful when there are more than two groups to compare. - -Pairwise differences are calculated relative to the specified -\code{by} variable's specified reference level. - Create a gtsummary table with Kaplan-Meier estimated survival estimates and specified times. } @@ -121,21 +111,34 @@ When the \code{statistic} argument is modified, the statistic labels will likely also need to be updated. To change the label, call the \code{modify_table_body()} function to directly update the underlying \code{x$table_body} data frame. } +\section{Methods (by generic)}{ +\itemize{ +\item \code{add_difference_row(tbl_survfit_times)}: Adds difference between groups to tables created by \code{\link[=tbl_survfit_times]{tbl_survfit_times()}} as additional rows. + +Difference statistics are calculated using \code{\link[cardx:ard_survival_survfit_diff]{cardx::ard_survival_survfit_diff()}} +for all \code{tbl_survfit_times(times)} variable values, using \code{survfit} formula +\code{survival::survfit(y ~ by, data = data)} where \code{y}, \code{by} and \code{data} are +the inputs of the same names to the \code{tbl_survfit_times()} object \code{x}. + +Pairwise differences are calculated relative to the specified +\code{by} variable's specified reference level. + +}} \examples{ -# Example 2 - Survival Differences ----------- +# Example 1 ---------------------------------- tbl_survfit_times( data = cards::ADTTE, by = "TRTA", times = c(30, 60), label = "Day {time}" ) |> - add_difference_row(reference = "Placebo") -# Example 1 ---------------------------------- + add_overall() +# Example 2 - Survival Differences ----------- tbl_survfit_times( data = cards::ADTTE, by = "TRTA", times = c(30, 60), label = "Day {time}" ) |> - add_overall() + add_difference_row(reference = "Placebo") } From 83d0493d36311637a34c75b03b626a514589d6a9 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 16 Oct 2025 20:59:16 -0400 Subject: [PATCH 04/11] Roxygen --- R/add_difference_row.R | 13 +++++++------ man/tbl_survfit_times.Rd | 14 ++++++++------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/R/add_difference_row.R b/R/add_difference_row.R index 08eeb9c3..2ec42146 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -1,14 +1,15 @@ #' @describeIn tbl_survfit_times #' -#' Adds difference between groups to tables created by [`tbl_survfit_times()`] as additional rows. +#' Adds survival differences between groups as additional rows to tables created by [`tbl_survfit_times()`]. #' #' Difference statistics are calculated using [cardx::ard_survival_survfit_diff()] -#' for all `tbl_survfit_times(times)` variable values, using `survfit` formula -#' `survival::survfit(y ~ by, data = data)` where `y`, `by` and `data` are -#' the inputs of the same names to the `tbl_survfit_times()` object `x`. +#' for all `tbl_survfit_times(times)` variable values, using `survfit` formula: +#' ```r +#' survival::survfit(y ~ by, data = data) +#' ``` +#' where `y`, `by` and `data` are the inputs of the same names to the `tbl_survfit_times()` object `x`. #' -#' Pairwise differences are calculated relative to the specified -#' `by` variable's specified reference level. +#' Pairwise differences are calculated relative to the specified `by` variable's specified reference level. #' #' @inheritParams gtsummary::add_difference.tbl_summary #' @param reference (`string`)\cr diff --git a/man/tbl_survfit_times.Rd b/man/tbl_survfit_times.Rd index fb30adbd..6bc907ca 100644 --- a/man/tbl_survfit_times.Rd +++ b/man/tbl_survfit_times.Rd @@ -113,15 +113,17 @@ function to directly update the underlying \code{x$table_body} data frame. } \section{Methods (by generic)}{ \itemize{ -\item \code{add_difference_row(tbl_survfit_times)}: Adds difference between groups to tables created by \code{\link[=tbl_survfit_times]{tbl_survfit_times()}} as additional rows. +\item \code{add_difference_row(tbl_survfit_times)}: Adds survival differences between groups as additional rows to tables created by \code{\link[=tbl_survfit_times]{tbl_survfit_times()}}. Difference statistics are calculated using \code{\link[cardx:ard_survival_survfit_diff]{cardx::ard_survival_survfit_diff()}} -for all \code{tbl_survfit_times(times)} variable values, using \code{survfit} formula -\code{survival::survfit(y ~ by, data = data)} where \code{y}, \code{by} and \code{data} are -the inputs of the same names to the \code{tbl_survfit_times()} object \code{x}. +for all \code{tbl_survfit_times(times)} variable values, using \code{survfit} formula: -Pairwise differences are calculated relative to the specified -\code{by} variable's specified reference level. +\if{html}{\out{
}}\preformatted{survival::survfit(y ~ by, data = data) +}\if{html}{\out{
}} + +where \code{y}, \code{by} and \code{data} are the inputs of the same names to the \code{tbl_survfit_times()} object \code{x}. + +Pairwise differences are calculated relative to the specified \code{by} variable's specified reference level. }} \examples{ From bee5b9444209c6df2836d67945d303016bb6a2ef Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 17 Oct 2025 16:30:42 -0400 Subject: [PATCH 05/11] Add tests --- R/add_difference_row.R | 39 ++++++----- man/tbl_survfit_times.Rd | 2 +- tests/testthat/_snaps/add_difference_row.md | 56 ++++++++++++++++ tests/testthat/test-add_difference_row.R | 73 +++++++++++++++++++++ 4 files changed, 153 insertions(+), 17 deletions(-) create mode 100644 tests/testthat/_snaps/add_difference_row.md create mode 100644 tests/testthat/test-add_difference_row.R diff --git a/R/add_difference_row.R b/R/add_difference_row.R index 2ec42146..35c60bb8 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -19,7 +19,7 @@ #' @param pvalue_fun (`function`)\cr #' Function to round and format the `p.value` statistic. Default is [label_roche_pvalue()]. #' The function must have a numeric vector input, and return a string that is the -#' rounded/formatted p-value (e.g. `pvalue_fun = label_style_pvalue(digits = 4)`). +#' rounded/formatted p-value (e.g. `pvalue_fun = label_style_pvalue(digits = 3)`). #' #' @export #' @order 3 @@ -50,19 +50,22 @@ add_difference_row.tbl_survfit_times <- function(x, check_class(pvalue_fun, "function") check_class(estimate_fun, "function") - # check that input `x` has a `by` var and it has 2+ levels + # check that input `x` has a `by` var with 2+ levels if (is_empty(x$inputs$by)) { - "Cannot run {.fun add_difference_row} when {.code tbl_survfit_times()} does not include a {.arg by} argument." |> - cli::cli_abort(call = get_cli_abort_call()) + cli::cli_abort( + "Cannot run {.fun add_difference_row} when {.code tbl_survfit_times()} does not include a {.arg by} argument.", + call = get_cli_abort_call() + ) } - # check reference level is appropriate lst_by_levels <- x$table_styling$header |> dplyr::filter(grepl(pattern = "^stat_\\d*[1-9]\\d*$", x = .data$column)) |> dplyr::select("column", "modify_stat_level") |> deframe() |> lapply(FUN = as.character) + + # check reference level is appropriate if (!as.character(reference) %in% unlist(lst_by_levels)) { cli::cli_abort( "The {.arg reference} argument must be one of {.val {unlist(lst_by_levels)}}.", @@ -70,6 +73,7 @@ add_difference_row.tbl_survfit_times <- function(x, ) } + # get function inputs -------------------------------------------------------- func_inputs <- as.list(environment()) by <- x$inputs$by y <- x$inputs$y @@ -77,21 +81,20 @@ add_difference_row.tbl_survfit_times <- function(x, data <- x$inputs$data form <- glue("{y} ~ {cardx::bt(by)}") |> stats::as.formula() - # subset data on complete row ------------------------------------------------ + # subset data on complete row data <- data[stats::complete.cases(data[all.vars(form)]), ] # add reference level to the first position in factor data[[by]] <- fct_relevel(data[[by]], reference, after = 0L) ref_col <- names(lst_by_levels)[lst_by_levels == reference] - # move reference column to first position + # move reference column to first position in `x` x <- x |> gtsummary::modify_table_body( - ~ .x |> - dplyr::relocate(ref_col, .after = label) + ~ .x |> dplyr::relocate(all_of(ref_col), .after = label) ) - # calculate survival difference ---------------------------------------------------------------- + # calculate survival difference ---------------------------------------------- card <- cardx::ard_survival_survfit_diff( x = rlang::inject(survival::survfit(!!form, data = data)), @@ -112,6 +115,7 @@ add_difference_row.tbl_survfit_times <- function(x, variable_level = NULL ) + # add statistics to create an empty column for the reference level ard_surv_diff <- cards::bind_ard( card |> @@ -133,10 +137,12 @@ add_difference_row.tbl_survfit_times <- function(x, type = starts_with("time") ~ "continuous2", statistic = starts_with("time") ~ statistic ) |> + # remove time labels gtsummary::remove_row_type(type = "header") |> gtsummary::modify_table_body( ~ .x |> dplyr::mutate( + # add default labels label = dplyr::case_when( .data$label == "Survival Difference" ~ "Difference in Event Free Rate", .data$label == "(CI Lower Bound, CI Upper Bound)" ~ glue("{style_roche_number(conf.level, scale = 100)}% CI"), @@ -148,8 +154,10 @@ add_difference_row.tbl_survfit_times <- function(x, !!ref_col := NA ) ) |> + # indent rows gtsummary::modify_indent(columns = label, rows = row_type == "difference_row", indent = 8L) |> gtsummary::modify_indent(columns = label, rows = label == "Difference in Event Free Rate", indent = 4L) |> + # use — symbol as placeholder in reference column gtsummary::modify_missing_symbol( columns = x$table_styling$header |> @@ -159,22 +167,21 @@ add_difference_row.tbl_survfit_times <- function(x, symbol = "\U2014" ) + # add difference rows into tbl_survfit_times table x <- tbl_stack( tbls = list(x, tbl_surv_diff), quiet = TRUE ) |> - # move survival difference sections under each section for each matching survival time + # move survival difference rows under each section for each matching survival time gtsummary::modify_table_body( ~ .x |> dplyr::mutate( - variable = as.factor(variable), + variable_f = as.factor(variable), idx_row = dplyr::row_number() ) |> - dplyr::arrange(variable, idx_row) |> - dplyr::mutate( - variable = as.character(variable) - ) + dplyr::arrange(variable_f, idx_row) |> + dplyr::select(-variable_f, -idx_row) ) # add info to table ---------------------------------------------------------- diff --git a/man/tbl_survfit_times.Rd b/man/tbl_survfit_times.Rd index 6bc907ca..a85b5847 100644 --- a/man/tbl_survfit_times.Rd +++ b/man/tbl_survfit_times.Rd @@ -88,7 +88,7 @@ a scalar in the interval \verb{(0, 1)} indicating the confidence level. Default \item{pvalue_fun}{(\code{function})\cr Function to round and format the \code{p.value} statistic. Default is \code{\link[=label_roche_pvalue]{label_roche_pvalue()}}. The function must have a numeric vector input, and return a string that is the -rounded/formatted p-value (e.g. \code{pvalue_fun = label_style_pvalue(digits = 4)}).} +rounded/formatted p-value (e.g. \code{pvalue_fun = label_style_pvalue(digits = 3)}).} \item{...}{These dots are for future extensions and must be empty.} diff --git a/tests/testthat/_snaps/add_difference_row.md b/tests/testthat/_snaps/add_difference_row.md new file mode 100644 index 00000000..606eaa90 --- /dev/null +++ b/tests/testthat/_snaps/add_difference_row.md @@ -0,0 +1,56 @@ +# add_difference_row.tbl_survfit_times() works + + Code + as.data.frame(tbl1) + Output + Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) + 1 Time 30 + 2 Patients remaining at risk 69 38 42 + 3 Event Free Rate (%) 84.4 53.0 53.4 + 4 95% CI (77.0, 92.6) (42.8, 65.7) (43.4, 65.6) + 5 Difference in Event Free Rate 31.43 31.07 + 6 95% CI (17.66, 45.20) (17.56, 44.58) + 7 p-value (Z-test) <0.0001 <0.0001 + 8 Time 60 + 9 Patients remaining at risk 59 14 20 + 10 Event Free Rate (%) 76.8 24.3 31.1 + 11 95% CI (68.2, 86.6) (15.8, 37.3) (21.9, 44.1) + 12 Difference in Event Free Rate 52.54 45.77 + 13 95% CI (38.65, 66.43) (31.57, 59.97) + 14 p-value (Z-test) <0.0001 <0.0001 + +--- + + Code + as.data.frame(tbl3) + Output + Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) + 1 Time 30 + 2 Patients remaining at risk 69 38 42 + 3 Event Free Rate (%) 84.4 53.0 53.4 + 4 95% CI (77.0, 92.6) (42.8, 65.7) (43.4, 65.6) + 5 Survival Difference (Survival Difference Standard Error) 31.4 (0.1) 31.1 (0.1) + 6 z statistic (p = p-value) 4.5 (p = <0.001) 4.5 (p = <0.001) + 7 Time 60 + 8 Patients remaining at risk 59 14 20 + 9 Event Free Rate (%) 76.8 24.3 31.1 + 10 95% CI (68.2, 86.6) (15.8, 37.3) (21.9, 44.1) + 11 Survival Difference (Survival Difference Standard Error) 52.5 (0.1) 45.8 (0.1) + 12 z statistic (p = p-value) 7.4 (p = <0.001) 6.3 (p = <0.001) + +# add_difference_row.tbl_survfit_times() error messaging works + + Code + add_difference_row(tbl_survfit_times(data = cards::ADTTE, times = c(30, 60)), "Placebo") + Condition + Error in `add_difference_row()`: + ! Cannot run `add_difference_row()` when `tbl_survfit_times()` does not include a `by` argument. + +--- + + Code + add_difference_row(tbl_survfit_times(data = cards::ADTTE, by = TRTA, times = c(30, 60)), "No Treatment") + Condition + Error in `add_difference_row()`: + ! The `reference` argument must be one of "Placebo", "Xanomeline High Dose", and "Xanomeline Low Dose". + diff --git a/tests/testthat/test-add_difference_row.R b/tests/testthat/test-add_difference_row.R new file mode 100644 index 00000000..f22df917 --- /dev/null +++ b/tests/testthat/test-add_difference_row.R @@ -0,0 +1,73 @@ +skip_if_not(is_pkg_installed(c("survival", "withr"))) + +tbl <- + tbl_survfit_times( + data = cards::ADTTE, + by = TRTA, + times = c(30, 60) + ) + +test_that("add_difference_row.tbl_survfit_times() works", { + withr::local_options(list(width = 200)) + expect_silent( + tbl1 <- tbl |> + add_difference_row(reference = "Placebo") + ) + expect_snapshot(as.data.frame(tbl1)) + + # works with different reference column + expect_silent( + tbl2 <- tbl |> + add_difference_row(reference = "Xanomeline Low Dose") + ) + expect_equal( + as.data.frame(tbl2) |> names(), + c("", "Xanomeline Low Dose \n(N = 84)", "Placebo \n(N = 86)", "Xanomeline High Dose \n(N = 84)") + ) + + # works with custom statistics/formats + expect_silent( + tbl3 <- tbl |> + add_difference_row( + reference = "Placebo", + statistic = c("{estimate} ({std.error})", "{statistic} (p = {p.value})"), + pvalue_fun = label_style_pvalue(digits = 3), + estimate_fun = label_roche_number(digits = 1, scale = 100) + ) + ) + expect_snapshot(as.data.frame(tbl3)) + + # no error if overall column is present + expect_silent( + tbl4 <- tbl |> + add_overall(last = TRUE) |> + add_difference_row(reference = "Xanomeline High Dose") + ) + expect_equal( + as.data.frame(tbl4) |> names(), + c("", "Xanomeline High Dose \n(N = 84)", "Placebo \n(N = 86)", "Xanomeline Low Dose \n(N = 84)", "All Participants \nN = 254") + ) +}) + +test_that("add_difference_row.tbl_survfit_times() error messaging works", { + withr::local_options(list(width = 200)) + + expect_snapshot( + error = TRUE, + tbl_survfit_times( + data = cards::ADTTE, + times = c(30, 60) + ) |> + add_difference_row("Placebo") + ) + + expect_snapshot( + error = TRUE, + tbl_survfit_times( + data = cards::ADTTE, + by = TRTA, + times = c(30, 60) + ) |> + add_difference_row("No Treatment") + ) +}) From 3598b5b91390a95b323ebe3bc176ea3805c621b8 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 17 Oct 2025 16:31:37 -0400 Subject: [PATCH 06/11] tbl_survfit_times - update default statistic formats --- tests/testthat/_snaps/tbl_survfit_times.md | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/testthat/_snaps/tbl_survfit_times.md b/tests/testthat/_snaps/tbl_survfit_times.md index 924e3dc2..2e8cefac 100644 --- a/tests/testthat/_snaps/tbl_survfit_times.md +++ b/tests/testthat/_snaps/tbl_survfit_times.md @@ -6,12 +6,12 @@ Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) 1 Time 30 2 Patients remaining at risk 69 38 42 - 3 Event Free Rate (%) 84.4% 53.0% 53.4% - 4 95% CI 77.0%, 92.6% 42.8%, 65.7% 43.4%, 65.6% + 3 Event Free Rate (%) 84.4 53.0 53.4 + 4 95% CI (77.0, 92.6) (42.8, 65.7) (43.4, 65.6) 5 Time 60 6 Patients remaining at risk 59 14 20 - 7 Event Free Rate (%) 76.8% 24.3% 31.1% - 8 95% CI 68.2%, 86.6% 15.8%, 37.3% 21.9%, 44.1% + 7 Event Free Rate (%) 76.8 24.3 31.1 + 8 95% CI (68.2, 86.6) (15.8, 37.3) (21.9, 44.1) --- @@ -21,12 +21,12 @@ Drug A \n(N = 98) Drug B \n(N = 102) 1 Time 12 2 Patients remaining at risk 89 88 - 3 Event Free Rate (%) 90.8% 86.3% - 4 95% CI 85.3%, 96.7% 79.8%, 93.2% + 3 Event Free Rate (%) 90.8 86.3 + 4 95% CI (85.3, 96.7) (79.8, 93.2) 5 Time 15 6 Patients remaining at risk 83 75 - 7 Event Free Rate (%) 84.7% 73.5% - 8 95% CI 77.9%, 92.1% 65.4%, 82.6% + 7 Event Free Rate (%) 84.7 73.5 + 8 95% CI (77.9, 92.1) (65.4, 82.6) --- @@ -36,12 +36,12 @@ Overall \n(N = 254) 1 Time 30 2 Patients remaining at risk 149 - 3 Event Free Rate (%) 64.1% - 4 90% CI 59.1%, 69.4% + 3 Event Free Rate (%) 64.1 + 4 90% CI (59.1, 69.4) 5 Time 60 6 Patients remaining at risk 93 - 7 Event Free Rate (%) 45.7% - 8 90% CI 40.5%, 51.5% + 7 Event Free Rate (%) 45.7 + 8 90% CI (40.5, 51.5) # tbl_survfit_times(by) messaging @@ -68,6 +68,6 @@ Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) **All Participants** \nN = 254 1 Day 30 2 Patients remaining at risk 69 38 42 149 - 3 Event Free Rate (%) 84.4% 53.0% 53.4% 64.1% - 4 95% CI 77.0%, 92.6% 42.8%, 65.7% 43.4%, 65.6% 58.2%, 70.5% + 3 Event Free Rate (%) 84.4 53.0 53.4 64.1 + 4 95% CI (77.0, 92.6) (42.8, 65.7) (43.4, 65.6) (58.2, 70.5) From 9e2ed1ca78de960d6698c2cee41d8cbdd5b82a7e Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 17 Oct 2025 16:49:40 -0400 Subject: [PATCH 07/11] Fix notes --- R/add_difference_row.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/add_difference_row.R b/R/add_difference_row.R index 35c60bb8..16bc4671 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -91,7 +91,7 @@ add_difference_row.tbl_survfit_times <- function(x, # move reference column to first position in `x` x <- x |> gtsummary::modify_table_body( - ~ .x |> dplyr::relocate(all_of(ref_col), .after = label) + ~ .x |> dplyr::relocate(all_of(ref_col), .after = "label") ) # calculate survival difference ---------------------------------------------- @@ -155,8 +155,8 @@ add_difference_row.tbl_survfit_times <- function(x, ) ) |> # indent rows - gtsummary::modify_indent(columns = label, rows = row_type == "difference_row", indent = 8L) |> - gtsummary::modify_indent(columns = label, rows = label == "Difference in Event Free Rate", indent = 4L) |> + gtsummary::modify_indent(columns = "label", rows = .data$row_type == "difference_row", indent = 8L) |> + gtsummary::modify_indent(columns = "label", rows = .data$label == "Difference in Event Free Rate", indent = 4L) |> # use — symbol as placeholder in reference column gtsummary::modify_missing_symbol( columns = @@ -169,7 +169,7 @@ add_difference_row.tbl_survfit_times <- function(x, # add difference rows into tbl_survfit_times table x <- - tbl_stack( + gtsummary::tbl_stack( tbls = list(x, tbl_surv_diff), quiet = TRUE ) |> From 6496929f3aed448a154c0f1c9d2545333df3b0a6 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 17 Oct 2025 16:56:22 -0400 Subject: [PATCH 08/11] Use `skip_if_pkg_not_installed()` --- R/import-standalone-check_pkg_installed.R | 93 +++++++++------------ tests/testthat/test-add_difference_row.R | 2 +- tests/testthat/test-tbl_roche_summary.R | 2 +- tests/testthat/test-tbl_survfit_quantiles.R | 2 +- tests/testthat/test-tbl_survfit_times.R | 2 +- 5 files changed, 42 insertions(+), 59 deletions(-) diff --git a/R/import-standalone-check_pkg_installed.R b/R/import-standalone-check_pkg_installed.R index 10575803..c97004e7 100644 --- a/R/import-standalone-check_pkg_installed.R +++ b/R/import-standalone-check_pkg_installed.R @@ -6,7 +6,7 @@ # --- # repo: insightsengineering/standalone # file: standalone-check_pkg_installed.R -# last-updated: 2025-02-03 +# last-updated: 2025-10-03 # license: https://unlicense.org # dependencies: standalone-cli_call_env.R # imports: [rlang, dplyr, tidyr] @@ -17,6 +17,9 @@ # ## Changelog # 2025-02-03 # - `get_pkg_dependencies()` was updated to use base r equivalents for `str_extract()` and `str_remove_all()`. +# +# 2025-10-03 +# - `skip_if_pkg_not_installed()` was added. # nocov start # styler: off @@ -38,6 +41,9 @@ #' #' - `get_min_version_required()` will return, if any, the minimum version of `pkg` required by `ref`. #' +#' - `skip_if_pkg_not_installed()` checks whether packages are installed (with the minimum required version) +#' and skips tests if any are not installed. +#' #' @param pkg (`character`)\cr #' vector of package names to check. #' @param call (`environment`)\cr @@ -83,18 +89,13 @@ check_pkg_installed <- function(pkg, if (!is.character(ref) && !is.null(ref)) cli::cli_abort("{.arg ref} must be a string.") # get min version data ------------------------------------------------------- - df_pkg_min_version <- - get_min_version_required(pkg = pkg, ref = ref) + df_pkg_min_version <- get_min_version_required(pkg = pkg, ref = ref) # prompt user to install package --------------------------------------------- rlang::check_installed( pkg = df_pkg_min_version$pkg, - version = df_pkg_min_version$version, - compare = df_pkg_min_version$compare, call = call - ) |> - # this can be removed after this issue is resolved https://github.com/r-lib/rlang/issues/1694 - suppressWarnings() + ) } #' @inheritParams check_pkg_installed @@ -108,14 +109,11 @@ is_pkg_installed <- function(pkg, df_pkg_min_version <- get_min_version_required(pkg = pkg, ref = ref) + # check installation TRUE/FALSE ---------------------------------------------- rlang::is_installed( - pkg = df_pkg_min_version$pkg, - version = df_pkg_min_version$version, - compare = df_pkg_min_version$compare - ) |> - # this can be removed after this issue is resolved https://github.com/r-lib/rlang/issues/1694 - suppressWarnings() + pkg = df_pkg_min_version$pkg + ) } #' @inheritParams check_pkg_installed @@ -140,47 +138,20 @@ get_pkg_dependencies <- function(pkg = utils::packageName(), lib.loc = NULL) { unclass() |> dplyr::as_tibble() |> dplyr::select( - dplyr::any_of(c( - "Package", "Version", "Imports", "Depends", - "Suggests", "Enhances", "LinkingTo" - )) - ) |> - dplyr::rename( - reference_pkg = "Package", - reference_pkg_version = "Version" + dplyr::any_of(c("Imports", "Depends", "Suggests", "Enhances", "LinkingTo")) ) |> - tidyr::pivot_longer( - -dplyr::all_of(c("reference_pkg", "reference_pkg_version")), - values_to = "pkg", - names_to = "dependency_type", - ) |> - tidyr::separate_rows("pkg", sep = ",") |> + tidyr::pivot_longer(cols = dplyr::everything(), names_to = NULL, values_to = "pkg") |> + tidyr::separate_longer_delim(dplyr::everything(), delim = ",") |> dplyr::mutate( pkg = trimws( x = gsub(x = .data$pkg, pattern = "\\s+", replacement = " "), which = "both", whitespace = "[ \t\r\n]" ) - ) |> - dplyr::filter(!is.na(.data$pkg)) |> - tidyr::separate( - .data$pkg, - into = c("pkg", "version"), - sep = " ", extra = "merge", fill = "right" - ) |> - dplyr::mutate( - compare = as.character(ifelse(regexpr("[>=<]+", .data$version) > 0, - regmatches(.data$version, regexpr("[>=<]+", .data$version)), - NA)), - version = gsub(pattern = "[\\(\\) >=<]", replacement = "", x = .data$version) ) } .empty_pkg_deps_df <- function() { - dplyr::tibble( - reference_pkg = character(0L), reference_pkg_version = character(0L), - dependency_type = character(0L), pkg = character(0L), - version = character(0L), compare = character(0L) - ) + dplyr::tibble(pkg = character(0L)) } #' @inheritParams check_pkg_installed @@ -200,17 +171,29 @@ get_min_version_required <- function(pkg, ref = utils::packageName(), lib.loc = ) } - # get the package_ref deps and subset on requested pkgs, also supplement df with pkgs - # that may not be proper deps of the reference package (these pkgs don't have min versions) - res <- - get_pkg_dependencies(ref, lib.loc = lib.loc) |> - dplyr::filter(.data$pkg %in% .env$pkg) |> - dplyr::full_join( - dplyr::tibble(pkg = pkg), - by = "pkg" - ) + # get the package_ref deps and subset on requested pkgs + res <- get_pkg_dependencies(ref, lib.loc = lib.loc) |> + dplyr::filter(grepl(paste0(paste0(.env$pkg, "(\\s|$)"), collapse = "|"), .data$pkg)) + + # supplement df with pkgs that may not be proper deps of the reference package (these pkgs don't have min versions) + pkg_add <- which(sapply(pkg, \(x) !grepl(x, paste0(res$pkg, collapse = "|")))) |> names() + res |> + dplyr::full_join(dplyr::tibble(pkg = pkg_add), by = "pkg") +} - res +skip_if_pkg_not_installed <- function(pkg, + ref = utils::packageName()) { + pkg_deps <- get_min_version_required(pkg, ref = ref) + for (p in pkg_deps$pkg) { + pkg_installed <- rlang::is_installed(p) + if (!pkg_installed) { + # skip if any required package is not installed + testthat::skip(message = paste( + "Required package", shQuote(p, type = "sh"), "is not installed" + )) + } + } + invisible() } # nocov end diff --git a/tests/testthat/test-add_difference_row.R b/tests/testthat/test-add_difference_row.R index f22df917..ebd87022 100644 --- a/tests/testthat/test-add_difference_row.R +++ b/tests/testthat/test-add_difference_row.R @@ -1,4 +1,4 @@ -skip_if_not(is_pkg_installed(c("survival", "withr"))) +skip_if_pkg_not_installed(c("survival", "withr")) tbl <- tbl_survfit_times( diff --git a/tests/testthat/test-tbl_roche_summary.R b/tests/testthat/test-tbl_roche_summary.R index 9cce553d..b2621d4f 100644 --- a/tests/testthat/test-tbl_roche_summary.R +++ b/tests/testthat/test-tbl_roche_summary.R @@ -1,4 +1,4 @@ -skip_if_not(is_pkg_installed("withr")) +skip_if_pkg_not_installed("withr") test_that("tbl_roche_summary() works", { expect_silent( diff --git a/tests/testthat/test-tbl_survfit_quantiles.R b/tests/testthat/test-tbl_survfit_quantiles.R index 51fee101..65c42d82 100644 --- a/tests/testthat/test-tbl_survfit_quantiles.R +++ b/tests/testthat/test-tbl_survfit_quantiles.R @@ -1,4 +1,4 @@ -skip_if_not(is_pkg_installed(c("survival", "withr"))) +skip_if_pkg_not_installed(c("survival", "withr")) test_that("tbl_survfit_quantiles() works", { withr::local_options(list(width = 120)) diff --git a/tests/testthat/test-tbl_survfit_times.R b/tests/testthat/test-tbl_survfit_times.R index 39509a70..1fae42dd 100644 --- a/tests/testthat/test-tbl_survfit_times.R +++ b/tests/testthat/test-tbl_survfit_times.R @@ -1,4 +1,4 @@ -skip_if_not(is_pkg_installed(c("survival", "withr"))) +skip_if_pkg_not_installed(c("survival", "withr")) test_that("tbl_survfit_times() works", { withr::local_options(list(width = 120)) From 0b782c1c133cff858c0b8029c46c99d8a026683b Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 17 Oct 2025 17:02:45 -0400 Subject: [PATCH 09/11] Styler --- R/add_difference_row.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/add_difference_row.R b/R/add_difference_row.R index 16bc4671..d1d655a6 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -161,8 +161,8 @@ add_difference_row.tbl_survfit_times <- function(x, gtsummary::modify_missing_symbol( columns = x$table_styling$header |> - dplyr::filter(.data$modify_stat_level == .env$reference) |> - dplyr::pull("column"), + dplyr::filter(.data$modify_stat_level == .env$reference) |> + dplyr::pull("column"), rows = .data$row_type == "difference_row", symbol = "\U2014" ) From 960d985fcaa4ad5173c9621fd7715fdbf1f2aba6 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 17 Oct 2025 17:19:46 -0400 Subject: [PATCH 10/11] Explicitly order times --- R/add_difference_row.R | 6 +-- tests/testthat/_snaps/add_difference_row.md | 54 ++++++++++----------- tests/testthat/test-add_difference_row.R | 2 +- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/R/add_difference_row.R b/R/add_difference_row.R index d1d655a6..2273cde1 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -144,7 +144,7 @@ add_difference_row.tbl_survfit_times <- function(x, dplyr::mutate( # add default labels label = dplyr::case_when( - .data$label == "Survival Difference" ~ "Difference in Event Free Rate", + .data$label == "Survival Difference" ~ "Difference in Event Free Rates", .data$label == "(CI Lower Bound, CI Upper Bound)" ~ glue("{style_roche_number(conf.level, scale = 100)}% CI"), .data$label == "p-value" ~ "p-value (Z-test)", .default = .data$label @@ -156,7 +156,7 @@ add_difference_row.tbl_survfit_times <- function(x, ) |> # indent rows gtsummary::modify_indent(columns = "label", rows = .data$row_type == "difference_row", indent = 8L) |> - gtsummary::modify_indent(columns = "label", rows = .data$label == "Difference in Event Free Rate", indent = 4L) |> + gtsummary::modify_indent(columns = "label", rows = .data$label == "Difference in Event Free Rates", indent = 4L) |> # use — symbol as placeholder in reference column gtsummary::modify_missing_symbol( columns = @@ -177,7 +177,7 @@ add_difference_row.tbl_survfit_times <- function(x, gtsummary::modify_table_body( ~ .x |> dplyr::mutate( - variable_f = as.factor(variable), + variable_f = factor(variable, levels = unique(.data$variable)), idx_row = dplyr::row_number() ) |> dplyr::arrange(variable_f, idx_row) |> diff --git a/tests/testthat/_snaps/add_difference_row.md b/tests/testthat/_snaps/add_difference_row.md index 606eaa90..093be92f 100644 --- a/tests/testthat/_snaps/add_difference_row.md +++ b/tests/testthat/_snaps/add_difference_row.md @@ -3,21 +3,21 @@ Code as.data.frame(tbl1) Output - Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) - 1 Time 30 - 2 Patients remaining at risk 69 38 42 - 3 Event Free Rate (%) 84.4 53.0 53.4 - 4 95% CI (77.0, 92.6) (42.8, 65.7) (43.4, 65.6) - 5 Difference in Event Free Rate 31.43 31.07 - 6 95% CI (17.66, 45.20) (17.56, 44.58) - 7 p-value (Z-test) <0.0001 <0.0001 - 8 Time 60 - 9 Patients remaining at risk 59 14 20 - 10 Event Free Rate (%) 76.8 24.3 31.1 - 11 95% CI (68.2, 86.6) (15.8, 37.3) (21.9, 44.1) - 12 Difference in Event Free Rate 52.54 45.77 - 13 95% CI (38.65, 66.43) (31.57, 59.97) - 14 p-value (Z-test) <0.0001 <0.0001 + Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) + 1 Time 60 + 2 Patients remaining at risk 59 14 20 + 3 Event Free Rate (%) 76.8 24.3 31.1 + 4 95% CI (68.2, 86.6) (15.8, 37.3) (21.9, 44.1) + 5 Difference in Event Free Rates 52.54 45.77 + 6 95% CI (38.65, 66.43) (31.57, 59.97) + 7 p-value (Z-test) <0.0001 <0.0001 + 8 Time 120 + 9 Patients remaining at risk 45 4 8 + 10 Event Free Rate (%) 64.3 9.2 14.7 + 11 95% CI (54.5, 76.0) (3.8, 22.1) (8.0, 27.1) + 12 Difference in Event Free Rates 55.16 49.68 + 13 95% CI (41.76, 68.56) (35.70, 63.65) + 14 p-value (Z-test) <0.0001 <0.0001 --- @@ -25,18 +25,18 @@ as.data.frame(tbl3) Output Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) - 1 Time 30 - 2 Patients remaining at risk 69 38 42 - 3 Event Free Rate (%) 84.4 53.0 53.4 - 4 95% CI (77.0, 92.6) (42.8, 65.7) (43.4, 65.6) - 5 Survival Difference (Survival Difference Standard Error) 31.4 (0.1) 31.1 (0.1) - 6 z statistic (p = p-value) 4.5 (p = <0.001) 4.5 (p = <0.001) - 7 Time 60 - 8 Patients remaining at risk 59 14 20 - 9 Event Free Rate (%) 76.8 24.3 31.1 - 10 95% CI (68.2, 86.6) (15.8, 37.3) (21.9, 44.1) - 11 Survival Difference (Survival Difference Standard Error) 52.5 (0.1) 45.8 (0.1) - 12 z statistic (p = p-value) 7.4 (p = <0.001) 6.3 (p = <0.001) + 1 Time 60 + 2 Patients remaining at risk 59 14 20 + 3 Event Free Rate (%) 76.8 24.3 31.1 + 4 95% CI (68.2, 86.6) (15.8, 37.3) (21.9, 44.1) + 5 Survival Difference (Survival Difference Standard Error) 52.5 (0.1) 45.8 (0.1) + 6 z statistic (p = p-value) 7.4 (p = <0.001) 6.3 (p = <0.001) + 7 Time 120 + 8 Patients remaining at risk 45 4 8 + 9 Event Free Rate (%) 64.3 9.2 14.7 + 10 95% CI (54.5, 76.0) (3.8, 22.1) (8.0, 27.1) + 11 Survival Difference (Survival Difference Standard Error) 55.2 (0.1) 49.7 (0.1) + 12 z statistic (p = p-value) 8.1 (p = <0.001) 7.0 (p = <0.001) # add_difference_row.tbl_survfit_times() error messaging works diff --git a/tests/testthat/test-add_difference_row.R b/tests/testthat/test-add_difference_row.R index ebd87022..8b28e4b4 100644 --- a/tests/testthat/test-add_difference_row.R +++ b/tests/testthat/test-add_difference_row.R @@ -4,7 +4,7 @@ tbl <- tbl_survfit_times( data = cards::ADTTE, by = TRTA, - times = c(30, 60) + times = c(60, 120) ) test_that("add_difference_row.tbl_survfit_times() works", { From a6141539876398de0c77d3636773e4958645a389 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 28 Oct 2025 21:29:44 -0400 Subject: [PATCH 11/11] Use gtsummary::add_difference_row directly --- R/add_difference_row.R | 113 +++++++++----------- tests/testthat/_snaps/add_difference_row.md | 8 +- 2 files changed, 53 insertions(+), 68 deletions(-) diff --git a/R/add_difference_row.R b/R/add_difference_row.R index 2273cde1..8df64264 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -81,9 +81,6 @@ add_difference_row.tbl_survfit_times <- function(x, data <- x$inputs$data form <- glue("{y} ~ {cardx::bt(by)}") |> stats::as.formula() - # subset data on complete row - data <- data[stats::complete.cases(data[all.vars(form)]), ] - # add reference level to the first position in factor data[[by]] <- fct_relevel(data[[by]], reference, after = 0L) ref_col <- names(lst_by_levels)[lst_by_levels == reference] @@ -95,77 +92,58 @@ add_difference_row.tbl_survfit_times <- function(x, ) # calculate survival difference ---------------------------------------------- - card <- + survfit_diff_ard_fun <- function(data, variable, ...) { cardx::ard_survival_survfit_diff( x = rlang::inject(survival::survfit(!!form, data = data)), - times = times, + times = as.numeric(variable), conf.level = conf.level ) |> - dplyr::filter(!.data$stat_name %in% c("method", "reference_level")) |> - cards::update_ard_fmt_fun( - stat_names = c("estimate", "conf.low", "conf.high"), - fmt_fun = estimate_fun - ) |> - cards::update_ard_fmt_fun( - stat_names = "p.value", - fmt_fun = pvalue_fun - ) |> - dplyr::mutate( - variable = paste0(.data$variable, unlist(.data$variable_level)), - variable_level = NULL - ) + dplyr::filter(!.data$stat_name %in% c("method", "reference_level")) + } - # add statistics to create an empty column for the reference level - ard_surv_diff <- - cards::bind_ard( - card |> - dplyr::filter( - unlist(.data$group1_level) == unlist(card$group1_level)[1] - ) |> - dplyr::mutate( - group1_level = as.list(factor(reference, levels = levels(data[[by]]))), - stat = list(NULL) - ), - card + # difference to be calculated for each time + for (t in times) data[[as.character(t)]] <- NA + + # create difference rows + tbl_surv_diff <- + data |> + # create dummy table to add difference rows to + tbl_summary(by = any_of(by), include = as.character(times), missing = "no") |> + gtsummary::add_difference_row( + reference = reference, + statistic = everything() ~ statistic, + test = everything() ~ survfit_diff_ard_fun, + conf.level = conf.level, + pvalue_fun = pvalue_fun, + estimate_fun = everything() ~ estimate_fun ) # build gtsummary table ------------------------------------------------------ tbl_surv_diff <- - ard_surv_diff |> - gtsummary::tbl_ard_summary( - by = any_of(by), - type = starts_with("time") ~ "continuous2", - statistic = starts_with("time") ~ statistic - ) |> + tbl_surv_diff |> # remove time labels - gtsummary::remove_row_type(type = "header") |> gtsummary::modify_table_body( ~ .x |> + # remove dummy table label rows + dplyr::filter(row_type != "label") |> dplyr::mutate( + # match variable names to `x` + variable = paste0("time", .data$variable), # add default labels label = dplyr::case_when( .data$label == "Survival Difference" ~ "Difference in Event Free Rates", .data$label == "(CI Lower Bound, CI Upper Bound)" ~ glue("{style_roche_number(conf.level, scale = 100)}% CI"), .data$label == "p-value" ~ "p-value (Z-test)", .default = .data$label - ), - # update row_type - row_type = "difference_row", - !!ref_col := NA + ) ) ) |> # indent rows gtsummary::modify_indent(columns = "label", rows = .data$row_type == "difference_row", indent = 8L) |> - gtsummary::modify_indent(columns = "label", rows = .data$label == "Difference in Event Free Rates", indent = 4L) |> - # use — symbol as placeholder in reference column - gtsummary::modify_missing_symbol( - columns = - x$table_styling$header |> - dplyr::filter(.data$modify_stat_level == .env$reference) |> - dplyr::pull("column"), - rows = .data$row_type == "difference_row", - symbol = "\U2014" - ) + gtsummary::modify_indent(columns = "label", rows = .data$label == "Difference in Event Free Rates", indent = 4L) + + # remove ARD for dummy table rows + tbl_surv_diff$cards$tbl_summary <- NULL # add difference rows into tbl_survfit_times table x <- @@ -175,21 +153,28 @@ add_difference_row.tbl_survfit_times <- function(x, ) |> # move survival difference rows under each section for each matching survival time gtsummary::modify_table_body( - ~ .x |> - dplyr::mutate( - variable_f = factor(variable, levels = unique(.data$variable)), - idx_row = dplyr::row_number() - ) |> - dplyr::arrange(variable_f, idx_row) |> - dplyr::select(-variable_f, -idx_row) + \(x) { + x |> + dplyr::mutate( + variable_f = factor(gsub("-row_difference", "", variable), levels = unique(x$variable)), + idx_row = dplyr::row_number() + ) |> + dplyr::arrange(variable_f, idx_row) |> + dplyr::select(-variable_f, -idx_row) + } ) # add info to table ---------------------------------------------------------- - x$call_list[["add_difference_row"]] <- match.call() - x$cards[["add_difference_row"]] <- card - - # print warnings/errors from calculations - x$cards[["add_difference_row"]] |> cards::print_ard_conditions() + x$call_list <- list( + "tbl_survfit_times" = x$tbls[[1]]$call_list, + "add_difference_row" = match.call() + ) + x$cards <- lapply(x$tbls, \(x) x$cards) |> unlist(recursive = FALSE) + x$inputs <- list( + "tbl_survfit_times" = x$tbls[[1]]$inputs, + "add_difference_row" = func_inputs + ) - x + x |> + structure(class = c("tbl_survfit_times", "gtsummary")) } diff --git a/tests/testthat/_snaps/add_difference_row.md b/tests/testthat/_snaps/add_difference_row.md index 093be92f..56dd9c93 100644 --- a/tests/testthat/_snaps/add_difference_row.md +++ b/tests/testthat/_snaps/add_difference_row.md @@ -29,14 +29,14 @@ 2 Patients remaining at risk 59 14 20 3 Event Free Rate (%) 76.8 24.3 31.1 4 95% CI (68.2, 86.6) (15.8, 37.3) (21.9, 44.1) - 5 Survival Difference (Survival Difference Standard Error) 52.5 (0.1) 45.8 (0.1) - 6 z statistic (p = p-value) 7.4 (p = <0.001) 6.3 (p = <0.001) + 5 Survival Difference (Survival Difference Standard Error) 52.5 (7.1) 45.8 (7.2) + 6 z statistic (p = p-value) 741.5 (p = <0.001) 631.8 (p = <0.001) 7 Time 120 8 Patients remaining at risk 45 4 8 9 Event Free Rate (%) 64.3 9.2 14.7 10 95% CI (54.5, 76.0) (3.8, 22.1) (8.0, 27.1) - 11 Survival Difference (Survival Difference Standard Error) 55.2 (0.1) 49.7 (0.1) - 12 z statistic (p = p-value) 8.1 (p = <0.001) 7.0 (p = <0.001) + 11 Survival Difference (Survival Difference Standard Error) 55.2 (6.8) 49.7 (7.1) + 12 z statistic (p = p-value) 806.8 (p = <0.001) 696.7 (p = <0.001) # add_difference_row.tbl_survfit_times() error messaging works