diff --git a/DESCRIPTION b/DESCRIPTION index 419fb8e6..84663fb6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -122,6 +122,7 @@ Collate: 'surv-concordance_survival.R' 'surv-roc_auc_survival.R' 'surv-roc_curve_survival.R' + 'surv-royston.R' 'template.R' 'validation.R' 'yardstick-package.R' diff --git a/NAMESPACE b/NAMESPACE index e998640c..f24d3531 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -108,6 +108,7 @@ S3method(roc_aunp,data.frame) S3method(roc_aunu,data.frame) S3method(roc_curve,data.frame) S3method(roc_curve_survival,data.frame) +S3method(royston_survival,data.frame) S3method(rpd,data.frame) S3method(rpiq,data.frame) S3method(rsq,data.frame) @@ -244,6 +245,8 @@ export(roc_aunu) export(roc_aunu_vec) export(roc_curve) export(roc_curve_survival) +export(royston_survival) +export(royston_survival_vec) export(rpd) export(rpd_vec) export(rpiq) diff --git a/R/surv-royston.R b/R/surv-royston.R new file mode 100644 index 00000000..0499866c --- /dev/null +++ b/R/surv-royston.R @@ -0,0 +1,132 @@ +#' Royston-Sauerbei D statistic +#' +#' Compute the Royston-Sauerbei D statistic +#' +#' @family linear pred survival metrics +#' @templateVar fn royston_survival +#' @template return +#' @details +#' +#' Royston and Sauerbrei proposed $R^2_D$ as a measure of explained variation +#' on the log relative hazard scale based on the authors’ D statistic. +#' D measures prognostic separation of survival curves, and is closely related +#' to the standard deviation of the prognostic index. +#' +#' Larger values of the score are associated with better model performance. +#' +#' @inheritParams brier_survival +#' +#' @param estimate The column identifier for the predicted linear predictor, +#' this should be a numeric variable. This should be an unquoted column name +#' although this argument is passed by expression and supports +#' [quasiquotation][rlang::quasiquotation] (you can unquote column names). For +#' `_vec()` functions, a numeric vector. +#' +#' @param ... Currently not used. +#' +#' @author Hannah Frick +#' +#' @references +#' +#' Royston, P., Sauerbrei, W., "A new measure of prognostic separation in +#' survival data", Statistics in Medicine, 23, 723-748, 2004. +#' +#' @examples +#' royston_survival( +#' data = lung_surv, +#' truth = surv_obj, +#' estimate = .pred_linear_pred +#' ) +#' @export +royston_survival <- function(data, ...) { + UseMethod("royston_survival") +} +royston_survival <- new_linear_pred_survival_metric( + royston_survival, + direction = "maximize" +) + +#' @rdname royston_survival +#' @export +royston_survival.data.frame <- function( + data, + truth, + estimate, + na_rm = TRUE, + case_weights = NULL, + ... +) { + linear_pred_survival_metric_summarizer( + name = "royston_survival", + fn = royston_survival_vec, + data = data, + truth = !!enquo(truth), + estimate = !!enquo(estimate), + na_rm = na_rm, + case_weights = !!enquo(case_weights) + ) +} + +#' @rdname royston_survival +#' @export +royston_survival_vec <- function( + truth, + estimate, + na_rm = TRUE, + case_weights = NULL, + ... +) { + check_linear_pred_survival_metric(truth, estimate, case_weights) + + if (na_rm) { + result <- yardstick_remove_missing(truth, estimate, case_weights) + + truth <- result$truth + estimate <- result$estimate + case_weights <- result$case_weights + } else if (yardstick_any_missing(truth, estimate, case_weights)) { + return(NA_real_) + } + + royston_survival_impl(truth, estimate, case_weights) +} + +royston_survival_impl <- function(truth, estimate, case_weights) { + if (is.null(case_weights)) { + case_weights <- rep(1, length(estimate)) + } else { + case_weights <- vec_cast(case_weights, to = double()) + } + + bns <- normal_score_blom(estimate, case_weights) + + fit <- survival::coxph(truth ~ bns, weights = case_weights) + est <- unname(stats::coef(fit)) + # the regression coefficient is sigma* in Royston & Sauerbrei 2004 + # because `normal_score_blom()` returns their u_i (not z_i) + # we thus don't need to scale the coefficient with kappa here + est^2 / (est^2 + pi^2 / 6) +} + +normal_score_blom <- function(x, case_weights) { + # includes observations with zero weights + x_0 <- tibble::tibble(.row = seq_along(x), x = x) + + rankits <- tibble::tibble( + .row = rep(seq_along(x), times = case_weights), + x = rep(x, times = case_weights), + ) |> + dplyr::mutate( + x_first = rank(.data$x, ties.method = "first"), + # does not need kappa (from Royston & Sauerbrei 2004) because it'll + # "disappear" into the baseline hazard of the Cox model in + # `royston_survival_impl()` + z = stats::qnorm((.data$x_first - 3 / 8) / (dplyr::n() + 1 / 4)) + ) |> + # average over ties + dplyr::mutate(s = mean(.data$z), .by = "x") |> + dplyr::slice(1, .by = .row) + + dplyr::left_join(x_0, rankits, by = c(".row")) |> + dplyr::pull("s") +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 2d123afa..c323e164 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -90,6 +90,10 @@ reference: contents: - concordance_survival +- title: Linear Predictor Survival Metrics + contents: + - royston_survival + - title: Curve Survival Functions contents: - roc_curve_survival diff --git a/data-raw/lung_surv.R b/data-raw/lung_surv.R index 98c957ca..1f23ffcf 100644 --- a/data-raw/lung_surv.R +++ b/data-raw/lung_surv.R @@ -11,7 +11,7 @@ options(pillar.advice = FALSE, pillar.min_title_chars = Inf) lung_data <- survival::lung |> - select(time, status, age, sex, ph.ecog) + dplyr::select(time, status, age, sex, ph.ecog) model_fit <- survival_reg() |> @@ -30,7 +30,11 @@ lung_surv <- predict(model_fit, lung_data, type = "time"), # We'll need the surv object lung_data |> transmute(surv_obj = Surv(time, status)) - ) |> - .censoring_weights_graf(model_fit, .) + ) +lung_surv <- .censoring_weights_graf(model_fit, lung_surv) +lung_surv <- lung_surv |> + bind_cols( + predict(model_fit, lung_data, type = "linear_pred") + ) usethis::use_data(lung_surv, overwrite = TRUE) diff --git a/data/lung_surv.rda b/data/lung_surv.rda index 2aa569a1..b8c40a88 100644 Binary files a/data/lung_surv.rda and b/data/lung_surv.rda differ diff --git a/man/royston_survival.Rd b/man/royston_survival.Rd new file mode 100644 index 00000000..f5b8b1ba --- /dev/null +++ b/man/royston_survival.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/surv-royston.R +\name{royston_survival} +\alias{royston_survival} +\alias{royston_survival.data.frame} +\alias{royston_survival_vec} +\title{Royston-Sauerbei D statistic} +\usage{ +royston_survival(data, ...) + +\method{royston_survival}{data.frame}(data, truth, estimate, na_rm = TRUE, case_weights = NULL, ...) + +royston_survival_vec(truth, estimate, na_rm = TRUE, case_weights = NULL, ...) +} +\arguments{ +\item{data}{A \code{data.frame} containing the columns specified by \code{truth} and +\code{...}.} + +\item{...}{Currently not used.} + +\item{truth}{The column identifier for the true survival result (that +is created using \code{\link[survival:Surv]{survival::Surv()}}.). This should be an unquoted column name +although this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column names). For +\verb{_vec()} functions, an \code{\link[survival:Surv]{survival::Surv()}} object.} + +\item{estimate}{The column identifier for the predicted linear predictor, +this should be a numeric variable. This should be an unquoted column name +although this argument is passed by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column names). For +\verb{_vec()} functions, a numeric vector.} + +\item{na_rm}{A \code{logical} value indicating whether \code{NA} +values should be stripped before the computation proceeds.} + +\item{case_weights}{The optional column identifier for case weights. +This should be an unquoted column name that evaluates to a numeric column +in \code{data}. For \verb{_vec()} functions, a numeric vector, +\code{\link[hardhat:importance_weights]{hardhat::importance_weights()}}, or \code{\link[hardhat:frequency_weights]{hardhat::frequency_weights()}}.} +} +\value{ +A \code{tibble} with columns \code{.metric}, \code{.estimator}, +and \code{.estimate} and 1 row of values. + +For grouped data frames, the number of rows returned will be the same as +the number of groups. + +For \code{royston_survival_vec()}, a single \code{numeric} value (or \code{NA}). +} +\description{ +Compute the Royston-Sauerbei D statistic +} +\details{ +Royston and Sauerbrei proposed $R^2_D$ as a measure of explained variation +on the log relative hazard scale based on the authors’ D statistic. +D measures prognostic separation of survival curves, and is closely related +to the standard deviation of the prognostic index. + +Larger values of the score are associated with better model performance. +} +\examples{ +royston_survival( + data = lung_surv, + truth = surv_obj, + estimate = .pred_linear_pred +) +} +\references{ +Royston, P., Sauerbrei, W., "A new measure of prognostic separation in +survival data", Statistics in Medicine, 23, 723-748, 2004. +} +\author{ +Hannah Frick +} +\concept{linear pred survival metrics} diff --git a/tests/testthat/test-surv-royston.R b/tests/testthat/test-surv-royston.R new file mode 100644 index 00000000..d460e37e --- /dev/null +++ b/tests/testthat/test-surv-royston.R @@ -0,0 +1,104 @@ +test_that("comparison test with survival::royston", { + lung_data <- survival::lung |> + dplyr::select(time, status, age, sex, ph.ecog) + reference_fit <- survival::coxph( + survival::Surv(time, status) ~ age + sex + ph.ecog, + data = lung_data + ) + royston_ref <- survival::royston(reference_fit) + + lung_surv <- data_lung_surv() + + res <- royston_survival( + data = lung_surv, + truth = surv_obj, + estimate = .pred_linear_pred + ) + + expect_equal( + res[[".estimate"]], + royston_ref["R.D"], + ignore_attr = TRUE, + tolerance = 1e-3 + ) +}) + +test_that("`normal_score_blom()` works with case weights", { + # weights without ties + x <- 1:10 + 0.5 + case_weights <- rep(2, 10) + nsb <- normal_score_blom(x, case_weights) + expect_length(nsb, 10) + expect_equal( + nsb[1], + mean(qnorm((1:2 - 3 / 8) / (sum(case_weights) + 0.25))) + ) + + # weights and ties + x <- c(x, x[1:5], x[1:3]) + case_weights <- c(case_weights, rep(1, 8)) + nsb <- normal_score_blom(x, case_weights) + expect_length(nsb, 18) + expect_equal( + nsb[1], + mean(qnorm((1:4 - 3 / 8) / (sum(case_weights) + 0.25))) + ) + + # weights of zero + x <- 1:10 + 0.5 + case_weights <- c(0, 0, rep(2, 8)) + nsb <- normal_score_blom(x, case_weights) + expect_length(nsb, 10) + expect_true(all(is.na(nsb[1:2]))) +}) + +test_that("case weights works with equal weights", { + lung_surv <- data_lung_surv() + lung_surv$wts <- rep(1, nrow(lung_surv)) + + res <- royston_survival( + data = lung_surv, + truth = surv_obj, + estimate = .pred_linear_pred + ) + + res_wts <- royston_survival( + data = lung_surv, + truth = surv_obj, + estimate = .pred_time, + case_weights = wts + ) + + expect_equal( + res[[".estimate"]], + res_wts[[".estimate"]] + ) +}) + +test_that("works with hardhat case weights", { + lung_surv <- data_lung_surv() + lung_surv$case_wts <- c(rep(0, 10), rep(2, nrow(lung_surv) - 10)) + + df <- lung_surv + + df$imp_wgt <- hardhat::importance_weights(lung_surv$case_wts) + df$freq_wgt <- hardhat::frequency_weights(lung_surv$case_wts) + + expect_no_error( + royston_survival( + df, + truth = surv_obj, + .pred_time, + case_weights = imp_wgt + ) + ) + + expect_no_error( + royston_survival( + df, + truth = surv_obj, + .pred_time, + case_weights = freq_wgt + ) + ) +})