tidymodels · hfrick · Dec 9, 2025 · Dec 15, 2025 · Dec 15, 2025 · Dec 15, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -122,6 +122,7 @@ Collate:
     'surv-concordance_survival.R'
     'surv-roc_auc_survival.R'
     'surv-roc_curve_survival.R'
+    'surv-royston.R'
     'template.R'
     'validation.R'
     'yardstick-package.R'
diff --git a/NAMESPACE b/NAMESPACE
@@ -108,6 +108,7 @@ S3method(roc_aunp,data.frame)
 S3method(roc_aunu,data.frame)
 S3method(roc_curve,data.frame)
 S3method(roc_curve_survival,data.frame)
+S3method(royston_survival,data.frame)
 S3method(rpd,data.frame)
 S3method(rpiq,data.frame)
 S3method(rsq,data.frame)
@@ -244,6 +245,8 @@ export(roc_aunu)
 export(roc_aunu_vec)
 export(roc_curve)
 export(roc_curve_survival)
+export(royston_survival)
+export(royston_survival_vec)
 export(rpd)
 export(rpd_vec)
 export(rpiq)

diff --git a/R/surv-royston.R b/R/surv-royston.R
@@ -0,0 +1,132 @@
+#' Royston-Sauerbei D statistic
+#'
+#' Compute the Royston-Sauerbei D statistic
+#'
+#' @family linear pred survival metrics
+#' @templateVar fn royston_survival
+#' @template return
+#' @details
+#'
+#' Royston and Sauerbrei proposed $R^2_D$ as a measure of explained variation
+#' on the log relative hazard scale based on the authors’ D statistic.
+#' D measures prognostic separation of survival curves, and is closely related
+#' to the standard deviation of the prognostic index.
+#'
+#' Larger values of the score are associated with better model performance.
+#'
+#' @inheritParams brier_survival
+#'
+#' @param estimate The column identifier for the predicted linear predictor,
+#' this should be a numeric variable. This should be an unquoted column name
+#' although this argument is passed by expression and supports
+#' [quasiquotation][rlang::quasiquotation] (you can unquote column names). For
+#' `_vec()` functions, a numeric vector.
+#'
+#' @param ... Currently not used.
+#'
+#' @author Hannah Frick
+#'
+#' @references
+#'
+#' Royston, P., Sauerbrei, W., "A new measure of prognostic separation in
+#' survival data", Statistics in Medicine, 23, 723-748, 2004.
+#'
+#' @examples
+#' royston_survival(
+#'   data = lung_surv,
+#'   truth = surv_obj,
+#'   estimate = .pred_linear_pred
+#' )
+#' @export
+royston_survival <- function(data, ...) {
+  UseMethod("royston_survival")
+}
+royston_survival <- new_linear_pred_survival_metric(
+  royston_survival,
+  direction = "maximize"
+)
+
+#' @rdname royston_survival
+#' @export
+royston_survival.data.frame <- function(
+  data,
+  truth,
+  estimate,
+  na_rm = TRUE,
+  case_weights = NULL,
+  ...
+) {
+  linear_pred_survival_metric_summarizer(
+    name = "royston_survival",
+    fn = royston_survival_vec,
+    data = data,
+    truth = !!enquo(truth),
+    estimate = !!enquo(estimate),
+    na_rm = na_rm,
+    case_weights = !!enquo(case_weights)
+  )
+}
+
+#' @rdname royston_survival
+#' @export
+royston_survival_vec <- function(
+  truth,
+  estimate,
+  na_rm = TRUE,
+  case_weights = NULL,
+  ...
+) {
+  check_linear_pred_survival_metric(truth, estimate, case_weights)
+
+  if (na_rm) {
+    result <- yardstick_remove_missing(truth, estimate, case_weights)
+
+    truth <- result$truth
+    estimate <- result$estimate
+    case_weights <- result$case_weights
+  } else if (yardstick_any_missing(truth, estimate, case_weights)) {
+    return(NA_real_)
+  }
+
+  royston_survival_impl(truth, estimate, case_weights)
+}
+
+royston_survival_impl <- function(truth, estimate, case_weights) {
+  if (is.null(case_weights)) {
+    case_weights <- rep(1, length(estimate))
+  } else {
+    case_weights <- vec_cast(case_weights, to = double())
+  }
+
+  bns <- normal_score_blom(estimate, case_weights)
+
+  fit <- survival::coxph(truth ~ bns, weights = case_weights)
+  est <- unname(stats::coef(fit))
+  # the regression coefficient is sigma* in Royston & Sauerbrei 2004
+  # because `normal_score_blom()` returns their u_i (not z_i)
+  # we thus don't need to scale the coefficient with kappa here
+  est^2 / (est^2 + pi^2 / 6)
+}
+
+normal_score_blom <- function(x, case_weights) {
+  # includes observations with zero weights
+  x_0 <- tibble::tibble(.row = seq_along(x), x = x)
+
+  rankits <- tibble::tibble(
+    .row = rep(seq_along(x), times = case_weights),
+    x = rep(x, times = case_weights),
+  ) |>
+    dplyr::mutate(
+      x_first = rank(.data$x, ties.method = "first"),
+      # does not need kappa (from Royston & Sauerbrei 2004) because it'll
+      # "disappear" into the baseline hazard of the Cox model in
+      # `royston_survival_impl()`
+      z = stats::qnorm((.data$x_first - 3 / 8) / (dplyr::n() + 1 / 4))
+    ) |>
+    # average over ties
+    dplyr::mutate(s = mean(.data$z), .by = "x") |>
+    dplyr::slice(1, .by = .row)
+
+  dplyr::left_join(x_0, rankits, by = c(".row")) |>
+    dplyr::pull("s")
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -90,6 +90,10 @@ reference:
   contents:
   - concordance_survival
 
+- title: Linear Predictor Survival Metrics
+  contents:
+  - royston_survival
+
 - title: Curve Survival Functions
   contents:
   - roc_curve_survival

diff --git a/data-raw/lung_surv.R b/data-raw/lung_surv.R
@@ -11,7 +11,7 @@ options(pillar.advice = FALSE, pillar.min_title_chars = Inf)
 
 lung_data <-
   survival::lung |>
-  select(time, status, age, sex, ph.ecog)
+  dplyr::select(time, status, age, sex, ph.ecog)
 
 model_fit <-
   survival_reg() |>
@@ -30,7 +30,11 @@ lung_surv <-
     predict(model_fit, lung_data, type = "time"),
     # We'll need the surv object
     lung_data |> transmute(surv_obj = Surv(time, status))
-  ) |>
-  .censoring_weights_graf(model_fit, .)
+  )
+lung_surv <- .censoring_weights_graf(model_fit, lung_surv)
+lung_surv <- lung_surv |>
+  bind_cols(
+    predict(model_fit, lung_data, type = "linear_pred")
+  )
 
 usethis::use_data(lung_surv, overwrite = TRUE)
diff --git a/data/lung_surv.rda b/data/lung_surv.rda
diff --git a/man/royston_survival.Rd b/man/royston_survival.Rd
diff --git a/tests/testthat/test-surv-royston.R b/tests/testthat/test-surv-royston.R
@@ -0,0 +1,104 @@
+test_that("comparison test with survival::royston", {
+  lung_data <- survival::lung |>
+    dplyr::select(time, status, age, sex, ph.ecog)
+  reference_fit <- survival::coxph(
+    survival::Surv(time, status) ~ age + sex + ph.ecog,
+    data = lung_data
+  )
+  royston_ref <- survival::royston(reference_fit)
+
+  lung_surv <- data_lung_surv()
+
+  res <- royston_survival(
+    data = lung_surv,
+    truth = surv_obj,
+    estimate = .pred_linear_pred
+  )
+
+  expect_equal(
+    res[[".estimate"]],
+    royston_ref["R.D"],
+    ignore_attr = TRUE,
+    tolerance = 1e-3
+  )
+})
+
+test_that("`normal_score_blom()` works with case weights", {
+  # weights without ties
+  x <- 1:10 + 0.5
+  case_weights <- rep(2, 10)
+  nsb <- normal_score_blom(x, case_weights)
+  expect_length(nsb, 10)
+  expect_equal(
+    nsb[1],
+    mean(qnorm((1:2 - 3 / 8) / (sum(case_weights) + 0.25)))
+  )
+
+  # weights and ties
+  x <- c(x, x[1:5], x[1:3])
+  case_weights <- c(case_weights, rep(1, 8))
+  nsb <- normal_score_blom(x, case_weights)
+  expect_length(nsb, 18)
+  expect_equal(
+    nsb[1],
+    mean(qnorm((1:4 - 3 / 8) / (sum(case_weights) + 0.25)))
+  )
+
+  # weights of zero
+  x <- 1:10 + 0.5
+  case_weights <- c(0, 0, rep(2, 8))
+  nsb <- normal_score_blom(x, case_weights)
+  expect_length(nsb, 10)
+  expect_true(all(is.na(nsb[1:2])))
+})
+
+test_that("case weights works with equal weights", {
+  lung_surv <- data_lung_surv()
+  lung_surv$wts <- rep(1, nrow(lung_surv))
+
+  res <- royston_survival(
+    data = lung_surv,
+    truth = surv_obj,
+    estimate = .pred_linear_pred
+  )
+
+  res_wts <- royston_survival(
+    data = lung_surv,
+    truth = surv_obj,
+    estimate = .pred_time,
+    case_weights = wts
+  )
+
+  expect_equal(
+    res[[".estimate"]],
+    res_wts[[".estimate"]]
+  )
+})
+
+test_that("works with hardhat case weights", {
+  lung_surv <- data_lung_surv()
+  lung_surv$case_wts <- c(rep(0, 10), rep(2, nrow(lung_surv) - 10))
+
+  df <- lung_surv
+
+  df$imp_wgt <- hardhat::importance_weights(lung_surv$case_wts)
+  df$freq_wgt <- hardhat::frequency_weights(lung_surv$case_wts)
+
+  expect_no_error(
+    royston_survival(
+      df,
+      truth = surv_obj,
+      .pred_time,
+      case_weights = imp_wgt
+    )
+  )
+
+  expect_no_error(
+    royston_survival(
+      df,
+      truth = surv_obj,
+      .pred_time,
+      case_weights = freq_wgt
+    )
+  )
+})