Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ Collate:
'surv-concordance_survival.R'
'surv-roc_auc_survival.R'
'surv-roc_curve_survival.R'
'surv-royston.R'
'template.R'
'validation.R'
'yardstick-package.R'
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ S3method(roc_aunp,data.frame)
S3method(roc_aunu,data.frame)
S3method(roc_curve,data.frame)
S3method(roc_curve_survival,data.frame)
S3method(royston_survival,data.frame)
S3method(rpd,data.frame)
S3method(rpiq,data.frame)
S3method(rsq,data.frame)
Expand Down Expand Up @@ -244,6 +245,8 @@ export(roc_aunu)
export(roc_aunu_vec)
export(roc_curve)
export(roc_curve_survival)
export(royston_survival)
export(royston_survival_vec)
export(rpd)
export(rpd_vec)
export(rpiq)
Expand Down
132 changes: 132 additions & 0 deletions R/surv-royston.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#' Royston-Sauerbei D statistic
#'
#' Compute the Royston-Sauerbei D statistic
#'
#' @family linear pred survival metrics
#' @templateVar fn royston_survival
#' @template return
#' @details
#'
#' Royston and Sauerbrei proposed $R^2_D$ as a measure of explained variation
#' on the log relative hazard scale based on the authors’ D statistic.
#' D measures prognostic separation of survival curves, and is closely related
#' to the standard deviation of the prognostic index.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how hard would it be to add the formula in the docs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that the metric definition involves fitting a Cox model, there isn't a single equation/formula that captures it fully. If we describe the whole process, we should probably talk about how the implementation doesn't involve scaling with $\kappa$ since we are only interested in $R^2_D$ but not $D$. I've added code comments for future us or particularly invested users.

#'
#' Larger values of the score are associated with better model performance.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have a range of possible values?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the definition, I'd say it's asymptotically between 0 and 1, but I didn't come across a range in the paper. Because of this and because I didn't see it for the other survival metrics, I hadn't added a range.

#'
#' @inheritParams brier_survival
#'
#' @param estimate The column identifier for the predicted linear predictor,
#' this should be a numeric variable. This should be an unquoted column name
#' although this argument is passed by expression and supports
#' [quasiquotation][rlang::quasiquotation] (you can unquote column names). For
#' `_vec()` functions, a numeric vector.
#'
#' @param ... Currently not used.
#'
#' @author Hannah Frick
#'
#' @references
#'
#' Royston, P., Sauerbrei, W., "A new measure of prognostic separation in
#' survival data", Statistics in Medicine, 23, 723-748, 2004.
#'
#' @examples
#' royston_survival(
#' data = lung_surv,
#' truth = surv_obj,
#' estimate = .pred_linear_pred
#' )
#' @export
royston_survival <- function(data, ...) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought this was the most straightforward name but if you think it should carry a different name, let me know!

UseMethod("royston_survival")
}
royston_survival <- new_linear_pred_survival_metric(
royston_survival,
direction = "maximize"
)

#' @rdname royston_survival
#' @export
royston_survival.data.frame <- function(
data,
truth,
estimate,
na_rm = TRUE,
case_weights = NULL,
...
) {
linear_pred_survival_metric_summarizer(
name = "royston_survival",
fn = royston_survival_vec,
data = data,
truth = !!enquo(truth),
estimate = !!enquo(estimate),
na_rm = na_rm,
case_weights = !!enquo(case_weights)
)
}

#' @rdname royston_survival
#' @export
royston_survival_vec <- function(
truth,
estimate,
na_rm = TRUE,
case_weights = NULL,
...
) {
check_linear_pred_survival_metric(truth, estimate, case_weights)

if (na_rm) {
result <- yardstick_remove_missing(truth, estimate, case_weights)

truth <- result$truth
estimate <- result$estimate
case_weights <- result$case_weights
} else if (yardstick_any_missing(truth, estimate, case_weights)) {
return(NA_real_)
}

royston_survival_impl(truth, estimate, case_weights)
}

royston_survival_impl <- function(truth, estimate, case_weights) {
if (is.null(case_weights)) {
case_weights <- rep(1, length(estimate))
} else {
case_weights <- vec_cast(case_weights, to = double())
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't stop importance weights from going through this for the other metrics, so I also allowed it here

}

bns <- normal_score_blom(estimate, case_weights)

fit <- survival::coxph(truth ~ bns, weights = case_weights)
est <- unname(stats::coef(fit))
# the regression coefficient is sigma* in Royston & Sauerbrei 2004
# because `normal_score_blom()` returns their u_i (not z_i)
# we thus don't need to scale the coefficient with kappa here
est^2 / (est^2 + pi^2 / 6)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the pi^2 / 6 feels a little magic number to me, could you show where we got that from?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I originally went with the formula from Royston & Altman (2013) that Antoine linked. In Royston & Sauerbrei (2004), the definition of $R^2_D$, including the scaling factor $\sigma$, is in Appendix A2. ✨

}

normal_score_blom <- function(x, case_weights) {
# includes observations with zero weights
x_0 <- tibble::tibble(.row = seq_along(x), x = x)

rankits <- tibble::tibble(
.row = rep(seq_along(x), times = case_weights),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the nod to "weights are frequency weights"

x = rep(x, times = case_weights),
) |>
dplyr::mutate(
x_first = rank(.data$x, ties.method = "first"),
# does not need kappa (from Royston & Sauerbrei 2004) because it'll
# "disappear" into the baseline hazard of the Cox model in
# `royston_survival_impl()`
z = stats::qnorm((.data$x_first - 3 / 8) / (dplyr::n() + 1 / 4))
) |>
# average over ties
dplyr::mutate(s = mean(.data$z), .by = "x") |>
dplyr::slice(1, .by = .row)

dplyr::left_join(x_0, rankits, by = c(".row")) |>
dplyr::pull("s")
}
4 changes: 4 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ reference:
contents:
- concordance_survival

- title: Linear Predictor Survival Metrics
contents:
- royston_survival

- title: Curve Survival Functions
contents:
- roc_curve_survival
Expand Down
10 changes: 7 additions & 3 deletions data-raw/lung_surv.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ options(pillar.advice = FALSE, pillar.min_title_chars = Inf)

lung_data <-
survival::lung |>
select(time, status, age, sex, ph.ecog)
dplyr::select(time, status, age, sex, ph.ecog)

model_fit <-
survival_reg() |>
Expand All @@ -30,7 +30,11 @@ lung_surv <-
predict(model_fit, lung_data, type = "time"),
# We'll need the surv object
lung_data |> transmute(surv_obj = Surv(time, status))
) |>
.censoring_weights_graf(model_fit, .)
)
lung_surv <- .censoring_weights_graf(model_fit, lung_surv)
lung_surv <- lung_surv |>
bind_cols(
predict(model_fit, lung_data, type = "linear_pred")
)
Comment on lines +35 to +38
Copy link
Member Author

@hfrick hfrick Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding this below instead of with the other two predictions as to not break some tests that seem to rely on the order


usethis::use_data(lung_surv, overwrite = TRUE)
Binary file modified data/lung_surv.rda
Binary file not shown.
75 changes: 75 additions & 0 deletions man/royston_survival.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

104 changes: 104 additions & 0 deletions tests/testthat/test-surv-royston.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
test_that("comparison test with survival::royston", {
lung_data <- survival::lung |>
dplyr::select(time, status, age, sex, ph.ecog)
reference_fit <- survival::coxph(
survival::Surv(time, status) ~ age + sex + ph.ecog,
data = lung_data
)
royston_ref <- survival::royston(reference_fit)

lung_surv <- data_lung_surv()

res <- royston_survival(
data = lung_surv,
truth = surv_obj,
estimate = .pred_linear_pred
)

expect_equal(
res[[".estimate"]],
royston_ref["R.D"],
ignore_attr = TRUE,
tolerance = 1e-3
)
})

test_that("`normal_score_blom()` works with case weights", {
# weights without ties
x <- 1:10 + 0.5
case_weights <- rep(2, 10)
nsb <- normal_score_blom(x, case_weights)
expect_length(nsb, 10)
expect_equal(
nsb[1],
mean(qnorm((1:2 - 3 / 8) / (sum(case_weights) + 0.25)))
)

# weights and ties
x <- c(x, x[1:5], x[1:3])
case_weights <- c(case_weights, rep(1, 8))
nsb <- normal_score_blom(x, case_weights)
expect_length(nsb, 18)
expect_equal(
nsb[1],
mean(qnorm((1:4 - 3 / 8) / (sum(case_weights) + 0.25)))
)

# weights of zero
x <- 1:10 + 0.5
case_weights <- c(0, 0, rep(2, 8))
nsb <- normal_score_blom(x, case_weights)
expect_length(nsb, 10)
expect_true(all(is.na(nsb[1:2])))
})

test_that("case weights works with equal weights", {
lung_surv <- data_lung_surv()
lung_surv$wts <- rep(1, nrow(lung_surv))

res <- royston_survival(
data = lung_surv,
truth = surv_obj,
estimate = .pred_linear_pred
)

res_wts <- royston_survival(
data = lung_surv,
truth = surv_obj,
estimate = .pred_time,
case_weights = wts
)

expect_equal(
res[[".estimate"]],
res_wts[[".estimate"]]
)
})

test_that("works with hardhat case weights", {
lung_surv <- data_lung_surv()
lung_surv$case_wts <- c(rep(0, 10), rep(2, nrow(lung_surv) - 10))

df <- lung_surv

df$imp_wgt <- hardhat::importance_weights(lung_surv$case_wts)
df$freq_wgt <- hardhat::frequency_weights(lung_surv$case_wts)

expect_no_error(
royston_survival(
df,
truth = surv_obj,
.pred_time,
case_weights = imp_wgt
)
)

expect_no_error(
royston_survival(
df,
truth = surv_obj,
.pred_time,
case_weights = freq_wgt
)
)
})