diff --git a/NAMESPACE b/NAMESPACE index c59c727..4fc63ef 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(generate_selectivity_curve) export(get_functional_groups) export(load_csv_environmental_data) export(load_csv_ewe) diff --git a/R/fit_selectivity_helpers.R b/R/fit_selectivity_helpers.R new file mode 100644 index 0000000..7dca7b2 --- /dev/null +++ b/R/fit_selectivity_helpers.R @@ -0,0 +1,235 @@ +#' Fit logistic selectivity parameters +#' +#' @description +#' Fit logistic curve parameters to a vector of normalized values. The logistic +#' function is defined as: S(x) = 1 / (1 + exp(-slope * (x - inflection_point))) +#' where x is the index position (1, 2, 3, ...). +#' +#' @param f_values A numeric vector of normalized values (0-1) for each +#' group, in the order they should appear on the x-axis. These values +#' represent relative selectivity, maturity, or any other quantity that +#' follows a logistic pattern. +#' +#' @return A tibble with one row per parameter, containing the standard FIMS +#' parameter format columns. +#' +#' @details +#' The function uses nonlinear least squares (nls) to fit a logistic curve to +#' the normalized values. The x-axis index is simply 1, 2, 3, ... based on +#' the position in the values vector. +#' +#' If the optimization fails, the function falls back to a simple heuristic +#' method to estimate the inflection point and slope. +#' +#' @keywords internal +fit_logistic_selectivity <- function(f_values) { + # Create index (1, 2, 3, ...) + index_values <- seq_along(f_values) + + # Define logistic function + logistic <- function(index, inflection_point, slope) { + 1 / (1 + exp(-slope * (index - inflection_point))) + } + + # Initial parameter guesses + # Inflection point: index where value is around 0.5 + midpoint_index <- which.min(abs(f_values - 0.5)) + initial_inflection <- if (length(midpoint_index) > 0) index_values[midpoint_index] else mean(index_values) + + # Slope: positive value, start with 1 + initial_slope <- 1 + + # Fit using nonlinear least squares + fit_result <- tryCatch( + { + stats::nls( + f_values ~ logistic(index_values, inflection_point, slope), + start = list(inflection_point = initial_inflection, slope = initial_slope), + control = stats::nls.control(maxiter = 100, warnOnly = TRUE) + ) + }, + error = function(e) { + # Fallback: use simple heuristic + NULL + } + ) + + if (!is.null(fit_result)) { + params <- stats::coef(fit_result) + inflection_point <- params["inflection_point"] + slope <- params["slope"] + } else { + # Fallback method: find inflection as midpoint of rise + # and estimate slope from the steepest part of the curve + inflection_point <- initial_inflection + + # Estimate slope from differences + if (length(index_values) > 1) { + diffs <- diff(f_values) / diff(index_values) + slope <- max(abs(diffs), 0.1) * 4 # Scale up since max derivative of logistic is slope/4 + } else { + slope <- 1 + } + } + + # Create output tibble in FIMS format (replace time with year/month) + tibble::tibble( + model_family = "catch_at_age", + module_name = "Selectivity", + module_type = "Logistic", + label = c("inflection_point", "slope"), + distribution_link = NA_character_, + age = NA_real_, + length = NA_real_, + year = NA_integer_, + month = NA_integer_, + value = c(inflection_point, slope), + estimation_type = "fixed_effects", + distribution_type = NA_character_, + distribution = NA_character_ + ) +} + + +#' Fit double logistic selectivity parameters +#' +#' @description +#' Fit double logistic (dome-shaped) curve parameters to a vector of normalized +#' values. The double logistic function combines ascending and descending logistic +#' curves: S(x) = 1 / (1 + exp(-slope_asc * (x - inflection_asc))) * +#' (1 - 1 / (1 + exp(-slope_desc * (x - inflection_desc)))) +#' +#' @param f_values A numeric vector of normalized values (0-1) for each +#' group, in the order they should appear on the x-axis. These values +#' represent relative selectivity, maturity, or any other quantity that +#' follows a dome-shaped pattern. +#' +#' @return A tibble with one row per parameter, containing the standard FIMS +#' parameter format columns. +#' +#' @details +#' The function attempts to fit a dome-shaped curve. If the data +#' appears monotonic (no dome shape), it will still fit the parameters but may +#' produce parameters that effectively reduce to a simple logistic. +#' +#' The fitting uses nonlinear least squares with constraints to ensure: +#' \itemize{ +#' \item Ascending inflection point comes before descending +#' \item Slopes are positive +#' \item The curve shape is biologically reasonable +#' } +#' +#' @keywords internal +fit_double_logistic_selectivity <- function(f_values) { + # Create index (1, 2, 3, ...) + index_values <- seq_along(f_values) + + # Define double logistic function + double_logistic <- function(index, inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending) { + ascending_component <- 1 / (1 + exp(-slope_ascending * (index - inflection_point_ascending))) + descending_component <- 1 - 1 / (1 + exp(-slope_descending * (index - inflection_point_descending))) + ascending_component * descending_component + } + + # Initial parameter guesses + # Find the peak of the values + peak_idx <- which.max(f_values) + peak_index <- index_values[peak_idx] + + # Ascending parameters: before peak + if (peak_idx > 1) { + ascending_f <- f_values[1:peak_idx] + ascending_indices <- index_values[1:peak_idx] + midpoint_ascending_index <- which.min(abs(ascending_f - 0.5)) + initial_inflection_point_ascending <- if (length(midpoint_ascending_index) > 0) ascending_indices[midpoint_ascending_index] else peak_index - 1 + } else { + initial_inflection_point_ascending <- peak_index - 1 + } + + # Descending parameters: after peak + if (peak_idx < length(index_values)) { + descending_f <- f_values[peak_idx:length(index_values)] + descending_indices <- index_values[peak_idx:length(index_values)] + # For descending, look for where it drops below peak/2 + midpoint_descending_index <- which.min(abs(descending_f - max(f_values) / 2)) + initial_inflection_point_descending <- if (length(midpoint_descending_index) > 0) descending_indices[midpoint_descending_index] else peak_index + 1 + } else { + initial_inflection_point_descending <- peak_index + 1 + } + + # Ensure descending inflection is after ascending + if (initial_inflection_point_descending <= initial_inflection_point_ascending) { + initial_inflection_point_descending <- initial_inflection_point_ascending + 1 + } + + initial_slope_ascending <- 1 + initial_slope_descending <- 1 + + # Fit using nonlinear least squares + fit_result <- tryCatch( + { + stats::nls( + f_values ~ double_logistic(index_values, inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending), + start = list( + inflection_point_ascending = initial_inflection_point_ascending, + slope_ascending = initial_slope_ascending, + inflection_point_descending = initial_inflection_point_descending, + slope_descending = initial_slope_descending + ), + control = stats::nls.control(maxiter = 200, warnOnly = TRUE), + algorithm = "port", + lower = c( + inflection_point_ascending = min(index_values), + slope_ascending = 0.01, + inflection_point_descending = initial_inflection_point_ascending + 0.5, + slope_descending = 0.01 + ), + upper = c( + inflection_point_ascending = max(index_values), + slope_ascending = 10, + inflection_point_descending = max(index_values) + 10, + slope_descending = 10 + ) + ) + }, + error = function(e) { + NULL + } + ) + + if (!is.null(fit_result)) { + params <- stats::coef(fit_result) + inflection_point_ascending <- params["inflection_point_ascending"] + slope_ascending <- params["slope_ascending"] + inflection_point_descending <- params["inflection_point_descending"] + slope_descending <- params["slope_descending"] + } else { + # Fallback to initial guesses + inflection_point_ascending <- initial_inflection_point_ascending + slope_ascending <- initial_slope_ascending + inflection_point_descending <- initial_inflection_point_descending + slope_descending <- initial_slope_descending + } + + # Create output tibble in FIMS format (replace time with year/month) + tibble::tibble( + model_family = "catch_at_age", + module_name = "Selectivity", + module_type = "DoubleLogistic", + label = c( + "inflection_point_asc", + "slope_asc", + "inflection_point_desc", + "slope_desc" + ), + distribution_link = NA_character_, + age = NA_real_, + length = NA_real_, + year = NA_integer_, + month = NA_integer_, + value = c(inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending), + estimation_type = "fixed_effects", + distribution_type = NA_character_, + distribution = NA_character_ + ) +} diff --git a/R/generate_selectivity_curve.R b/R/generate_selectivity_curve.R new file mode 100644 index 0000000..7dd8250 --- /dev/null +++ b/R/generate_selectivity_curve.R @@ -0,0 +1,290 @@ +# ...existing code... +## At the end of the file, add: +utils::globalVariables(c( + "functional_group", "type", "value", "year", "mean_f", "model_family", "module_name", "fleet_name", "module_type", "label", "distribution_link", "age", "length", "time", "estimation_type", "distribution_type", "distribution" +)) + +#' Generate selectivity curve parameters from ecosystem model F values +#' +#' @description +#' This function takes fishing mortality (F) values by functional group from an +#' ecosystem model and fits selectivity curve parameters based on a specified +#' functional form. The function is modular and can be extended to support +#' additional selectivity curve types. +#' +#' @param ewe_output A tibble containing ecosystem model output, typically from +#' [load_model()] with type = "ewe". Must contain columns: functional_group, +#' type, and value. The function will filter for type == "mortality". +#' @param functional_groups A character vector of functional group names in the +#' order desired for the x-axis of the selectivity curve. These should match +#' names in the functional_group column of ewe_output. +#' @param functional_form A character string specifying the selectivity curve +#' type. Currently supports "logistic" and "double_logistic". Additional +#' forms can be added by creating new helper functions. +#' @param fleet_name A character string specifying the fleet name to use in the +#' output tibble. Default is "fleet1". +#' +#' @return A tibble with the following columns: +#' \itemize{ +#' \item model_family: Always "catch_at_age" +#' \item module_name: Always "Selectivity" +#' \item fleet_name: Fleet name as specified in the input +#' \item module_type: The selectivity curve type (e.g., "Logistic", "DoubleLogistic") +#' \item label: Parameter name (e.g., "inflection_point", "slope") +#' \item distribution_link: Always NA +#' \item age: Always NA +#' \item length: Always NA +#' \item time: Always NA +#' \item value: The fitted parameter value +#' \item estimation_type: Always "fixed_effects" +#' \item distribution_type: Always NA +#' \item distribution: Always NA +#' } +#' +#' @details +#' The function extracts fishing mortality values for the specified functional +#' groups and uses a helper function specific to the chosen functional form to +#' fit selectivity parameters. The F values are normalized to be between 0 and 1 +#' before fitting, representing relative selectivity. +#' +#' New functional forms can be added by creating helper functions with the +#' naming pattern `fit_
_selectivity()` that take a vector of values and +#' return parameter estimates. To add a new selectivity form: +#' \enumerate{ +#' \item Create a helper function named `fit__selectivity()` that +#' takes a numeric vector of normalized values (0-1) and returns a tibble +#' with columns: model_family, module_name, module_type, label, +#' distribution_link, age, length, time, value, estimation_type, +#' distribution_type, and distribution. +#' \item The function will automatically be available by passing the form name +#' to the `functional_form` argument (e.g., "new_form" will call +#' `fit_new_form_selectivity()`). +#' } +#' +#' @export +#' @examples +#' \dontrun{ +#' # Load example data +#' data("ewe_nwatlantic_base", package = "ecosystemdata") +#' +#' # Generate logistic selectivity for spiny dogfish age groups +#' generate_selectivity_curve( +#' ewe_output = ewe_nwatlantic_base, +#' functional_groups = c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +"), +#' functional_form = "logistic", +#' fleet_name = "fleet1" +#' ) +#' +#' # Generate double logistic selectivity +#' generate_selectivity_curve( +#' ewe_output = ewe_nwatlantic_base, +#' functional_groups = c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +"), +#' functional_form = "double_logistic", +#' fleet_name = "fleet1" +#' ) +#' } +generate_selectivity_curve <- function(ewe_output, + functional_groups, + functional_form, + fleet_name = "fleet1") { + # Input validation + if (!inherits(ewe_output, "data.frame")) { + cli::cli_abort("{.var ewe_output} must be a data frame or tibble.") + } + + required_cols <- c("functional_group", "type", "value") + missing_cols <- setdiff(required_cols, names(ewe_output)) + if (length(missing_cols) > 0) { + cli::cli_abort(c( + "{.var ewe_output} is missing required columns:", + "x" = "Missing: {.var {missing_cols}}" + )) + } + + if (!inherits(functional_groups, "character")) { + cli::cli_abort("{.var functional_groups} must be a character vector.") + } + + if (length(functional_groups) < 2) { + cli::cli_abort(c( + "{.var functional_groups} must contain at least 2 groups.", + "i" = "Selectivity curves require multiple age/size classes." + )) + } + + if (!inherits(functional_form, "character") || length(functional_form) != 1) { + cli::cli_abort("{.var functional_form} must be a single character string.") + } + + # Check that functional groups exist in the data + missing_groups <- setdiff( + functional_groups, + unique(ewe_output[["functional_group"]]) + ) + if (length(missing_groups) > 0) { + cli::cli_abort(c( + "Some functional groups not found in {.var ewe_output}:", + "x" = "Missing: {.var {missing_groups}}" + )) + } + + # Extract mortality data for the specified functional groups + mortality_data <- ewe_output |> + dplyr::filter( + type == "mortality", + functional_group %in% functional_groups + ) + + if (nrow(mortality_data) == 0) { + cli::cli_abort(c( + "No mortality data found for the specified functional groups.", + "i" = "Check that type == 'mortality' exists in {.var ewe_output}" + )) + } + + # If year and month columns exist, fit parameters for each year-month separately + if (all(c("year", "month") %in% names(mortality_data))) { + year_months <- unique(mortality_data[, c("year", "month")]) + year_months <- year_months[order(year_months$year, year_months$month), ] + results_list <- apply(year_months, 1, function(ym) { + yr <- ym[["year"]] + mo <- ym[["month"]] + ym_data <- mortality_data |> + dplyr::filter(year == yr, month == mo) + # Calculate mean F value for each functional group for this year-month + f_values <- ym_data |> + dplyr::group_by(functional_group) |> + dplyr::summarise(mean_f = mean(value, na.rm = TRUE), .groups = "drop") |> + dplyr::right_join( + tibble::tibble(functional_group = functional_groups), + by = "functional_group" + ) |> + dplyr::pull(mean_f) + # Check for missing F values + if (any(is.na(f_values))) { + na_groups <- functional_groups[is.na(f_values)] + cli::cli_abort(c( + "Missing mortality data for some functional groups:", + "x" = "Groups with no data: {.var {na_groups}}" + )) + } + if (max(f_values) == 0) { + cli::cli_abort("All F values are zero. Cannot fit selectivity curve.") + } + normalized_f <- f_values / max(f_values) + helper_function_name <- paste0("fit_", tolower(functional_form), "_selectivity") + if (!exists(helper_function_name, mode = "function")) { + cli::cli_abort(c( + "Unsupported functional form: {.val {functional_form}}", + "i" = "Currently supported: 'logistic', 'double_logistic'", + "i" = "To add support, create a function named {.fn {helper_function_name}}" + )) + } + helper_function <- get(helper_function_name, mode = "function") + result <- helper_function(normalized_f) + result[["fleet_name"]] <- fleet_name + result[["year"]] <- as.integer(yr) + result[["month"]] <- as.integer(mo) + result + }) + result <- dplyr::bind_rows(results_list) + # Reorder columns to match expected output format + result <- result |> + dplyr::select( + model_family, module_name, fleet_name, module_type, + label, distribution_link, age, length, year, month, value, + estimation_type, distribution_type, distribution + ) + return(result) + } else if ("year" %in% names(mortality_data)) { + years <- sort(unique(mortality_data$year)) + results_list <- lapply(years, function(yr) { + year_data <- mortality_data |> + dplyr::filter(year == yr) + # Calculate mean F value for each functional group for this year + f_values <- year_data |> + dplyr::group_by(functional_group) |> + dplyr::summarise(mean_f = mean(value, na.rm = TRUE), .groups = "drop") |> + dplyr::right_join( + tibble::tibble(functional_group = functional_groups), + by = "functional_group" + ) |> + dplyr::pull(mean_f) + # Check for missing F values + if (any(is.na(f_values))) { + na_groups <- functional_groups[is.na(f_values)] + cli::cli_abort(c( + "Missing mortality data for some functional groups:", + "x" = "Groups with no data: {.var {na_groups}}" + )) + } + if (max(f_values) == 0) { + cli::cli_abort("All F values are zero. Cannot fit selectivity curve.") + } + normalized_f <- f_values / max(f_values) + helper_function_name <- paste0("fit_", tolower(functional_form), "_selectivity") + if (!exists(helper_function_name, mode = "function")) { + cli::cli_abort(c( + "Unsupported functional form: {.val {functional_form}}", + "i" = "Currently supported: 'logistic', 'double_logistic'", + "i" = "To add support, create a function named {.fn {helper_function_name}}" + )) + } + helper_function <- get(helper_function_name, mode = "function") + result <- helper_function(normalized_f) + result[["fleet_name"]] <- fleet_name + result[["year"]] <- as.integer(yr) + result[["month"]] <- NA_integer_ + result + }) + result <- dplyr::bind_rows(results_list) + # Reorder columns to match expected output format + result <- result |> + dplyr::select( + model_family, module_name, fleet_name, module_type, + label, distribution_link, age, length, year, month, value, + estimation_type, distribution_type, distribution + ) + return(result) + } else { + # No year column: fit to all data (original behavior) + f_values <- mortality_data |> + dplyr::group_by(functional_group) |> + dplyr::summarise(mean_f = mean(value, na.rm = TRUE), .groups = "drop") |> + dplyr::right_join( + tibble::tibble(functional_group = functional_groups), + by = "functional_group" + ) |> + dplyr::pull(mean_f) + if (any(is.na(f_values))) { + na_groups <- functional_groups[is.na(f_values)] + cli::cli_abort(c( + "Missing mortality data for some functional groups:", + "x" = "Groups with no data: {.var {na_groups}}" + )) + } + if (max(f_values) == 0) { + cli::cli_abort("All F values are zero. Cannot fit selectivity curve.") + } + normalized_f <- f_values / max(f_values) + helper_function_name <- paste0("fit_", tolower(functional_form), "_selectivity") + if (!exists(helper_function_name, mode = "function")) { + cli::cli_abort(c( + "Unsupported functional form: {.val {functional_form}}", + "i" = "Currently supported: 'logistic', 'double_logistic'", + "i" = "To add support, create a function named {.fn {helper_function_name}}" + )) + } + helper_function <- get(helper_function_name, mode = "function") + result <- helper_function(normalized_f) + result[["fleet_name"]] <- fleet_name + # time column remains NA + result <- result |> + dplyr::select( + model_family, module_name, fleet_name, module_type, + label, distribution_link, age, length, year, month, value, + estimation_type, distribution_type, distribution + ) + return(result) + } +} diff --git a/man/generate_selectivity_curve.Rd b/man/generate_selectivity_curve.Rd new file mode 100644 index 0000000..f4c5c01 --- /dev/null +++ b/man/generate_selectivity_curve.Rd @@ -0,0 +1,95 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_selectivity_curve.R +\name{generate_selectivity_curve} +\alias{generate_selectivity_curve} +\title{Generate selectivity curve parameters from ecosystem model F values} +\usage{ +generate_selectivity_curve( + ewe_output, + functional_groups, + functional_form, + fleet_name = "fleet1" +) +} +\arguments{ +\item{ewe_output}{A tibble containing ecosystem model output, typically from +\code{\link[=load_model]{load_model()}} with type = "ewe". Must contain columns: functional_group, +type, and value. The function will filter for type == "mortality".} + +\item{functional_groups}{A character vector of functional group names in the +order desired for the x-axis of the selectivity curve. These should match +names in the functional_group column of ewe_output.} + +\item{functional_form}{A character string specifying the selectivity curve +type. Currently supports "logistic" and "double_logistic". Additional +forms can be added by creating new helper functions.} + +\item{fleet_name}{A character string specifying the fleet name to use in the +output tibble. Default is "fleet1".} +} +\value{ +A tibble with the following columns: +\itemize{ +\item model_family: Always "catch_at_age" +\item module_name: Always "Selectivity" +\item fleet_name: Fleet name as specified in the input +\item module_type: The selectivity curve type (e.g., "Logistic", "DoubleLogistic") +\item label: Parameter name (e.g., "inflection_point", "slope") +\item distribution_link: Always NA +\item age: Always NA +\item length: Always NA +\item time: Always NA +\item value: The fitted parameter value +\item estimation_type: Always "fixed_effects" +\item distribution_type: Always NA +\item distribution: Always NA +} +} +\description{ +This function takes fishing mortality (F) values by functional group from an +ecosystem model and fits selectivity curve parameters based on a specified +functional form. The function is modular and can be extended to support +additional selectivity curve types. +} +\details{ +The function extracts fishing mortality values for the specified functional +groups and uses a helper function specific to the chosen functional form to +fit selectivity parameters. The F values are normalized to be between 0 and 1 +before fitting, representing relative selectivity. + +New functional forms can be added by creating helper functions with the +naming pattern \code{fit__selectivity()} that take a vector of values and +return parameter estimates. To add a new selectivity form: +\enumerate{ +\item Create a helper function named \code{fit__selectivity()} that +takes a numeric vector of normalized values (0-1) and returns a tibble +with columns: model_family, module_name, module_type, label, +distribution_link, age, length, time, value, estimation_type, +distribution_type, and distribution. +\item The function will automatically be available by passing the form name +to the \code{functional_form} argument (e.g., "new_form" will call +\code{fit_new_form_selectivity()}). +} +} +\examples{ +\dontrun{ +# Load example data +data("ewe_nwatlantic_base", package = "ecosystemdata") + +# Generate logistic selectivity for spiny dogfish age groups +generate_selectivity_curve( + ewe_output = ewe_nwatlantic_base, + functional_groups = c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +"), + functional_form = "logistic", + fleet_name = "fleet1" +) + +# Generate double logistic selectivity +generate_selectivity_curve( + ewe_output = ewe_nwatlantic_base, + functional_groups = c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +"), + functional_form = "double_logistic", + fleet_name = "fleet1" +) +} +} diff --git a/tests/testthat/test-fit_selectivity_helpers.R b/tests/testthat/test-fit_selectivity_helpers.R new file mode 100644 index 0000000..3e180fc --- /dev/null +++ b/tests/testthat/test-fit_selectivity_helpers.R @@ -0,0 +1,173 @@ +# Instructions ---- +#' This file follows the format generated by ecosystemdata:::use_testthat_template(). +#' Necessary tests include input and output (IO) correctness [IO +#' correctness], edge-case handling [Edge handling], and built-in errors and +#' warnings [Error handling]. See `?ecosystemdata:::use_testthat_template` for more +#' information. Every test should have a @description tag that takes up just +#' one line, which will be used in the bookdown report of {testthat} results. + +# Setup ---- +# Load or prepare any necessary data for testing + +# fit_logistic_selectivity ---- +## IO correctness ---- +test_that("fit_logistic_selectivity() recovers known parameters", { + # True parameters + true_inflection <- 3 + true_slope <- 1.5 + + # Generate selectivity values from known parameters + index_values <- 1:6 + true_selectivity <- 1 / (1 + exp(-true_slope * (index_values - true_inflection))) + + # Fit the model + result <- ecosystemdata:::fit_logistic_selectivity(true_selectivity) + + # Extract estimated parameters + estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] |> + unname() + estimated_slope <- result[["value"]][result[["label"]] == "slope"] |> + unname() + + # Check that estimates are close to true values (within 10% tolerance) + #' @description Test that the function can recover known inflection parameter from generated data. + expect_equal(estimated_inflection, true_inflection, tolerance = 0.1 * true_inflection) + #' @description Test that the function can recover known slope parameter from generated data. + expect_equal(estimated_slope, true_slope, tolerance = 0.1 * true_slope) + + #' @description Test that fit_logistic_selectivity() returns a tibble with the correct columns and parameter labels. + expect_true(tibble::is_tibble(result)) + expect_equal(nrow(result), 2) + expect_equal(result[["label"]], c("inflection_point", "slope")) + expect_equal(result[["module_type"]], rep("Logistic", 2)) + expect_true(all(is.numeric(result[["value"]]))) +}) + +test_that("fit_logistic_selectivity() handles different slopes", { + + # True parameters with steeper slope + #' @description Test parameter recovery with a different slope value. + true_inflection <- 4 + true_slope <- 3.0 + + # Generate selectivity values + index_values <- 1:8 + true_selectivity <- 1 / (1 + exp(-true_slope * (index_values - true_inflection))) + + # Fit the model + result <- ecosystemdata:::fit_logistic_selectivity(true_selectivity) + + # Extract estimated parameters + estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] |> + unname() + estimated_slope <- result[["value"]][result[["label"]] == "slope"] |> + unname() + + # Check estimates + expect_equal(estimated_inflection, true_inflection, tolerance = 0.1 * true_inflection) + expect_equal(estimated_slope, true_slope, tolerance = 0.1 * true_slope) +}) + +# fit_double_logistic_selectivity ---- +## IO correctness ---- +test_that("fit_double_logistic_selectivity() returns correct structure", { + #' @description Test that fit_double_logistic_selectivity() returns a tibble with the correct columns and parameter labels. + f_values <- c(0.1, 0.3, 0.5, 0.9, 0.6, 0.3) + result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) + + expect_true(tibble::is_tibble(result)) + expect_equal(nrow(result), 4) + expect_equal( + result[["label"]], + c("inflection_point_asc", "slope_asc", "inflection_point_desc", "slope_desc") + ) + expect_equal(result[["module_type"]], rep("DoubleLogistic", 4)) + expect_true(all(is.numeric(result[["value"]]))) +}) + +test_that("fit_double_logistic_selectivity() handles dome-shaped pattern", { + #' @description Test that the function fits a dome-shaped selectivity pattern. + f_values <- c(0.1, 0.3, 0.5, 1.0, 0.7, 0.3) + result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) + + inflection_ascending <- result[["value"]][result[["label"]] == "inflection_point_asc"] + inflection_descending <- result[["value"]][result[["label"]] == "inflection_point_desc"] + + # Ascending inflection should come before descending + expect_true(inflection_ascending < inflection_descending) + + # Slopes should be positive + expect_true(result[["value"]][result[["label"]] == "slope_asc"] > 0) + expect_true(result[["value"]][result[["label"]] == "slope_desc"] > 0) +}) + +## Validation tests ---- +test_that("fit_double_logistic_selectivity() recovers known parameters", { + #' @description Test that the function can recover known double logistic parameters from generated data. + + # True parameters + true_inflection_asc <- 2.5 + true_slope_asc <- 2.0 + true_inflection_desc <- 5.5 + true_slope_desc <- 1.5 + + # Generate selectivity values from known parameters + index_values <- 1:8 + asc_component <- 1 / (1 + exp(-true_slope_asc * (index_values - true_inflection_asc))) + desc_component <- 1 - 1 / (1 + exp(-true_slope_desc * (index_values - true_inflection_desc))) + true_selectivity <- asc_component * desc_component + + # Fit the model + result <- ecosystemdata:::fit_double_logistic_selectivity(true_selectivity) + + # Extract estimated parameters + est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] |> + unname() + est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] |> + unname() + est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] |> + unname() + est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] |> + unname() + + # Check that estimates are close to true values (within 20% tolerance for more complex model) + expect_equal(est_infl_asc, true_inflection_asc, tolerance = 0.2 * true_inflection_asc) + expect_equal(est_slope_asc, true_slope_asc, tolerance = 0.2 * true_slope_asc) + expect_equal(est_infl_desc, true_inflection_desc, tolerance = 0.2 * true_inflection_desc) + expect_equal(est_slope_desc, true_slope_desc, tolerance = 0.2 * true_slope_desc) +}) + +test_that("fit_double_logistic_selectivity() handles symmetric dome", { + #' @description Test parameter recovery with symmetric dome shape. + + # True parameters with symmetric slopes + true_inflection_asc <- 3.0 + true_slope_asc <- 1.5 + true_inflection_desc <- 7.0 + true_slope_desc <- 1.5 + + # Generate selectivity values + index_values <- 1:10 + asc_component <- 1 / (1 + exp(-true_slope_asc * (index_values - true_inflection_asc))) + desc_component <- 1 - 1 / (1 + exp(-true_slope_desc * (index_values - true_inflection_desc))) + true_selectivity <- asc_component * desc_component + + # Fit the model + result <- ecosystemdata:::fit_double_logistic_selectivity(true_selectivity) + + # Extract estimated parameters + est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] |> + unname() + est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] |> + unname() + est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] |> + unname() + est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] |> + unname() + + # Check estimates + expect_equal(est_infl_asc, true_inflection_asc, tolerance = 0.25 * true_inflection_asc) + expect_equal(est_slope_asc, true_slope_asc, tolerance = 0.25 * true_slope_asc) + expect_equal(est_infl_desc, true_inflection_desc, tolerance = 0.25 * true_inflection_desc) + expect_equal(est_slope_desc, true_slope_desc, tolerance = 0.25 * true_slope_desc) +}) diff --git a/tests/testthat/test-generate_selectivity_curve.R b/tests/testthat/test-generate_selectivity_curve.R new file mode 100644 index 0000000..039a3ca --- /dev/null +++ b/tests/testthat/test-generate_selectivity_curve.R @@ -0,0 +1,372 @@ +# Instructions ---- +#' This file follows the format generated by ecosystemdata:::use_testthat_template(). +#' Necessary tests include input and output (IO) correctness [IO +#' correctness], edge-case handling [Edge handling], and built-in errors and +#' warnings [Error handling]. See `?ecosystemdata:::use_testthat_template` for more +#' information. Every test should have a @description tag that takes up just +#' one line, which will be used in the bookdown report of {testthat} results. + +# Setup ---- +# Load or prepare any necessary data for testing +data("ewe_nwatlantic_base", package = "ecosystemdata") + +functional_groups <- c("species1 0", "species1 1", "species1 2", "species1 3", "species1 4", "species1 5") +# True parameters +true_inflection <- 3 +true_slope <- 1.5 + +# Generate selectivity values from known parameters +index_values <- 1:6 +true_selectivity <- 1 / (1 + exp(-true_slope * (index_values - true_inflection))) + +test_data <- tibble::tibble( + functional_group = rep(functional_groups, each = 12), + type = "mortality", + # Replicate each selectivity value for 12 months + value = rep(true_selectivity, each = 12), + year = rep(2000, 72), + month = rep(1:12, 6) +) + +# generate_selectivity_curve ---- +## IO correctness ---- +test_that("generate_selectivity_curve() works with logistic form", { + result <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = functional_groups, + functional_form = "logistic", + fleet_name = "fleet1" + ) + + estimated_inflection <- result |> + dplyr::filter(label == "inflection_point") |> + dplyr::pull(value) |> + unname() + estimated_slope <- result |> + dplyr::filter(label == "slope") |> + dplyr::pull(value) |> + unname() + + #' @description Test that generate_selectivity_curve() returns a tibble with correct structure for logistic selectivity. + expect_equal(estimated_inflection, rep(true_inflection, 12), tolerance = 0.1 * true_inflection) + #' @description Test that estimated slope matches true slope. + expect_equal(estimated_slope, rep(true_slope, 12), tolerance = 0.1 * true_slope) + #' @description Test that result is a tibble. + expect_true(tibble::is_tibble(result)) + #' @description Test that result has two rows. + expect_equal(nrow(result), 2 * 12) # inflection_point and slope + #' @description Test that model_family is catch_at_age. + expect_equal(result[["model_family"]], rep("catch_at_age", 2 * 12)) + #' @description Test that module_name is Selectivity. + expect_equal(result[["module_name"]], rep("Selectivity", 2 * 12)) + #' @description Test that fleet_name is fleet1. + expect_equal(result[["fleet_name"]], rep("fleet1", 2 * 12)) + #' @description Test that module_type is Logistic. + expect_equal(result[["module_type"]], rep("Logistic", 2 * 12)) + #' @description Test that label is inflection_point and slope. + expect_equal(result[["label"]], rep(c("inflection_point", "slope"), 12)) + #' @description Test that estimation_type is fixed_effects. + expect_equal(result[["estimation_type"]], rep("fixed_effects", 2 * 12)) + #' @description Test that distribution_link is NA. + expect_true(all(is.na(result[["distribution_link"]]))) + #' @description Test that value is numeric. + expect_true(all(is.numeric(result[["value"]]))) + #' @description Test that value is not NA. + expect_true(all(!is.na(result[["value"]]))) +}) + +test_that("generate_selectivity_curve() works with double_logistic form", { + result <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = functional_groups, + functional_form = "double_logistic", + fleet_name = "fleet1" + ) + + #' @description Test that result is a tibble for double logistic. + expect_true(tibble::is_tibble(result)) + #' @description Test that result has four rows for double logistic. + expect_equal(nrow(result), 4 * 12) # 4 parameters for double logistic + #' @description Test that model_family is catch_at_age for double logistic. + expect_equal(result[["model_family"]], rep("catch_at_age", 4 * 12)) + #' @description Test that module_name is Selectivity for double logistic. + expect_equal(result[["module_name"]], rep("Selectivity", 4 * 12)) + #' @description Test that fleet_name is fleet1 for double logistic. + expect_equal(result[["fleet_name"]], rep("fleet1", 4 * 12)) + #' @description Test that module_type is DoubleLogistic. + expect_equal(result[["module_type"]], rep("DoubleLogistic", 4 * 12)) + #' @description Test that label is correct for double logistic. + expect_equal( + result[["label"]], + rep(c("inflection_point_asc", "slope_asc", "inflection_point_desc", "slope_desc"), 12) + ) + #' @description Test that estimation_type is fixed_effects for double logistic. + expect_equal(result[["estimation_type"]], rep("fixed_effects", 4 * 12)) + #' @description Test that value is numeric for double logistic. + expect_true(all(is.numeric(result[["value"]]))) + #' @description Test that value is not NA for double logistic. + expect_true(all(!is.na(result[["value"]]))) +}) + +test_that("generate_selectivity_curve() works with real EWE data", { + menhaden_data <- ewe_nwatlantic_base |> + dplyr::filter(species == "menhaden" & type == "mortality") + menhaden_groups <- menhaden_data |> + dplyr::pull(functional_group) |> + unique() + + result <- generate_selectivity_curve( + ewe_output = menhaden_data, + functional_groups = menhaden_groups, + functional_form = "double_logistic", + fleet_name = "NA" + ) + + #' @description Test that result is a tibble for real EWE data. + expect_true(tibble::is_tibble(result)) + #' @description Test that result has two rows for real EWE data. + expect_equal(nrow(result), 2) + #' @description Test that fleet_name is commercial for real EWE data. + expect_equal(result[["fleet_name"]], rep("commercial", 2)) + #' @description Test that value is numeric for real EWE data. + expect_true(all(is.numeric(result[["value"]]))) +}) + +## Edge handling ---- +test_that("generate_selectivity_curve() handles case-insensitive functional forms", { + #' @description Test that functional_form parameter accepts different cases. + result1 <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "LOGISTIC", + fleet_name = "fleet1" + ) + + result2 <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "Logistic", + fleet_name = "fleet1" + ) + + expect_equal(result1[["module_type"]], result2[["module_type"]]) +}) + +test_that("generate_selectivity_curve() preserves functional_groups order", { + #' @description Test that the order of functional groups affects the fitted parameters (reversed order should give different results). + + result_forward <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "logistic", + fleet_name = "fleet1" + ) + + result_reversed <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 2", "species1 1", "species1 0"), + functional_form = "logistic", + fleet_name = "fleet1" + ) + + # The inflection points should be different because the order is reversed + expect_false( + isTRUE(all.equal( + result_forward[["value"]][1], + result_reversed[["value"]][1] + )) + ) +}) + +## Error handling ---- +test_that("generate_selectivity_curve() errors on invalid ewe_output", { + #' @description Test that the function errors when ewe_output is not a data frame. + expect_error( + generate_selectivity_curve( + ewe_output = "not a data frame", + functional_groups = c("species1 0", "species1 1"), + functional_form = "logistic" + ), + "must be a data frame" + ) +}) + +test_that("generate_selectivity_curve() errors on missing columns", { + #' @description Test that the function errors when required columns are missing. + bad_data <- test_data[, c("functional_group", "value")] + + expect_error( + generate_selectivity_curve( + ewe_output = bad_data, + functional_groups = c("species1 0", "species1 1"), + functional_form = "logistic" + ), + "missing required columns" + ) +}) + +test_that("generate_selectivity_curve() errors on invalid functional_groups", { + #' @description Test that the function errors when functional_groups is not + #' a character vector. + expect_error( + generate_selectivity_curve( + ewe_output = test_data, + functional_groups = 123, + functional_form = "logistic" + ), + "must be a character vector" + ) +}) + +test_that("generate_selectivity_curve() errors with too few functional groups", { + #' @description Test that the function errors when fewer than 2 functional + #' groups are provided. + expect_error( + generate_selectivity_curve( + ewe_output = test_data, + functional_groups = "species1 0", + functional_form = "logistic" + ), + "at least 2 groups" + ) +}) + +test_that("generate_selectivity_curve() errors on unsupported functional form", { + #' @description Test that the function errors when an unsupported functional + #' form is specified. + expect_error( + generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1"), + functional_form = "invalid_form" + ), + "Unsupported functional form" + ) +}) + +test_that("generate_selectivity_curve() errors on missing functional groups", { + #' @description Test that the function errors when specified functional groups + #' are not in the data. + expect_error( + generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("nonexistent 0", "nonexistent 1"), + functional_form = "logistic" + ), + "not found in" + ) +}) + +test_that("generate_selectivity_curve() errors on missing mortality data", { + #' @description Test that the function errors when no mortality data exists + #' for the specified groups. + biomass_data <- test_data + biomass_data[["type"]] <- "biomass" + + expect_error( + generate_selectivity_curve( + ewe_output = biomass_data, + functional_groups = c("species1 0", "species1 1"), + functional_form = "logistic" + ), + "No mortality data found" + ) +}) + +test_that("generate_selectivity_curve() errors on all-zero F values", { + #' @description Test that the function errors when all F values are zero. + zero_data <- test_data + zero_data[["value"]] <- 0 + + expect_error( + generate_selectivity_curve( + ewe_output = zero_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "logistic" + ), + "All F values are zero" + ) +}) + +## Validation tests ---- +test_that("generate_selectivity_curve() recovers parameters from known selectivity", { + #' @description Test that the full workflow can recover known parameters. + + # Create true selectivity parameters + true_inflection <- 2.0 + true_slope <- 1.5 + + # Generate F values from true selectivity + index_values <- 1:5 + true_selectivity <- 1 / (1 + exp(-true_slope * (index_values - true_inflection))) + + # Create test data with these F values + validation_data <- tibble::tibble( + functional_group = rep(paste("group", index_values), each = 12), + type = "mortality", + value = rep(true_selectivity, each = 12), + year = rep(2000, length(index_values) * 12), + month = rep(1:12, length(index_values)) + ) + + # Fit using generate_selectivity_curve + result <- generate_selectivity_curve( + ewe_output = validation_data, + functional_groups = paste("group", index_values), + functional_form = "logistic", + fleet_name = "test" + ) + + # Extract estimated parameters + estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] + estimated_slope <- result[["value"]][result[["label"]] == "slope"] + + # Check that estimates are close to true values + expect_equal(estimated_inflection, true_inflection, tolerance = 0.15 * true_inflection) + expect_equal(estimated_slope, true_slope, tolerance = 0.15 * true_slope) +}) + +test_that("generate_selectivity_curve() recovers double logistic parameters", { + #' @description Test recovery of double logistic parameters through full workflow. + + # Create true parameters + true_infl_asc <- 2.0 + true_slope_asc <- 2.0 + true_infl_desc <- 5.0 + true_slope_desc <- 1.5 + + # Generate F values from true double logistic + index_values <- 1:7 + asc_component <- 1 / (1 + exp(-true_slope_asc * (index_values - true_infl_asc))) + desc_component <- 1 - 1 / (1 + exp(-true_slope_desc * (index_values - true_infl_desc))) + true_selectivity <- asc_component * desc_component + + # Create test data + validation_data <- tibble::tibble( + functional_group = rep(paste("group", index_values), each = 12), + type = "mortality", + value = rep(true_selectivity, each = 12), + year = rep(2000, length(index_values) * 12), + month = rep(1:12, length(index_values)) + ) + + # Fit using generate_selectivity_curve + result <- generate_selectivity_curve( + ewe_output = validation_data, + functional_groups = paste("group", index_values), + functional_form = "double_logistic", + fleet_name = "test" + ) + + # Extract estimated parameters + est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] + est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] + est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] + est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] + + # Check estimates (more lenient tolerance for complex model) + expect_equal(est_infl_asc, true_infl_asc, tolerance = 0.25 * true_infl_asc) + expect_equal(est_slope_asc, true_slope_asc, tolerance = 0.25 * true_slope_asc) + expect_equal(est_infl_desc, true_infl_desc, tolerance = 0.25 * true_infl_desc) + expect_equal(est_slope_desc, true_slope_desc, tolerance = 0.25 * true_slope_desc) +})