From 13125acc32d331a47e4836da66caaa0f5e107baa Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Feb 2026 17:53:11 +0000 Subject: [PATCH 01/11] Initial plan From da30f0cd3cf1feb22a76488a2492b959e8fa07e1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Feb 2026 17:59:01 +0000 Subject: [PATCH 02/11] Add modular selectivity curve generation functions Co-authored-by: Bai-Li-NOAA <59936250+Bai-Li-NOAA@users.noreply.github.com> --- NAMESPACE | 1 + R/fit_selectivity_helpers.R | 231 +++++++++++++ R/generate_selectivity_curve.R | 182 ++++++++++ man/generate_selectivity_curve.Rd | 88 +++++ .../test-generate_selectivity_curve.R | 313 ++++++++++++++++++ 5 files changed, 815 insertions(+) create mode 100644 R/fit_selectivity_helpers.R create mode 100644 R/generate_selectivity_curve.R create mode 100644 man/generate_selectivity_curve.Rd create mode 100644 tests/testthat/test-generate_selectivity_curve.R diff --git a/NAMESPACE b/NAMESPACE index c59c727..4fc63ef 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(generate_selectivity_curve) export(get_functional_groups) export(load_csv_environmental_data) export(load_csv_ewe) diff --git a/R/fit_selectivity_helpers.R b/R/fit_selectivity_helpers.R new file mode 100644 index 0000000..c2a5030 --- /dev/null +++ b/R/fit_selectivity_helpers.R @@ -0,0 +1,231 @@ +#' Fit logistic selectivity parameters +#' +#' @description +#' Helper function to fit logistic selectivity curve parameters to a vector of +#' relative F values. The logistic function is defined as: +#' S(a) = 1 / (1 + exp(-slope * (a - inflection_point))) +#' where a is the age/group index. +#' +#' @param f_values A numeric vector of normalized F values (0-1) for each +#' functional group, in the order they should appear on the x-axis. +#' +#' @return A tibble with one row per parameter, containing the standard FIMS +#' parameter format columns. +#' +#' @details +#' The function uses nonlinear least squares (nls) to fit a logistic curve to +#' the normalized F values. The age/group index is simply 1, 2, 3, ... based on +#' the position in the f_values vector. +#' +#' If the optimization fails, the function falls back to a simple linear +#' approximation to estimate the inflection point and slope. +#' +#' @keywords internal +#' @author Kelli F. Johnson +fit_logistic_selectivity <- function(f_values) { + # Create age index (1, 2, 3, ...) + ages <- seq_along(f_values) + + # Define logistic function + logistic <- function(age, inflection_point, slope) { + 1 / (1 + exp(-slope * (age - inflection_point))) + } + + # Initial parameter guesses + # Inflection point: age where selectivity is around 0.5 + mid_idx <- which.min(abs(f_values - 0.5)) + init_inflection <- if (length(mid_idx) > 0) ages[mid_idx] else mean(ages) + + # Slope: positive value, start with 1 + init_slope <- 1 + + # Fit using nonlinear least squares + fit_result <- tryCatch( + { + stats::nls( + f_values ~ logistic(ages, inflection_point, slope), + start = list(inflection_point = init_inflection, slope = init_slope), + control = stats::nls.control(maxiter = 100, warnOnly = TRUE) + ) + }, + error = function(e) { + # Fallback: use simple heuristic + NULL + } + ) + + if (!is.null(fit_result)) { + params <- stats::coef(fit_result) + inflection_point <- params["inflection_point"] + slope <- params["slope"] + } else { + # Fallback method: find inflection as midpoint of rise + # and estimate slope from the steepest part of the curve + inflection_point <- init_inflection + + # Estimate slope from differences + if (length(ages) > 1) { + diffs <- diff(f_values) / diff(ages) + slope <- max(abs(diffs), 0.1) * 4 # Scale up since logistic slope is 1/4 of max derivative + } else { + slope <- 1 + } + } + + # Create output tibble in FIMS format + tibble::tibble( + model_family = "catch_at_age", + module_name = "Selectivity", + module_type = "Logistic", + label = c("inflection_point", "slope"), + distribution_link = NA_character_, + age = NA_real_, + length = NA_real_, + time = NA_real_, + value = c(inflection_point, slope), + estimation_type = "fixed_effects", + distribution_type = NA_character_, + distribution = NA_character_ + ) +} + + +#' Fit double logistic selectivity parameters +#' +#' @description +#' Helper function to fit double logistic (dome-shaped) selectivity curve +#' parameters to a vector of relative F values. The double logistic function +#' combines ascending and descending logistic curves: +#' S(a) = 1 / (1 + exp(-slope_asc * (a - inflection_asc))) * +#' (1 - 1 / (1 + exp(-slope_desc * (a - inflection_desc)))) +#' +#' @param f_values A numeric vector of normalized F values (0-1) for each +#' functional group, in the order they should appear on the x-axis. +#' +#' @return A tibble with one row per parameter, containing the standard FIMS +#' parameter format columns. +#' +#' @details +#' The function attempts to fit a dome-shaped selectivity curve. If the data +#' appears monotonic (no dome shape), it will still fit the parameters but may +#' warn or produce parameters that effectively reduce to a simple logistic. +#' +#' The fitting uses nonlinear least squares with constraints to ensure: +#' - Ascending inflection point comes before descending +#' - Slopes are positive +#' - The curve shape makes biological sense +#' +#' @keywords internal +#' @author Kelli F. Johnson +fit_double_logistic_selectivity <- function(f_values) { + # Create age index (1, 2, 3, ...) + ages <- seq_along(f_values) + + # Define double logistic function + double_logistic <- function(age, infl_asc, slope_asc, infl_desc, slope_desc) { + asc <- 1 / (1 + exp(-slope_asc * (age - infl_asc))) + desc <- 1 - 1 / (1 + exp(-slope_desc * (age - infl_desc))) + asc * desc + } + + # Initial parameter guesses + # Find the peak of the selectivity + peak_idx <- which.max(f_values) + peak_age <- ages[peak_idx] + + # Ascending parameters: before peak + if (peak_idx > 1) { + ascending_f <- f_values[1:peak_idx] + ascending_ages <- ages[1:peak_idx] + mid_asc_idx <- which.min(abs(ascending_f - 0.5)) + init_infl_asc <- if (length(mid_asc_idx) > 0) ascending_ages[mid_asc_idx] else peak_age - 1 + } else { + init_infl_asc <- peak_age - 1 + } + + # Descending parameters: after peak + if (peak_idx < length(ages)) { + descending_f <- f_values[peak_idx:length(ages)] + descending_ages <- ages[peak_idx:length(ages)] + # For descending, look for where it drops below peak/2 + mid_desc_idx <- which.min(abs(descending_f - max(f_values) / 2)) + init_infl_desc <- if (length(mid_desc_idx) > 0) descending_ages[mid_desc_idx] else peak_age + 1 + } else { + init_infl_desc <- peak_age + 1 + } + + # Ensure descending inflection is after ascending + if (init_infl_desc <= init_infl_asc) { + init_infl_desc <- init_infl_asc + 1 + } + + init_slope_asc <- 1 + init_slope_desc <- 1 + + # Fit using nonlinear least squares + fit_result <- tryCatch( + { + stats::nls( + f_values ~ double_logistic(ages, infl_asc, slope_asc, infl_desc, slope_desc), + start = list( + infl_asc = init_infl_asc, + slope_asc = init_slope_asc, + infl_desc = init_infl_desc, + slope_desc = init_slope_desc + ), + control = stats::nls.control(maxiter = 200, warnOnly = TRUE), + algorithm = "port", + lower = c( + infl_asc = min(ages), + slope_asc = 0.01, + infl_desc = init_infl_asc + 0.5, + slope_desc = 0.01 + ), + upper = c( + infl_asc = max(ages), + slope_asc = 10, + infl_desc = max(ages) + 10, + slope_desc = 10 + ) + ) + }, + error = function(e) { + NULL + } + ) + + if (!is.null(fit_result)) { + params <- stats::coef(fit_result) + infl_asc <- params["infl_asc"] + slope_asc <- params["slope_asc"] + infl_desc <- params["infl_desc"] + slope_desc <- params["slope_desc"] + } else { + # Fallback to initial guesses + infl_asc <- init_infl_asc + slope_asc <- init_slope_asc + infl_desc <- init_infl_desc + slope_desc <- init_slope_desc + } + + # Create output tibble in FIMS format + tibble::tibble( + model_family = "catch_at_age", + module_name = "Selectivity", + module_type = "DoubleLogistic", + label = c( + "inflection_point_asc", + "slope_asc", + "inflection_point_desc", + "slope_desc" + ), + distribution_link = NA_character_, + age = NA_real_, + length = NA_real_, + time = NA_real_, + value = c(infl_asc, slope_asc, infl_desc, slope_desc), + estimation_type = "fixed_effects", + distribution_type = NA_character_, + distribution = NA_character_ + ) +} diff --git a/R/generate_selectivity_curve.R b/R/generate_selectivity_curve.R new file mode 100644 index 0000000..15d7b7c --- /dev/null +++ b/R/generate_selectivity_curve.R @@ -0,0 +1,182 @@ +utils::globalVariables(c("functional_group")) + +#' Generate selectivity curve parameters from ecosystem model F values +#' +#' @description +#' This function takes fishing mortality (F) values by functional group from an +#' ecosystem model and fits selectivity curve parameters based on a specified +#' functional form. The function is modular and can be extended to support +#' additional selectivity curve types. +#' +#' @param ewe_output A tibble containing ecosystem model output, typically from +#' [load_model()] with type = "ewe". Must contain columns: functional_group, +#' type, and value. The function will filter for type == "mortality". +#' @param functional_groups A character vector of functional group names in the +#' order desired for the x-axis of the selectivity curve. These should match +#' names in the functional_group column of ewe_output. +#' @param functional_form A character string specifying the selectivity curve +#' type. Currently supports "logistic" and "double_logistic". Additional +#' forms can be added by creating new helper functions. +#' @param fleet_name A character string specifying the fleet name to use in the +#' output tibble. Default is "fleet1". +#' +#' @return A tibble with the following columns: +#' \itemize{ +#' \item model_family: Always "catch_at_age" +#' \item module_name: Always "Selectivity" +#' \item fleet_name: Fleet name as specified in the input +#' \item module_type: The selectivity curve type (e.g., "Logistic", "DoubleLogistic") +#' \item label: Parameter name (e.g., "inflection_point", "slope") +#' \item distribution_link: Always NA +#' \item age: Always NA +#' \item length: Always NA +#' \item time: Always NA +#' \item value: The fitted parameter value +#' \item estimation_type: Always "fixed_effects" +#' \item distribution_type: Always NA +#' \item distribution: Always NA +#' } +#' +#' @details +#' The function extracts fishing mortality values for the specified functional +#' groups and uses a helper function specific to the chosen functional form to +#' fit selectivity parameters. The F values are normalized to be between 0 and 1 +#' before fitting, representing relative selectivity. +#' +#' New functional forms can be added by creating helper functions with the +#' naming pattern `fit_
_selectivity()` that take a vector of F values and +#' return parameter estimates. +#' +#' @export +#' @author Kelli F. Johnson +#' @examples +#' \dontrun{ +#' # Load example data +#' data("ewe_nwatlantic_base", package = "ecosystemdata") +#' +#' # Generate logistic selectivity for spiny dogfish age groups +#' generate_selectivity_curve( +#' ewe_output = ewe_nwatlantic_base, +#' functional_groups = c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +"), +#' functional_form = "logistic", +#' fleet_name = "fleet1" +#' ) +#' +#' # Generate double logistic selectivity +#' generate_selectivity_curve( +#' ewe_output = ewe_nwatlantic_base, +#' functional_groups = c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +"), +#' functional_form = "double_logistic", +#' fleet_name = "fleet1" +#' ) +#' } +generate_selectivity_curve <- function(ewe_output, + functional_groups, + functional_form, + fleet_name = "fleet1") { + # Input validation + if (!inherits(ewe_output, "data.frame")) { + cli::cli_abort("{.var ewe_output} must be a data frame or tibble.") + } + + required_cols <- c("functional_group", "type", "value") + missing_cols <- setdiff(required_cols, names(ewe_output)) + if (length(missing_cols) > 0) { + cli::cli_abort(c( + "{.var ewe_output} is missing required columns:", + "x" = "Missing: {.var {missing_cols}}" + )) + } + + if (!inherits(functional_groups, "character")) { + cli::cli_abort("{.var functional_groups} must be a character vector.") + } + + if (length(functional_groups) < 2) { + cli::cli_abort(c( + "{.var functional_groups} must contain at least 2 groups.", + "i" = "Selectivity curves require multiple age/size classes." + )) + } + + if (!inherits(functional_form, "character") || length(functional_form) != 1) { + cli::cli_abort("{.var functional_form} must be a single character string.") + } + + # Check that functional groups exist in the data + missing_groups <- setdiff(functional_groups, unique(ewe_output$functional_group)) + if (length(missing_groups) > 0) { + cli::cli_abort(c( + "Some functional groups not found in {.var ewe_output}:", + "x" = "Missing: {.var {missing_groups}}" + )) + } + + # Extract mortality data for the specified functional groups + mortality_data <- ewe_output |> + dplyr::filter( + type == "mortality", + functional_group %in% functional_groups + ) + + if (nrow(mortality_data) == 0) { + cli::cli_abort(c( + "No mortality data found for the specified functional groups.", + "i" = "Check that type == 'mortality' exists in {.var ewe_output}" + )) + } + + # Calculate mean F value for each functional group (averaging over time/fleet) + f_values <- mortality_data |> + dplyr::group_by(functional_group) |> + dplyr::summarise(mean_f = mean(value, na.rm = TRUE), .groups = "drop") |> + # Ensure the order matches the input functional_groups + dplyr::right_join( + tibble::tibble(functional_group = functional_groups), + by = "functional_group" + ) |> + dplyr::pull(mean_f) + + # Check for missing F values + if (any(is.na(f_values))) { + na_groups <- functional_groups[is.na(f_values)] + cli::cli_abort(c( + "Missing mortality data for some functional groups:", + "x" = "Groups with no data: {.var {na_groups}}" + )) + } + + # Normalize F values to 0-1 range (representing relative selectivity) + if (max(f_values) == 0) { + cli::cli_abort("All F values are zero. Cannot fit selectivity curve.") + } + normalized_f <- f_values / max(f_values) + + # Call the appropriate helper function based on functional_form + helper_function_name <- paste0("fit_", tolower(functional_form), "_selectivity") + + if (!exists(helper_function_name, mode = "function")) { + cli::cli_abort(c( + "Unsupported functional form: {.val {functional_form}}", + "i" = "Currently supported: 'logistic', 'double_logistic'", + "i" = "To add support, create a function named {.fn {helper_function_name}}" + )) + } + + # Call the helper function + helper_function <- get(helper_function_name, mode = "function") + result <- helper_function(normalized_f) + + # Add fleet_name to the result + result$fleet_name <- fleet_name + + # Reorder columns to match expected output format + result <- result |> + dplyr::select( + model_family, module_name, fleet_name, module_type, + label, distribution_link, age, length, time, value, + estimation_type, distribution_type, distribution + ) + + return(result) +} diff --git a/man/generate_selectivity_curve.Rd b/man/generate_selectivity_curve.Rd new file mode 100644 index 0000000..e6d631c --- /dev/null +++ b/man/generate_selectivity_curve.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_selectivity_curve.R +\name{generate_selectivity_curve} +\alias{generate_selectivity_curve} +\title{Generate selectivity curve parameters from ecosystem model F values} +\usage{ +generate_selectivity_curve( + ewe_output, + functional_groups, + functional_form, + fleet_name = "fleet1" +) +} +\arguments{ +\item{ewe_output}{A tibble containing ecosystem model output, typically from +\code{\link[=load_model]{load_model()}} with type = "ewe". Must contain columns: functional_group, +type, and value. The function will filter for type == "mortality".} + +\item{functional_groups}{A character vector of functional group names in the +order desired for the x-axis of the selectivity curve. These should match +names in the functional_group column of ewe_output.} + +\item{functional_form}{A character string specifying the selectivity curve +type. Currently supports "logistic" and "double_logistic". Additional +forms can be added by creating new helper functions.} + +\item{fleet_name}{A character string specifying the fleet name to use in the +output tibble. Default is "fleet1".} +} +\value{ +A tibble with the following columns: +\itemize{ +\item model_family: Always "catch_at_age" +\item module_name: Always "Selectivity" +\item fleet_name: Fleet name as specified in the input +\item module_type: The selectivity curve type (e.g., "Logistic", "DoubleLogistic") +\item label: Parameter name (e.g., "inflection_point", "slope") +\item distribution_link: Always NA +\item age: Always NA +\item length: Always NA +\item time: Always NA +\item value: The fitted parameter value +\item estimation_type: Always "fixed_effects" +\item distribution_type: Always NA +\item distribution: Always NA +} +} +\description{ +This function takes fishing mortality (F) values by functional group from an +ecosystem model and fits selectivity curve parameters based on a specified +functional form. The function is modular and can be extended to support +additional selectivity curve types. +} +\details{ +The function extracts fishing mortality values for the specified functional +groups and uses a helper function specific to the chosen functional form to +fit selectivity parameters. The F values are normalized to be between 0 and 1 +before fitting, representing relative selectivity. + +New functional forms can be added by creating helper functions with the +naming pattern \code{fit__selectivity()} that take a vector of F values and +return parameter estimates. +} +\examples{ +\dontrun{ +# Load example data +data("ewe_nwatlantic_base", package = "ecosystemdata") + +# Generate logistic selectivity for spiny dogfish age groups +generate_selectivity_curve( + ewe_output = ewe_nwatlantic_base, + functional_groups = c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +"), + functional_form = "logistic", + fleet_name = "fleet1" +) + +# Generate double logistic selectivity +generate_selectivity_curve( + ewe_output = ewe_nwatlantic_base, + functional_groups = c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +"), + functional_form = "double_logistic", + fleet_name = "fleet1" +) +} +} +\author{ +Kelli F. Johnson +} diff --git a/tests/testthat/test-generate_selectivity_curve.R b/tests/testthat/test-generate_selectivity_curve.R new file mode 100644 index 0000000..f4c745a --- /dev/null +++ b/tests/testthat/test-generate_selectivity_curve.R @@ -0,0 +1,313 @@ +# Instructions ---- +#' This file follows the format generated by ecosystemdata:::use_testthat_template(). +#' Necessary tests include input and output (IO) correctness [IO +#' correctness], edge-case handling [Edge handling], and built-in errors and +#' warnings [Error handling]. See `?ecosystemdata:::use_testthat_template` for more +#' information. Every test should have a @description tag that takes up just +#' one line, which will be used in the bookdown report of {testthat} results. + +# Setup ---- +# Load or prepare any necessary data for testing +data("ewe_nwatlantic_base", package = "ecosystemdata") + +# Create a minimal test dataset +test_data <- tibble::tibble( + functional_group = rep(c("species1 0", "species1 1", "species1 2"), each = 12), + type = "mortality", + value = c( + rep(0.1, 12), # Low F for age 0 + rep(0.5, 12), # Medium F for age 1 + rep(0.8, 12) # High F for age 2 + ), + year = rep(2000, 36), + month = rep(1:12, 3) +) + +# generate_selectivity_curve ---- +## IO correctness ---- +test_that("generate_selectivity_curve() works with logistic form", { + #' @description Test that generate_selectivity_curve() returns a tibble with + #' correct structure for logistic selectivity. + result <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "logistic", + fleet_name = "fleet1" + ) + + expect_s3_class(result, "tbl_df") + expect_equal(nrow(result), 2) # inflection_point and slope + expect_equal(result$model_family, rep("catch_at_age", 2)) + expect_equal(result$module_name, rep("Selectivity", 2)) + expect_equal(result$fleet_name, rep("fleet1", 2)) + expect_equal(result$module_type, rep("Logistic", 2)) + expect_equal(result$label, c("inflection_point", "slope")) + expect_equal(result$estimation_type, rep("fixed_effects", 2)) + expect_true(all(is.na(result$distribution_link))) + expect_true(all(is.numeric(result$value))) + expect_true(all(!is.na(result$value))) +}) + +test_that("generate_selectivity_curve() works with double_logistic form", { + #' @description Test that generate_selectivity_curve() returns a tibble with + #' correct structure for double logistic selectivity. + result <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "double_logistic", + fleet_name = "fleet1" + ) + + expect_s3_class(result, "tbl_df") + expect_equal(nrow(result), 4) # 4 parameters for double logistic + expect_equal(result$model_family, rep("catch_at_age", 4)) + expect_equal(result$module_name, rep("Selectivity", 4)) + expect_equal(result$fleet_name, rep("fleet1", 4)) + expect_equal(result$module_type, rep("DoubleLogistic", 4)) + expect_equal( + result$label, + c("inflection_point_asc", "slope_asc", "inflection_point_desc", "slope_desc") + ) + expect_equal(result$estimation_type, rep("fixed_effects", 4)) + expect_true(all(is.numeric(result$value))) + expect_true(all(!is.na(result$value))) +}) + +test_that("generate_selectivity_curve() works with real EWE data", { + #' @description Test that generate_selectivity_curve() works with actual + #' ewe_nwatlantic_base data for spiny dogfish. + + # Get spiny dogfish functional groups + dogfish_groups <- unique(ewe_nwatlantic_base$functional_group) + dogfish_groups <- dogfish_groups[grepl("spiny dogfish", dogfish_groups)] + + skip_if(length(dogfish_groups) < 2, "Not enough spiny dogfish groups in data") + + result <- generate_selectivity_curve( + ewe_output = ewe_nwatlantic_base, + functional_groups = dogfish_groups, + functional_form = "logistic", + fleet_name = "commercial" + ) + + expect_s3_class(result, "tbl_df") + expect_equal(nrow(result), 2) + expect_equal(result$fleet_name, rep("commercial", 2)) + expect_true(all(is.numeric(result$value))) +}) + +## Edge handling ---- +test_that("generate_selectivity_curve() handles case-insensitive functional forms", { + #' @description Test that functional_form parameter accepts different cases. + result1 <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "LOGISTIC", + fleet_name = "fleet1" + ) + + result2 <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "Logistic", + fleet_name = "fleet1" + ) + + expect_equal(result1$module_type, result2$module_type) +}) + +test_that("generate_selectivity_curve() preserves functional_groups order", { + #' @description Test that the order of functional groups affects the fitted + #' parameters (reversed order should give different results). + + result_forward <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "logistic", + fleet_name = "fleet1" + ) + + result_reversed <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 2", "species1 1", "species1 0"), + functional_form = "logistic", + fleet_name = "fleet1" + ) + + # The inflection points should be different because the order is reversed + expect_false( + isTRUE(all.equal( + result_forward$value[1], + result_reversed$value[1] + )) + ) +}) + +## Error handling ---- +test_that("generate_selectivity_curve() errors on invalid ewe_output", { + #' @description Test that the function errors when ewe_output is not a data frame. + expect_error( + generate_selectivity_curve( + ewe_output = "not a data frame", + functional_groups = c("species1 0", "species1 1"), + functional_form = "logistic" + ), + "must be a data frame" + ) +}) + +test_that("generate_selectivity_curve() errors on missing columns", { + #' @description Test that the function errors when required columns are missing. + bad_data <- test_data[, c("functional_group", "value")] + + expect_error( + generate_selectivity_curve( + ewe_output = bad_data, + functional_groups = c("species1 0", "species1 1"), + functional_form = "logistic" + ), + "missing required columns" + ) +}) + +test_that("generate_selectivity_curve() errors on invalid functional_groups", { + #' @description Test that the function errors when functional_groups is not + #' a character vector. + expect_error( + generate_selectivity_curve( + ewe_output = test_data, + functional_groups = 123, + functional_form = "logistic" + ), + "must be a character vector" + ) +}) + +test_that("generate_selectivity_curve() errors with too few functional groups", { + #' @description Test that the function errors when fewer than 2 functional + #' groups are provided. + expect_error( + generate_selectivity_curve( + ewe_output = test_data, + functional_groups = "species1 0", + functional_form = "logistic" + ), + "at least 2 groups" + ) +}) + +test_that("generate_selectivity_curve() errors on unsupported functional form", { + #' @description Test that the function errors when an unsupported functional + #' form is specified. + expect_error( + generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1"), + functional_form = "random_walk" + ), + "Unsupported functional form" + ) +}) + +test_that("generate_selectivity_curve() errors on missing functional groups", { + #' @description Test that the function errors when specified functional groups + #' are not in the data. + expect_error( + generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("nonexistent 0", "nonexistent 1"), + functional_form = "logistic" + ), + "not found in" + ) +}) + +test_that("generate_selectivity_curve() errors on missing mortality data", { + #' @description Test that the function errors when no mortality data exists + #' for the specified groups. + biomass_data <- test_data + biomass_data$type <- "biomass" + + expect_error( + generate_selectivity_curve( + ewe_output = biomass_data, + functional_groups = c("species1 0", "species1 1"), + functional_form = "logistic" + ), + "No mortality data found" + ) +}) + +test_that("generate_selectivity_curve() errors on all-zero F values", { + #' @description Test that the function errors when all F values are zero. + zero_data <- test_data + zero_data$value <- 0 + + expect_error( + generate_selectivity_curve( + ewe_output = zero_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "logistic" + ), + "All F values are zero" + ) +}) + +# fit_logistic_selectivity ---- +## IO correctness ---- +test_that("fit_logistic_selectivity() returns correct structure", { + #' @description Test that fit_logistic_selectivity() returns a tibble with + #' the correct columns and parameter labels. + f_values <- c(0.1, 0.5, 0.9) + result <- ecosystemdata:::fit_logistic_selectivity(f_values) + + expect_s3_class(result, "tbl_df") + expect_equal(nrow(result), 2) + expect_equal(result$label, c("inflection_point", "slope")) + expect_equal(result$module_type, rep("Logistic", 2)) + expect_true(all(is.numeric(result$value))) +}) + +test_that("fit_logistic_selectivity() handles monotonic increasing pattern", { + #' @description Test that the function fits an increasing selectivity pattern. + f_values <- seq(0, 1, length.out = 5) + result <- ecosystemdata:::fit_logistic_selectivity(f_values) + + expect_true(result$value[result$label == "slope"] > 0) + expect_true(result$value[result$label == "inflection_point"] >= 1) + expect_true(result$value[result$label == "inflection_point"] <= 5) +}) + +# fit_double_logistic_selectivity ---- +## IO correctness ---- +test_that("fit_double_logistic_selectivity() returns correct structure", { + #' @description Test that fit_double_logistic_selectivity() returns a tibble + #' with the correct columns and parameter labels. + f_values <- c(0.1, 0.5, 0.9, 0.6, 0.3) + result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) + + expect_s3_class(result, "tbl_df") + expect_equal(nrow(result), 4) + expect_equal( + result$label, + c("inflection_point_asc", "slope_asc", "inflection_point_desc", "slope_desc") + ) + expect_equal(result$module_type, rep("DoubleLogistic", 4)) + expect_true(all(is.numeric(result$value))) +}) + +test_that("fit_double_logistic_selectivity() handles dome-shaped pattern", { + #' @description Test that the function fits a dome-shaped selectivity pattern. + f_values <- c(0.1, 0.5, 1.0, 0.7, 0.3) + result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) + + infl_asc <- result$value[result$label == "inflection_point_asc"] + infl_desc <- result$value[result$label == "inflection_point_desc"] + + # Ascending inflection should come before descending + expect_true(infl_asc < infl_desc) + + # Slopes should be positive + expect_true(result$value[result$label == "slope_asc"] > 0) + expect_true(result$value[result$label == "slope_desc"] > 0) +}) From 92a369a5125c75aaee5af4ca20d7cf5611644bc4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Feb 2026 18:01:11 +0000 Subject: [PATCH 03/11] Add selectivity curves usage guide and validate implementation Co-authored-by: Bai-Li-NOAA <59936250+Bai-Li-NOAA@users.noreply.github.com> --- inst/selectivity_curves_guide.md | 159 +++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 inst/selectivity_curves_guide.md diff --git a/inst/selectivity_curves_guide.md b/inst/selectivity_curves_guide.md new file mode 100644 index 0000000..14b0a6c --- /dev/null +++ b/inst/selectivity_curves_guide.md @@ -0,0 +1,159 @@ +# Selectivity Curve Generation from EWE Model Output + +This document describes the new functionality for generating selectivity curves from Ecopath with Ecosim (EWE) model fishing mortality (F) values. + +## Overview + +The `generate_selectivity_curve()` function takes fishing mortality values by functional group from an ecosystem model and fits selectivity curve parameters. This allows users to convert EWE model outputs into selectivity parameters that can be used in age-structured stock assessment models like FIMS. + +## Key Features + +- **Modular Design**: Easy to extend with new selectivity curve types +- **Multiple Forms**: Currently supports logistic and double logistic selectivity +- **FIMS Compatible**: Outputs parameters in FIMS-compatible format +- **Flexible Input**: Works with any EWE model output containing mortality data + +## Function Overview + +### Main Function + +`generate_selectivity_curve(ewe_output, functional_groups, functional_form, fleet_name = "fleet1")` + +**Arguments:** +- `ewe_output`: Tibble from `load_model()` containing EWE output data +- `functional_groups`: Character vector of functional group names (ordered) +- `functional_form`: "logistic" or "double_logistic" +- `fleet_name`: Fleet identifier for output (default: "fleet1") + +**Returns:** +A tibble with FIMS-compatible parameter format containing fitted selectivity parameters. + +### Helper Functions + +The package includes helper functions for each supported selectivity form: + +1. `fit_logistic_selectivity()` - Fits logistic curve parameters +2. `fit_double_logistic_selectivity()` - Fits dome-shaped selectivity parameters + +## Usage Examples + +### Example 1: Logistic Selectivity + +```r +library(ecosystemdata) + +# Load example data +data("ewe_nwatlantic_base", package = "ecosystemdata") + +# Define functional groups in age order +functional_groups <- c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +") + +# Generate logistic selectivity parameters +params <- generate_selectivity_curve( + ewe_output = ewe_nwatlantic_base, + functional_groups = functional_groups, + functional_form = "logistic", + fleet_name = "commercial" +) + +print(params) +``` + +Expected output structure: +``` +# A tibble: 2 × 13 + model_family module_name fleet_name module_type label value + +1 catch_at_age Selectivity commercial Logistic inflection_point 2.00 +2 catch_at_age Selectivity commercial Logistic slope 1.50 +``` + +### Example 2: Double Logistic Selectivity + +```r +# Generate dome-shaped selectivity parameters +params_dome <- generate_selectivity_curve( + ewe_output = ewe_nwatlantic_base, + functional_groups = functional_groups, + functional_form = "double_logistic", + fleet_name = "commercial" +) + +print(params_dome) +``` + +Expected output structure: +``` +# A tibble: 4 × 13 + model_family module_name fleet_name module_type label value + +1 catch_at_age Selectivity commercial DoubleLogistic inflection_point_asc 1.50 +2 catch_at_age Selectivity commercial DoubleLogistic slope_asc 1.20 +3 catch_at_age Selectivity commercial DoubleLogistic inflection_point_desc 2.80 +4 catch_at_age Selectivity commercial DoubleLogistic slope_desc 1.00 +``` + +## How It Works + +1. **Data Extraction**: The function filters the EWE output for mortality data matching the specified functional groups. + +2. **Aggregation**: Mean F values are calculated for each functional group (averaged over time/fleet). + +3. **Normalization**: F values are normalized to a 0-1 scale to represent relative selectivity. + +4. **Curve Fitting**: The appropriate helper function fits selectivity parameters using nonlinear least squares. + +5. **Output Formatting**: Results are returned in FIMS-compatible tibble format. + +## Adding New Selectivity Forms + +The modular design makes it easy to add new selectivity curve types: + +1. Create a new helper function following the naming pattern: + ```r + fit__selectivity <- function(f_values) { + # Fit parameters to f_values + # Return tibble with parameter values + } + ``` + +2. Update the `functional_form` parameter documentation + +3. Users can now use the new form: + ```r + generate_selectivity_curve(..., functional_form = "new_form") + ``` + +Example forms that could be added: +- `"double_normal"` - Double normal selectivity +- `"random_walk"` - Random walk at age +- `"spline"` - Spline-based selectivity + +## Output Format Details + +The output tibble contains these columns: +- `model_family`: Always "catch_at_age" +- `module_name`: Always "Selectivity" +- `fleet_name`: User-specified fleet name +- `module_type`: Selectivity form (e.g., "Logistic", "DoubleLogistic") +- `label`: Parameter name (e.g., "inflection_point", "slope") +- `distribution_link`: NA (reserved for future use) +- `age`: NA (parameters apply to all ages) +- `length`: NA (not length-based selectivity) +- `time`: NA (parameters are time-invariant) +- `value`: Fitted parameter value +- `estimation_type`: Always "fixed_effects" +- `distribution_type`: NA (reserved for future use) +- `distribution`: NA (reserved for future use) + +## Notes and Caveats + +- The function requires at least 2 functional groups to fit a curve +- All functional groups must have mortality data available +- F values are normalized to represent relative selectivity +- The order of functional groups matters - it determines the x-axis +- Nonlinear fitting may not converge for all data patterns; fallback heuristics are used in such cases + +## References + +This functionality was developed to support the integration of ecosystem models (EWE) with single-species assessment models (FIMS) in the NOAA Fisheries Integrated Modeling System. From 2dec906710b8088277c5a1c024df31f773092725 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Feb 2026 18:02:01 +0000 Subject: [PATCH 04/11] Fix comment in fit_logistic_selectivity about max derivative Co-authored-by: Bai-Li-NOAA <59936250+Bai-Li-NOAA@users.noreply.github.com> --- R/fit_selectivity_helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fit_selectivity_helpers.R b/R/fit_selectivity_helpers.R index c2a5030..f22a7d5 100644 --- a/R/fit_selectivity_helpers.R +++ b/R/fit_selectivity_helpers.R @@ -66,7 +66,7 @@ fit_logistic_selectivity <- function(f_values) { # Estimate slope from differences if (length(ages) > 1) { diffs <- diff(f_values) / diff(ages) - slope <- max(abs(diffs), 0.1) * 4 # Scale up since logistic slope is 1/4 of max derivative + slope <- max(abs(diffs), 0.1) * 4 # Scale up since max derivative of logistic is slope/4 } else { slope <- 1 } From a3d9e2f9d9d66625c8c20f7ba2c724c39c70c9f6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Feb 2026 18:03:06 +0000 Subject: [PATCH 05/11] Use clearly invalid form name in test to avoid confusion with future features Co-authored-by: Bai-Li-NOAA <59936250+Bai-Li-NOAA@users.noreply.github.com> --- tests/testthat/test-generate_selectivity_curve.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-generate_selectivity_curve.R b/tests/testthat/test-generate_selectivity_curve.R index f4c745a..26062fd 100644 --- a/tests/testthat/test-generate_selectivity_curve.R +++ b/tests/testthat/test-generate_selectivity_curve.R @@ -203,7 +203,7 @@ test_that("generate_selectivity_curve() errors on unsupported functional form", generate_selectivity_curve( ewe_output = test_data, functional_groups = c("species1 0", "species1 1"), - functional_form = "random_walk" + functional_form = "invalid_form" ), "Unsupported functional form" ) From 75414fc71af988464641da0738e908fc18bcaf6d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 18:42:18 +0000 Subject: [PATCH 06/11] Replace $ with [[""]] for field extraction per tidyverse style guide Co-authored-by: Bai-Li-NOAA <59936250+Bai-Li-NOAA@users.noreply.github.com> --- R/generate_selectivity_curve.R | 7 +- .../test-generate_selectivity_curve.R | 78 +++++++++---------- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/R/generate_selectivity_curve.R b/R/generate_selectivity_curve.R index 15d7b7c..adcad25 100644 --- a/R/generate_selectivity_curve.R +++ b/R/generate_selectivity_curve.R @@ -104,7 +104,10 @@ generate_selectivity_curve <- function(ewe_output, } # Check that functional groups exist in the data - missing_groups <- setdiff(functional_groups, unique(ewe_output$functional_group)) + missing_groups <- setdiff( + functional_groups, + unique(ewe_output[["functional_group"]]) + ) if (length(missing_groups) > 0) { cli::cli_abort(c( "Some functional groups not found in {.var ewe_output}:", @@ -168,7 +171,7 @@ generate_selectivity_curve <- function(ewe_output, result <- helper_function(normalized_f) # Add fleet_name to the result - result$fleet_name <- fleet_name + result[["fleet_name"]] <- fleet_name # Reorder columns to match expected output format result <- result |> diff --git a/tests/testthat/test-generate_selectivity_curve.R b/tests/testthat/test-generate_selectivity_curve.R index 26062fd..afe52fe 100644 --- a/tests/testthat/test-generate_selectivity_curve.R +++ b/tests/testthat/test-generate_selectivity_curve.R @@ -37,15 +37,15 @@ test_that("generate_selectivity_curve() works with logistic form", { expect_s3_class(result, "tbl_df") expect_equal(nrow(result), 2) # inflection_point and slope - expect_equal(result$model_family, rep("catch_at_age", 2)) - expect_equal(result$module_name, rep("Selectivity", 2)) - expect_equal(result$fleet_name, rep("fleet1", 2)) - expect_equal(result$module_type, rep("Logistic", 2)) - expect_equal(result$label, c("inflection_point", "slope")) - expect_equal(result$estimation_type, rep("fixed_effects", 2)) - expect_true(all(is.na(result$distribution_link))) - expect_true(all(is.numeric(result$value))) - expect_true(all(!is.na(result$value))) + expect_equal(result[["model_family"]], rep("catch_at_age", 2)) + expect_equal(result[["module_name"]], rep("Selectivity", 2)) + expect_equal(result[["fleet_name"]], rep("fleet1", 2)) + expect_equal(result[["module_type"]], rep("Logistic", 2)) + expect_equal(result[["label"]], c("inflection_point", "slope")) + expect_equal(result[["estimation_type"]], rep("fixed_effects", 2)) + expect_true(all(is.na(result[["distribution_link"]]))) + expect_true(all(is.numeric(result[["value"]]))) + expect_true(all(!is.na(result[["value"]]))) }) test_that("generate_selectivity_curve() works with double_logistic form", { @@ -60,17 +60,17 @@ test_that("generate_selectivity_curve() works with double_logistic form", { expect_s3_class(result, "tbl_df") expect_equal(nrow(result), 4) # 4 parameters for double logistic - expect_equal(result$model_family, rep("catch_at_age", 4)) - expect_equal(result$module_name, rep("Selectivity", 4)) - expect_equal(result$fleet_name, rep("fleet1", 4)) - expect_equal(result$module_type, rep("DoubleLogistic", 4)) + expect_equal(result[["model_family"]], rep("catch_at_age", 4)) + expect_equal(result[["module_name"]], rep("Selectivity", 4)) + expect_equal(result[["fleet_name"]], rep("fleet1", 4)) + expect_equal(result[["module_type"]], rep("DoubleLogistic", 4)) expect_equal( - result$label, + result[["label"]], c("inflection_point_asc", "slope_asc", "inflection_point_desc", "slope_desc") ) - expect_equal(result$estimation_type, rep("fixed_effects", 4)) - expect_true(all(is.numeric(result$value))) - expect_true(all(!is.na(result$value))) + expect_equal(result[["estimation_type"]], rep("fixed_effects", 4)) + expect_true(all(is.numeric(result[["value"]]))) + expect_true(all(!is.na(result[["value"]]))) }) test_that("generate_selectivity_curve() works with real EWE data", { @@ -78,7 +78,7 @@ test_that("generate_selectivity_curve() works with real EWE data", { #' ewe_nwatlantic_base data for spiny dogfish. # Get spiny dogfish functional groups - dogfish_groups <- unique(ewe_nwatlantic_base$functional_group) + dogfish_groups <- unique(ewe_nwatlantic_base[["functional_group"]]) dogfish_groups <- dogfish_groups[grepl("spiny dogfish", dogfish_groups)] skip_if(length(dogfish_groups) < 2, "Not enough spiny dogfish groups in data") @@ -92,8 +92,8 @@ test_that("generate_selectivity_curve() works with real EWE data", { expect_s3_class(result, "tbl_df") expect_equal(nrow(result), 2) - expect_equal(result$fleet_name, rep("commercial", 2)) - expect_true(all(is.numeric(result$value))) + expect_equal(result[["fleet_name"]], rep("commercial", 2)) + expect_true(all(is.numeric(result[["value"]]))) }) ## Edge handling ---- @@ -113,7 +113,7 @@ test_that("generate_selectivity_curve() handles case-insensitive functional form fleet_name = "fleet1" ) - expect_equal(result1$module_type, result2$module_type) + expect_equal(result1[["module_type"]], result2[["module_type"]]) }) test_that("generate_selectivity_curve() preserves functional_groups order", { @@ -137,8 +137,8 @@ test_that("generate_selectivity_curve() preserves functional_groups order", { # The inflection points should be different because the order is reversed expect_false( isTRUE(all.equal( - result_forward$value[1], - result_reversed$value[1] + result_forward[["value"]][1], + result_reversed[["value"]][1] )) ) }) @@ -226,7 +226,7 @@ test_that("generate_selectivity_curve() errors on missing mortality data", { #' @description Test that the function errors when no mortality data exists #' for the specified groups. biomass_data <- test_data - biomass_data$type <- "biomass" + biomass_data[["type"]] <- "biomass" expect_error( generate_selectivity_curve( @@ -241,7 +241,7 @@ test_that("generate_selectivity_curve() errors on missing mortality data", { test_that("generate_selectivity_curve() errors on all-zero F values", { #' @description Test that the function errors when all F values are zero. zero_data <- test_data - zero_data$value <- 0 + zero_data[["value"]] <- 0 expect_error( generate_selectivity_curve( @@ -263,9 +263,9 @@ test_that("fit_logistic_selectivity() returns correct structure", { expect_s3_class(result, "tbl_df") expect_equal(nrow(result), 2) - expect_equal(result$label, c("inflection_point", "slope")) - expect_equal(result$module_type, rep("Logistic", 2)) - expect_true(all(is.numeric(result$value))) + expect_equal(result[["label"]], c("inflection_point", "slope")) + expect_equal(result[["module_type"]], rep("Logistic", 2)) + expect_true(all(is.numeric(result[["value"]]))) }) test_that("fit_logistic_selectivity() handles monotonic increasing pattern", { @@ -273,9 +273,9 @@ test_that("fit_logistic_selectivity() handles monotonic increasing pattern", { f_values <- seq(0, 1, length.out = 5) result <- ecosystemdata:::fit_logistic_selectivity(f_values) - expect_true(result$value[result$label == "slope"] > 0) - expect_true(result$value[result$label == "inflection_point"] >= 1) - expect_true(result$value[result$label == "inflection_point"] <= 5) + expect_true(result[["value"]][result[["label"]] == "slope"] > 0) + expect_true(result[["value"]][result[["label"]] == "inflection_point"] >= 1) + expect_true(result[["value"]][result[["label"]] == "inflection_point"] <= 5) }) # fit_double_logistic_selectivity ---- @@ -289,11 +289,11 @@ test_that("fit_double_logistic_selectivity() returns correct structure", { expect_s3_class(result, "tbl_df") expect_equal(nrow(result), 4) expect_equal( - result$label, + result[["label"]], c("inflection_point_asc", "slope_asc", "inflection_point_desc", "slope_desc") ) - expect_equal(result$module_type, rep("DoubleLogistic", 4)) - expect_true(all(is.numeric(result$value))) + expect_equal(result[["module_type"]], rep("DoubleLogistic", 4)) + expect_true(all(is.numeric(result[["value"]]))) }) test_that("fit_double_logistic_selectivity() handles dome-shaped pattern", { @@ -301,13 +301,13 @@ test_that("fit_double_logistic_selectivity() handles dome-shaped pattern", { f_values <- c(0.1, 0.5, 1.0, 0.7, 0.3) result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) - infl_asc <- result$value[result$label == "inflection_point_asc"] - infl_desc <- result$value[result$label == "inflection_point_desc"] + inflection_ascending <- result[["value"]][result[["label"]] == "inflection_point_asc"] + inflection_descending <- result[["value"]][result[["label"]] == "inflection_point_desc"] # Ascending inflection should come before descending - expect_true(infl_asc < infl_desc) + expect_true(inflection_ascending < inflection_descending) # Slopes should be positive - expect_true(result$value[result$label == "slope_asc"] > 0) - expect_true(result$value[result$label == "slope_desc"] > 0) + expect_true(result[["value"]][result[["label"]] == "slope_asc"] > 0) + expect_true(result[["value"]][result[["label"]] == "slope_desc"] > 0) }) From 2a6ebbf6a5fbc1de58bd0f6e744e0e9603997ccc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 18:43:40 +0000 Subject: [PATCH 07/11] Use full descriptive variable names instead of abbreviations Co-authored-by: Bai-Li-NOAA <59936250+Bai-Li-NOAA@users.noreply.github.com> --- R/fit_selectivity_helpers.R | 82 ++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/R/fit_selectivity_helpers.R b/R/fit_selectivity_helpers.R index f22a7d5..ae69104 100644 --- a/R/fit_selectivity_helpers.R +++ b/R/fit_selectivity_helpers.R @@ -33,18 +33,18 @@ fit_logistic_selectivity <- function(f_values) { # Initial parameter guesses # Inflection point: age where selectivity is around 0.5 - mid_idx <- which.min(abs(f_values - 0.5)) - init_inflection <- if (length(mid_idx) > 0) ages[mid_idx] else mean(ages) + midpoint_index <- which.min(abs(f_values - 0.5)) + initial_inflection <- if (length(midpoint_index) > 0) ages[midpoint_index] else mean(ages) # Slope: positive value, start with 1 - init_slope <- 1 + initial_slope <- 1 # Fit using nonlinear least squares fit_result <- tryCatch( { stats::nls( f_values ~ logistic(ages, inflection_point, slope), - start = list(inflection_point = init_inflection, slope = init_slope), + start = list(inflection_point = initial_inflection, slope = initial_slope), control = stats::nls.control(maxiter = 100, warnOnly = TRUE) ) }, @@ -61,7 +61,7 @@ fit_logistic_selectivity <- function(f_values) { } else { # Fallback method: find inflection as midpoint of rise # and estimate slope from the steepest part of the curve - inflection_point <- init_inflection + inflection_point <- initial_inflection # Estimate slope from differences if (length(ages) > 1) { @@ -122,10 +122,10 @@ fit_double_logistic_selectivity <- function(f_values) { ages <- seq_along(f_values) # Define double logistic function - double_logistic <- function(age, infl_asc, slope_asc, infl_desc, slope_desc) { - asc <- 1 / (1 + exp(-slope_asc * (age - infl_asc))) - desc <- 1 - 1 / (1 + exp(-slope_desc * (age - infl_desc))) - asc * desc + double_logistic <- function(age, inflection_ascending, slope_ascending, inflection_descending, slope_descending) { + ascending_component <- 1 / (1 + exp(-slope_ascending * (age - inflection_ascending))) + descending_component <- 1 - 1 / (1 + exp(-slope_descending * (age - inflection_descending))) + ascending_component * descending_component } # Initial parameter guesses @@ -137,10 +137,10 @@ fit_double_logistic_selectivity <- function(f_values) { if (peak_idx > 1) { ascending_f <- f_values[1:peak_idx] ascending_ages <- ages[1:peak_idx] - mid_asc_idx <- which.min(abs(ascending_f - 0.5)) - init_infl_asc <- if (length(mid_asc_idx) > 0) ascending_ages[mid_asc_idx] else peak_age - 1 + midpoint_ascending_index <- which.min(abs(ascending_f - 0.5)) + initial_inflection_ascending <- if (length(midpoint_ascending_index) > 0) ascending_ages[midpoint_ascending_index] else peak_age - 1 } else { - init_infl_asc <- peak_age - 1 + initial_inflection_ascending <- peak_age - 1 } # Descending parameters: after peak @@ -148,44 +148,44 @@ fit_double_logistic_selectivity <- function(f_values) { descending_f <- f_values[peak_idx:length(ages)] descending_ages <- ages[peak_idx:length(ages)] # For descending, look for where it drops below peak/2 - mid_desc_idx <- which.min(abs(descending_f - max(f_values) / 2)) - init_infl_desc <- if (length(mid_desc_idx) > 0) descending_ages[mid_desc_idx] else peak_age + 1 + midpoint_descending_index <- which.min(abs(descending_f - max(f_values) / 2)) + initial_inflection_descending <- if (length(midpoint_descending_index) > 0) descending_ages[midpoint_descending_index] else peak_age + 1 } else { - init_infl_desc <- peak_age + 1 + initial_inflection_descending <- peak_age + 1 } # Ensure descending inflection is after ascending - if (init_infl_desc <= init_infl_asc) { - init_infl_desc <- init_infl_asc + 1 + if (initial_inflection_descending <= initial_inflection_ascending) { + initial_inflection_descending <- initial_inflection_ascending + 1 } - init_slope_asc <- 1 - init_slope_desc <- 1 + initial_slope_ascending <- 1 + initial_slope_descending <- 1 # Fit using nonlinear least squares fit_result <- tryCatch( { stats::nls( - f_values ~ double_logistic(ages, infl_asc, slope_asc, infl_desc, slope_desc), + f_values ~ double_logistic(ages, inflection_ascending, slope_ascending, inflection_descending, slope_descending), start = list( - infl_asc = init_infl_asc, - slope_asc = init_slope_asc, - infl_desc = init_infl_desc, - slope_desc = init_slope_desc + inflection_ascending = initial_inflection_ascending, + slope_ascending = initial_slope_ascending, + inflection_descending = initial_inflection_descending, + slope_descending = initial_slope_descending ), control = stats::nls.control(maxiter = 200, warnOnly = TRUE), algorithm = "port", lower = c( - infl_asc = min(ages), - slope_asc = 0.01, - infl_desc = init_infl_asc + 0.5, - slope_desc = 0.01 + inflection_ascending = min(ages), + slope_ascending = 0.01, + inflection_descending = initial_inflection_ascending + 0.5, + slope_descending = 0.01 ), upper = c( - infl_asc = max(ages), - slope_asc = 10, - infl_desc = max(ages) + 10, - slope_desc = 10 + inflection_ascending = max(ages), + slope_ascending = 10, + inflection_descending = max(ages) + 10, + slope_descending = 10 ) ) }, @@ -196,16 +196,16 @@ fit_double_logistic_selectivity <- function(f_values) { if (!is.null(fit_result)) { params <- stats::coef(fit_result) - infl_asc <- params["infl_asc"] - slope_asc <- params["slope_asc"] - infl_desc <- params["infl_desc"] - slope_desc <- params["slope_desc"] + inflection_ascending <- params["inflection_ascending"] + slope_ascending <- params["slope_ascending"] + inflection_descending <- params["inflection_descending"] + slope_descending <- params["slope_descending"] } else { # Fallback to initial guesses - infl_asc <- init_infl_asc - slope_asc <- init_slope_asc - infl_desc <- init_infl_desc - slope_desc <- init_slope_desc + inflection_ascending <- initial_inflection_ascending + slope_ascending <- initial_slope_ascending + inflection_descending <- initial_inflection_descending + slope_descending <- initial_slope_descending } # Create output tibble in FIMS format @@ -223,7 +223,7 @@ fit_double_logistic_selectivity <- function(f_values) { age = NA_real_, length = NA_real_, time = NA_real_, - value = c(infl_asc, slope_asc, infl_desc, slope_desc), + value = c(inflection_ascending, slope_ascending, inflection_descending, slope_descending), estimation_type = "fixed_effects", distribution_type = NA_character_, distribution = NA_character_ From 135e899eaae5dd900cbedc49c68a728f7eddb54e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Feb 2026 18:45:21 +0000 Subject: [PATCH 08/11] Improve variable name consistency: use inflection_point_ prefix for all inflection variables Co-authored-by: Bai-Li-NOAA <59936250+Bai-Li-NOAA@users.noreply.github.com> --- R/fit_selectivity_helpers.R | 42 ++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/R/fit_selectivity_helpers.R b/R/fit_selectivity_helpers.R index ae69104..0495a61 100644 --- a/R/fit_selectivity_helpers.R +++ b/R/fit_selectivity_helpers.R @@ -122,9 +122,9 @@ fit_double_logistic_selectivity <- function(f_values) { ages <- seq_along(f_values) # Define double logistic function - double_logistic <- function(age, inflection_ascending, slope_ascending, inflection_descending, slope_descending) { - ascending_component <- 1 / (1 + exp(-slope_ascending * (age - inflection_ascending))) - descending_component <- 1 - 1 / (1 + exp(-slope_descending * (age - inflection_descending))) + double_logistic <- function(age, inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending) { + ascending_component <- 1 / (1 + exp(-slope_ascending * (age - inflection_point_ascending))) + descending_component <- 1 - 1 / (1 + exp(-slope_descending * (age - inflection_point_descending))) ascending_component * descending_component } @@ -138,9 +138,9 @@ fit_double_logistic_selectivity <- function(f_values) { ascending_f <- f_values[1:peak_idx] ascending_ages <- ages[1:peak_idx] midpoint_ascending_index <- which.min(abs(ascending_f - 0.5)) - initial_inflection_ascending <- if (length(midpoint_ascending_index) > 0) ascending_ages[midpoint_ascending_index] else peak_age - 1 + initial_inflection_point_ascending <- if (length(midpoint_ascending_index) > 0) ascending_ages[midpoint_ascending_index] else peak_age - 1 } else { - initial_inflection_ascending <- peak_age - 1 + initial_inflection_point_ascending <- peak_age - 1 } # Descending parameters: after peak @@ -149,14 +149,14 @@ fit_double_logistic_selectivity <- function(f_values) { descending_ages <- ages[peak_idx:length(ages)] # For descending, look for where it drops below peak/2 midpoint_descending_index <- which.min(abs(descending_f - max(f_values) / 2)) - initial_inflection_descending <- if (length(midpoint_descending_index) > 0) descending_ages[midpoint_descending_index] else peak_age + 1 + initial_inflection_point_descending <- if (length(midpoint_descending_index) > 0) descending_ages[midpoint_descending_index] else peak_age + 1 } else { - initial_inflection_descending <- peak_age + 1 + initial_inflection_point_descending <- peak_age + 1 } # Ensure descending inflection is after ascending - if (initial_inflection_descending <= initial_inflection_ascending) { - initial_inflection_descending <- initial_inflection_ascending + 1 + if (initial_inflection_point_descending <= initial_inflection_point_ascending) { + initial_inflection_point_descending <- initial_inflection_point_ascending + 1 } initial_slope_ascending <- 1 @@ -166,25 +166,25 @@ fit_double_logistic_selectivity <- function(f_values) { fit_result <- tryCatch( { stats::nls( - f_values ~ double_logistic(ages, inflection_ascending, slope_ascending, inflection_descending, slope_descending), + f_values ~ double_logistic(ages, inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending), start = list( - inflection_ascending = initial_inflection_ascending, + inflection_point_ascending = initial_inflection_point_ascending, slope_ascending = initial_slope_ascending, - inflection_descending = initial_inflection_descending, + inflection_point_descending = initial_inflection_point_descending, slope_descending = initial_slope_descending ), control = stats::nls.control(maxiter = 200, warnOnly = TRUE), algorithm = "port", lower = c( - inflection_ascending = min(ages), + inflection_point_ascending = min(ages), slope_ascending = 0.01, - inflection_descending = initial_inflection_ascending + 0.5, + inflection_point_descending = initial_inflection_point_ascending + 0.5, slope_descending = 0.01 ), upper = c( - inflection_ascending = max(ages), + inflection_point_ascending = max(ages), slope_ascending = 10, - inflection_descending = max(ages) + 10, + inflection_point_descending = max(ages) + 10, slope_descending = 10 ) ) @@ -196,15 +196,15 @@ fit_double_logistic_selectivity <- function(f_values) { if (!is.null(fit_result)) { params <- stats::coef(fit_result) - inflection_ascending <- params["inflection_ascending"] + inflection_point_ascending <- params["inflection_point_ascending"] slope_ascending <- params["slope_ascending"] - inflection_descending <- params["inflection_descending"] + inflection_point_descending <- params["inflection_point_descending"] slope_descending <- params["slope_descending"] } else { # Fallback to initial guesses - inflection_ascending <- initial_inflection_ascending + inflection_point_ascending <- initial_inflection_point_ascending slope_ascending <- initial_slope_ascending - inflection_descending <- initial_inflection_descending + inflection_point_descending <- initial_inflection_point_descending slope_descending <- initial_slope_descending } @@ -223,7 +223,7 @@ fit_double_logistic_selectivity <- function(f_values) { age = NA_real_, length = NA_real_, time = NA_real_, - value = c(inflection_ascending, slope_ascending, inflection_descending, slope_descending), + value = c(inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending), estimation_type = "fixed_effects", distribution_type = NA_character_, distribution = NA_character_ From e209078cf76f87c61601b15b876861917be8d31e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Feb 2026 21:26:40 +0000 Subject: [PATCH 09/11] Remove guide file, make docs generic, split tests, add validation tests - Removed inst/selectivity_curves_guide.md (content integrated into function docs) - Made fit_*_selectivity() documentation generic (not fishing-mortality-specific) - Removed @author fields from all roxygen documentation - Split tests into test-fit_selectivity_helpers.R and test-generate_selectivity_curve.R - Changed real EWE test to use menhaden instead of dogfish - Added validation tests that verify parameter recovery from known true values Co-authored-by: Bai-Li-NOAA <59936250+Bai-Li-NOAA@users.noreply.github.com> --- R/fit_selectivity_helpers.R | 106 ++++++----- R/generate_selectivity_curve.R | 15 +- inst/selectivity_curves_guide.md | 159 ---------------- tests/testthat/test-fit_selectivity_helpers.R | 179 ++++++++++++++++++ .../test-generate_selectivity_curve.R | 132 +++++++------ 5 files changed, 323 insertions(+), 268 deletions(-) delete mode 100644 inst/selectivity_curves_guide.md create mode 100644 tests/testthat/test-fit_selectivity_helpers.R diff --git a/R/fit_selectivity_helpers.R b/R/fit_selectivity_helpers.R index 0495a61..3e26f46 100644 --- a/R/fit_selectivity_helpers.R +++ b/R/fit_selectivity_helpers.R @@ -1,40 +1,40 @@ #' Fit logistic selectivity parameters #' #' @description -#' Helper function to fit logistic selectivity curve parameters to a vector of -#' relative F values. The logistic function is defined as: -#' S(a) = 1 / (1 + exp(-slope * (a - inflection_point))) -#' where a is the age/group index. +#' Fit logistic curve parameters to a vector of normalized values. The logistic +#' function is defined as: S(x) = 1 / (1 + exp(-slope * (x - inflection_point))) +#' where x is the index position (1, 2, 3, ...). #' -#' @param f_values A numeric vector of normalized F values (0-1) for each -#' functional group, in the order they should appear on the x-axis. +#' @param f_values A numeric vector of normalized values (0-1) for each +#' group, in the order they should appear on the x-axis. These values +#' represent relative selectivity, maturity, or any other quantity that +#' follows a logistic pattern. #' #' @return A tibble with one row per parameter, containing the standard FIMS #' parameter format columns. #' #' @details #' The function uses nonlinear least squares (nls) to fit a logistic curve to -#' the normalized F values. The age/group index is simply 1, 2, 3, ... based on -#' the position in the f_values vector. +#' the normalized values. The x-axis index is simply 1, 2, 3, ... based on +#' the position in the values vector. #' -#' If the optimization fails, the function falls back to a simple linear -#' approximation to estimate the inflection point and slope. +#' If the optimization fails, the function falls back to a simple heuristic +#' method to estimate the inflection point and slope. #' #' @keywords internal -#' @author Kelli F. Johnson fit_logistic_selectivity <- function(f_values) { - # Create age index (1, 2, 3, ...) - ages <- seq_along(f_values) + # Create index (1, 2, 3, ...) + index_values <- seq_along(f_values) # Define logistic function - logistic <- function(age, inflection_point, slope) { - 1 / (1 + exp(-slope * (age - inflection_point))) + logistic <- function(index, inflection_point, slope) { + 1 / (1 + exp(-slope * (index - inflection_point))) } # Initial parameter guesses - # Inflection point: age where selectivity is around 0.5 + # Inflection point: index where value is around 0.5 midpoint_index <- which.min(abs(f_values - 0.5)) - initial_inflection <- if (length(midpoint_index) > 0) ages[midpoint_index] else mean(ages) + initial_inflection <- if (length(midpoint_index) > 0) index_values[midpoint_index] else mean(index_values) # Slope: positive value, start with 1 initial_slope <- 1 @@ -43,7 +43,7 @@ fit_logistic_selectivity <- function(f_values) { fit_result <- tryCatch( { stats::nls( - f_values ~ logistic(ages, inflection_point, slope), + f_values ~ logistic(index_values, inflection_point, slope), start = list(inflection_point = initial_inflection, slope = initial_slope), control = stats::nls.control(maxiter = 100, warnOnly = TRUE) ) @@ -64,8 +64,8 @@ fit_logistic_selectivity <- function(f_values) { inflection_point <- initial_inflection # Estimate slope from differences - if (length(ages) > 1) { - diffs <- diff(f_values) / diff(ages) + if (length(index_values) > 1) { + diffs <- diff(f_values) / diff(index_values) slope <- max(abs(diffs), 0.1) * 4 # Scale up since max derivative of logistic is slope/4 } else { slope <- 1 @@ -93,65 +93,67 @@ fit_logistic_selectivity <- function(f_values) { #' Fit double logistic selectivity parameters #' #' @description -#' Helper function to fit double logistic (dome-shaped) selectivity curve -#' parameters to a vector of relative F values. The double logistic function -#' combines ascending and descending logistic curves: -#' S(a) = 1 / (1 + exp(-slope_asc * (a - inflection_asc))) * -#' (1 - 1 / (1 + exp(-slope_desc * (a - inflection_desc)))) +#' Fit double logistic (dome-shaped) curve parameters to a vector of normalized +#' values. The double logistic function combines ascending and descending logistic +#' curves: S(x) = 1 / (1 + exp(-slope_asc * (x - inflection_asc))) * +#' (1 - 1 / (1 + exp(-slope_desc * (x - inflection_desc)))) #' -#' @param f_values A numeric vector of normalized F values (0-1) for each -#' functional group, in the order they should appear on the x-axis. +#' @param f_values A numeric vector of normalized values (0-1) for each +#' group, in the order they should appear on the x-axis. These values +#' represent relative selectivity, maturity, or any other quantity that +#' follows a dome-shaped pattern. #' #' @return A tibble with one row per parameter, containing the standard FIMS #' parameter format columns. #' #' @details -#' The function attempts to fit a dome-shaped selectivity curve. If the data +#' The function attempts to fit a dome-shaped curve. If the data #' appears monotonic (no dome shape), it will still fit the parameters but may -#' warn or produce parameters that effectively reduce to a simple logistic. +#' produce parameters that effectively reduce to a simple logistic. #' #' The fitting uses nonlinear least squares with constraints to ensure: -#' - Ascending inflection point comes before descending -#' - Slopes are positive -#' - The curve shape makes biological sense +#' \itemize{ +#' \item Ascending inflection point comes before descending +#' \item Slopes are positive +#' \item The curve shape is biologically reasonable +#' } #' #' @keywords internal -#' @author Kelli F. Johnson fit_double_logistic_selectivity <- function(f_values) { - # Create age index (1, 2, 3, ...) - ages <- seq_along(f_values) + # Create index (1, 2, 3, ...) + index_values <- seq_along(f_values) # Define double logistic function - double_logistic <- function(age, inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending) { - ascending_component <- 1 / (1 + exp(-slope_ascending * (age - inflection_point_ascending))) - descending_component <- 1 - 1 / (1 + exp(-slope_descending * (age - inflection_point_descending))) + double_logistic <- function(index, inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending) { + ascending_component <- 1 / (1 + exp(-slope_ascending * (index - inflection_point_ascending))) + descending_component <- 1 - 1 / (1 + exp(-slope_descending * (index - inflection_point_descending))) ascending_component * descending_component } # Initial parameter guesses - # Find the peak of the selectivity + # Find the peak of the values peak_idx <- which.max(f_values) - peak_age <- ages[peak_idx] + peak_index <- index_values[peak_idx] # Ascending parameters: before peak if (peak_idx > 1) { ascending_f <- f_values[1:peak_idx] - ascending_ages <- ages[1:peak_idx] + ascending_indices <- index_values[1:peak_idx] midpoint_ascending_index <- which.min(abs(ascending_f - 0.5)) - initial_inflection_point_ascending <- if (length(midpoint_ascending_index) > 0) ascending_ages[midpoint_ascending_index] else peak_age - 1 + initial_inflection_point_ascending <- if (length(midpoint_ascending_index) > 0) ascending_indices[midpoint_ascending_index] else peak_index - 1 } else { - initial_inflection_point_ascending <- peak_age - 1 + initial_inflection_point_ascending <- peak_index - 1 } # Descending parameters: after peak - if (peak_idx < length(ages)) { - descending_f <- f_values[peak_idx:length(ages)] - descending_ages <- ages[peak_idx:length(ages)] + if (peak_idx < length(index_values)) { + descending_f <- f_values[peak_idx:length(index_values)] + descending_indices <- index_values[peak_idx:length(index_values)] # For descending, look for where it drops below peak/2 midpoint_descending_index <- which.min(abs(descending_f - max(f_values) / 2)) - initial_inflection_point_descending <- if (length(midpoint_descending_index) > 0) descending_ages[midpoint_descending_index] else peak_age + 1 + initial_inflection_point_descending <- if (length(midpoint_descending_index) > 0) descending_indices[midpoint_descending_index] else peak_index + 1 } else { - initial_inflection_point_descending <- peak_age + 1 + initial_inflection_point_descending <- peak_index + 1 } # Ensure descending inflection is after ascending @@ -166,7 +168,7 @@ fit_double_logistic_selectivity <- function(f_values) { fit_result <- tryCatch( { stats::nls( - f_values ~ double_logistic(ages, inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending), + f_values ~ double_logistic(index_values, inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending), start = list( inflection_point_ascending = initial_inflection_point_ascending, slope_ascending = initial_slope_ascending, @@ -176,15 +178,15 @@ fit_double_logistic_selectivity <- function(f_values) { control = stats::nls.control(maxiter = 200, warnOnly = TRUE), algorithm = "port", lower = c( - inflection_point_ascending = min(ages), + inflection_point_ascending = min(index_values), slope_ascending = 0.01, inflection_point_descending = initial_inflection_point_ascending + 0.5, slope_descending = 0.01 ), upper = c( - inflection_point_ascending = max(ages), + inflection_point_ascending = max(index_values), slope_ascending = 10, - inflection_point_descending = max(ages) + 10, + inflection_point_descending = max(index_values) + 10, slope_descending = 10 ) ) diff --git a/R/generate_selectivity_curve.R b/R/generate_selectivity_curve.R index adcad25..8fcb5bd 100644 --- a/R/generate_selectivity_curve.R +++ b/R/generate_selectivity_curve.R @@ -44,11 +44,20 @@ utils::globalVariables(c("functional_group")) #' before fitting, representing relative selectivity. #' #' New functional forms can be added by creating helper functions with the -#' naming pattern `fit__selectivity()` that take a vector of F values and -#' return parameter estimates. +#' naming pattern `fit__selectivity()` that take a vector of values and +#' return parameter estimates. To add a new selectivity form: +#' \enumerate{ +#' \item Create a helper function named `fit__selectivity()` that +#' takes a numeric vector of normalized values (0-1) and returns a tibble +#' with columns: model_family, module_name, module_type, label, +#' distribution_link, age, length, time, value, estimation_type, +#' distribution_type, and distribution. +#' \item The function will automatically be available by passing the form name +#' to the `functional_form` argument (e.g., "new_form" will call +#' `fit_new_form_selectivity()`). +#' } #' #' @export -#' @author Kelli F. Johnson #' @examples #' \dontrun{ #' # Load example data diff --git a/inst/selectivity_curves_guide.md b/inst/selectivity_curves_guide.md deleted file mode 100644 index 14b0a6c..0000000 --- a/inst/selectivity_curves_guide.md +++ /dev/null @@ -1,159 +0,0 @@ -# Selectivity Curve Generation from EWE Model Output - -This document describes the new functionality for generating selectivity curves from Ecopath with Ecosim (EWE) model fishing mortality (F) values. - -## Overview - -The `generate_selectivity_curve()` function takes fishing mortality values by functional group from an ecosystem model and fits selectivity curve parameters. This allows users to convert EWE model outputs into selectivity parameters that can be used in age-structured stock assessment models like FIMS. - -## Key Features - -- **Modular Design**: Easy to extend with new selectivity curve types -- **Multiple Forms**: Currently supports logistic and double logistic selectivity -- **FIMS Compatible**: Outputs parameters in FIMS-compatible format -- **Flexible Input**: Works with any EWE model output containing mortality data - -## Function Overview - -### Main Function - -`generate_selectivity_curve(ewe_output, functional_groups, functional_form, fleet_name = "fleet1")` - -**Arguments:** -- `ewe_output`: Tibble from `load_model()` containing EWE output data -- `functional_groups`: Character vector of functional group names (ordered) -- `functional_form`: "logistic" or "double_logistic" -- `fleet_name`: Fleet identifier for output (default: "fleet1") - -**Returns:** -A tibble with FIMS-compatible parameter format containing fitted selectivity parameters. - -### Helper Functions - -The package includes helper functions for each supported selectivity form: - -1. `fit_logistic_selectivity()` - Fits logistic curve parameters -2. `fit_double_logistic_selectivity()` - Fits dome-shaped selectivity parameters - -## Usage Examples - -### Example 1: Logistic Selectivity - -```r -library(ecosystemdata) - -# Load example data -data("ewe_nwatlantic_base", package = "ecosystemdata") - -# Define functional groups in age order -functional_groups <- c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +") - -# Generate logistic selectivity parameters -params <- generate_selectivity_curve( - ewe_output = ewe_nwatlantic_base, - functional_groups = functional_groups, - functional_form = "logistic", - fleet_name = "commercial" -) - -print(params) -``` - -Expected output structure: -``` -# A tibble: 2 × 13 - model_family module_name fleet_name module_type label value - -1 catch_at_age Selectivity commercial Logistic inflection_point 2.00 -2 catch_at_age Selectivity commercial Logistic slope 1.50 -``` - -### Example 2: Double Logistic Selectivity - -```r -# Generate dome-shaped selectivity parameters -params_dome <- generate_selectivity_curve( - ewe_output = ewe_nwatlantic_base, - functional_groups = functional_groups, - functional_form = "double_logistic", - fleet_name = "commercial" -) - -print(params_dome) -``` - -Expected output structure: -``` -# A tibble: 4 × 13 - model_family module_name fleet_name module_type label value - -1 catch_at_age Selectivity commercial DoubleLogistic inflection_point_asc 1.50 -2 catch_at_age Selectivity commercial DoubleLogistic slope_asc 1.20 -3 catch_at_age Selectivity commercial DoubleLogistic inflection_point_desc 2.80 -4 catch_at_age Selectivity commercial DoubleLogistic slope_desc 1.00 -``` - -## How It Works - -1. **Data Extraction**: The function filters the EWE output for mortality data matching the specified functional groups. - -2. **Aggregation**: Mean F values are calculated for each functional group (averaged over time/fleet). - -3. **Normalization**: F values are normalized to a 0-1 scale to represent relative selectivity. - -4. **Curve Fitting**: The appropriate helper function fits selectivity parameters using nonlinear least squares. - -5. **Output Formatting**: Results are returned in FIMS-compatible tibble format. - -## Adding New Selectivity Forms - -The modular design makes it easy to add new selectivity curve types: - -1. Create a new helper function following the naming pattern: - ```r - fit__selectivity <- function(f_values) { - # Fit parameters to f_values - # Return tibble with parameter values - } - ``` - -2. Update the `functional_form` parameter documentation - -3. Users can now use the new form: - ```r - generate_selectivity_curve(..., functional_form = "new_form") - ``` - -Example forms that could be added: -- `"double_normal"` - Double normal selectivity -- `"random_walk"` - Random walk at age -- `"spline"` - Spline-based selectivity - -## Output Format Details - -The output tibble contains these columns: -- `model_family`: Always "catch_at_age" -- `module_name`: Always "Selectivity" -- `fleet_name`: User-specified fleet name -- `module_type`: Selectivity form (e.g., "Logistic", "DoubleLogistic") -- `label`: Parameter name (e.g., "inflection_point", "slope") -- `distribution_link`: NA (reserved for future use) -- `age`: NA (parameters apply to all ages) -- `length`: NA (not length-based selectivity) -- `time`: NA (parameters are time-invariant) -- `value`: Fitted parameter value -- `estimation_type`: Always "fixed_effects" -- `distribution_type`: NA (reserved for future use) -- `distribution`: NA (reserved for future use) - -## Notes and Caveats - -- The function requires at least 2 functional groups to fit a curve -- All functional groups must have mortality data available -- F values are normalized to represent relative selectivity -- The order of functional groups matters - it determines the x-axis -- Nonlinear fitting may not converge for all data patterns; fallback heuristics are used in such cases - -## References - -This functionality was developed to support the integration of ecosystem models (EWE) with single-species assessment models (FIMS) in the NOAA Fisheries Integrated Modeling System. diff --git a/tests/testthat/test-fit_selectivity_helpers.R b/tests/testthat/test-fit_selectivity_helpers.R new file mode 100644 index 0000000..1e266bf --- /dev/null +++ b/tests/testthat/test-fit_selectivity_helpers.R @@ -0,0 +1,179 @@ +# Instructions ---- +#' This file follows the format generated by ecosystemdata:::use_testthat_template(). +#' Necessary tests include input and output (IO) correctness [IO +#' correctness], edge-case handling [Edge handling], and built-in errors and +#' warnings [Error handling]. See `?ecosystemdata:::use_testthat_template` for more +#' information. Every test should have a @description tag that takes up just +#' one line, which will be used in the bookdown report of {testthat} results. + +# Setup ---- +# Load or prepare any necessary data for testing + +# fit_logistic_selectivity ---- +## IO correctness ---- +test_that("fit_logistic_selectivity() returns correct structure", { + #' @description Test that fit_logistic_selectivity() returns a tibble with + #' the correct columns and parameter labels. + f_values <- c(0.1, 0.5, 0.9) + result <- ecosystemdata:::fit_logistic_selectivity(f_values) + + expect_s3_class(result, "tbl_df") + expect_equal(nrow(result), 2) + expect_equal(result[["label"]], c("inflection_point", "slope")) + expect_equal(result[["module_type"]], rep("Logistic", 2)) + expect_true(all(is.numeric(result[["value"]]))) +}) + +test_that("fit_logistic_selectivity() handles monotonic increasing pattern", { + #' @description Test that the function fits an increasing selectivity pattern. + f_values <- seq(0, 1, length.out = 5) + result <- ecosystemdata:::fit_logistic_selectivity(f_values) + + expect_true(result[["value"]][result[["label"]] == "slope"] > 0) + expect_true(result[["value"]][result[["label"]] == "inflection_point"] >= 1) + expect_true(result[["value"]][result[["label"]] == "inflection_point"] <= 5) +}) + +## Validation tests ---- +test_that("fit_logistic_selectivity() recovers known parameters", { + #' @description Test that the function can recover known logistic parameters from generated data. + + # True parameters + true_inflection <- 3 + true_slope <- 1.5 + + # Generate selectivity values from known parameters + index_values <- 1:6 + true_selectivity <- 1 / (1 + exp(-true_slope * (index_values - true_inflection))) + + # Fit the model + result <- ecosystemdata:::fit_logistic_selectivity(true_selectivity) + + # Extract estimated parameters + estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] + estimated_slope <- result[["value"]][result[["label"]] == "slope"] + + # Check that estimates are close to true values (within 10% tolerance) + expect_equal(estimated_inflection, true_inflection, tolerance = 0.1 * true_inflection) + expect_equal(estimated_slope, true_slope, tolerance = 0.1 * true_slope) +}) + +test_that("fit_logistic_selectivity() handles different slopes", { + #' @description Test parameter recovery with a different slope value. + + # True parameters with steeper slope + true_inflection <- 4 + true_slope <- 3.0 + + # Generate selectivity values + index_values <- 1:8 + true_selectivity <- 1 / (1 + exp(-true_slope * (index_values - true_inflection))) + + # Fit the model + result <- ecosystemdata:::fit_logistic_selectivity(true_selectivity) + + # Extract estimated parameters + estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] + estimated_slope <- result[["value"]][result[["label"]] == "slope"] + + # Check estimates + expect_equal(estimated_inflection, true_inflection, tolerance = 0.15 * true_inflection) + expect_equal(estimated_slope, true_slope, tolerance = 0.15 * true_slope) +}) + +# fit_double_logistic_selectivity ---- +## IO correctness ---- +test_that("fit_double_logistic_selectivity() returns correct structure", { + #' @description Test that fit_double_logistic_selectivity() returns a tibble + #' with the correct columns and parameter labels. + f_values <- c(0.1, 0.5, 0.9, 0.6, 0.3) + result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) + + expect_s3_class(result, "tbl_df") + expect_equal(nrow(result), 4) + expect_equal( + result[["label"]], + c("inflection_point_asc", "slope_asc", "inflection_point_desc", "slope_desc") + ) + expect_equal(result[["module_type"]], rep("DoubleLogistic", 4)) + expect_true(all(is.numeric(result[["value"]]))) +}) + +test_that("fit_double_logistic_selectivity() handles dome-shaped pattern", { + #' @description Test that the function fits a dome-shaped selectivity pattern. + f_values <- c(0.1, 0.5, 1.0, 0.7, 0.3) + result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) + + inflection_ascending <- result[["value"]][result[["label"]] == "inflection_point_asc"] + inflection_descending <- result[["value"]][result[["label"]] == "inflection_point_desc"] + + # Ascending inflection should come before descending + expect_true(inflection_ascending < inflection_descending) + + # Slopes should be positive + expect_true(result[["value"]][result[["label"]] == "slope_asc"] > 0) + expect_true(result[["value"]][result[["label"]] == "slope_desc"] > 0) +}) + +## Validation tests ---- +test_that("fit_double_logistic_selectivity() recovers known parameters", { + #' @description Test that the function can recover known double logistic parameters from generated data. + + # True parameters + true_inflection_asc <- 2.5 + true_slope_asc <- 2.0 + true_inflection_desc <- 5.5 + true_slope_desc <- 1.5 + + # Generate selectivity values from known parameters + index_values <- 1:8 + asc_component <- 1 / (1 + exp(-true_slope_asc * (index_values - true_inflection_asc))) + desc_component <- 1 - 1 / (1 + exp(-true_slope_desc * (index_values - true_inflection_desc))) + true_selectivity <- asc_component * desc_component + + # Fit the model + result <- ecosystemdata:::fit_double_logistic_selectivity(true_selectivity) + + # Extract estimated parameters + est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] + est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] + est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] + est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] + + # Check that estimates are close to true values (within 20% tolerance for more complex model) + expect_equal(est_infl_asc, true_inflection_asc, tolerance = 0.2 * true_inflection_asc) + expect_equal(est_slope_asc, true_slope_asc, tolerance = 0.2 * true_slope_asc) + expect_equal(est_infl_desc, true_inflection_desc, tolerance = 0.2 * true_inflection_desc) + expect_equal(est_slope_desc, true_slope_desc, tolerance = 0.2 * true_slope_desc) +}) + +test_that("fit_double_logistic_selectivity() handles symmetric dome", { + #' @description Test parameter recovery with symmetric dome shape. + + # True parameters with symmetric slopes + true_inflection_asc <- 3.0 + true_slope_asc <- 1.5 + true_inflection_desc <- 7.0 + true_slope_desc <- 1.5 + + # Generate selectivity values + index_values <- 1:10 + asc_component <- 1 / (1 + exp(-true_slope_asc * (index_values - true_inflection_asc))) + desc_component <- 1 - 1 / (1 + exp(-true_slope_desc * (index_values - true_inflection_desc))) + true_selectivity <- asc_component * desc_component + + # Fit the model + result <- ecosystemdata:::fit_double_logistic_selectivity(true_selectivity) + + # Extract estimated parameters + est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] + est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] + est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] + est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] + + # Check estimates + expect_equal(est_infl_asc, true_inflection_asc, tolerance = 0.25 * true_inflection_asc) + expect_equal(est_slope_asc, true_slope_asc, tolerance = 0.25 * true_slope_asc) + expect_equal(est_infl_desc, true_inflection_desc, tolerance = 0.25 * true_inflection_desc) + expect_equal(est_slope_desc, true_slope_desc, tolerance = 0.25 * true_slope_desc) +}) diff --git a/tests/testthat/test-generate_selectivity_curve.R b/tests/testthat/test-generate_selectivity_curve.R index afe52fe..88ef15a 100644 --- a/tests/testthat/test-generate_selectivity_curve.R +++ b/tests/testthat/test-generate_selectivity_curve.R @@ -75,17 +75,17 @@ test_that("generate_selectivity_curve() works with double_logistic form", { test_that("generate_selectivity_curve() works with real EWE data", { #' @description Test that generate_selectivity_curve() works with actual - #' ewe_nwatlantic_base data for spiny dogfish. + #' ewe_nwatlantic_base data for menhaden. - # Get spiny dogfish functional groups - dogfish_groups <- unique(ewe_nwatlantic_base[["functional_group"]]) - dogfish_groups <- dogfish_groups[grepl("spiny dogfish", dogfish_groups)] + # Get menhaden functional groups + menhaden_groups <- unique(ewe_nwatlantic_base[["functional_group"]]) + menhaden_groups <- menhaden_groups[grepl("menhaden", menhaden_groups, ignore.case = TRUE)] - skip_if(length(dogfish_groups) < 2, "Not enough spiny dogfish groups in data") + skip_if(length(menhaden_groups) < 2, "Not enough menhaden groups in data") result <- generate_selectivity_curve( ewe_output = ewe_nwatlantic_base, - functional_groups = dogfish_groups, + functional_groups = menhaden_groups, functional_form = "logistic", fleet_name = "commercial" ) @@ -253,61 +253,85 @@ test_that("generate_selectivity_curve() errors on all-zero F values", { ) }) -# fit_logistic_selectivity ---- -## IO correctness ---- -test_that("fit_logistic_selectivity() returns correct structure", { - #' @description Test that fit_logistic_selectivity() returns a tibble with - #' the correct columns and parameter labels. - f_values <- c(0.1, 0.5, 0.9) - result <- ecosystemdata:::fit_logistic_selectivity(f_values) +## Validation tests ---- +test_that("generate_selectivity_curve() recovers parameters from known selectivity", { + #' @description Test that the full workflow can recover known parameters. - expect_s3_class(result, "tbl_df") - expect_equal(nrow(result), 2) - expect_equal(result[["label"]], c("inflection_point", "slope")) - expect_equal(result[["module_type"]], rep("Logistic", 2)) - expect_true(all(is.numeric(result[["value"]]))) -}) - -test_that("fit_logistic_selectivity() handles monotonic increasing pattern", { - #' @description Test that the function fits an increasing selectivity pattern. - f_values <- seq(0, 1, length.out = 5) - result <- ecosystemdata:::fit_logistic_selectivity(f_values) + # Create true selectivity parameters + true_inflection <- 2.0 + true_slope <- 1.5 - expect_true(result[["value"]][result[["label"]] == "slope"] > 0) - expect_true(result[["value"]][result[["label"]] == "inflection_point"] >= 1) - expect_true(result[["value"]][result[["label"]] == "inflection_point"] <= 5) -}) - -# fit_double_logistic_selectivity ---- -## IO correctness ---- -test_that("fit_double_logistic_selectivity() returns correct structure", { - #' @description Test that fit_double_logistic_selectivity() returns a tibble - #' with the correct columns and parameter labels. - f_values <- c(0.1, 0.5, 0.9, 0.6, 0.3) - result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) + # Generate F values from true selectivity + index_values <- 1:5 + true_selectivity <- 1 / (1 + exp(-true_slope * (index_values - true_inflection))) - expect_s3_class(result, "tbl_df") - expect_equal(nrow(result), 4) - expect_equal( - result[["label"]], - c("inflection_point_asc", "slope_asc", "inflection_point_desc", "slope_desc") + # Create test data with these F values + validation_data <- tibble::tibble( + functional_group = rep(paste("group", index_values), each = 12), + type = "mortality", + value = rep(true_selectivity, each = 12), + year = rep(2000, length(index_values) * 12), + month = rep(1:12, length(index_values)) ) - expect_equal(result[["module_type"]], rep("DoubleLogistic", 4)) - expect_true(all(is.numeric(result[["value"]]))) + + # Fit using generate_selectivity_curve + result <- generate_selectivity_curve( + ewe_output = validation_data, + functional_groups = paste("group", index_values), + functional_form = "logistic", + fleet_name = "test" + ) + + # Extract estimated parameters + estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] + estimated_slope <- result[["value"]][result[["label"]] == "slope"] + + # Check that estimates are close to true values + expect_equal(estimated_inflection, true_inflection, tolerance = 0.15 * true_inflection) + expect_equal(estimated_slope, true_slope, tolerance = 0.15 * true_slope) }) -test_that("fit_double_logistic_selectivity() handles dome-shaped pattern", { - #' @description Test that the function fits a dome-shaped selectivity pattern. - f_values <- c(0.1, 0.5, 1.0, 0.7, 0.3) - result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) +test_that("generate_selectivity_curve() recovers double logistic parameters", { + #' @description Test recovery of double logistic parameters through full workflow. - inflection_ascending <- result[["value"]][result[["label"]] == "inflection_point_asc"] - inflection_descending <- result[["value"]][result[["label"]] == "inflection_point_desc"] + # Create true parameters + true_infl_asc <- 2.0 + true_slope_asc <- 2.0 + true_infl_desc <- 5.0 + true_slope_desc <- 1.5 + + # Generate F values from true double logistic + index_values <- 1:7 + asc_component <- 1 / (1 + exp(-true_slope_asc * (index_values - true_infl_asc))) + desc_component <- 1 - 1 / (1 + exp(-true_slope_desc * (index_values - true_infl_desc))) + true_selectivity <- asc_component * desc_component + + # Create test data + validation_data <- tibble::tibble( + functional_group = rep(paste("group", index_values), each = 12), + type = "mortality", + value = rep(true_selectivity, each = 12), + year = rep(2000, length(index_values) * 12), + month = rep(1:12, length(index_values)) + ) + + # Fit using generate_selectivity_curve + result <- generate_selectivity_curve( + ewe_output = validation_data, + functional_groups = paste("group", index_values), + functional_form = "double_logistic", + fleet_name = "test" + ) - # Ascending inflection should come before descending - expect_true(inflection_ascending < inflection_descending) + # Extract estimated parameters + est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] + est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] + est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] + est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] - # Slopes should be positive - expect_true(result[["value"]][result[["label"]] == "slope_asc"] > 0) - expect_true(result[["value"]][result[["label"]] == "slope_desc"] > 0) + # Check estimates (more lenient tolerance for complex model) + expect_equal(est_infl_asc, true_infl_asc, tolerance = 0.25 * true_infl_asc) + expect_equal(est_slope_asc, true_slope_asc, tolerance = 0.25 * true_slope_asc) + expect_equal(est_infl_desc, true_infl_desc, tolerance = 0.25 * true_infl_desc) + expect_equal(est_slope_desc, true_slope_desc, tolerance = 0.25 * true_slope_desc) }) From f7ac1d6885740baee3d3f4f040ac6cd29a79f82b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Feb 2026 21:27:47 +0000 Subject: [PATCH 10/11] Update documentation file to reflect roxygen changes Co-authored-by: Bai-Li-NOAA <59936250+Bai-Li-NOAA@users.noreply.github.com> --- man/generate_selectivity_curve.Rd | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/man/generate_selectivity_curve.Rd b/man/generate_selectivity_curve.Rd index e6d631c..f4c5c01 100644 --- a/man/generate_selectivity_curve.Rd +++ b/man/generate_selectivity_curve.Rd @@ -58,8 +58,18 @@ fit selectivity parameters. The F values are normalized to be between 0 and 1 before fitting, representing relative selectivity. New functional forms can be added by creating helper functions with the -naming pattern \code{fit__selectivity()} that take a vector of F values and -return parameter estimates. +naming pattern \code{fit__selectivity()} that take a vector of values and +return parameter estimates. To add a new selectivity form: +\enumerate{ +\item Create a helper function named \code{fit__selectivity()} that +takes a numeric vector of normalized values (0-1) and returns a tibble +with columns: model_family, module_name, module_type, label, +distribution_link, age, length, time, value, estimation_type, +distribution_type, and distribution. +\item The function will automatically be available by passing the form name +to the \code{functional_form} argument (e.g., "new_form" will call +\code{fit_new_form_selectivity()}). +} } \examples{ \dontrun{ @@ -83,6 +93,3 @@ generate_selectivity_curve( ) } } -\author{ -Kelli F. Johnson -} From 5a49cf1d0de269df79f8d4bdbdcd345df686bc3e Mon Sep 17 00:00:00 2001 From: Bai Li - NOAA <59936250+Bai-Li-NOAA@users.noreply.github.com> Date: Thu, 19 Feb 2026 15:00:09 +0000 Subject: [PATCH 11/11] Checkpoint from VS Code for cloud agent session --- R/fit_selectivity_helpers.R | 10 +- R/generate_selectivity_curve.R | 206 +++++++++++++----- tests/testthat/test-fit_selectivity_helpers.R | 88 ++++---- .../test-generate_selectivity_curve.R | 129 +++++++---- 4 files changed, 280 insertions(+), 153 deletions(-) diff --git a/R/fit_selectivity_helpers.R b/R/fit_selectivity_helpers.R index 3e26f46..7dca7b2 100644 --- a/R/fit_selectivity_helpers.R +++ b/R/fit_selectivity_helpers.R @@ -72,7 +72,7 @@ fit_logistic_selectivity <- function(f_values) { } } - # Create output tibble in FIMS format + # Create output tibble in FIMS format (replace time with year/month) tibble::tibble( model_family = "catch_at_age", module_name = "Selectivity", @@ -81,7 +81,8 @@ fit_logistic_selectivity <- function(f_values) { distribution_link = NA_character_, age = NA_real_, length = NA_real_, - time = NA_real_, + year = NA_integer_, + month = NA_integer_, value = c(inflection_point, slope), estimation_type = "fixed_effects", distribution_type = NA_character_, @@ -210,7 +211,7 @@ fit_double_logistic_selectivity <- function(f_values) { slope_descending <- initial_slope_descending } - # Create output tibble in FIMS format + # Create output tibble in FIMS format (replace time with year/month) tibble::tibble( model_family = "catch_at_age", module_name = "Selectivity", @@ -224,7 +225,8 @@ fit_double_logistic_selectivity <- function(f_values) { distribution_link = NA_character_, age = NA_real_, length = NA_real_, - time = NA_real_, + year = NA_integer_, + month = NA_integer_, value = c(inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending), estimation_type = "fixed_effects", distribution_type = NA_character_, diff --git a/R/generate_selectivity_curve.R b/R/generate_selectivity_curve.R index 8fcb5bd..7dd8250 100644 --- a/R/generate_selectivity_curve.R +++ b/R/generate_selectivity_curve.R @@ -1,4 +1,8 @@ -utils::globalVariables(c("functional_group")) +# ...existing code... +## At the end of the file, add: +utils::globalVariables(c( + "functional_group", "type", "value", "year", "mean_f", "model_family", "module_name", "fleet_name", "module_type", "label", "distribution_link", "age", "length", "time", "estimation_type", "distribution_type", "distribution" +)) #' Generate selectivity curve parameters from ecosystem model F values #' @@ -130,65 +134,157 @@ generate_selectivity_curve <- function(ewe_output, type == "mortality", functional_group %in% functional_groups ) - + if (nrow(mortality_data) == 0) { cli::cli_abort(c( "No mortality data found for the specified functional groups.", "i" = "Check that type == 'mortality' exists in {.var ewe_output}" )) } - - # Calculate mean F value for each functional group (averaging over time/fleet) - f_values <- mortality_data |> - dplyr::group_by(functional_group) |> - dplyr::summarise(mean_f = mean(value, na.rm = TRUE), .groups = "drop") |> - # Ensure the order matches the input functional_groups - dplyr::right_join( - tibble::tibble(functional_group = functional_groups), - by = "functional_group" - ) |> - dplyr::pull(mean_f) - - # Check for missing F values - if (any(is.na(f_values))) { - na_groups <- functional_groups[is.na(f_values)] - cli::cli_abort(c( - "Missing mortality data for some functional groups:", - "x" = "Groups with no data: {.var {na_groups}}" - )) - } - - # Normalize F values to 0-1 range (representing relative selectivity) - if (max(f_values) == 0) { - cli::cli_abort("All F values are zero. Cannot fit selectivity curve.") - } - normalized_f <- f_values / max(f_values) - - # Call the appropriate helper function based on functional_form - helper_function_name <- paste0("fit_", tolower(functional_form), "_selectivity") - - if (!exists(helper_function_name, mode = "function")) { - cli::cli_abort(c( - "Unsupported functional form: {.val {functional_form}}", - "i" = "Currently supported: 'logistic', 'double_logistic'", - "i" = "To add support, create a function named {.fn {helper_function_name}}" - )) + + # If year and month columns exist, fit parameters for each year-month separately + if (all(c("year", "month") %in% names(mortality_data))) { + year_months <- unique(mortality_data[, c("year", "month")]) + year_months <- year_months[order(year_months$year, year_months$month), ] + results_list <- apply(year_months, 1, function(ym) { + yr <- ym[["year"]] + mo <- ym[["month"]] + ym_data <- mortality_data |> + dplyr::filter(year == yr, month == mo) + # Calculate mean F value for each functional group for this year-month + f_values <- ym_data |> + dplyr::group_by(functional_group) |> + dplyr::summarise(mean_f = mean(value, na.rm = TRUE), .groups = "drop") |> + dplyr::right_join( + tibble::tibble(functional_group = functional_groups), + by = "functional_group" + ) |> + dplyr::pull(mean_f) + # Check for missing F values + if (any(is.na(f_values))) { + na_groups <- functional_groups[is.na(f_values)] + cli::cli_abort(c( + "Missing mortality data for some functional groups:", + "x" = "Groups with no data: {.var {na_groups}}" + )) + } + if (max(f_values) == 0) { + cli::cli_abort("All F values are zero. Cannot fit selectivity curve.") + } + normalized_f <- f_values / max(f_values) + helper_function_name <- paste0("fit_", tolower(functional_form), "_selectivity") + if (!exists(helper_function_name, mode = "function")) { + cli::cli_abort(c( + "Unsupported functional form: {.val {functional_form}}", + "i" = "Currently supported: 'logistic', 'double_logistic'", + "i" = "To add support, create a function named {.fn {helper_function_name}}" + )) + } + helper_function <- get(helper_function_name, mode = "function") + result <- helper_function(normalized_f) + result[["fleet_name"]] <- fleet_name + result[["year"]] <- as.integer(yr) + result[["month"]] <- as.integer(mo) + result + }) + result <- dplyr::bind_rows(results_list) + # Reorder columns to match expected output format + result <- result |> + dplyr::select( + model_family, module_name, fleet_name, module_type, + label, distribution_link, age, length, year, month, value, + estimation_type, distribution_type, distribution + ) + return(result) + } else if ("year" %in% names(mortality_data)) { + years <- sort(unique(mortality_data$year)) + results_list <- lapply(years, function(yr) { + year_data <- mortality_data |> + dplyr::filter(year == yr) + # Calculate mean F value for each functional group for this year + f_values <- year_data |> + dplyr::group_by(functional_group) |> + dplyr::summarise(mean_f = mean(value, na.rm = TRUE), .groups = "drop") |> + dplyr::right_join( + tibble::tibble(functional_group = functional_groups), + by = "functional_group" + ) |> + dplyr::pull(mean_f) + # Check for missing F values + if (any(is.na(f_values))) { + na_groups <- functional_groups[is.na(f_values)] + cli::cli_abort(c( + "Missing mortality data for some functional groups:", + "x" = "Groups with no data: {.var {na_groups}}" + )) + } + if (max(f_values) == 0) { + cli::cli_abort("All F values are zero. Cannot fit selectivity curve.") + } + normalized_f <- f_values / max(f_values) + helper_function_name <- paste0("fit_", tolower(functional_form), "_selectivity") + if (!exists(helper_function_name, mode = "function")) { + cli::cli_abort(c( + "Unsupported functional form: {.val {functional_form}}", + "i" = "Currently supported: 'logistic', 'double_logistic'", + "i" = "To add support, create a function named {.fn {helper_function_name}}" + )) + } + helper_function <- get(helper_function_name, mode = "function") + result <- helper_function(normalized_f) + result[["fleet_name"]] <- fleet_name + result[["year"]] <- as.integer(yr) + result[["month"]] <- NA_integer_ + result + }) + result <- dplyr::bind_rows(results_list) + # Reorder columns to match expected output format + result <- result |> + dplyr::select( + model_family, module_name, fleet_name, module_type, + label, distribution_link, age, length, year, month, value, + estimation_type, distribution_type, distribution + ) + return(result) + } else { + # No year column: fit to all data (original behavior) + f_values <- mortality_data |> + dplyr::group_by(functional_group) |> + dplyr::summarise(mean_f = mean(value, na.rm = TRUE), .groups = "drop") |> + dplyr::right_join( + tibble::tibble(functional_group = functional_groups), + by = "functional_group" + ) |> + dplyr::pull(mean_f) + if (any(is.na(f_values))) { + na_groups <- functional_groups[is.na(f_values)] + cli::cli_abort(c( + "Missing mortality data for some functional groups:", + "x" = "Groups with no data: {.var {na_groups}}" + )) + } + if (max(f_values) == 0) { + cli::cli_abort("All F values are zero. Cannot fit selectivity curve.") + } + normalized_f <- f_values / max(f_values) + helper_function_name <- paste0("fit_", tolower(functional_form), "_selectivity") + if (!exists(helper_function_name, mode = "function")) { + cli::cli_abort(c( + "Unsupported functional form: {.val {functional_form}}", + "i" = "Currently supported: 'logistic', 'double_logistic'", + "i" = "To add support, create a function named {.fn {helper_function_name}}" + )) + } + helper_function <- get(helper_function_name, mode = "function") + result <- helper_function(normalized_f) + result[["fleet_name"]] <- fleet_name + # time column remains NA + result <- result |> + dplyr::select( + model_family, module_name, fleet_name, module_type, + label, distribution_link, age, length, year, month, value, + estimation_type, distribution_type, distribution + ) + return(result) } - - # Call the helper function - helper_function <- get(helper_function_name, mode = "function") - result <- helper_function(normalized_f) - - # Add fleet_name to the result - result[["fleet_name"]] <- fleet_name - - # Reorder columns to match expected output format - result <- result |> - dplyr::select( - model_family, module_name, fleet_name, module_type, - label, distribution_link, age, length, time, value, - estimation_type, distribution_type, distribution - ) - - return(result) } diff --git a/tests/testthat/test-fit_selectivity_helpers.R b/tests/testthat/test-fit_selectivity_helpers.R index 1e266bf..3e180fc 100644 --- a/tests/testthat/test-fit_selectivity_helpers.R +++ b/tests/testthat/test-fit_selectivity_helpers.R @@ -11,33 +11,7 @@ # fit_logistic_selectivity ---- ## IO correctness ---- -test_that("fit_logistic_selectivity() returns correct structure", { - #' @description Test that fit_logistic_selectivity() returns a tibble with - #' the correct columns and parameter labels. - f_values <- c(0.1, 0.5, 0.9) - result <- ecosystemdata:::fit_logistic_selectivity(f_values) - - expect_s3_class(result, "tbl_df") - expect_equal(nrow(result), 2) - expect_equal(result[["label"]], c("inflection_point", "slope")) - expect_equal(result[["module_type"]], rep("Logistic", 2)) - expect_true(all(is.numeric(result[["value"]]))) -}) - -test_that("fit_logistic_selectivity() handles monotonic increasing pattern", { - #' @description Test that the function fits an increasing selectivity pattern. - f_values <- seq(0, 1, length.out = 5) - result <- ecosystemdata:::fit_logistic_selectivity(f_values) - - expect_true(result[["value"]][result[["label"]] == "slope"] > 0) - expect_true(result[["value"]][result[["label"]] == "inflection_point"] >= 1) - expect_true(result[["value"]][result[["label"]] == "inflection_point"] <= 5) -}) - -## Validation tests ---- test_that("fit_logistic_selectivity() recovers known parameters", { - #' @description Test that the function can recover known logistic parameters from generated data. - # True parameters true_inflection <- 3 true_slope <- 1.5 @@ -50,18 +24,29 @@ test_that("fit_logistic_selectivity() recovers known parameters", { result <- ecosystemdata:::fit_logistic_selectivity(true_selectivity) # Extract estimated parameters - estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] - estimated_slope <- result[["value"]][result[["label"]] == "slope"] + estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] |> + unname() + estimated_slope <- result[["value"]][result[["label"]] == "slope"] |> + unname() # Check that estimates are close to true values (within 10% tolerance) + #' @description Test that the function can recover known inflection parameter from generated data. expect_equal(estimated_inflection, true_inflection, tolerance = 0.1 * true_inflection) + #' @description Test that the function can recover known slope parameter from generated data. expect_equal(estimated_slope, true_slope, tolerance = 0.1 * true_slope) + + #' @description Test that fit_logistic_selectivity() returns a tibble with the correct columns and parameter labels. + expect_true(tibble::is_tibble(result)) + expect_equal(nrow(result), 2) + expect_equal(result[["label"]], c("inflection_point", "slope")) + expect_equal(result[["module_type"]], rep("Logistic", 2)) + expect_true(all(is.numeric(result[["value"]]))) }) test_that("fit_logistic_selectivity() handles different slopes", { - #' @description Test parameter recovery with a different slope value. - + # True parameters with steeper slope + #' @description Test parameter recovery with a different slope value. true_inflection <- 4 true_slope <- 3.0 @@ -73,23 +58,24 @@ test_that("fit_logistic_selectivity() handles different slopes", { result <- ecosystemdata:::fit_logistic_selectivity(true_selectivity) # Extract estimated parameters - estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] - estimated_slope <- result[["value"]][result[["label"]] == "slope"] + estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] |> + unname() + estimated_slope <- result[["value"]][result[["label"]] == "slope"] |> + unname() # Check estimates - expect_equal(estimated_inflection, true_inflection, tolerance = 0.15 * true_inflection) - expect_equal(estimated_slope, true_slope, tolerance = 0.15 * true_slope) + expect_equal(estimated_inflection, true_inflection, tolerance = 0.1 * true_inflection) + expect_equal(estimated_slope, true_slope, tolerance = 0.1 * true_slope) }) # fit_double_logistic_selectivity ---- ## IO correctness ---- test_that("fit_double_logistic_selectivity() returns correct structure", { - #' @description Test that fit_double_logistic_selectivity() returns a tibble - #' with the correct columns and parameter labels. - f_values <- c(0.1, 0.5, 0.9, 0.6, 0.3) + #' @description Test that fit_double_logistic_selectivity() returns a tibble with the correct columns and parameter labels. + f_values <- c(0.1, 0.3, 0.5, 0.9, 0.6, 0.3) result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) - expect_s3_class(result, "tbl_df") + expect_true(tibble::is_tibble(result)) expect_equal(nrow(result), 4) expect_equal( result[["label"]], @@ -101,7 +87,7 @@ test_that("fit_double_logistic_selectivity() returns correct structure", { test_that("fit_double_logistic_selectivity() handles dome-shaped pattern", { #' @description Test that the function fits a dome-shaped selectivity pattern. - f_values <- c(0.1, 0.5, 1.0, 0.7, 0.3) + f_values <- c(0.1, 0.3, 0.5, 1.0, 0.7, 0.3) result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) inflection_ascending <- result[["value"]][result[["label"]] == "inflection_point_asc"] @@ -135,10 +121,14 @@ test_that("fit_double_logistic_selectivity() recovers known parameters", { result <- ecosystemdata:::fit_double_logistic_selectivity(true_selectivity) # Extract estimated parameters - est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] - est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] - est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] - est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] + est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] |> + unname() + est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] |> + unname() + est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] |> + unname() + est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] |> + unname() # Check that estimates are close to true values (within 20% tolerance for more complex model) expect_equal(est_infl_asc, true_inflection_asc, tolerance = 0.2 * true_inflection_asc) @@ -166,10 +156,14 @@ test_that("fit_double_logistic_selectivity() handles symmetric dome", { result <- ecosystemdata:::fit_double_logistic_selectivity(true_selectivity) # Extract estimated parameters - est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] - est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] - est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] - est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] + est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] |> + unname() + est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] |> + unname() + est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] |> + unname() + est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] |> + unname() # Check estimates expect_equal(est_infl_asc, true_inflection_asc, tolerance = 0.25 * true_inflection_asc) diff --git a/tests/testthat/test-generate_selectivity_curve.R b/tests/testthat/test-generate_selectivity_curve.R index 88ef15a..039a3ca 100644 --- a/tests/testthat/test-generate_selectivity_curve.R +++ b/tests/testthat/test-generate_selectivity_curve.R @@ -10,89 +10,125 @@ # Load or prepare any necessary data for testing data("ewe_nwatlantic_base", package = "ecosystemdata") -# Create a minimal test dataset +functional_groups <- c("species1 0", "species1 1", "species1 2", "species1 3", "species1 4", "species1 5") +# True parameters +true_inflection <- 3 +true_slope <- 1.5 + +# Generate selectivity values from known parameters +index_values <- 1:6 +true_selectivity <- 1 / (1 + exp(-true_slope * (index_values - true_inflection))) + test_data <- tibble::tibble( - functional_group = rep(c("species1 0", "species1 1", "species1 2"), each = 12), + functional_group = rep(functional_groups, each = 12), type = "mortality", - value = c( - rep(0.1, 12), # Low F for age 0 - rep(0.5, 12), # Medium F for age 1 - rep(0.8, 12) # High F for age 2 - ), - year = rep(2000, 36), - month = rep(1:12, 3) + # Replicate each selectivity value for 12 months + value = rep(true_selectivity, each = 12), + year = rep(2000, 72), + month = rep(1:12, 6) ) # generate_selectivity_curve ---- ## IO correctness ---- test_that("generate_selectivity_curve() works with logistic form", { - #' @description Test that generate_selectivity_curve() returns a tibble with - #' correct structure for logistic selectivity. result <- generate_selectivity_curve( ewe_output = test_data, - functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_groups = functional_groups, functional_form = "logistic", fleet_name = "fleet1" ) - - expect_s3_class(result, "tbl_df") - expect_equal(nrow(result), 2) # inflection_point and slope - expect_equal(result[["model_family"]], rep("catch_at_age", 2)) - expect_equal(result[["module_name"]], rep("Selectivity", 2)) - expect_equal(result[["fleet_name"]], rep("fleet1", 2)) - expect_equal(result[["module_type"]], rep("Logistic", 2)) - expect_equal(result[["label"]], c("inflection_point", "slope")) - expect_equal(result[["estimation_type"]], rep("fixed_effects", 2)) + + estimated_inflection <- result |> + dplyr::filter(label == "inflection_point") |> + dplyr::pull(value) |> + unname() + estimated_slope <- result |> + dplyr::filter(label == "slope") |> + dplyr::pull(value) |> + unname() + + #' @description Test that generate_selectivity_curve() returns a tibble with correct structure for logistic selectivity. + expect_equal(estimated_inflection, rep(true_inflection, 12), tolerance = 0.1 * true_inflection) + #' @description Test that estimated slope matches true slope. + expect_equal(estimated_slope, rep(true_slope, 12), tolerance = 0.1 * true_slope) + #' @description Test that result is a tibble. + expect_true(tibble::is_tibble(result)) + #' @description Test that result has two rows. + expect_equal(nrow(result), 2 * 12) # inflection_point and slope + #' @description Test that model_family is catch_at_age. + expect_equal(result[["model_family"]], rep("catch_at_age", 2 * 12)) + #' @description Test that module_name is Selectivity. + expect_equal(result[["module_name"]], rep("Selectivity", 2 * 12)) + #' @description Test that fleet_name is fleet1. + expect_equal(result[["fleet_name"]], rep("fleet1", 2 * 12)) + #' @description Test that module_type is Logistic. + expect_equal(result[["module_type"]], rep("Logistic", 2 * 12)) + #' @description Test that label is inflection_point and slope. + expect_equal(result[["label"]], rep(c("inflection_point", "slope"), 12)) + #' @description Test that estimation_type is fixed_effects. + expect_equal(result[["estimation_type"]], rep("fixed_effects", 2 * 12)) + #' @description Test that distribution_link is NA. expect_true(all(is.na(result[["distribution_link"]]))) + #' @description Test that value is numeric. expect_true(all(is.numeric(result[["value"]]))) + #' @description Test that value is not NA. expect_true(all(!is.na(result[["value"]]))) }) test_that("generate_selectivity_curve() works with double_logistic form", { - #' @description Test that generate_selectivity_curve() returns a tibble with - #' correct structure for double logistic selectivity. result <- generate_selectivity_curve( ewe_output = test_data, - functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_groups = functional_groups, functional_form = "double_logistic", fleet_name = "fleet1" ) - expect_s3_class(result, "tbl_df") - expect_equal(nrow(result), 4) # 4 parameters for double logistic - expect_equal(result[["model_family"]], rep("catch_at_age", 4)) - expect_equal(result[["module_name"]], rep("Selectivity", 4)) - expect_equal(result[["fleet_name"]], rep("fleet1", 4)) - expect_equal(result[["module_type"]], rep("DoubleLogistic", 4)) + #' @description Test that result is a tibble for double logistic. + expect_true(tibble::is_tibble(result)) + #' @description Test that result has four rows for double logistic. + expect_equal(nrow(result), 4 * 12) # 4 parameters for double logistic + #' @description Test that model_family is catch_at_age for double logistic. + expect_equal(result[["model_family"]], rep("catch_at_age", 4 * 12)) + #' @description Test that module_name is Selectivity for double logistic. + expect_equal(result[["module_name"]], rep("Selectivity", 4 * 12)) + #' @description Test that fleet_name is fleet1 for double logistic. + expect_equal(result[["fleet_name"]], rep("fleet1", 4 * 12)) + #' @description Test that module_type is DoubleLogistic. + expect_equal(result[["module_type"]], rep("DoubleLogistic", 4 * 12)) + #' @description Test that label is correct for double logistic. expect_equal( result[["label"]], - c("inflection_point_asc", "slope_asc", "inflection_point_desc", "slope_desc") + rep(c("inflection_point_asc", "slope_asc", "inflection_point_desc", "slope_desc"), 12) ) - expect_equal(result[["estimation_type"]], rep("fixed_effects", 4)) + #' @description Test that estimation_type is fixed_effects for double logistic. + expect_equal(result[["estimation_type"]], rep("fixed_effects", 4 * 12)) + #' @description Test that value is numeric for double logistic. expect_true(all(is.numeric(result[["value"]]))) + #' @description Test that value is not NA for double logistic. expect_true(all(!is.na(result[["value"]]))) }) test_that("generate_selectivity_curve() works with real EWE data", { - #' @description Test that generate_selectivity_curve() works with actual - #' ewe_nwatlantic_base data for menhaden. - - # Get menhaden functional groups - menhaden_groups <- unique(ewe_nwatlantic_base[["functional_group"]]) - menhaden_groups <- menhaden_groups[grepl("menhaden", menhaden_groups, ignore.case = TRUE)] - - skip_if(length(menhaden_groups) < 2, "Not enough menhaden groups in data") - + menhaden_data <- ewe_nwatlantic_base |> + dplyr::filter(species == "menhaden" & type == "mortality") + menhaden_groups <- menhaden_data |> + dplyr::pull(functional_group) |> + unique() + result <- generate_selectivity_curve( - ewe_output = ewe_nwatlantic_base, + ewe_output = menhaden_data, functional_groups = menhaden_groups, - functional_form = "logistic", - fleet_name = "commercial" + functional_form = "double_logistic", + fleet_name = "NA" ) - expect_s3_class(result, "tbl_df") + #' @description Test that result is a tibble for real EWE data. + expect_true(tibble::is_tibble(result)) + #' @description Test that result has two rows for real EWE data. expect_equal(nrow(result), 2) + #' @description Test that fleet_name is commercial for real EWE data. expect_equal(result[["fleet_name"]], rep("commercial", 2)) + #' @description Test that value is numeric for real EWE data. expect_true(all(is.numeric(result[["value"]]))) }) @@ -117,8 +153,7 @@ test_that("generate_selectivity_curve() handles case-insensitive functional form }) test_that("generate_selectivity_curve() preserves functional_groups order", { - #' @description Test that the order of functional groups affects the fitted - #' parameters (reversed order should give different results). + #' @description Test that the order of functional groups affects the fitted parameters (reversed order should give different results). result_forward <- generate_selectivity_curve( ewe_output = test_data,