From 13125acc32d331a47e4836da66caaa0f5e107baa Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Feb 2026 17:53:11 +0000 Subject: [PATCH 01/11] Initial plan From da30f0cd3cf1feb22a76488a2492b959e8fa07e1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Feb 2026 17:59:01 +0000 Subject: [PATCH 02/11] Add modular selectivity curve generation functions Co-authored-by: Bai-Li-NOAA <59936250+Bai-Li-NOAA@users.noreply.github.com> --- NAMESPACE | 1 + R/fit_selectivity_helpers.R | 231 +++++++++++++ R/generate_selectivity_curve.R | 182 ++++++++++ man/generate_selectivity_curve.Rd | 88 +++++ .../test-generate_selectivity_curve.R | 313 ++++++++++++++++++ 5 files changed, 815 insertions(+) create mode 100644 R/fit_selectivity_helpers.R create mode 100644 R/generate_selectivity_curve.R create mode 100644 man/generate_selectivity_curve.Rd create mode 100644 tests/testthat/test-generate_selectivity_curve.R diff --git a/NAMESPACE b/NAMESPACE index c59c727..4fc63ef 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(generate_selectivity_curve) export(get_functional_groups) export(load_csv_environmental_data) export(load_csv_ewe) diff --git a/R/fit_selectivity_helpers.R b/R/fit_selectivity_helpers.R new file mode 100644 index 0000000..c2a5030 --- /dev/null +++ b/R/fit_selectivity_helpers.R @@ -0,0 +1,231 @@ +#' Fit logistic selectivity parameters +#' +#' @description +#' Helper function to fit logistic selectivity curve parameters to a vector of +#' relative F values. The logistic function is defined as: +#' S(a) = 1 / (1 + exp(-slope * (a - inflection_point))) +#' where a is the age/group index. +#' +#' @param f_values A numeric vector of normalized F values (0-1) for each +#' functional group, in the order they should appear on the x-axis. +#' +#' @return A tibble with one row per parameter, containing the standard FIMS +#' parameter format columns. +#' +#' @details +#' The function uses nonlinear least squares (nls) to fit a logistic curve to +#' the normalized F values. The age/group index is simply 1, 2, 3, ... based on +#' the position in the f_values vector. +#' +#' If the optimization fails, the function falls back to a simple linear +#' approximation to estimate the inflection point and slope. +#' +#' @keywords internal +#' @author Kelli F. Johnson +fit_logistic_selectivity <- function(f_values) { + # Create age index (1, 2, 3, ...) + ages <- seq_along(f_values) + + # Define logistic function + logistic <- function(age, inflection_point, slope) { + 1 / (1 + exp(-slope * (age - inflection_point))) + } + + # Initial parameter guesses + # Inflection point: age where selectivity is around 0.5 + mid_idx <- which.min(abs(f_values - 0.5)) + init_inflection <- if (length(mid_idx) > 0) ages[mid_idx] else mean(ages) + + # Slope: positive value, start with 1 + init_slope <- 1 + + # Fit using nonlinear least squares + fit_result <- tryCatch( + { + stats::nls( + f_values ~ logistic(ages, inflection_point, slope), + start = list(inflection_point = init_inflection, slope = init_slope), + control = stats::nls.control(maxiter = 100, warnOnly = TRUE) + ) + }, + error = function(e) { + # Fallback: use simple heuristic + NULL + } + ) + + if (!is.null(fit_result)) { + params <- stats::coef(fit_result) + inflection_point <- params["inflection_point"] + slope <- params["slope"] + } else { + # Fallback method: find inflection as midpoint of rise + # and estimate slope from the steepest part of the curve + inflection_point <- init_inflection + + # Estimate slope from differences + if (length(ages) > 1) { + diffs <- diff(f_values) / diff(ages) + slope <- max(abs(diffs), 0.1) * 4 # Scale up since logistic slope is 1/4 of max derivative + } else { + slope <- 1 + } + } + + # Create output tibble in FIMS format + tibble::tibble( + model_family = "catch_at_age", + module_name = "Selectivity", + module_type = "Logistic", + label = c("inflection_point", "slope"), + distribution_link = NA_character_, + age = NA_real_, + length = NA_real_, + time = NA_real_, + value = c(inflection_point, slope), + estimation_type = "fixed_effects", + distribution_type = NA_character_, + distribution = NA_character_ + ) +} + + +#' Fit double logistic selectivity parameters +#' +#' @description +#' Helper function to fit double logistic (dome-shaped) selectivity curve +#' parameters to a vector of relative F values. The double logistic function +#' combines ascending and descending logistic curves: +#' S(a) = 1 / (1 + exp(-slope_asc * (a - inflection_asc))) * +#' (1 - 1 / (1 + exp(-slope_desc * (a - inflection_desc)))) +#' +#' @param f_values A numeric vector of normalized F values (0-1) for each +#' functional group, in the order they should appear on the x-axis. +#' +#' @return A tibble with one row per parameter, containing the standard FIMS +#' parameter format columns. +#' +#' @details +#' The function attempts to fit a dome-shaped selectivity curve. If the data +#' appears monotonic (no dome shape), it will still fit the parameters but may +#' warn or produce parameters that effectively reduce to a simple logistic. +#' +#' The fitting uses nonlinear least squares with constraints to ensure: +#' - Ascending inflection point comes before descending +#' - Slopes are positive +#' - The curve shape makes biological sense +#' +#' @keywords internal +#' @author Kelli F. Johnson +fit_double_logistic_selectivity <- function(f_values) { + # Create age index (1, 2, 3, ...) + ages <- seq_along(f_values) + + # Define double logistic function + double_logistic <- function(age, infl_asc, slope_asc, infl_desc, slope_desc) { + asc <- 1 / (1 + exp(-slope_asc * (age - infl_asc))) + desc <- 1 - 1 / (1 + exp(-slope_desc * (age - infl_desc))) + asc * desc + } + + # Initial parameter guesses + # Find the peak of the selectivity + peak_idx <- which.max(f_values) + peak_age <- ages[peak_idx] + + # Ascending parameters: before peak + if (peak_idx > 1) { + ascending_f <- f_values[1:peak_idx] + ascending_ages <- ages[1:peak_idx] + mid_asc_idx <- which.min(abs(ascending_f - 0.5)) + init_infl_asc <- if (length(mid_asc_idx) > 0) ascending_ages[mid_asc_idx] else peak_age - 1 + } else { + init_infl_asc <- peak_age - 1 + } + + # Descending parameters: after peak + if (peak_idx < length(ages)) { + descending_f <- f_values[peak_idx:length(ages)] + descending_ages <- ages[peak_idx:length(ages)] + # For descending, look for where it drops below peak/2 + mid_desc_idx <- which.min(abs(descending_f - max(f_values) / 2)) + init_infl_desc <- if (length(mid_desc_idx) > 0) descending_ages[mid_desc_idx] else peak_age + 1 + } else { + init_infl_desc <- peak_age + 1 + } + + # Ensure descending inflection is after ascending + if (init_infl_desc <= init_infl_asc) { + init_infl_desc <- init_infl_asc + 1 + } + + init_slope_asc <- 1 + init_slope_desc <- 1 + + # Fit using nonlinear least squares + fit_result <- tryCatch( + { + stats::nls( + f_values ~ double_logistic(ages, infl_asc, slope_asc, infl_desc, slope_desc), + start = list( + infl_asc = init_infl_asc, + slope_asc = init_slope_asc, + infl_desc = init_infl_desc, + slope_desc = init_slope_desc + ), + control = stats::nls.control(maxiter = 200, warnOnly = TRUE), + algorithm = "port", + lower = c( + infl_asc = min(ages), + slope_asc = 0.01, + infl_desc = init_infl_asc + 0.5, + slope_desc = 0.01 + ), + upper = c( + infl_asc = max(ages), + slope_asc = 10, + infl_desc = max(ages) + 10, + slope_desc = 10 + ) + ) + }, + error = function(e) { + NULL + } + ) + + if (!is.null(fit_result)) { + params <- stats::coef(fit_result) + infl_asc <- params["infl_asc"] + slope_asc <- params["slope_asc"] + infl_desc <- params["infl_desc"] + slope_desc <- params["slope_desc"] + } else { + # Fallback to initial guesses + infl_asc <- init_infl_asc + slope_asc <- init_slope_asc + infl_desc <- init_infl_desc + slope_desc <- init_slope_desc + } + + # Create output tibble in FIMS format + tibble::tibble( + model_family = "catch_at_age", + module_name = "Selectivity", + module_type = "DoubleLogistic", + label = c( + "inflection_point_asc", + "slope_asc", + "inflection_point_desc", + "slope_desc" + ), + distribution_link = NA_character_, + age = NA_real_, + length = NA_real_, + time = NA_real_, + value = c(infl_asc, slope_asc, infl_desc, slope_desc), + estimation_type = "fixed_effects", + distribution_type = NA_character_, + distribution = NA_character_ + ) +} diff --git a/R/generate_selectivity_curve.R b/R/generate_selectivity_curve.R new file mode 100644 index 0000000..15d7b7c --- /dev/null +++ b/R/generate_selectivity_curve.R @@ -0,0 +1,182 @@ +utils::globalVariables(c("functional_group")) + +#' Generate selectivity curve parameters from ecosystem model F values +#' +#' @description +#' This function takes fishing mortality (F) values by functional group from an +#' ecosystem model and fits selectivity curve parameters based on a specified +#' functional form. The function is modular and can be extended to support +#' additional selectivity curve types. +#' +#' @param ewe_output A tibble containing ecosystem model output, typically from +#' [load_model()] with type = "ewe". Must contain columns: functional_group, +#' type, and value. The function will filter for type == "mortality". +#' @param functional_groups A character vector of functional group names in the +#' order desired for the x-axis of the selectivity curve. These should match +#' names in the functional_group column of ewe_output. +#' @param functional_form A character string specifying the selectivity curve +#' type. Currently supports "logistic" and "double_logistic". Additional +#' forms can be added by creating new helper functions. +#' @param fleet_name A character string specifying the fleet name to use in the +#' output tibble. Default is "fleet1". +#' +#' @return A tibble with the following columns: +#' \itemize{ +#' \item model_family: Always "catch_at_age" +#' \item module_name: Always "Selectivity" +#' \item fleet_name: Fleet name as specified in the input +#' \item module_type: The selectivity curve type (e.g., "Logistic", "DoubleLogistic") +#' \item label: Parameter name (e.g., "inflection_point", "slope") +#' \item distribution_link: Always NA +#' \item age: Always NA +#' \item length: Always NA +#' \item time: Always NA +#' \item value: The fitted parameter value +#' \item estimation_type: Always "fixed_effects" +#' \item distribution_type: Always NA +#' \item distribution: Always NA +#' } +#' +#' @details +#' The function extracts fishing mortality values for the specified functional +#' groups and uses a helper function specific to the chosen functional form to +#' fit selectivity parameters. The F values are normalized to be between 0 and 1 +#' before fitting, representing relative selectivity. +#' +#' New functional forms can be added by creating helper functions with the +#' naming pattern `fit_