diff --git a/NAMESPACE b/NAMESPACE index c59c727..4fc63ef 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(generate_selectivity_curve) export(get_functional_groups) export(load_csv_environmental_data) export(load_csv_ewe) diff --git a/R/fit_selectivity_helpers.R b/R/fit_selectivity_helpers.R new file mode 100644 index 0000000..3e26f46 --- /dev/null +++ b/R/fit_selectivity_helpers.R @@ -0,0 +1,233 @@ +#' Fit logistic selectivity parameters +#' +#' @description +#' Fit logistic curve parameters to a vector of normalized values. The logistic +#' function is defined as: S(x) = 1 / (1 + exp(-slope * (x - inflection_point))) +#' where x is the index position (1, 2, 3, ...). +#' +#' @param f_values A numeric vector of normalized values (0-1) for each +#' group, in the order they should appear on the x-axis. These values +#' represent relative selectivity, maturity, or any other quantity that +#' follows a logistic pattern. +#' +#' @return A tibble with one row per parameter, containing the standard FIMS +#' parameter format columns. +#' +#' @details +#' The function uses nonlinear least squares (nls) to fit a logistic curve to +#' the normalized values. The x-axis index is simply 1, 2, 3, ... based on +#' the position in the values vector. +#' +#' If the optimization fails, the function falls back to a simple heuristic +#' method to estimate the inflection point and slope. +#' +#' @keywords internal +fit_logistic_selectivity <- function(f_values) { + # Create index (1, 2, 3, ...) + index_values <- seq_along(f_values) + + # Define logistic function + logistic <- function(index, inflection_point, slope) { + 1 / (1 + exp(-slope * (index - inflection_point))) + } + + # Initial parameter guesses + # Inflection point: index where value is around 0.5 + midpoint_index <- which.min(abs(f_values - 0.5)) + initial_inflection <- if (length(midpoint_index) > 0) index_values[midpoint_index] else mean(index_values) + + # Slope: positive value, start with 1 + initial_slope <- 1 + + # Fit using nonlinear least squares + fit_result <- tryCatch( + { + stats::nls( + f_values ~ logistic(index_values, inflection_point, slope), + start = list(inflection_point = initial_inflection, slope = initial_slope), + control = stats::nls.control(maxiter = 100, warnOnly = TRUE) + ) + }, + error = function(e) { + # Fallback: use simple heuristic + NULL + } + ) + + if (!is.null(fit_result)) { + params <- stats::coef(fit_result) + inflection_point <- params["inflection_point"] + slope <- params["slope"] + } else { + # Fallback method: find inflection as midpoint of rise + # and estimate slope from the steepest part of the curve + inflection_point <- initial_inflection + + # Estimate slope from differences + if (length(index_values) > 1) { + diffs <- diff(f_values) / diff(index_values) + slope <- max(abs(diffs), 0.1) * 4 # Scale up since max derivative of logistic is slope/4 + } else { + slope <- 1 + } + } + + # Create output tibble in FIMS format + tibble::tibble( + model_family = "catch_at_age", + module_name = "Selectivity", + module_type = "Logistic", + label = c("inflection_point", "slope"), + distribution_link = NA_character_, + age = NA_real_, + length = NA_real_, + time = NA_real_, + value = c(inflection_point, slope), + estimation_type = "fixed_effects", + distribution_type = NA_character_, + distribution = NA_character_ + ) +} + + +#' Fit double logistic selectivity parameters +#' +#' @description +#' Fit double logistic (dome-shaped) curve parameters to a vector of normalized +#' values. The double logistic function combines ascending and descending logistic +#' curves: S(x) = 1 / (1 + exp(-slope_asc * (x - inflection_asc))) * +#' (1 - 1 / (1 + exp(-slope_desc * (x - inflection_desc)))) +#' +#' @param f_values A numeric vector of normalized values (0-1) for each +#' group, in the order they should appear on the x-axis. These values +#' represent relative selectivity, maturity, or any other quantity that +#' follows a dome-shaped pattern. +#' +#' @return A tibble with one row per parameter, containing the standard FIMS +#' parameter format columns. +#' +#' @details +#' The function attempts to fit a dome-shaped curve. If the data +#' appears monotonic (no dome shape), it will still fit the parameters but may +#' produce parameters that effectively reduce to a simple logistic. +#' +#' The fitting uses nonlinear least squares with constraints to ensure: +#' \itemize{ +#' \item Ascending inflection point comes before descending +#' \item Slopes are positive +#' \item The curve shape is biologically reasonable +#' } +#' +#' @keywords internal +fit_double_logistic_selectivity <- function(f_values) { + # Create index (1, 2, 3, ...) + index_values <- seq_along(f_values) + + # Define double logistic function + double_logistic <- function(index, inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending) { + ascending_component <- 1 / (1 + exp(-slope_ascending * (index - inflection_point_ascending))) + descending_component <- 1 - 1 / (1 + exp(-slope_descending * (index - inflection_point_descending))) + ascending_component * descending_component + } + + # Initial parameter guesses + # Find the peak of the values + peak_idx <- which.max(f_values) + peak_index <- index_values[peak_idx] + + # Ascending parameters: before peak + if (peak_idx > 1) { + ascending_f <- f_values[1:peak_idx] + ascending_indices <- index_values[1:peak_idx] + midpoint_ascending_index <- which.min(abs(ascending_f - 0.5)) + initial_inflection_point_ascending <- if (length(midpoint_ascending_index) > 0) ascending_indices[midpoint_ascending_index] else peak_index - 1 + } else { + initial_inflection_point_ascending <- peak_index - 1 + } + + # Descending parameters: after peak + if (peak_idx < length(index_values)) { + descending_f <- f_values[peak_idx:length(index_values)] + descending_indices <- index_values[peak_idx:length(index_values)] + # For descending, look for where it drops below peak/2 + midpoint_descending_index <- which.min(abs(descending_f - max(f_values) / 2)) + initial_inflection_point_descending <- if (length(midpoint_descending_index) > 0) descending_indices[midpoint_descending_index] else peak_index + 1 + } else { + initial_inflection_point_descending <- peak_index + 1 + } + + # Ensure descending inflection is after ascending + if (initial_inflection_point_descending <= initial_inflection_point_ascending) { + initial_inflection_point_descending <- initial_inflection_point_ascending + 1 + } + + initial_slope_ascending <- 1 + initial_slope_descending <- 1 + + # Fit using nonlinear least squares + fit_result <- tryCatch( + { + stats::nls( + f_values ~ double_logistic(index_values, inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending), + start = list( + inflection_point_ascending = initial_inflection_point_ascending, + slope_ascending = initial_slope_ascending, + inflection_point_descending = initial_inflection_point_descending, + slope_descending = initial_slope_descending + ), + control = stats::nls.control(maxiter = 200, warnOnly = TRUE), + algorithm = "port", + lower = c( + inflection_point_ascending = min(index_values), + slope_ascending = 0.01, + inflection_point_descending = initial_inflection_point_ascending + 0.5, + slope_descending = 0.01 + ), + upper = c( + inflection_point_ascending = max(index_values), + slope_ascending = 10, + inflection_point_descending = max(index_values) + 10, + slope_descending = 10 + ) + ) + }, + error = function(e) { + NULL + } + ) + + if (!is.null(fit_result)) { + params <- stats::coef(fit_result) + inflection_point_ascending <- params["inflection_point_ascending"] + slope_ascending <- params["slope_ascending"] + inflection_point_descending <- params["inflection_point_descending"] + slope_descending <- params["slope_descending"] + } else { + # Fallback to initial guesses + inflection_point_ascending <- initial_inflection_point_ascending + slope_ascending <- initial_slope_ascending + inflection_point_descending <- initial_inflection_point_descending + slope_descending <- initial_slope_descending + } + + # Create output tibble in FIMS format + tibble::tibble( + model_family = "catch_at_age", + module_name = "Selectivity", + module_type = "DoubleLogistic", + label = c( + "inflection_point_asc", + "slope_asc", + "inflection_point_desc", + "slope_desc" + ), + distribution_link = NA_character_, + age = NA_real_, + length = NA_real_, + time = NA_real_, + value = c(inflection_point_ascending, slope_ascending, inflection_point_descending, slope_descending), + estimation_type = "fixed_effects", + distribution_type = NA_character_, + distribution = NA_character_ + ) +} diff --git a/R/generate_selectivity_curve.R b/R/generate_selectivity_curve.R new file mode 100644 index 0000000..0f10c37 --- /dev/null +++ b/R/generate_selectivity_curve.R @@ -0,0 +1,223 @@ +utils::globalVariables(c("functional_group", "year", "month", "time_step")) + +#' Generate selectivity curve parameters from ecosystem model F values +#' +#' @description +#' This function takes fishing mortality (F) values by functional group from an +#' ecosystem model and fits selectivity curve parameters based on a specified +#' functional form. The function is modular and can be extended to support +#' additional selectivity curve types. +#' +#' @param ewe_output A tibble containing ecosystem model output, typically from +#' [load_model()] with type = "ewe". Must contain columns: functional_group, +#' type, value, year, and month. The function will filter for type == "mortality" +#' and generate selectivity parameters for each year-month combination. +#' @param functional_groups A character vector of functional group names in the +#' order desired for the x-axis of the selectivity curve. These should match +#' names in the functional_group column of ewe_output. +#' @param functional_form A character string specifying the selectivity curve +#' type. Currently supports "logistic" and "double_logistic". Additional +#' forms can be added by creating new helper functions. +#' @param fleet_name A character string specifying the fleet name to use in the +#' output tibble. Default is "fleet1". +#' +#' @return A tibble with one row per parameter per time step. Contains the following columns: +#' \itemize{ +#' \item model_family: Always "catch_at_age" +#' \item module_name: Always "Selectivity" +#' \item fleet_name: Fleet name as specified in the input +#' \item module_type: The selectivity curve type (e.g., "Logistic", "DoubleLogistic") +#' \item label: Parameter name (e.g., "inflection_point", "slope") +#' \item distribution_link: Always NA +#' \item age: Always NA +#' \item length: Always NA +#' \item time: Time step identifier (year-month combination) +#' \item value: The fitted parameter value +#' \item estimation_type: Always "fixed_effects" +#' \item distribution_type: Always NA +#' \item distribution: Always NA +#' } +#' +#' @details +#' The function extracts fishing mortality values for the specified functional +#' groups for each year-month combination and uses a helper function specific to +#' the chosen functional form to fit selectivity parameters. For each time step, +#' the F values are normalized to be between 0 and 1 before fitting, representing +#' relative selectivity. This results in time-varying selectivity parameters. +#' +#' New functional forms can be added by creating helper functions with the +#' naming pattern `fit_
_selectivity()` that take a vector of values and +#' return parameter estimates. To add a new selectivity form: +#' \enumerate{ +#' \item Create a helper function named `fit__selectivity()` that +#' takes a numeric vector of normalized values (0-1) and returns a tibble +#' with columns: model_family, module_name, module_type, label, +#' distribution_link, age, length, time, value, estimation_type, +#' distribution_type, and distribution. +#' \item The function will automatically be available by passing the form name +#' to the `functional_form` argument (e.g., "new_form" will call +#' `fit_new_form_selectivity()`). +#' } +#' +#' @export +#' @examples +#' \dontrun{ +#' # Load example data +#' data("ewe_nwatlantic_base", package = "ecosystemdata") +#' +#' # Generate logistic selectivity for spiny dogfish age groups +#' generate_selectivity_curve( +#' ewe_output = ewe_nwatlantic_base, +#' functional_groups = c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +"), +#' functional_form = "logistic", +#' fleet_name = "fleet1" +#' ) +#' +#' # Generate double logistic selectivity +#' generate_selectivity_curve( +#' ewe_output = ewe_nwatlantic_base, +#' functional_groups = c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +"), +#' functional_form = "double_logistic", +#' fleet_name = "fleet1" +#' ) +#' } +generate_selectivity_curve <- function(ewe_output, + functional_groups, + functional_form, + fleet_name = "fleet1") { + # Input validation + if (!inherits(ewe_output, "data.frame")) { + cli::cli_abort("{.var ewe_output} must be a data frame or tibble.") + } + + required_cols <- c("functional_group", "type", "value", "year", "month") + missing_cols <- setdiff(required_cols, names(ewe_output)) + if (length(missing_cols) > 0) { + cli::cli_abort(c( + "{.var ewe_output} is missing required columns:", + "x" = "Missing: {.var {missing_cols}}" + )) + } + + if (!inherits(functional_groups, "character")) { + cli::cli_abort("{.var functional_groups} must be a character vector.") + } + + if (length(functional_groups) < 2) { + cli::cli_abort(c( + "{.var functional_groups} must contain at least 2 groups.", + "i" = "Selectivity curves require multiple age/size classes." + )) + } + + if (!inherits(functional_form, "character") || length(functional_form) != 1) { + cli::cli_abort("{.var functional_form} must be a single character string.") + } + + # Check that functional groups exist in the data + missing_groups <- setdiff( + functional_groups, + unique(ewe_output[["functional_group"]]) + ) + if (length(missing_groups) > 0) { + cli::cli_abort(c( + "Some functional groups not found in {.var ewe_output}:", + "x" = "Missing: {.var {missing_groups}}" + )) + } + + # Extract mortality data for the specified functional groups + mortality_data <- ewe_output |> + dplyr::filter( + type == "mortality", + functional_group %in% functional_groups + ) + + if (nrow(mortality_data) == 0) { + cli::cli_abort(c( + "No mortality data found for the specified functional groups.", + "i" = "Check that type == 'mortality' exists in {.var ewe_output}" + )) + } + + # Create time step identifier for each year-month combination + mortality_data <- mortality_data |> + dplyr::mutate(time_step = paste(year, month, sep = "-")) + + # Get unique time steps + time_steps <- unique(mortality_data[["time_step"]]) + + # Call the appropriate helper function based on functional_form + helper_function_name <- paste0("fit_", tolower(functional_form), "_selectivity") + + if (!exists(helper_function_name, mode = "function")) { + cli::cli_abort(c( + "Unsupported functional form: {.val {functional_form}}", + "i" = "Currently supported: 'logistic', 'double_logistic'", + "i" = "To add support, create a function named {.fn {helper_function_name}}" + )) + } + + helper_function <- get(helper_function_name, mode = "function") + + # Process each time step separately + results_list <- lapply(time_steps, function(ts) { + # Extract F-at-age vector for this time step + time_step_data <- mortality_data |> + dplyr::filter(time_step == ts) |> + dplyr::select(functional_group, value) + + # Ensure the order matches the input functional_groups + f_values_data <- tibble::tibble(functional_group = functional_groups) |> + dplyr::left_join(time_step_data, by = "functional_group") + + f_values <- f_values_data[["value"]] + + # Check for missing F values + if (any(is.na(f_values))) { + na_groups <- functional_groups[is.na(f_values)] + cli::cli_abort(c( + "Missing mortality data for some functional groups in time step {.val {ts}}:", + "x" = "Groups with no data: {.var {na_groups}}" + )) + } + + # Normalize F values to 0-1 range (representing relative selectivity) + if (max(f_values) == 0) { + cli::cli_warn( + "All F values are zero for time step {.val {ts}}. Skipping this time step." + ) + return(NULL) + } + normalized_f <- f_values / max(f_values) + + # Call the helper function + result <- helper_function(normalized_f) + + # Add time step and fleet name to the result + result[["time"]] <- ts + result[["fleet_name"]] <- fleet_name + + return(result) + }) + + # Remove NULL results (time steps with all zero F values) + results_list <- Filter(Negate(is.null), results_list) + + if (length(results_list) == 0) { + cli::cli_abort("All time steps have zero F values. Cannot fit selectivity curve.") + } + + # Combine all results + result <- dplyr::bind_rows(results_list) + + # Reorder columns to match expected output format + result <- result |> + dplyr::select( + model_family, module_name, fleet_name, module_type, + label, distribution_link, age, length, time, value, + estimation_type, distribution_type, distribution + ) + + return(result) +} diff --git a/man/generate_selectivity_curve.Rd b/man/generate_selectivity_curve.Rd new file mode 100644 index 0000000..1e23715 --- /dev/null +++ b/man/generate_selectivity_curve.Rd @@ -0,0 +1,97 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generate_selectivity_curve.R +\name{generate_selectivity_curve} +\alias{generate_selectivity_curve} +\title{Generate selectivity curve parameters from ecosystem model F values} +\usage{ +generate_selectivity_curve( + ewe_output, + functional_groups, + functional_form, + fleet_name = "fleet1" +) +} +\arguments{ +\item{ewe_output}{A tibble containing ecosystem model output, typically from +\code{\link[=load_model]{load_model()}} with type = "ewe". Must contain columns: functional_group, +type, value, year, and month. The function will filter for type == "mortality" +and generate selectivity parameters for each year-month combination.} + +\item{functional_groups}{A character vector of functional group names in the +order desired for the x-axis of the selectivity curve. These should match +names in the functional_group column of ewe_output.} + +\item{functional_form}{A character string specifying the selectivity curve +type. Currently supports "logistic" and "double_logistic". Additional +forms can be added by creating new helper functions.} + +\item{fleet_name}{A character string specifying the fleet name to use in the +output tibble. Default is "fleet1".} +} +\value{ +A tibble with one row per parameter per time step. Contains the following columns: +\itemize{ +\item model_family: Always "catch_at_age" +\item module_name: Always "Selectivity" +\item fleet_name: Fleet name as specified in the input +\item module_type: The selectivity curve type (e.g., "Logistic", "DoubleLogistic") +\item label: Parameter name (e.g., "inflection_point", "slope") +\item distribution_link: Always NA +\item age: Always NA +\item length: Always NA +\item time: Time step identifier (year-month combination) +\item value: The fitted parameter value +\item estimation_type: Always "fixed_effects" +\item distribution_type: Always NA +\item distribution: Always NA +} +} +\description{ +This function takes fishing mortality (F) values by functional group from an +ecosystem model and fits selectivity curve parameters based on a specified +functional form. The function is modular and can be extended to support +additional selectivity curve types. +} +\details{ +The function extracts fishing mortality values for the specified functional +groups for each year-month combination and uses a helper function specific to +the chosen functional form to fit selectivity parameters. For each time step, +the F values are normalized to be between 0 and 1 before fitting, representing +relative selectivity. This results in time-varying selectivity parameters. + +New functional forms can be added by creating helper functions with the +naming pattern \code{fit__selectivity()} that take a vector of values and +return parameter estimates. To add a new selectivity form: +\enumerate{ +\item Create a helper function named \code{fit__selectivity()} that +takes a numeric vector of normalized values (0-1) and returns a tibble +with columns: model_family, module_name, module_type, label, +distribution_link, age, length, time, value, estimation_type, +distribution_type, and distribution. +\item The function will automatically be available by passing the form name +to the \code{functional_form} argument (e.g., "new_form" will call +\code{fit_new_form_selectivity()}). +} +} +\examples{ +\dontrun{ +# Load example data +data("ewe_nwatlantic_base", package = "ecosystemdata") + +# Generate logistic selectivity for spiny dogfish age groups +generate_selectivity_curve( + ewe_output = ewe_nwatlantic_base, + functional_groups = c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +"), + functional_form = "logistic", + fleet_name = "fleet1" +) + +# Generate double logistic selectivity +generate_selectivity_curve( + ewe_output = ewe_nwatlantic_base, + functional_groups = c("spiny dogfish 0", "spiny dogfish 1-2", "spiny dogfish +"), + functional_form = "double_logistic", + fleet_name = "fleet1" +) +} +} diff --git a/tests/testthat/test-fit_selectivity_helpers.R b/tests/testthat/test-fit_selectivity_helpers.R new file mode 100644 index 0000000..1e266bf --- /dev/null +++ b/tests/testthat/test-fit_selectivity_helpers.R @@ -0,0 +1,179 @@ +# Instructions ---- +#' This file follows the format generated by ecosystemdata:::use_testthat_template(). +#' Necessary tests include input and output (IO) correctness [IO +#' correctness], edge-case handling [Edge handling], and built-in errors and +#' warnings [Error handling]. See `?ecosystemdata:::use_testthat_template` for more +#' information. Every test should have a @description tag that takes up just +#' one line, which will be used in the bookdown report of {testthat} results. + +# Setup ---- +# Load or prepare any necessary data for testing + +# fit_logistic_selectivity ---- +## IO correctness ---- +test_that("fit_logistic_selectivity() returns correct structure", { + #' @description Test that fit_logistic_selectivity() returns a tibble with + #' the correct columns and parameter labels. + f_values <- c(0.1, 0.5, 0.9) + result <- ecosystemdata:::fit_logistic_selectivity(f_values) + + expect_s3_class(result, "tbl_df") + expect_equal(nrow(result), 2) + expect_equal(result[["label"]], c("inflection_point", "slope")) + expect_equal(result[["module_type"]], rep("Logistic", 2)) + expect_true(all(is.numeric(result[["value"]]))) +}) + +test_that("fit_logistic_selectivity() handles monotonic increasing pattern", { + #' @description Test that the function fits an increasing selectivity pattern. + f_values <- seq(0, 1, length.out = 5) + result <- ecosystemdata:::fit_logistic_selectivity(f_values) + + expect_true(result[["value"]][result[["label"]] == "slope"] > 0) + expect_true(result[["value"]][result[["label"]] == "inflection_point"] >= 1) + expect_true(result[["value"]][result[["label"]] == "inflection_point"] <= 5) +}) + +## Validation tests ---- +test_that("fit_logistic_selectivity() recovers known parameters", { + #' @description Test that the function can recover known logistic parameters from generated data. + + # True parameters + true_inflection <- 3 + true_slope <- 1.5 + + # Generate selectivity values from known parameters + index_values <- 1:6 + true_selectivity <- 1 / (1 + exp(-true_slope * (index_values - true_inflection))) + + # Fit the model + result <- ecosystemdata:::fit_logistic_selectivity(true_selectivity) + + # Extract estimated parameters + estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] + estimated_slope <- result[["value"]][result[["label"]] == "slope"] + + # Check that estimates are close to true values (within 10% tolerance) + expect_equal(estimated_inflection, true_inflection, tolerance = 0.1 * true_inflection) + expect_equal(estimated_slope, true_slope, tolerance = 0.1 * true_slope) +}) + +test_that("fit_logistic_selectivity() handles different slopes", { + #' @description Test parameter recovery with a different slope value. + + # True parameters with steeper slope + true_inflection <- 4 + true_slope <- 3.0 + + # Generate selectivity values + index_values <- 1:8 + true_selectivity <- 1 / (1 + exp(-true_slope * (index_values - true_inflection))) + + # Fit the model + result <- ecosystemdata:::fit_logistic_selectivity(true_selectivity) + + # Extract estimated parameters + estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] + estimated_slope <- result[["value"]][result[["label"]] == "slope"] + + # Check estimates + expect_equal(estimated_inflection, true_inflection, tolerance = 0.15 * true_inflection) + expect_equal(estimated_slope, true_slope, tolerance = 0.15 * true_slope) +}) + +# fit_double_logistic_selectivity ---- +## IO correctness ---- +test_that("fit_double_logistic_selectivity() returns correct structure", { + #' @description Test that fit_double_logistic_selectivity() returns a tibble + #' with the correct columns and parameter labels. + f_values <- c(0.1, 0.5, 0.9, 0.6, 0.3) + result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) + + expect_s3_class(result, "tbl_df") + expect_equal(nrow(result), 4) + expect_equal( + result[["label"]], + c("inflection_point_asc", "slope_asc", "inflection_point_desc", "slope_desc") + ) + expect_equal(result[["module_type"]], rep("DoubleLogistic", 4)) + expect_true(all(is.numeric(result[["value"]]))) +}) + +test_that("fit_double_logistic_selectivity() handles dome-shaped pattern", { + #' @description Test that the function fits a dome-shaped selectivity pattern. + f_values <- c(0.1, 0.5, 1.0, 0.7, 0.3) + result <- ecosystemdata:::fit_double_logistic_selectivity(f_values) + + inflection_ascending <- result[["value"]][result[["label"]] == "inflection_point_asc"] + inflection_descending <- result[["value"]][result[["label"]] == "inflection_point_desc"] + + # Ascending inflection should come before descending + expect_true(inflection_ascending < inflection_descending) + + # Slopes should be positive + expect_true(result[["value"]][result[["label"]] == "slope_asc"] > 0) + expect_true(result[["value"]][result[["label"]] == "slope_desc"] > 0) +}) + +## Validation tests ---- +test_that("fit_double_logistic_selectivity() recovers known parameters", { + #' @description Test that the function can recover known double logistic parameters from generated data. + + # True parameters + true_inflection_asc <- 2.5 + true_slope_asc <- 2.0 + true_inflection_desc <- 5.5 + true_slope_desc <- 1.5 + + # Generate selectivity values from known parameters + index_values <- 1:8 + asc_component <- 1 / (1 + exp(-true_slope_asc * (index_values - true_inflection_asc))) + desc_component <- 1 - 1 / (1 + exp(-true_slope_desc * (index_values - true_inflection_desc))) + true_selectivity <- asc_component * desc_component + + # Fit the model + result <- ecosystemdata:::fit_double_logistic_selectivity(true_selectivity) + + # Extract estimated parameters + est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] + est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] + est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] + est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] + + # Check that estimates are close to true values (within 20% tolerance for more complex model) + expect_equal(est_infl_asc, true_inflection_asc, tolerance = 0.2 * true_inflection_asc) + expect_equal(est_slope_asc, true_slope_asc, tolerance = 0.2 * true_slope_asc) + expect_equal(est_infl_desc, true_inflection_desc, tolerance = 0.2 * true_inflection_desc) + expect_equal(est_slope_desc, true_slope_desc, tolerance = 0.2 * true_slope_desc) +}) + +test_that("fit_double_logistic_selectivity() handles symmetric dome", { + #' @description Test parameter recovery with symmetric dome shape. + + # True parameters with symmetric slopes + true_inflection_asc <- 3.0 + true_slope_asc <- 1.5 + true_inflection_desc <- 7.0 + true_slope_desc <- 1.5 + + # Generate selectivity values + index_values <- 1:10 + asc_component <- 1 / (1 + exp(-true_slope_asc * (index_values - true_inflection_asc))) + desc_component <- 1 - 1 / (1 + exp(-true_slope_desc * (index_values - true_inflection_desc))) + true_selectivity <- asc_component * desc_component + + # Fit the model + result <- ecosystemdata:::fit_double_logistic_selectivity(true_selectivity) + + # Extract estimated parameters + est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] + est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] + est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] + est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] + + # Check estimates + expect_equal(est_infl_asc, true_inflection_asc, tolerance = 0.25 * true_inflection_asc) + expect_equal(est_slope_asc, true_slope_asc, tolerance = 0.25 * true_slope_asc) + expect_equal(est_infl_desc, true_inflection_desc, tolerance = 0.25 * true_inflection_desc) + expect_equal(est_slope_desc, true_slope_desc, tolerance = 0.25 * true_slope_desc) +}) diff --git a/tests/testthat/test-generate_selectivity_curve.R b/tests/testthat/test-generate_selectivity_curve.R new file mode 100644 index 0000000..23a77ba --- /dev/null +++ b/tests/testthat/test-generate_selectivity_curve.R @@ -0,0 +1,353 @@ +# Instructions ---- +#' This file follows the format generated by ecosystemdata:::use_testthat_template(). +#' Necessary tests include input and output (IO) correctness [IO +#' correctness], edge-case handling [Edge handling], and built-in errors and +#' warnings [Error handling]. See `?ecosystemdata:::use_testthat_template` for more +#' information. Every test should have a @description tag that takes up just +#' one line, which will be used in the bookdown report of {testthat} results. + +# Setup ---- +# Load or prepare any necessary data for testing +data("ewe_nwatlantic_base", package = "ecosystemdata") + +# Create a minimal test dataset +test_data <- tibble::tibble( + functional_group = rep(c("species1 0", "species1 1", "species1 2"), each = 12), + type = "mortality", + value = c( + rep(0.1, 12), # Low F for age 0 + rep(0.5, 12), # Medium F for age 1 + rep(0.8, 12) # High F for age 2 + ), + year = rep(2000, 36), + month = rep(1:12, 3) +) + +# generate_selectivity_curve ---- +## IO correctness ---- +test_that("generate_selectivity_curve() works with logistic form", { + #' @description Test that generate_selectivity_curve() returns a tibble with + #' correct structure for logistic selectivity. + result <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "logistic", + fleet_name = "fleet1" + ) + + expect_s3_class(result, "tbl_df") + # 2 parameters (inflection_point and slope) × 12 time steps = 24 rows + expect_equal(nrow(result), 24) + expect_equal(unique(result[["model_family"]]), "catch_at_age") + expect_equal(unique(result[["module_name"]]), "Selectivity") + expect_equal(unique(result[["fleet_name"]]), "fleet1") + expect_equal(unique(result[["module_type"]]), "Logistic") + expect_equal(unique(result[["label"]]), c("inflection_point", "slope")) + expect_equal(unique(result[["estimation_type"]]), "fixed_effects") + expect_true(all(is.na(result[["distribution_link"]]))) + expect_true(all(is.numeric(result[["value"]]))) + expect_true(all(!is.na(result[["value"]]))) + # Check that time column contains the year-month combinations + expect_true(all(!is.na(result[["time"]]))) + expect_equal(length(unique(result[["time"]])), 12) +}) + +test_that("generate_selectivity_curve() works with double_logistic form", { + #' @description Test that generate_selectivity_curve() returns a tibble with + #' correct structure for double logistic selectivity. + result <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "double_logistic", + fleet_name = "fleet1" + ) + + expect_s3_class(result, "tbl_df") + # 4 parameters for double logistic × 12 time steps = 48 rows + expect_equal(nrow(result), 48) + expect_equal(unique(result[["model_family"]]), "catch_at_age") + expect_equal(unique(result[["module_name"]]), "Selectivity") + expect_equal(unique(result[["fleet_name"]]), "fleet1") + expect_equal(unique(result[["module_type"]]), "DoubleLogistic") + expect_equal( + unique(result[["label"]]), + c("inflection_point_asc", "slope_asc", "inflection_point_desc", "slope_desc") + ) + expect_equal(unique(result[["estimation_type"]]), "fixed_effects") + expect_true(all(is.numeric(result[["value"]]))) + expect_true(all(!is.na(result[["value"]]))) + # Check that time column contains the year-month combinations + expect_true(all(!is.na(result[["time"]]))) + expect_equal(length(unique(result[["time"]])), 12) +}) + +test_that("generate_selectivity_curve() works with real EWE data", { + #' @description Test that generate_selectivity_curve() works with actual + #' ewe_nwatlantic_base data for menhaden. + + # Get menhaden functional groups + menhaden_groups <- unique(ewe_nwatlantic_base[["functional_group"]]) + menhaden_groups <- menhaden_groups[grepl("menhaden", menhaden_groups, ignore.case = TRUE)] + + skip_if(length(menhaden_groups) < 2, "Not enough menhaden groups in data") + + result <- generate_selectivity_curve( + ewe_output = ewe_nwatlantic_base, + functional_groups = menhaden_groups, + functional_form = "logistic", + fleet_name = "commercial" + ) + + expect_s3_class(result, "tbl_df") + # Should have results for multiple time steps (2 parameters per time step) + expect_true(nrow(result) > 2) + expect_true(nrow(result) %% 2 == 0) # Should be even number (2 params per time step) + expect_equal(unique(result[["fleet_name"]]), "commercial") + expect_true(all(is.numeric(result[["value"]]))) + expect_true(all(!is.na(result[["time"]]))) +}) + +## Edge handling ---- +test_that("generate_selectivity_curve() handles case-insensitive functional forms", { + #' @description Test that functional_form parameter accepts different cases. + result1 <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "LOGISTIC", + fleet_name = "fleet1" + ) + + result2 <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "Logistic", + fleet_name = "fleet1" + ) + + expect_equal(result1[["module_type"]], result2[["module_type"]]) +}) + +test_that("generate_selectivity_curve() preserves functional_groups order", { + #' @description Test that the order of functional groups affects the fitted + #' parameters (reversed order should give different results). + + result_forward <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "logistic", + fleet_name = "fleet1" + ) + + result_reversed <- generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 2", "species1 1", "species1 0"), + functional_form = "logistic", + fleet_name = "fleet1" + ) + + # Check inflection points for the first time step + forward_infl <- result_forward[["value"]][result_forward[["label"]] == "inflection_point"][1] + reversed_infl <- result_reversed[["value"]][result_reversed[["label"]] == "inflection_point"][1] + + # The inflection points should be different because the order is reversed + expect_false(isTRUE(all.equal(forward_infl, reversed_infl))) +}) + +## Error handling ---- +test_that("generate_selectivity_curve() errors on invalid ewe_output", { + #' @description Test that the function errors when ewe_output is not a data frame. + expect_error( + generate_selectivity_curve( + ewe_output = "not a data frame", + functional_groups = c("species1 0", "species1 1"), + functional_form = "logistic" + ), + "must be a data frame" + ) +}) + +test_that("generate_selectivity_curve() errors on missing columns", { + #' @description Test that the function errors when required columns are missing. + bad_data <- test_data[, c("functional_group", "value")] + + expect_error( + generate_selectivity_curve( + ewe_output = bad_data, + functional_groups = c("species1 0", "species1 1"), + functional_form = "logistic" + ), + "missing required columns" + ) +}) + +test_that("generate_selectivity_curve() errors on invalid functional_groups", { + #' @description Test that the function errors when functional_groups is not + #' a character vector. + expect_error( + generate_selectivity_curve( + ewe_output = test_data, + functional_groups = 123, + functional_form = "logistic" + ), + "must be a character vector" + ) +}) + +test_that("generate_selectivity_curve() errors with too few functional groups", { + #' @description Test that the function errors when fewer than 2 functional + #' groups are provided. + expect_error( + generate_selectivity_curve( + ewe_output = test_data, + functional_groups = "species1 0", + functional_form = "logistic" + ), + "at least 2 groups" + ) +}) + +test_that("generate_selectivity_curve() errors on unsupported functional form", { + #' @description Test that the function errors when an unsupported functional + #' form is specified. + expect_error( + generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("species1 0", "species1 1"), + functional_form = "invalid_form" + ), + "Unsupported functional form" + ) +}) + +test_that("generate_selectivity_curve() errors on missing functional groups", { + #' @description Test that the function errors when specified functional groups + #' are not in the data. + expect_error( + generate_selectivity_curve( + ewe_output = test_data, + functional_groups = c("nonexistent 0", "nonexistent 1"), + functional_form = "logistic" + ), + "not found in" + ) +}) + +test_that("generate_selectivity_curve() errors on missing mortality data", { + #' @description Test that the function errors when no mortality data exists + #' for the specified groups. + biomass_data <- test_data + biomass_data[["type"]] <- "biomass" + + expect_error( + generate_selectivity_curve( + ewe_output = biomass_data, + functional_groups = c("species1 0", "species1 1"), + functional_form = "logistic" + ), + "No mortality data found" + ) +}) + +test_that("generate_selectivity_curve() errors on all-zero F values", { + #' @description Test that the function errors when all F values are zero across all time steps. + zero_data <- test_data + zero_data[["value"]] <- 0 + + expect_error( + generate_selectivity_curve( + ewe_output = zero_data, + functional_groups = c("species1 0", "species1 1", "species1 2"), + functional_form = "logistic" + ), + "All time steps have zero F values" + ) +}) + +## Validation tests ---- +test_that("generate_selectivity_curve() recovers parameters from known selectivity", { + #' @description Test that the full workflow can recover known parameters. + + # Create true selectivity parameters + true_inflection <- 2.0 + true_slope <- 1.5 + + # Generate F values from true selectivity + index_values <- 1:5 + true_selectivity <- 1 / (1 + exp(-true_slope * (index_values - true_inflection))) + + # Create test data with these F values (single time step for simplicity) + validation_data <- tibble::tibble( + functional_group = paste("group", index_values), + type = "mortality", + value = true_selectivity, + year = 2000, + month = 1 + ) + + # Fit using generate_selectivity_curve + result <- generate_selectivity_curve( + ewe_output = validation_data, + functional_groups = paste("group", index_values), + functional_form = "logistic", + fleet_name = "test" + ) + + # Should have 2 rows (one time step, 2 parameters) + expect_equal(nrow(result), 2) + + # Extract estimated parameters + estimated_inflection <- result[["value"]][result[["label"]] == "inflection_point"] + estimated_slope <- result[["value"]][result[["label"]] == "slope"] + + # Check that estimates are close to true values + expect_equal(estimated_inflection, true_inflection, tolerance = 0.15 * true_inflection) + expect_equal(estimated_slope, true_slope, tolerance = 0.15 * true_slope) +}) + +test_that("generate_selectivity_curve() recovers double logistic parameters", { + #' @description Test recovery of double logistic parameters through full workflow. + + # Create true parameters + true_infl_asc <- 2.0 + true_slope_asc <- 2.0 + true_infl_desc <- 5.0 + true_slope_desc <- 1.5 + + # Generate F values from true double logistic + index_values <- 1:7 + asc_component <- 1 / (1 + exp(-true_slope_asc * (index_values - true_infl_asc))) + desc_component <- 1 - 1 / (1 + exp(-true_slope_desc * (index_values - true_infl_desc))) + true_selectivity <- asc_component * desc_component + + # Create test data (single time step for simplicity) + validation_data <- tibble::tibble( + functional_group = paste("group", index_values), + type = "mortality", + value = true_selectivity, + year = 2000, + month = 1 + ) + + # Fit using generate_selectivity_curve + result <- generate_selectivity_curve( + ewe_output = validation_data, + functional_groups = paste("group", index_values), + functional_form = "double_logistic", + fleet_name = "test" + ) + + # Should have 4 rows (one time step, 4 parameters) + expect_equal(nrow(result), 4) + + # Extract estimated parameters + est_infl_asc <- result[["value"]][result[["label"]] == "inflection_point_asc"] + est_slope_asc <- result[["value"]][result[["label"]] == "slope_asc"] + est_infl_desc <- result[["value"]][result[["label"]] == "inflection_point_desc"] + est_slope_desc <- result[["value"]][result[["label"]] == "slope_desc"] + + # Check estimates (more lenient tolerance for complex model) + expect_equal(est_infl_asc, true_infl_asc, tolerance = 0.25 * true_infl_asc) + expect_equal(est_slope_asc, true_slope_asc, tolerance = 0.25 * true_slope_asc) + expect_equal(est_infl_desc, true_infl_desc, tolerance = 0.25 * true_infl_desc) + expect_equal(est_slope_desc, true_slope_desc, tolerance = 0.25 * true_slope_desc) +})