diff --git a/README.md b/README.md index fe19099179..7b993d1e4b 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,10 @@ -[![Flake8](https://img.shields.io/badge/Flake8-passed-brightgreen)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) -[![Pytest](https://img.shields.io/badge/Pytest-passed-brightgreen)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) -[![Coverage](https://img.shields.io/badge/Coverage-96%25-brightgreen)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) +[![Flake8](https://img.shields.io/badge/Flake8-failed-red)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) +[![Pytest](https://img.shields.io/badge/Pytest-failed-red)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) +[![Coverage](https://img.shields.io/badge/Coverage-%25-red)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) [![Mypy](https://img.shields.io/badge/Mypy-3453%20errors-red)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) + # Vision To support research and sustainable decision-making in ruminant animal production through a state-of-the-art, open-source modeling environment that is continuously adapting as technology and scientific knowledge advance. diff --git a/RUFAS/routines/animal/animal_manager.py b/RUFAS/routines/animal/animal_manager.py index 05e8f775fe..f579b1a7eb 100644 --- a/RUFAS/routines/animal/animal_manager.py +++ b/RUFAS/routines/animal/animal_manager.py @@ -549,7 +549,7 @@ def _calc_max_animal_spaces_per_pen(cls, num_stalls: int, max_stocking_density: if num_stalls < 0 or max_stocking_density < 0: raise ValueError("The number of stalls and maximum stocking density must be greater than or equal to 0.") - return int(num_stalls * max_stocking_density) + return max(int(num_stalls * max_stocking_density), 1) @classmethod def _calc_animal_space_shortage(cls, num_animals: int, pens: List[Pen]) -> int: diff --git a/RUFAS/routines/animal/life_cycle/cow.py b/RUFAS/routines/animal/life_cycle/cow.py index 79e6b02dd4..ef4fea9346 100644 --- a/RUFAS/routines/animal/life_cycle/cow.py +++ b/RUFAS/routines/animal/life_cycle/cow.py @@ -390,7 +390,7 @@ def milking_update(self, sim_day: int, calving_interval: int | float) -> None: Journal of Dairy Science, Volume 105, Issue 9, 2022, Pages 7525-7538, ISSN 0022-0302. """ - if self.days_in_preg == AnimalBase.config["days_in_preg_when_dry"]: + if self.days_in_preg >= AnimalBase.config["days_in_preg_when_dry"]: self.milking = False self.events.add_event(self.days_born, sim_day, const.DRY) self.days_in_milk = 0 @@ -716,9 +716,13 @@ def get_bw_change(self, CI: int | float) -> float: # noqa if self.days_in_preg == AnimalBase.config["days_in_preg_when_dry"] - 1: self.tissue_changed = 40 * self.days_in_milk / 70 * math.exp(1 - self.days_in_milk / 70) else: # dry period - bodyweight_tissue = self.tissue_changed / ( - self.gestation_length - AnimalBase.config["days_in_preg_when_dry"] - ) + # TODO + try: + bodyweight_tissue = self.tissue_changed / ( + self.gestation_length - AnimalBase.config["days_in_preg_when_dry"] + ) + except Exception as e: + bodyweight_tissue = 0.0 return target_adg_cow + conceptus_growth + bodyweight_tissue @@ -748,7 +752,7 @@ def update(self, sim_day: int, calving_interval: int | float) -> bool: # noqa new_born = False self.days_born += 1 - if self.days_in_preg > 0 and self.days_in_preg == self.gestation_length: + if self.days_in_preg > 0 and self.days_in_preg >= self.gestation_length: self._repro_state_manager.reset() self.calves += 1 self.milking = True diff --git a/RUFAS/routines/animal/life_cycle/heiferIII.py b/RUFAS/routines/animal/life_cycle/heiferIII.py index 090daf34f8..99d3fe1c06 100644 --- a/RUFAS/routines/animal/life_cycle/heiferIII.py +++ b/RUFAS/routines/animal/life_cycle/heiferIII.py @@ -183,7 +183,7 @@ def update(self, sim_day: int) -> bool: self.body_weight = self.mature_body_weight self.events.add_event(self.days_born, sim_day, const.MATURE_BODY_WEIGHT_REGULAR) - if self.days_in_preg == self.gestation_length: + if self.days_in_preg >= self.gestation_length: self.days_born -= 1 # will be incremented again in next stage cow_stage = True return cow_stage diff --git a/RUFAS/routines/animal/life_cycle/life_cycle.py b/RUFAS/routines/animal/life_cycle/life_cycle.py index 8bbc58a618..da820ca0e8 100644 --- a/RUFAS/routines/animal/life_cycle/life_cycle.py +++ b/RUFAS/routines/animal/life_cycle/life_cycle.py @@ -337,7 +337,7 @@ def daily_update( self._calculate_cull_reason_stats_percent() self._calculate_percent_cow_per_parity() - self.daily_milk_production = sum(cow.estimated_daily_milk_produced for cow in cows) + self.daily_milk_production = max(1.0, sum(cow.estimated_daily_milk_produced for cow in cows)) self.dry_cows_daily_milk_production = sum(cow.estimated_daily_milk_produced for cow in cows if not cow.milking) self.herd_milk_fat_kg = sum(cow.milk_fat_kg for cow in cows if cow.milking) self.herd_milk_fat_percent = (self.herd_milk_fat_kg / self.daily_milk_production) * 100 diff --git a/RUFAS/util.py b/RUFAS/util.py index 3a942ba7f3..9c02ce1266 100644 --- a/RUFAS/util.py +++ b/RUFAS/util.py @@ -72,6 +72,8 @@ def flatten_keys_to_nested_structure(input_dict: Dict[str, Any]) -> Dict[str, An current.append([] if next_key_is_digit else {}) current = current[key] else: + if key.startswith("numberaskey"): + key = key.replace("numberaskey", "") if isinstance(current, list): current = current[-1] if key not in current: diff --git a/helpful_scripts/get_runtime.py b/helpful_scripts/get_runtime.py new file mode 100644 index 0000000000..3b0a877040 --- /dev/null +++ b/helpful_scripts/get_runtime.py @@ -0,0 +1,22 @@ +""" +This script simply checks the runtimes for all simulations that have been run. +Useful for benchmarking when running large batches of simulations, especially sensitivity analyses. +""" + +import json +import os + +import numpy as np + +logs_dir = "output/logs/" + +logs_files = [log_file for log_file in os.listdir(logs_dir) if "logs" in log_file and "benchmark" in log_file] + +run_times = [] + +for log_file in logs_files: + with open(logs_dir + log_file) as log_fp: + log_dict = json.load(log_fp) + run_times.append(float(log_dict["SimulationEngine.simulate.total_simulation_time"]["values"][0].split(": ")[1])) + +print(np.mean(run_times)) diff --git a/input/data/animal/default_animal_100x.json b/input/data/animal/default_animal_100x.json new file mode 100644 index 0000000000..e3f9f58e82 --- /dev/null +++ b/input/data/animal/default_animal_100x.json @@ -0,0 +1,202 @@ +{ + "herd_information": { + "calf_num": 800, + "heiferI_num": 4400, + "heiferII_num": 3800, + "heiferIII_num_springers": 500, + "cow_num": 10000, + "replace_num": 50000, + "herd_num": 10000, + "breed": "HO", + "parity_fractions": { + "1": 0.386, + "2": 0.281, + "3": 0.333 + }, + "annual_milk_yield": null + }, + "herd_initialization": { + "initial_animal_num": 10000, + "simulation_days": 5000 + }, + "animal_config": { + "management_decisions": { + "breeding_start_day_h": 380, + "heifer_repro_method": "ED", + "cow_repro_method": "ED-TAI", + "semen_type": "conventional", + "days_in_preg_when_dry": 218, + "heifer_repro_cull_time": 500, + "do_not_breed_time": 185, + "cull_milk_production": 30, + "cow_times_milked_per_day": 3, + "milk_fat_percent": 4, + "milk_protein_percent": 3.2 + }, + "farm_level": { + "calf": { + "male_calf_rate_sexed_semen": 0.1, + "male_calf_rate_conventional_semen": 0.53, + "keep_female_calf_rate": 1, + "wean_day": 60, + "wean_length": 7, + "milk_type": "whole" + }, + "repro": { + "voluntary_waiting_period": 50, + "conception_rate_decrease": 0.026, + "decrease_conception_rate_in_rebreeding": false, + "decrease_conception_rate_by_parity": false, + "avg_gestation_len": 276, + "std_gestation_len": 6, + "prefresh_day": 21, + "calving_interval": 400, + "heifers": { + "estrus_detection_rate": 0.9, + "estrus_conception_rate": 0.6, + "repro_sub_protocol": "5dCG2P", + "repro_sub_properties": { + "conception_rate": 0.6, + "estrus_detection_rate": 0.9 + } + }, + "cows": { + "estrus_detection_rate": 0.6, + "ED_conception_rate": 0.5, + "presynch_program": "Double OvSynch", + "presynch_program_start_day": 50, + "ovsynch_program": "OvSynch 56", + "ovsynch_program_start_day": 64, + "ovsynch_program_conception_rate": 0.6, + "resynch_program": "TAIafterPD" + } + }, + "bodyweight": { + "birth_weight_avg_ho": 43.9, + "birth_weight_std_ho": 1, + "birth_weight_avg_je": 27.2, + "birth_weight_std_je": 1, + "target_heifer_preg_day": 399, + "mature_body_weight_avg": 740.1, + "mature_body_weight_std": 73.5 + } + }, + "from_literature": { + "repro": { + "preg_check_day_1": 32, + "preg_loss_rate_1": 0.02, + "preg_check_day_2": 60, + "preg_loss_rate_2": 0.096, + "preg_check_day_3": 200, + "preg_loss_rate_3": 0.017, + "avg_estrus_cycle_return": 23, + "std_estrus_cycle_return": 6, + "avg_estrus_cycle_heifer": 21, + "std_estrus_cycle_heifer": 2.5, + "avg_estrus_cycle_cow": 21, + "std_estrus_cycle_cow": 4, + "avg_estrus_cycle_after_pgf": 5, + "std_estrus_cycle_after_pgf": 2 + }, + "culling": { + "cull_day_count": [0, 5, 15, 45, 90, 135, 180, 225, 270, 330, 380, 430, 480, 530], + "feet_leg_cull": { + "probability": 0.1633, + "cull_day_prob": [0, 0.03, 0.08, 0.16, 0.25, 0.36, 0.48, 0.59, 0.69, 0.78, 0.85, 0.90, 0.95, 1] + }, + "injury_cull": { + "probability": 0.2883, + "cull_day_prob": [0, 0.08, 0.18, 0.28, 0.38, 0.47, 0.56, 0.64, 0.71, 0.78, 0.85, 0.90, 0.95, 1] + }, + "mastitis_cull": { + "probability": 0.2439, + "cull_day_prob": [0, 0.06, 0.12, 0.19, 0.30, 0.43, 0.56, 0.68, 0.78, 0.85, 0.90, 0.94, 0.97, 1] + }, + "disease_cull": { + "probability": 0.1391, + "cull_day_prob": [0, 0.04, 0.12, 0.24, 0.34, 0.42, 0.50, 0.57, 0.64, 0.72, 0.81, 0.89, 0.95, 1] + }, + "udder_cull": { + "probability": 0.0645, + "cull_day_prob": [0, 0.12, 0.24, 0.33, 0.41, 0.48, 0.55, 0.62, 0.68, 0.76, 0.82, 0.89, 0.95, 1] + }, + "unknown_cull": { + "probability": 0.1009, + "cull_day_prob": [0, 0.05, 0.11, 0.18, 0.27, 0.37, 0.45, 0.54, 0.62, 0.70, 0.77, 0.84, 0.92, 1] + }, + "parity_death_prob": [0.039,0.056,0.085,0.117], + "parity_cull_prob": [0.169, 0.233, 0.301, 0.408], + "death_day_prob": [0, 0.18, 0.32, 0.42, 0.48, 0.54, 0.60, 0.65, 0.70, 0.77, 0.83, 0.89, 0.95, 1] + }, + "life_cycle": { + "still_birth_rate": 0.065 + } + } + }, + "methane_mitigation": { + "methane_mitigation_method": "None", + "methane_mitigation_additive_amount": 0, + "3-NOP_additive_amount": 70, + "monensin_additive_amount": 24, + "essential_oils_additive_amount": 0, + "seaweed_additive_amount": 0 + }, + "housing": "barn", + "pasture_concentrate": 0, + "methane_model": "IPCC", + "ration": { + "phosphorus_requirement_buffer": 0, + "user_input": false, + "formulation_interval": 30 + }, + "pen_information": [ + { + "id": 0, + "pen_name": "", + "animal_combination": "CALF", + "vertical_dist_to_milking_parlor": 0.1, + "horizontal_dist_to_milking_parlor": 10, + "number_of_stalls": 3000, + "housing_type": "open air barn", + "pen_type": "freestall", + "max_stocking_density": 1.2, + "manure_management_scenario_id": 0 + }, + { + "id": 1, + "pen_name": "", + "animal_combination": "GROWING", + "vertical_dist_to_milking_parlor": 0.1, + "horizontal_dist_to_milking_parlor": 10, + "number_of_stalls": 12500, + "housing_type": "open air barn", + "pen_type": "freestall", + "max_stocking_density": 1.2, + "manure_management_scenario_id": 1 + }, + { + "id": 2, + "pen_name": "", + "animal_combination": "CLOSE_UP", + "vertical_dist_to_milking_parlor": 0.1, + "horizontal_dist_to_milking_parlor": 10, + "number_of_stalls": 6000, + "housing_type": "open air barn", + "pen_type": "freestall", + "max_stocking_density": 1.2, + "manure_management_scenario_id": 2 + }, + { + "id": 3, + "pen_name": "", + "animal_combination": "LAC_COW", + "vertical_dist_to_milking_parlor": 0.1, + "horizontal_dist_to_milking_parlor": 10, + "number_of_stalls": 15000, + "housing_type": "open air barn", + "pen_type": "freestall", + "max_stocking_density": 1.2, + "manure_management_scenario_id": 5 + } + ] +} \ No newline at end of file diff --git a/input/data/animal/default_animal_10x.json b/input/data/animal/default_animal_10x.json new file mode 100644 index 0000000000..5a883078f2 --- /dev/null +++ b/input/data/animal/default_animal_10x.json @@ -0,0 +1,202 @@ +{ + "herd_information": { + "calf_num": 80, + "heiferI_num": 440, + "heiferII_num": 380, + "heiferIII_num_springers": 50, + "cow_num": 1000, + "replace_num": 5000, + "herd_num": 1000, + "breed": "HO", + "parity_fractions": { + "1": 0.346, + "2": 0.272, + "3": 0.379 + }, + "annual_milk_yield": null + }, + "herd_initialization": { + "initial_animal_num": 10000, + "simulation_days": 5000 + }, + "animal_config": { + "management_decisions": { + "breeding_start_day_h": 380, + "heifer_repro_method": "ED", + "cow_repro_method": "ED-TAI", + "semen_type": "conventional", + "days_in_preg_when_dry": 218, + "heifer_repro_cull_time": 500, + "do_not_breed_time": 185, + "cull_milk_production": 30, + "cow_times_milked_per_day": 3, + "milk_fat_percent": 4, + "milk_protein_percent": 3.2 + }, + "farm_level": { + "calf": { + "male_calf_rate_sexed_semen": 0.1, + "male_calf_rate_conventional_semen": 0.53, + "keep_female_calf_rate": 1, + "wean_day": 60, + "wean_length": 7, + "milk_type": "whole" + }, + "repro": { + "voluntary_waiting_period": 50, + "conception_rate_decrease": 0.026, + "decrease_conception_rate_in_rebreeding": false, + "decrease_conception_rate_by_parity": false, + "avg_gestation_len": 276, + "std_gestation_len": 6, + "prefresh_day": 21, + "calving_interval": 400, + "heifers": { + "estrus_detection_rate": 0.9, + "estrus_conception_rate": 0.6, + "repro_sub_protocol": "5dCG2P", + "repro_sub_properties": { + "conception_rate": 0.6, + "estrus_detection_rate": 0.9 + } + }, + "cows": { + "estrus_detection_rate": 0.6, + "ED_conception_rate": 0.5, + "presynch_program": "Double OvSynch", + "presynch_program_start_day": 50, + "ovsynch_program": "OvSynch 56", + "ovsynch_program_start_day": 64, + "ovsynch_program_conception_rate": 0.6, + "resynch_program": "TAIafterPD" + } + }, + "bodyweight": { + "birth_weight_avg_ho": 43.9, + "birth_weight_std_ho": 1, + "birth_weight_avg_je": 27.2, + "birth_weight_std_je": 1, + "target_heifer_preg_day": 399, + "mature_body_weight_avg": 740.1, + "mature_body_weight_std": 73.5 + } + }, + "from_literature": { + "repro": { + "preg_check_day_1": 32, + "preg_loss_rate_1": 0.02, + "preg_check_day_2": 60, + "preg_loss_rate_2": 0.096, + "preg_check_day_3": 200, + "preg_loss_rate_3": 0.017, + "avg_estrus_cycle_return": 23, + "std_estrus_cycle_return": 6, + "avg_estrus_cycle_heifer": 21, + "std_estrus_cycle_heifer": 2.5, + "avg_estrus_cycle_cow": 21, + "std_estrus_cycle_cow": 4, + "avg_estrus_cycle_after_pgf": 5, + "std_estrus_cycle_after_pgf": 2 + }, + "culling": { + "cull_day_count": [0, 5, 15, 45, 90, 135, 180, 225, 270, 330, 380, 430, 480, 530], + "feet_leg_cull": { + "probability": 0.1633, + "cull_day_prob": [0, 0.03, 0.08, 0.16, 0.25, 0.36, 0.48, 0.59, 0.69, 0.78, 0.85, 0.90, 0.95, 1] + }, + "injury_cull": { + "probability": 0.2883, + "cull_day_prob": [0, 0.08, 0.18, 0.28, 0.38, 0.47, 0.56, 0.64, 0.71, 0.78, 0.85, 0.90, 0.95, 1] + }, + "mastitis_cull": { + "probability": 0.2439, + "cull_day_prob": [0, 0.06, 0.12, 0.19, 0.30, 0.43, 0.56, 0.68, 0.78, 0.85, 0.90, 0.94, 0.97, 1] + }, + "disease_cull": { + "probability": 0.1391, + "cull_day_prob": [0, 0.04, 0.12, 0.24, 0.34, 0.42, 0.50, 0.57, 0.64, 0.72, 0.81, 0.89, 0.95, 1] + }, + "udder_cull": { + "probability": 0.0645, + "cull_day_prob": [0, 0.12, 0.24, 0.33, 0.41, 0.48, 0.55, 0.62, 0.68, 0.76, 0.82, 0.89, 0.95, 1] + }, + "unknown_cull": { + "probability": 0.1009, + "cull_day_prob": [0, 0.05, 0.11, 0.18, 0.27, 0.37, 0.45, 0.54, 0.62, 0.70, 0.77, 0.84, 0.92, 1] + }, + "parity_death_prob": [0.039,0.056,0.085,0.117], + "parity_cull_prob": [0.169, 0.233, 0.301, 0.408], + "death_day_prob": [0, 0.18, 0.32, 0.42, 0.48, 0.54, 0.60, 0.65, 0.70, 0.77, 0.83, 0.89, 0.95, 1] + }, + "life_cycle": { + "still_birth_rate": 0.065 + } + } + }, + "methane_mitigation": { + "methane_mitigation_method": "None", + "methane_mitigation_additive_amount": 0, + "3-NOP_additive_amount": 70, + "monensin_additive_amount": 24, + "essential_oils_additive_amount": 0, + "seaweed_additive_amount": 0 + }, + "housing": "barn", + "pasture_concentrate": 0, + "methane_model": "IPCC", + "ration": { + "phosphorus_requirement_buffer": 0, + "user_input": false, + "formulation_interval": 30 + }, + "pen_information": [ + { + "id": 0, + "pen_name": "", + "animal_combination": "CALF", + "vertical_dist_to_milking_parlor": 0.1, + "horizontal_dist_to_milking_parlor": 10, + "number_of_stalls": 300, + "housing_type": "open air barn", + "pen_type": "freestall", + "max_stocking_density": 1.2, + "manure_management_scenario_id": 0 + }, + { + "id": 1, + "pen_name": "", + "animal_combination": "GROWING", + "vertical_dist_to_milking_parlor": 0.1, + "horizontal_dist_to_milking_parlor": 10, + "number_of_stalls": 1250, + "housing_type": "open air barn", + "pen_type": "freestall", + "max_stocking_density": 1.2, + "manure_management_scenario_id": 1 + }, + { + "id": 2, + "pen_name": "", + "animal_combination": "CLOSE_UP", + "vertical_dist_to_milking_parlor": 0.1, + "horizontal_dist_to_milking_parlor": 10, + "number_of_stalls": 600, + "housing_type": "open air barn", + "pen_type": "freestall", + "max_stocking_density": 1.2, + "manure_management_scenario_id": 2 + }, + { + "id": 3, + "pen_name": "", + "animal_combination": "LAC_COW", + "vertical_dist_to_milking_parlor": 0.1, + "horizontal_dist_to_milking_parlor": 10, + "number_of_stalls": 1500, + "housing_type": "open air barn", + "pen_type": "freestall", + "max_stocking_density": 1.2, + "manure_management_scenario_id": 5 + } + ] +} \ No newline at end of file diff --git a/input/data/tasks/sobol1k_128_tasks.json b/input/data/tasks/sobol1k_128_tasks.json new file mode 100644 index 0000000000..91e71516b5 --- /dev/null +++ b/input/data/tasks/sobol1k_128_tasks.json @@ -0,0 +1,331 @@ +{ + "parallel_workers": 6, + "tasks": [ + { + "task_type": "SENSITIVITY_ANALYSIS", + "output_prefix": "sobol_1k_128", + "log_verbosity": "errors", + "sampler": "sobol", + "saltelli_number": 128, + "saltelli_skip": 2048, + "SA_load_balancing_start": 0.9, + "SA_load_balancing_stop": 1, + "random_seed": 42, + "SA_input_variables": [ + { + "variable_name": "animal.herd_information.calf_num", + "lower_bound": 60, + "upper_bound": 80, + "data_type":"int" + }, + { + "variable_name": "animal.herd_information.cow_num", + "lower_bound": 700, + "upper_bound": 1000, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.management_decisions.breeding_start_day_h", + "lower_bound": 270, + "upper_bound": 440, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.management_decisions.days_in_preg_when_dry", + "lower_bound": 200, + "upper_bound": 230, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.management_decisions.heifer_repro_cull_time", + "lower_bound": 270, + "upper_bound": 1000, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.management_decisions.do_not_breed_time", + "lower_bound": 60, + "upper_bound": 190, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.management_decisions.cull_milk_production", + "lower_bound": 20, + "upper_bound": 30, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.calf.male_calf_rate_sexed_semen", + "lower_bound": 0.01, + "upper_bound": 0.2, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.calf.wean_day", + "lower_bound": 1, + "upper_bound": 120, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.voluntary_waiting_period", + "lower_bound": 45, + "upper_bound": 55, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.conception_rate_decrease", + "lower_bound": 0.01, + "upper_bound": 0.1, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.avg_gestation_len", + "lower_bound": 240, + "upper_bound": 280, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.prefresh_day", + "lower_bound": 15, + "upper_bound": 30, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.calving_interval", + "lower_bound": 300, + "upper_bound": 800, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.cows.estrus_detection_rate", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.heifers.estrus_detection_rate", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.heifers.estrus_conception_rate", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.cows.estrus_detection_rate", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.cows.ED_conception_rate", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.bodyweight.birth_weight_avg_ho", + "lower_bound": 8, + "upper_bound": 80, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.bodyweight.target_heifer_preg_day", + "lower_bound": 350, + "upper_bound": 399, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.bodyweight.mature_body_weight_avg", + "lower_bound": 400, + "upper_bound": 1500, + "data_type":"float" + }, + + + { + "variable_name": "animal.animal_config.from_literature.repro.preg_check_day_1", + "lower_bound": 28, + "upper_bound": 40, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.preg_check_day_2", + "lower_bound": 61, + "upper_bound": 120, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.preg_check_day_3", + "lower_bound": 121, + "upper_bound": 240, + "data_type":"int" + }, + + { + "variable_name": "animal.animal_config.from_literature.repro.preg_loss_rate_1", + "lower_bound": 0.0, + "upper_bound": 0.5, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.preg_loss_rate_2", + "lower_bound": 0.0, + "upper_bound": 0.2, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.preg_loss_rate_3", + "lower_bound": 0.0, + "upper_bound": 0.1, + "data_type":"float" + }, + + + + { + "variable_name": "animal.animal_config.from_literature.repro.avg_estrus_cycle_return", + "lower_bound": 1, + "upper_bound": 46, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.avg_estrus_cycle_heifer", + "lower_bound": 7, + "upper_bound": 42, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.avg_estrus_cycle_cow", + "lower_bound": 7, + "upper_bound": 42, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.avg_estrus_cycle_after_pgf", + "lower_bound": 1, + "upper_bound": 10, + "data_type":"int" + }, + + + + { + "variable_name": "animal.animal_config.from_literature.culling.feet_leg_cull.probability", + "lower_bound": 0, + "upper_bound": 1, + "data_type":"float" + },{ + "variable_name": "animal.animal_config.from_literature.culling.injury_cull.probability", + "lower_bound": 0, + "upper_bound": 1, + "data_type":"float" + },{ + "variable_name": "animal.animal_config.from_literature.culling.mastitis_cull.probability", + "lower_bound": 0, + "upper_bound": 1, + "data_type":"float" + },{ + "variable_name": "animal.animal_config.from_literature.culling.disease_cull.probability", + "lower_bound": 0, + "upper_bound": 1, + "data_type":"float" + },{ + "variable_name": "animal.animal_config.from_literature.culling.udder_cull.probability", + "lower_bound": 0, + "upper_bound": 1, + "data_type":"float" + },{ + "variable_name": "animal.animal_config.from_literature.culling.unknown_cull.probability", + "lower_bound": 0, + "upper_bound": 1, + "data_type":"float" + }, + + + + { + "variable_name": "animal.pen_information.3.vertical_dist_to_milking_parlor", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "animal.pen_information.3.horizontal_dist_to_milking_parlor", + "lower_bound": 0.1, + "upper_bound": 10, + "data_type":"float" + }, + + { + "variable_name": "animal.pen_information.3.max_stocking_density", + "lower_bound": 0.75, + "upper_bound": 2, + "data_type":"float" + }, + + + + + { + "variable_name": "soil.second_moisture_condition_parameter", + "lower_bound": 20, + "upper_bound": 999, + "data_type":"float" + }, + { + "variable_name": "field.minimum_daylength", + "lower_bound": 7, + "upper_bound": 11, + "data_type":"float" + }, + { + "variable_name": "field.watering_amount_in_liters", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "manure_management.bedding_configs.0.bedding_mass_per_day", + "lower_bound": 1, + "upper_bound": 3, + "data_type":"float" + }, + { + "variable_name": "manure_management.bedding_configs.0.bedding_density", + "lower_bound": 200, + "upper_bound": 300, + "data_type":"float" + }, + { + "variable_name": "manure_management.bedding_configs.0.bedding_dry_matter_content", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "manure_management.bedding_configs.0.bedding_cleaned_fraction", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "manure_management.manure_handler_configs.1.cleaning_water_use_rate", + "lower_bound": 9, + "upper_bound": 11, + "data_type":"float" + }, + { + "variable_name": "manure_management.manure_handler_configs.1.cleanings_per_day", + "lower_bound": 1, + "upper_bound": 3, + "data_type":"int" + } + ] + } + ] + } + \ No newline at end of file diff --git a/input/data/tasks/sobol1k_32_tasks.json b/input/data/tasks/sobol1k_32_tasks.json new file mode 100644 index 0000000000..efa95e0217 --- /dev/null +++ b/input/data/tasks/sobol1k_32_tasks.json @@ -0,0 +1,337 @@ +{ + "parallel_workers": 6, + "tasks": [ + { + "task_type": "SENSITIVITY_ANALYSIS", + "output_prefix": "sobol_1k", + "log_verbosity": "errors", + "sampler": "sobol", + "saltelli_number": 32, + "saltelli_skip": 128, + "SA_load_balancing_start": 0.5, + "SA_load_balancing_stop": 1, + "random_seed": 42, + "SA_input_variables": [ + { + "variable_name": "animal.herd_information.calf_num", + "lower_bound": 60, + "upper_bound": 80, + "data_type":"int" + }, + { + "variable_name": "animal.herd_information.cow_num", + "lower_bound": 700, + "upper_bound": 1000, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.management_decisions.breeding_start_day_h", + "lower_bound": 360, + "upper_bound": 385, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.management_decisions.days_in_preg_when_dry", + "lower_bound": 170, + "upper_bound": 220, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.management_decisions.heifer_repro_cull_time", + "lower_bound": 470, + "upper_bound": 500, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.management_decisions.do_not_breed_time", + "lower_bound": 170, + "upper_bound": 190, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.management_decisions.cull_milk_production", + "lower_bound": 20, + "upper_bound": 30, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.management_decisions.breeding_start_day_h", + "lower_bound": 360, + "upper_bound": 385, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.calf.male_calf_rate_sexed_semen", + "lower_bound": 0.01, + "upper_bound": 0.2, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.calf.wean_day", + "lower_bound": 55, + "upper_bound": 65, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.voluntary_waiting_period", + "lower_bound": 45, + "upper_bound": 55, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.conception_rate_decrease", + "lower_bound": 0.01, + "upper_bound": 0.036, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.avg_gestation_len", + "lower_bound": 260, + "upper_bound": 280, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.prefresh_day", + "lower_bound": 15, + "upper_bound": 22, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.calving_interval", + "lower_bound": 300, + "upper_bound": 400, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.cows.estrus_detection_rate", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.heifers.estrus_detection_rate", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.heifers.estrus_conception_rate", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.cows.estrus_detection_rate", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.repro.cows.ED_conception_rate", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.bodyweight.birth_weight_avg_ho", + "lower_bound": 39, + "upper_bound": 50, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.farm_level.bodyweight.target_heifer_preg_day", + "lower_bound": 350, + "upper_bound": 399, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.farm_level.bodyweight.mature_body_weight_avg", + "lower_bound": 650, + "upper_bound": 760, + "data_type":"float" + }, + + + { + "variable_name": "animal.animal_config.from_literature.repro.preg_check_day_1", + "lower_bound": 25, + "upper_bound": 35, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.preg_check_day_2", + "lower_bound": 55, + "upper_bound": 65, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.preg_check_day_3", + "lower_bound": 190, + "upper_bound": 210, + "data_type":"int" + }, + + { + "variable_name": "animal.animal_config.from_literature.repro.preg_loss_rate_1", + "lower_bound": 0.0, + "upper_bound": 0.02, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.preg_loss_rate_2", + "lower_bound": 0.0, + "upper_bound": 0.096, + "data_type":"float" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.preg_loss_rate_3", + "lower_bound": 0.0, + "upper_bound": 0.017, + "data_type":"float" + }, + + + + { + "variable_name": "animal.animal_config.from_literature.repro.avg_estrus_cycle_return", + "lower_bound": 1, + "upper_bound": 23, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.avg_estrus_cycle_heifer", + "lower_bound": 1, + "upper_bound": 23, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.avg_estrus_cycle_cow", + "lower_bound": 1, + "upper_bound": 23, + "data_type":"int" + }, + { + "variable_name": "animal.animal_config.from_literature.repro.avg_estrus_cycle_after_pgf", + "lower_bound": 1, + "upper_bound": 5, + "data_type":"int" + }, + + + + { + "variable_name": "animal.animal_config.from_literature.culling.feet_leg_cull.probability", + "lower_bound": 0, + "upper_bound": 1, + "data_type":"float" + },{ + "variable_name": "animal.animal_config.from_literature.culling.injury_cull.probability", + "lower_bound": 0, + "upper_bound": 1, + "data_type":"float" + },{ + "variable_name": "animal.animal_config.from_literature.culling.mastitis_cull.probability", + "lower_bound": 0, + "upper_bound": 1, + "data_type":"float" + },{ + "variable_name": "animal.animal_config.from_literature.culling.disease_cull.probability", + "lower_bound": 0, + "upper_bound": 1, + "data_type":"float" + },{ + "variable_name": "animal.animal_config.from_literature.culling.udder_cull.probability", + "lower_bound": 0, + "upper_bound": 1, + "data_type":"float" + },{ + "variable_name": "animal.animal_config.from_literature.culling.unknown_cull.probability", + "lower_bound": 0, + "upper_bound": 1, + "data_type":"float" + }, + + + + { + "variable_name": "animal.pen_information.3.vertical_dist_to_milking_parlor", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "animal.pen_information.3.horizontal_dist_to_milking_parlor", + "lower_bound": 0.1, + "upper_bound": 10, + "data_type":"float" + }, + + { + "variable_name": "animal.pen_information.3.max_stocking_density", + "lower_bound": 0.75, + "upper_bound": 2, + "data_type":"float" + }, + + + + + { + "variable_name": "soil.second_moisture_condition_parameter", + "lower_bound": 20, + "upper_bound": 999, + "data_type":"float" + }, + { + "variable_name": "field.minimum_daylength", + "lower_bound": 7, + "upper_bound": 11, + "data_type":"float" + }, + { + "variable_name": "field.watering_amount_in_liters", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "manure_management.bedding_configs.0.bedding_mass_per_day", + "lower_bound": 1, + "upper_bound": 3, + "data_type":"float" + }, + { + "variable_name": "manure_management.bedding_configs.0.bedding_density", + "lower_bound": 200, + "upper_bound": 300, + "data_type":"float" + }, + { + "variable_name": "manure_management.bedding_configs.0.bedding_dry_matter_content", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "manure_management.bedding_configs.0.bedding_cleaned_fraction", + "lower_bound": 0.1, + "upper_bound": 1, + "data_type":"float" + }, + { + "variable_name": "manure_management.manure_handler_configs.1.cleaning_water_use_rate", + "lower_bound": 9, + "upper_bound": 11, + "data_type":"float" + }, + { + "variable_name": "manure_management.manure_handler_configs.1.cleanings_per_day", + "lower_bound": 1, + "upper_bound": 3, + "data_type":"int" + } + ] + } + ] + } + \ No newline at end of file diff --git a/input/metadata/default_metadata_10x.json b/input/metadata/default_metadata_10x.json new file mode 100644 index 0000000000..f67aafd249 --- /dev/null +++ b/input/metadata/default_metadata_10x.json @@ -0,0 +1,173 @@ +{ + "files": { + "config": { + "title": "Config Data", + "description": "Configuration file for general simulation parameters.", + "path": "input/data/config/default_config.json", + "type": "json", + "properties": "config_properties" + }, + "animal": { + "title": "Animal data", + "description": "Input data and configuration information for animal management decisions, herd attributes, and housing properties.", + "path": "input/data/animal/default_animal_10x.json", + "type": "json", + "properties": "animal_properties" + }, + "animal_population": { + "title": "Animal population data", + "description": "Animal objects that herd initialization draws from during the simulation. A similar file can be generated using the -I and -s command line arguments simultaneously.", + "path": "input/data/animal/animal_population.json", + "type": "json", + "properties": "animal_population_properties" + }, + "lactation": { + "title": "Lactation curve adjustment values", + "description": "Values for the adjustment of the three wood parameters based on farm specific data", + "path": "input/data/animal/lactation_curve_adjustment_inputs.json", + "type": "json", + "properties": "lactation_properties" + }, + "crop": { + "title": "Crop data", + "description": "Crop selection and detailed rotation schedules.", + "path": "input/data/crop/default_rotation.json", + "type": "json", + "properties": "crop_schedule_properties" + }, + "economy": { + "title": "Economy data", + "description": "Energy prices used in the EEE module.", + "path": "input/data/EEE/default_costs.csv", + "type": "csv", + "properties": "economic_properties" + }, + "emission": { + "title": "Emissions data", + "description": "General emission values used in the EEE module.", + "path": "input/data/EEE/default_emissions.csv", + "type": "csv", + "properties": "emissions_properties" + }, + "purchased_feeds_emissions": { + "title": "Emissions from purchased feeds", + "description": "Purchased feeds emission values used in the EEE module.", + "path": "input/data/EEE/full_feeds_emissions_July2024.csv", + "type": "csv", + "properties": "feed_emissions_properties" + }, + "purchased_feed_land_use_change_emissions": { + "title": "Land Use Change emissions from purchased feeds", + "description": "Purchased feeds land use change emission values used in the EEE module.", + "path": "input/data/EEE/full_feeds_land_use_change_emissions_July2024.csv", + "type": "csv", + "properties": "feed_emissions_properties" + }, + "feed": { + "title": "Feed data", + "description": "Feeds available for each animal combination, purchased feeds and their prices, feed storage options, and user-defined ration percentages and associated parameters.", + "path": "input/data/feed/default_feed.json", + "type": "json", + "properties": "feed_properties" + }, + "NRC_Comp": { + "title": "NRC Comp data", + "description": "Nutritional information for each feed, following NRC (2001) guidelines.", + "path": "input/data/feed/NRC_comp.csv", + "type": "csv", + "properties": "NRC_Comp_properties" + }, + "NASEM_Comp": { + "title": "NASEM Comp data", + "description": "Nutritional information for each feed, following NASEM (2021) guidelines.", + "path": "input/data/feed/NASEM_Comp_with_TDN.csv", + "type": "csv", + "properties": "NASEM_Comp_properties" + }, + "manure_management": { + "title": "Manure data", + "description": "Manure management decisions, including bedding type, handler, separator, and treatment information.", + "path": "input/data/manure/manure_management.json", + "type": "json", + "properties": "manure_management_properties" + }, + "soil_1": { + "title": "Soil data", + "description": "Characteristic soil information, including composition, slope, and layer details.", + "path": "input/data/soil/default_soil.json", + "type": "json", + "properties": "soil_profile_properties" + }, + "soil_2": { + "title": "Soil data", + "description": "Characteristic soil information, including composition, slope, and layer details.", + "path": "input/data/soil/default_soil_2.json", + "type": "json", + "properties": "soil_profile_properties" + }, + "fertilizer_schedule": { + "title": "Fertilizer schedule.", + "description": "Fertilizer types available, and their application schedule.", + "path": "input/data/fertilizer_schedule/default_fertilizer_schedule.json", + "type": "json", + "properties": "fertilizer_schedule_properties" + }, + "manure_schedule": { + "title": "Manure schedule.", + "description": "Specifies manure applications to a field.", + "path": "input/data/manure_schedule/default_manure_schedule.json", + "type": "json", + "properties": "manure_schedule_properties" + }, + "tillage_schedule": { + "title": "Tillage schedule.", + "description": "Schedule of tillage applications for a field.", + "path": "input/data/tillage_schedule/default_tillage_schedule.json", + "type": "json", + "properties": "tillage_schedule_properties" + }, + "field": { + "title": "Field specification", + "description": "Field characteristics and references to field management specifications.", + "path": "input/data/field/default_field.json", + "type": "json", + "properties": "field_properties" + }, + "weather": { + "title": "Weather data", + "description": "Weather data used during the simulation, including date, precipitation, temperature, etc.", + "path": "input/data/weather/barnyard_weather.csv", + "type": "csv", + "properties": "weather_properties" + }, + "user_feeds": { + "title": "User Feed", + "description": "Summary of feeds available for use in the simulation, including their RuFaS ID, short descriptors, and which nutrient composition file they are included in.", + "path": "input/data/feed/user_feeds.csv", + "type": "csv", + "properties": "user_feeds_properties" + }, + "tractor_dataset": { + "title": "Tractor Dataset", + "description": "Details agricultural machinery operations for various crops, including tractor size, operations (planting, mowing, collection, etc.), implement details, operational parameters (depth, width, mass), and throughput metrics, spanning different crop types and soil management practices.", + "path": "input/data/EEE/tractor_dataset.csv", + "type": "csv", + "properties": "tractor_dataset_properties" + }, + "EEE_constants": { + "title": "EEE Constants", + "description": "The constants that are used in EEE module.", + "path": "input/data/EEE/constants.json", + "type": "json", + "properties": "EEE_constants_properties" + }, + "properties": { + "title": "Metadata Properties", + "description": "The properties of input data.", + "path": "input/metadata/properties/default.json", + "type": "json", + "properties": "NA" + } + }, + "cross-validation": {} +} diff --git a/input/metadata/task_manager_metadata.json b/input/metadata/task_manager_metadata.json index 8919c7a46d..4d274c0a30 100644 --- a/input/metadata/task_manager_metadata.json +++ b/input/metadata/task_manager_metadata.json @@ -3,7 +3,7 @@ "tasks": { "title": "Task manager data", "description": "Configuration file for general simulation parameters.", - "path": "input/data/tasks/default_task.json", + "path": "input/data/tasks/sobol_animal_tasks_v5.json", "type": "json", "properties": "tasks_properties" }, diff --git a/model_evaluation/sensitivity_analysis/SA_helpers.py b/model_evaluation/sensitivity_analysis/SA_helpers.py new file mode 100644 index 0000000000..f3f7d88f61 --- /dev/null +++ b/model_evaluation/sensitivity_analysis/SA_helpers.py @@ -0,0 +1,426 @@ +# flake8: noqa +import os + +# import csv +# import json +import pandas as pd +import numpy as np +import seaborn as sns +import matplotlib.pyplot as plt + +from typing import List, Dict, Any + +from SALib.sample import ff as fractional_factorial_sampler +from SALib.sample import saltelli as saltelli_sampler +from SALib.sample import sobol as sobol_sampler +from SALib.sample import morris as morris_sampler +from SALib.analyze import ff as ff_analyzer +from SALib.analyze import sobol as sobol_analyzer +from SALib.analyze import morris as morris_analyzer + +# from SALib.test_functions import Ishigami +# from sklearn import datasets, linear_model +# from sklearn.metrics import mean_squared_error, r2_score +import statsmodels.api as sm + +# from scipy import stats + + +def rewrite_ff_analysis(analysis: Dict[str, Any]) -> List[Any]: + """ + This reformats the output of the ff_analysis function + Forces into something easier to parse into csvs for printing + This will place the main effects and interaction effects into grouped columns in a single dataframe + """ + intnames = analysis["interaction_names"] + intnames = [str(x).replace("(", "") for x in intnames] + intnames = [str(x).replace(")", "") for x in intnames] + intnames = [str(x).replace(",", "*") for x in intnames] + intnames = [str(x).replace(" ", "") for x in intnames] + colnames = ["ME:" + x for x in analysis["names"]] + ["IE:" + str(x) for x in intnames] + rowvalues = list(analysis["ME"]) + list(analysis["IE"]) + analysis_out = [colnames, rowvalues] + return analysis_out + + +def rewrite_sobol_analysis(analysis: Dict[str, Any], p: Dict[str, Any]) -> List[Any]: + """ + This reformats the output of the sobol function + Forces into something easier to parse into csvs for printing + This will place the main effects and interaction effects into grouped columns in a single dataframe + """ + intnames = p["names"] + colnames = ( + ["S1:" + x for x in intnames] + + ["ST:" + str(x) for x in intnames] + + ["S2:" + str(x) for x in intnames] + + ["S1_conf:" + x for x in intnames] + + ["ST_conf:" + str(x) for x in intnames] + + ["S2_conf:" + str(x) for x in intnames] + ) + rowvalues = ( + list(analysis["S1"]) + + list(analysis["ST"]) + + list(analysis["S2"]) + + list(analysis["S1_conf"]) + + list(analysis["ST_conf"]) + + list(analysis["S2_conf"]) + ) + analysis_out = [colnames, rowvalues] + + expanded_names = [str(x) for x in intnames] + interaction_names = [] + for _ in expanded_names: + for pos, name in enumerate(expanded_names): + interaction_names.append(f"{name} * {expanded_names[pos]}") + + interaction_values = [] + as2 = analysis["S2"] + for a in as2: + # print(a) + for i in a: + # print(i) + interaction_values.append(i) + + colnames_expanded = ( + ["S1:" + x for x in intnames] + + ["ST:" + str(x) for x in intnames] + + interaction_names + + ["S1_conf:" + x for x in intnames] + + ["ST_conf:" + str(x) for x in intnames] + + ["S2_conf:" + str(x) for x in intnames] + ) + rowvalues_expanded = ( + list(analysis["S1"]) + + list(analysis["ST"]) + + interaction_values + + list(analysis["S1_conf"]) + + list(analysis["ST_conf"]) + + list(analysis["S2_conf"]) + ) + analysis_out_expanded = [colnames_expanded, rowvalues_expanded] + + return analysis_out_expanded + + +def rewrite_morris_analysis(analysis: Dict[str, Any], p: Dict[str, Any]) -> List[Any]: + """ + This reformats the output of the morris function + Forces into something easier to parse into csvs for printing + This will place the main effects and interaction effects into grouped columns in a single dataframe + """ + intnames = p["names"] + colnames = ( + ["mu:" + x for x in intnames] + + ["mu_star:" + x for x in intnames] + + ["sigma:" + str(x) for x in intnames] + + ["mu_st_conf:" + str(x) for x in intnames] + ) + rowvalues = ( + list(analysis["mu"]) + list(analysis["mu_star"]) + list(analysis["sigma"]) + list(analysis["mu_st_conf"]) + ) + + analysis_out = [colnames, rowvalues] + return analysis_out + + +def get_all_output_files(basedirectory: str, output_prefix: str, report_name: str) -> List[str]: + return [ + filename + for filename in os.listdir(basedirectory) + if filename.startswith(output_prefix) and report_name in filename + ] + + +def collate_outputs( + basedirectory: str, all_report_filenames: List[str], total_num_files: int +) -> Dict[str, List[float]]: + collected: Dict[str, List[float]] = {} + digits = len(str(total_num_files)) + + for i in range(0, total_num_files): + file_ID = f"{i+1}".zfill(digits) + "_" + try: + file_ID_found = [filename for filename in all_report_filenames if file_ID in filename][-1] + except: + file_ID_backup = f"{1}".zfill(digits) + "_" + file_ID_found = [filename for filename in all_report_filenames if file_ID_backup in filename][-1] + print("used dummy") + print(i / total_num_files) + print(i) + file = pd.read_csv(basedirectory + file_ID_found) + variable_names: List[str] = [] + if not variable_names: + variable_names = list(file.columns) + for variable_name in variable_names: + if variable_name not in collected.keys(): + collected[variable_name] = [] + valuetoappend = file[variable_name].values[0] + if type(valuetoappend) is not str: + valuetoappend = float(valuetoappend) + collected[variable_name].append(valuetoappend) + return collected + + +def analyze_it( + task_specified: Dict[str, Any], + parsed_SA_input_variables: Dict[str, Any], + sampled_values: np.ndarray[Any, Any], + output_to_analyze: List[float], +) -> List[Any]: + print_analysis = False + if task_specified["sampler"] == "sobol": + analyzed = sobol_analyzer.analyze( + parsed_SA_input_variables, + np.array(output_to_analyze), + print_to_console=print_analysis, + seed=task_specified["random_seed"], + ) + analyzed_formatted = rewrite_sobol_analysis(analyzed, parsed_SA_input_variables) + elif task_specified["sampler"] == "saltelli": + analyzed = sobol_analyzer.analyze( + parsed_SA_input_variables, + np.array(output_to_analyze), + print_to_console=print_analysis, + seed=task_specified["random_seed"], + ) + analyzed_formatted = rewrite_sobol_analysis(analyzed, parsed_SA_input_variables) + elif task_specified["sampler"] == "morris": + analyzed = morris_analyzer.analyze( + parsed_SA_input_variables, + sampled_values, + np.array(output_to_analyze), + print_to_console=print_analysis, + seed=task_specified["random_seed"], + ) + analyzed_formatted = rewrite_morris_analysis(analyzed, parsed_SA_input_variables) + else: + analyzed = ff_analyzer.analyze( + parsed_SA_input_variables, + sampled_values, + output_to_analyze, + second_order=True, + seed=task_specified["random_seed"], + print_to_console=print_analysis, + ) + analyzed_formatted = rewrite_ff_analysis(analyzed) + return analyzed_formatted + + +def get_sampled_values(task_to_analyze: Dict[str, Any], parsed_SA_input_variables: Dict[str, Any]) -> np.ndarray | Any: + if task_to_analyze["sampler"] == "sobol": + sampled_values = sobol_sampler.sample( + parsed_SA_input_variables, + task_to_analyze["saltelli_number"], + skip_values=task_to_analyze["saltelli_skip"], + seed=task_to_analyze["random_seed"], + ) + elif task_to_analyze["sampler"] == "saltelli_sobol": + sampled_values = saltelli_sampler.sample( + parsed_SA_input_variables, + task_to_analyze["saltelli_number"], + skip_values=task_to_analyze["saltelli_skip"], + ) + elif task_to_analyze["sampler"] == "morris": + sampled_values = morris_sampler.sample( + parsed_SA_input_variables, + task_to_analyze["saltelli_number"], + seed=task_to_analyze["random_seed"], + ) + else: + sampled_values = fractional_factorial_sampler.sample( + parsed_SA_input_variables, + ) + return sampled_values + + +def parse_input_variables(task_to_analyze: Dict[str, Any]) -> Dict[str, Any]: + SA_input_variables: List[Dict[str, float | str]] = task_to_analyze["SA_input_variables"] + names: List[str] = [str(input_variable["variable_name"]) for input_variable in SA_input_variables] + variables_count = len(names) + bounds: List[List[float]] = [ + [float(input_variable["lower_bound"]), float(input_variable["upper_bound"])] + for input_variable in SA_input_variables + ] + parsed_SA_input_variables = { + "num_vars": variables_count, + "names": names, + "bounds": bounds, + "sample_scaled": True, + } + return parsed_SA_input_variables + + +def get_whole_output( + collated_outputs: Dict[str, Any], + sampled_values: np.ndarray | Any, + task_to_analyze: Dict[str, Any], + parsed_SA_input_variables: Dict[str, Any], +) -> List[Any]: + whole_output: List[Any] = [] + for variable_name_for_analysis in list(collated_outputs.keys()): + output_as_list = collated_outputs[variable_name_for_analysis] + if len(output_as_list) == len(sampled_values): + out = analyze_it(task_to_analyze, parsed_SA_input_variables, sampled_values, output_as_list) + # outdf = pd.DataFrame(out) + # prettier_variable_name = variable_name_for_analysis.replace("/", " per ") + # prettier_variable_name = prettier_variable_name.replace(",", " ") + # outdf.to_csv(path_or_buf=basedirectory + output_prefix + '_' + prettier_variable_name + '.csv') + if not whole_output: + names_and_header = [""] + for name in out[0]: + names_and_header.append(name) + whole_output.append(names_and_header) + line_for_whole_output = [] + line_for_whole_output.append(variable_name_for_analysis) + for thing in out[1]: + line_for_whole_output.append(thing) + whole_output.append(line_for_whole_output) + + colidx = [] + colidx.append(999) + for column in range(1, len(whole_output[0])): + colidx.append(0) + for row in range(1, len(whole_output)): + val = whole_output[row][column] + if type(val) == np.ndarray or str(val) == "nan": + pass + else: + colidx[column] += 1 + + whole_output_expanded = [] + for row in whole_output: + whole_output_expanded.append([row[i] for i in range(len(row)) if colidx[i]]) + + # print(len(whole_output)) + # print(len(whole_output[0])) + # print(len(whole_output_expanded)) + # print(len(whole_output_expanded[0])) + + return whole_output_expanded + + +def get_new_whole_output(whole_output: List[Any]) -> List[Any]: + new_whole_output: List[Any] = [] + updated_names = [] + for item in whole_output[0]: + updated_name = item.replace(".", " ") + updated_name = updated_name.replace("'", "") + updated_name = updated_name.replace("*", " X ") + updated_names.append(updated_name) + new_whole_output.append(updated_names) + for line_num in range(1, len(whole_output)): + reformatted_line = [] + for item in whole_output[line_num]: + if type(item) is float or type(item) is np.float64: + if item < 0.001: + item = 0 + reformatted_line.append(item) + new_whole_output.append(reformatted_line) + return new_whole_output + + +def plot_whole_new( + output_path: str, + output_prefix: str, +) -> None: + whole_new_output_report = pd.read_csv(output_path + output_prefix + "_new whole analysis.csv", index_col=0) + # , + # names=0, encoding='utf-8' + # ) + + df = pd.DataFrame(whole_new_output_report) + df_trimmed = df.select_dtypes(include=["float"]) + # df_trimmed = df.drop(df.columns[99:], axis=1) + + plt.figure() + sns.heatmap(df_trimmed, annot=True) + # plt.show(block=False) + plt.savefig(output_path + output_prefix + "whole_new_heatmap.jpg") + plt.close() + + +def regression_stuff(X: List[float], xname: str, Y: List[float], yname: str, plot_inputs: bool) -> Any: + # Create linear regression object + # regr = linear_model.LinearRegression() + + # X = input_values + # Y = output_values + + # # diabetes = datasets.load_diabetes() + # # X = diabetes.data + # y = diabetes.target + + X2 = sm.add_constant(X) + est = sm.OLS(Y, X2) + est2 = est.fit() + print(est2.summary()) + R2_etc = est2.summary2().tables[0] + r2_value = R2_etc[3][0] + p_etc = est2.summary2().tables[1] + intercept_value = p_etc["Coef."]["const"] + slope = p_etc["Coef."]["x1"] + p_value = p_etc["P>|t|"]["x1"] + + # X = np.array(X).reshape(-1,1) + # Y = np.array(Y) + # # Train the model using the training sets + # regr.fit(X, Y) + + # # Make predictions using the testing set + # y_pred = regr.predict(X) + + # # The coefficients + # print("Coefficients: \n", regr.coef_) + # # The mean squared error + # print("Mean squared error: %.2f" % mean_squared_error(Y, y_pred)) + # # The coefficient of determination: 1 is perfect prediction + # r2 = r2_score(Y, y_pred) + # print("Coefficient of determination: %.2f" % r2) + + if plot_inputs: + # Plot outputs + fig, ax = plt.subplots() + plt.scatter(X, Y, color="black") + ax.axline((0, intercept_value), slope=slope) + ax.set_xlim(min(X) * 0.99, max(X) * 1.01) + + # plt.xticks(()) + # plt.yticks(()) + plt.xlabel(xname) + plt.ylabel(yname) + plt.show() + + return (slope, r2_value, p_value) + + +def collate_raw(basedirectory: str, all_report_filenames: List[str], total_num_files: int) -> Dict[str, List[float]]: + collected: Dict[str, List[float]] = {} + digits = len(str(total_num_files)) + + for i in range(0, total_num_files): + file_ID = f"{i+1}".zfill(digits) + "_" + try: + file_ID_found = [filename for filename in all_report_filenames if file_ID in filename][-1] + except: + file_ID_backup = f"{1}".zfill(digits) + "_" + file_ID_found = [filename for filename in all_report_filenames if file_ID_backup in filename][-1] + print("used dummy") + print(i / total_num_files) + print(i) + file = pd.read_csv(basedirectory + file_ID_found) + variable_names: List[str] = [] + if not variable_names: + variable_names = list(file.columns) + for variable_name in variable_names: + if variable_name not in collected.keys(): + collected[variable_name] = [] + valuetoappend = file[variable_name].values[0] + # if type(valuetoappend) is not str: + if valuetoappend == "Under construction, use the results with caution.": + pass + else: + valuetoappend = float(valuetoappend) + collected[variable_name].append(valuetoappend) + + return collected diff --git a/model_evaluation/sensitivity_analysis/analyze_SA_analysis_main.py b/model_evaluation/sensitivity_analysis/analyze_SA_analysis_main.py new file mode 100644 index 0000000000..907aa97647 --- /dev/null +++ b/model_evaluation/sensitivity_analysis/analyze_SA_analysis_main.py @@ -0,0 +1,262 @@ +# flake8: noqa +import csv +import json +import pandas as pd + +# import numpy as np +from typing import Dict, Any +from model_evaluation.sensitivity_analysis import SA_helpers + +""" +This script carries out the main sensitivity analysis, generating sensitivity indices for the desired analysis, as set up in the config file in this folder (config_analyze_SA.json). +""" + +config_json_filename = "model_evaluation/sensitivity_analysis/config_analyze_SA.json" +with open(config_json_filename) as json_file: + config_json = json.load(json_file) + +for analysis in config_json["analyses"]: # noqa + print(analysis) + input_file = analysis["input_file"] + output_path = analysis["output_path"] + "reports/" + report_name = analysis["report_name"] + only_inputs = analysis["only_inputs"] + plot_whole_new = analysis["plot_whole_new"] + + plot_inputs = analysis["plot_inputs"] + plot_outputs = analysis["plot_outputs"] + + with open(input_file) as json_file: + input_config = json.load(json_file) + + task_to_analyze: Dict[str, Any] = input_config["tasks"][0] + + if analysis["inputs_to_collate_override"]: + inputs_to_collate = analysis["inputs_to_collate"] + else: + inputs_to_collate = [ + (variable["variable_name"]).replace(".", " ") for variable in task_to_analyze["SA_input_variables"] + ] + + output_prefix = task_to_analyze["output_prefix"] + sampler = task_to_analyze["sampler"] + + parsed_SA_input_variables = SA_helpers.parse_input_variables(task_to_analyze) + sampled_values = SA_helpers.get_sampled_values(task_to_analyze, parsed_SA_input_variables) + total_num_files = len(sampled_values) + namesfornames = [name for name in parsed_SA_input_variables["names"]] + with open(output_path + "analyzed/" + output_prefix + "_inputs.csv", "w", newline="") as f: + writer = csv.writer(f) + writer.writerow(namesfornames) + writer.writerows(sampled_values) + if only_inputs: + break + all_report_filenames = SA_helpers.get_all_output_files( + basedirectory=output_path, output_prefix=output_prefix, report_name=report_name + ) + + collated_outputs = SA_helpers.collate_outputs( + basedirectory=output_path, all_report_filenames=all_report_filenames, total_num_files=total_num_files + ) + + whole_output = SA_helpers.get_whole_output( + collated_outputs, sampled_values, task_to_analyze, parsed_SA_input_variables + ) + with open(output_path + "analyzed/" + output_prefix + "_whole analysis.csv", "w", newline="") as f: + writer = csv.writer(f) + writer.writerows(whole_output) + + new_whole_output = SA_helpers.get_new_whole_output(whole_output) + with open(output_path + "analyzed/" + output_prefix + "_new whole analysis.csv", "w", newline="") as f: + writer = csv.writer(f) + writer.writerows(new_whole_output) + + if inputs_to_collate: + new_whole_output_pd = pd.DataFrame(new_whole_output) + column_names = new_whole_output_pd.iloc[0] + row_names = new_whole_output_pd[0] + new_whole_output_pd = new_whole_output_pd.iloc[1:, 1:] + new_whole_output_pd.columns = column_names[1:] + new_whole_output_pd.index = row_names[1:] + new_whole_output_pd = new_whole_output_pd.loc[:, ~new_whole_output_pd.columns.duplicated(keep="last")] + column_names = list(new_whole_output_pd.columns) + + for input in inputs_to_collate: + pass + try: + if sampler == "fractional_factorial": + # get the input column starts with ME: + main_effects = new_whole_output_pd["ME:" + input] + + # get the total column sum of all that include it and start with IE: + IEnames = [name for name in column_names if input in name and "IE:" in name] + IEtable = new_whole_output_pd[IEnames] + + IEsums = IEtable.sum(axis=1) + IEsums.columns = ["IE"] + + total_effect = IEsums + main_effects + threethings = pd.concat([main_effects, IEsums, total_effect], axis=1) + threethings.rename({0: "interaction_effects", 1: "total_effects"}, axis=1, inplace=True) + elif sampler == "sobol" or sampler == "saltelli_sobol": + # get the input column main effect: starts with S1: + main_effects = new_whole_output_pd["S1:" + input] + total_effect = new_whole_output_pd["ST:" + input] + interaction_effects = total_effect - main_effects + threethings = pd.concat([main_effects, interaction_effects, total_effect], axis=1) + threethings.rename( + {0: "interaction_effects", threethings.columns[-1]: "total_effects"}, axis=1, inplace=True + ) + elif sampler == "morris": + mu = new_whole_output_pd["mu:" + input] + total_effects = new_whole_output_pd["mu_star:" + input] + sigma = new_whole_output_pd["sigma:" + input] + mu_st_conf = new_whole_output_pd["mu_st_conf:" + input] + threethings = pd.concat([mu, total_effects, sigma, mu_st_conf], axis=1) + threethings.rename( + { + threethings.columns[0]: "mu", + threethings.columns[1]: "total_effects", + threethings.columns[2]: "sigma", + threethings.columns[3]: "mu_st_conf", + }, + axis=1, + inplace=True, + ) + + newcols = [] + for rowname in row_names[1:]: + # get the list of inputs (X) + input_reformatted = input.replace(" ", ".") + input_reformatted2 = input.replace(" ", ".").replace("ME:", "").replace("S1:", "") + + input_index = parsed_SA_input_variables["names"].index(input_reformatted) + input_values = [value[input_index] for value in sampled_values] + # get the list of outputs (Y) + output_values = collated_outputs[rowname] + slope, r2_value, p_value = SA_helpers.regression_stuff( + X=input_values, xname=input, Y=output_values, yname=rowname, plot_inputs=plot_inputs + ) + newcols.append([slope, r2_value, p_value]) + newcols_pd = pd.DataFrame(newcols) + newcols_pd.index = new_whole_output_pd.index + + minmax = pd.DataFrame( + [ + (name["lower_bound"], name["upper_bound"]) + for name in task_to_analyze["SA_input_variables"] + if name["variable_name"] == input_reformatted2 + ] + ) + output_pd = pd.concat([threethings, newcols_pd], axis=1) + output_pd.rename( + {output_pd.columns[0]: output_pd.columns[0].replace("S1:", "").replace("numberaskey", "")}, + axis=1, + inplace=True, + ) + # to rename with min.max in the name + # + str(minmax.iloc[0].values) + # output_pd.sort_values(by="total_effects", axis=0, ascending=False, inplace=True) + output_pd.sort_values(by=output_pd.columns[0], axis=0, ascending=False, inplace=True) + output_pd.rename({0: "slope", 1: "R2 value", 2: "p-value"}, axis=1, inplace=True) + filenameout = output_path + "analyzed/" + output_prefix + "_inputs_META_" + input + "_summarytable.csv" + output_pd.to_csv(filenameout) + # ENDTRY + except: + print(f"analysis of {input} failed") + + if analysis["outputs_to_collate_override"]: + outputs_to_collate = analysis["outputs_to_collate"] + else: + outputs_to_collate = output_pd.index.tolist() + + # here for a single output, we sort and explore all the inputs + for output in outputs_to_collate: + try: + just_output = pd.DataFrame(new_whole_output_pd.loc[output]) + MEnames = [name for name in column_names if "ME:" in name or "S1:" in name or "mu_star" in name] + temp_output = just_output.loc[MEnames] + + # get the input column starts with ME: + # MEnames = [name for name in column_names if "ME:" in name] + # main_effects = new_whole_output_pd[MEnames].loc[output] + + # get the total column sum of all that include it and start with IE: + interaction_effects = [] + total_effects = [] + for MEname in MEnames: + if sampler == "fractional_factorial": + IEnames_temp = [ + name + for name in column_names + if MEname.replace(".", " ").replace("ME:", "") in name and "IE:" in name + ] + IEtable = just_output.loc[IEnames_temp] + interaction_effects.append(IEtable.sum(axis=0).values[0]) + total_effects.append(interaction_effects[-1] + just_output.loc[MEname].values[0]) + elif sampler == "sobol" or sampler == "saltelli": + total_effects.append(just_output.loc[MEname.replace("S1:", "ST:")].values[0]) + interaction_effects.append(total_effects[-1] - just_output.loc[MEname].values) + else: + total_effects.append(just_output.loc[MEname].values[0]) + interaction_effects.append(just_output.loc[MEname.replace("mu_star:", "sigma:")].values[0]) + + inteffs = pd.DataFrame(interaction_effects) + inteffs.index = temp_output.index + inteffs.rename({0: "interaction_effects"}, axis=1, inplace=True) + toteffs = pd.DataFrame(total_effects) + toteffs.index = temp_output.index + toteffs.rename({0: "total_effects"}, axis=1, inplace=True) + + # get ranges + variable_names = [name["variable_name"] for name in task_to_analyze["SA_input_variables"]] + bounds = [(name["lower_bound"], name["upper_bound"]) for name in task_to_analyze["SA_input_variables"]] + + output_temp = pd.concat([temp_output, inteffs, toteffs], axis=1) + + input_names = [name for name in output_temp.index] + bounds2 = [] + newcols = [] + for input_name in input_names: + input_reformatted = ( + input_name.replace(" ", ".").replace("ME:", "").replace("S1:", "").replace("mu_star:", "") + ) + if input_reformatted in variable_names: + idx = variable_names.index(input_reformatted) + bounds2.append(bounds[idx]) + # Append the output for that input to the single_output + input_index = parsed_SA_input_variables["names"].index(input_reformatted) + input_values = [value[input_index] for value in sampled_values] + # get the list of outputs (Y) + output_values = collated_outputs[output] + slope, r2_value, p_value = SA_helpers.regression_stuff( + X=input_values, xname=input, Y=output_values, yname=output, plot_inputs=plot_inputs + ) + newcols.append([slope, r2_value, p_value]) + newcols_pd = pd.DataFrame(newcols) + bounds2_pd = pd.DataFrame(bounds2) + bounds2_pd.rename({0: "input_min", 1: "input_max"}, axis=1, inplace=True) + bounds2_pd.index = output_temp.index[0 : len(bounds2)] # TODO + # the previous line might need to be reverted to remove the info in hard brackets + newcols_pd.index = output_temp.index + index_names = pd.DataFrame( + [val.replace("S1:", "").replace("numberaskey", "") for val in output_temp.index.values] + ) + index_names.index = output_temp.index + index_names.rename({0: "Input name"}) + output_pd = pd.concat([bounds2_pd, output_temp, newcols_pd, index_names], axis=1) + # output_pd.sort_values(by="total_effects", axis=0, ascending=False, inplace=True) + output_pd.sort_values(by=output_pd.columns[2], axis=0, ascending=False, inplace=True) + output_pd.rename({0: "slope", 1: "r2_value", 2: "p_value"}, axis=1, inplace=True) + output_reformatted = output.replace("/", " per ") + filenameout = ( + output_path + "analyzed/" + output_prefix + "_outputs_META_" + output_reformatted + "_summarytable.csv" + ) + output_pd.to_csv(filenameout) + except: + print(f"analysis of {output} failed") + + if plot_whole_new: + SA_helpers.plot_whole_new(output_path=output_path, output_prefix=output_prefix) + +print("did all the stuff!") diff --git a/model_evaluation/sensitivity_analysis/analyze_SA_input_timing.py b/model_evaluation/sensitivity_analysis/analyze_SA_input_timing.py new file mode 100644 index 0000000000..808e8d0c1f --- /dev/null +++ b/model_evaluation/sensitivity_analysis/analyze_SA_input_timing.py @@ -0,0 +1,232 @@ +# flake8: noqa +# type: ignore +import os +import json +import pandas as pd +from typing import Any, List +import statistics + +# from itertools import combinations, chain +# import time +# import gc +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np + +""" +This script generates a file that should help the user determine if certain combinations of inputs are failing outright or taking an inordinately long time. +Additionally plots the simulation time associated with each input, e.g. each value of a given input plotted against runtime. +""" +config_json_filename = "model_evaluation/sensitivity_analysis/config_analyze_SA.json" + +with open(config_json_filename) as json_file: + config_json = json.load(json_file) + +analysis = config_json["analyses"][0] + +logfilepath = analysis["output_path"] + "logs/" +reportfilepath = analysis["output_path"] + "reports/" +fileprefix = analysis["analysis_prefix"] + +# read the CSV +input_list = reportfilepath + "analyzed/" + f"{fileprefix}_inputs.csv" +input_list_read = pd.read_csv(input_list) +# add the index for each, 0-X is a columns for run +df = pd.DataFrame(input_list_read) +num_rows = len(df.index) +df.insert(0, "Simulation_number", list(range(1, num_rows + 1)), True) +print(df) + + +def load_json(json_filename: str) -> Any: # noqa + with open(json_filename) as json_file: + json_loaded = json.load(json_file) + return json_loaded + + +# Get the list of files +logfiles = [filename for filename in os.listdir(logfilepath) if filename.startswith(fileprefix) and "logs" in filename] + +errorfiles = [ + filename for filename in os.listdir(logfilepath) if filename.startswith(fileprefix) and "errors" in filename +] + +total_time_float_list: List[float | str] = [] +initornotlist: List[str | bool] = [] +digits = len(str(num_rows)) + +for row_num in range(num_rows): + print(row_num) + # INSIDE THE LOOP + try: + logfilefound = [file for file in logfiles if "run " + f"{row_num + 1}".zfill(digits) in file][-1] + logfile = load_json(logfilepath + logfilefound) + except Exception as ex: + print(ex) + total_time_float_list.append(999999999999999999999999) + initornotlist.append("Unknown") + continue + + # list(logfile.keys()) + try: + total_time_string = logfile["SimulationEngine.simulate.total_simulation_time"]["values"][0] + total_time_float = float(total_time_string[total_time_string.rfind(":") + 1 :]) + total_time_float_list.append(total_time_float) + except Exception as ex: + print(ex) + total_time_float_list.append(999999999999999999999999) + + errorfilefound = [file for file in errorfiles if "run " + f"{row_num + 1}".zfill(digits) in file][-1] + errorfile = load_json(logfilepath + errorfilefound) + list(errorfile.keys()) + initornot = "TaskManager.task.Failed to finish the task" in list(errorfile.keys()) + initornotlist.append(initornot) + +len(initornotlist) +len(total_time_float_list) +df.insert(1, "Simulation time", total_time_float_list, True) +df.insert(2, "Failed simulation", initornotlist, True) +summarized_filename = reportfilepath + "analyzed/" + f"{fileprefix} summarized.csv" +df.to_csv(summarized_filename) + +long_simulations = df.loc[df["Simulation time"] > 600] +normal_simulations = df.loc[df["Simulation time"] <= 600] +failed_simulations = df.loc[df["Failed simulation"] == True] +passed_simulations = df.loc[df["Failed simulation"] == False] + +otherlist = ["Simulation_number", "Simulation time", "Failed simulation"] +input_variable_list = [colname for colname in list(df.columns) if "dummy" not in colname and colname not in otherlist] + +passfail_list = [] +timing_list = [] + +almost_uniquely_failed_list = [] +almost_uniquely_long_list = [] + +uniquely_failed_list = [] +uniquely_long_list = [] + +pass_mean_list = [] +fail_mean_list = [] +long_mean_list = [] +normal_mean_list = [] + +input_variable = "animal.ration.formulation_interval" +for input_variable in input_variable_list: + print(input_variable) + failed_inputs = list(failed_simulations[input_variable]) + passed_inputs = list(passed_simulations[input_variable]) + + try: + failed_mean = f'{float(f"{statistics.mean(failed_inputs):.3g}"):g}' + passed_mean = f'{float(f"{statistics.mean(passed_inputs):.3g}"):g}' + except: + failed_mean = "" + passed_mean = "" + pass_mean_list.append(passed_mean) + fail_mean_list.append(failed_mean) + passfail_list.append((passed_mean, failed_mean)) + + if len(set(failed_inputs)) == 1: + almost_uniquely_failed_list.append(failed_inputs[0]) + else: + almost_uniquely_failed_list.append("") + uniquely_failed = [input for input in failed_inputs if input not in passed_inputs] + # print(uniquely_failed) + uniquely_failed_list.append(uniquely_failed) + + long_inputs = list(long_simulations[input_variable]) + normal_inputs = list(normal_simulations[input_variable]) + + if len(set(long_inputs)) == 1: + almost_uniquely_long_list.append(long_inputs[0]) + else: + almost_uniquely_long_list.append("") + long_mean = f'{float(f"{statistics.mean(long_inputs):.3g}"):g}' + normal_mean = f'{float(f"{statistics.mean(normal_inputs):.3g}"):g}' + + long_mean_list.append(long_mean) + normal_mean_list.append(normal_mean) + timing_list.append((normal_mean, long_mean)) + + uniquely_long = [input for input in long_inputs if input not in normal_inputs] + uniquely_long_list.append(uniquely_long) + +meta = pd.DataFrame( + { + "input_variable_list": input_variable_list, + "pass_mean_list": pass_mean_list, + "fail_mean_list": fail_mean_list, + "normal_mean_list": normal_mean_list, + "long_mean_list": long_mean_list, + "almost_uniquely_failed_list": almost_uniquely_failed_list, + "almost_uniquely_long_list": almost_uniquely_long_list, + "uniquely_failed_list": uniquely_failed_list, + "uniquely_long_list": uniquely_long_list, + } +) +meta.to_csv(reportfilepath + "analyzed/" + f"{fileprefix} timing divided.csv") + + +summarized = pd.read_csv(summarized_filename) +colnames = list(summarized.columns) +howmanydidntrun = 0 +newarray = [] +coltoplot = 2 +yprime = summarized[colnames[coltoplot]] +yprime = list(yprime) + +newarray.append(list(yprime)) +for columname in colnames: + print(columname) + if columname not in colnames[0:4]: + xprime = summarized[columname] + xprime = list(xprime) + + y = [] + x = [] + for idx, val in enumerate(yprime): + if val < 2000: + y.append(val) + x.append(xprime[idx]) + + fig = plt.figure() + plt.scatter(x, y) + # plt.xscale('log') + # plt.yscale('log') + m, b = np.polyfit(x, y, deg=1) + plt.axline(xy1=(0, b), slope=m, label=f"$y = {m:.1f}x {b:+.1f}$") + plt.ylabel(colnames[coltoplot]) + plt.xlabel(columname) + # plt.show(block=False) + fig.savefig(reportfilepath + "analyzed/" + f"pngs/scatter {columname}.png") + newarray.append(list(y)) + + xy = pd.DataFrame([x, y]).T + xy.columns = ["x", "y"] + yhigh = xy["y"][xy["x"] > np.mean(xy["x"])] + ylow = xy["y"][xy["x"] < np.mean(xy["x"])] + + fig = plt.figure() + _, bins, _ = plt.hist(yhigh, bins=75) + _ = plt.hist(ylow, bins=bins, alpha=0.5) + plt.xlabel(colnames[coltoplot]) + plt.ylabel(columname) + # plt.show() + fig.savefig(reportfilepath + "analyzed/" + f"pngs/hist {columname}.png") + +x = summarized[colnames[1]] +plt.figure() +plt.hist(x) +plt.show(block=False) + + +df = pd.DataFrame(newarray) +df = df.T +df_norm_col = (df - df.mean()) / df.std() + +plt.figure() +ax = sns.heatmap(df_norm_col, linewidth=0.5) +plt.show(block=False) + +# c = ax.pcolormesh(x, y, z, cmap='RdBu', vmin=z_min, vmax=z_max) diff --git a/model_evaluation/sensitivity_analysis/analyze_SA_inputs_v_outputs.py b/model_evaluation/sensitivity_analysis/analyze_SA_inputs_v_outputs.py new file mode 100644 index 0000000000..0e1cecf48a --- /dev/null +++ b/model_evaluation/sensitivity_analysis/analyze_SA_inputs_v_outputs.py @@ -0,0 +1,139 @@ +# flake8: noqa +# type: ignore +import os +import json +import pandas as pd +from typing import Dict, Any, List +import statistics +from model_evaluation.sensitivity_analysis import SA_helpers +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np +import seaborn as sns +import json + +""" +This script allows for analysis of different inputs plotted against specific outputs. +Useful if the user wants to evaluate the variance or nonlinearity of specific input/output combinations. +Note: this requires modification of the script below in the user input section. +Note: this requires the analyze_SA_analysis_main.py script to be run prior to use. +""" + +## USER DEFINED INPUTS + +inputlist = [0, 1, 2, 5, 14, 15, 16] +outputlist = [ + 0, + 1, + 2, + 3, + 15, + 16, + 24, + 25, + 26, + 31, + 37, + 39, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 64, + 65, + 66, + 67, + 68, + 69, + 73, + 97, +] + +## END USER INPUT + +config_json_filename = "model_evaluation/sensitivity_analysis/config_analyze_SA.json" +with open(config_json_filename) as json_file: + config_json = json.load(json_file) + +analysis = config_json["analyses"][0] + +input_file = analysis["input_file"] +output_path = analysis["output_path"] + "reports/" +report_name = analysis["report_name"] +fileprefix = analysis["analysis_prefix"] + +with open(input_file) as json_file: + input_config = json.load(json_file) + +task_to_analyze: Dict[str, Any] = input_config["tasks"][0] + +parsed_SA_input_variables = SA_helpers.parse_input_variables(task_to_analyze) +sampled_values = SA_helpers.get_sampled_values(task_to_analyze, parsed_SA_input_variables) + +total_num_files = len(sampled_values) + +all_report_filenames = SA_helpers.get_all_output_files( + basedirectory=output_path, output_prefix=fileprefix, report_name=report_name +) +basedirectory = output_path +all_report_filenames = all_report_filenames +total_num_files = total_num_files + + +raw_collated = SA_helpers.collate_raw(basedirectory, all_report_filenames, total_num_files) + +target = max([len(raw_collated[thing]) for thing in raw_collated]) +raw_collated_filtered = {} +for thing in raw_collated: + if len(raw_collated[thing]) != target: + pass + else: + raw_collated_filtered[thing] = raw_collated[thing] + +raw_collated_pd = pd.DataFrame(raw_collated_filtered) + +rawcollatedfilenameout = output_path + "analyzed/" + fileprefix + " raw collated.csv" +raw_collated_pd.to_csv(rawcollatedfilenameout) + + +# read the CSV +input_list = output_path + "analyzed/" + f"{fileprefix}_inputs.csv" +input_list_read = pd.read_csv(input_list) +# add the index for each, 0-X is a columns for run +input_list_read_pd = pd.DataFrame(input_list_read) + +# PLOTTING IN PROGRESS BELOW +list_of_inputs = list(input_list_read_pd.columns) +list_of_outputs = list(raw_collated_pd.columns) + +pd.DataFrame(list_of_inputs).to_csv(output_path + "analyzed/FOR_PLOTS_list_of_inputs.csv") +pd.DataFrame(list_of_outputs).to_csv(output_path + "analyzed/FOR_PLOTS_list_of_outputs.csv") + + +for i in inputlist: + for j in outputlist: + input_of_choice = list_of_inputs[i] + output_of_choice = list_of_outputs[j] + print(input_of_choice) + print(output_of_choice) + + y = raw_collated_pd[output_of_choice] + x = input_list_read_pd[input_of_choice] + + fig = plt.figure() + plt.scatter(x, y) + # plt.xscale('log') + # plt.yscale('log') + m, b = np.polyfit(x, y, deg=1) + plt.axline(xy1=(0, b), slope=m, label=f"$y = {m:.1f}x {b:+.1f}$") + plt.ylabel(output_of_choice) + plt.xlabel(input_of_choice) + # plt.show(block=False) + fig.savefig( + output_path + + f'analyzed/pngs/scatter {input_of_choice.replace("/","")} v {output_of_choice.replace("/","")}.png' + ) diff --git a/model_evaluation/sensitivity_analysis/config_analyze_SA.json b/model_evaluation/sensitivity_analysis/config_analyze_SA.json new file mode 100644 index 0000000000..cad807407b --- /dev/null +++ b/model_evaluation/sensitivity_analysis/config_analyze_SA.json @@ -0,0 +1,34 @@ +{ + "analyses": [ + { + "input_file": "D:/RUFAS/S256/sobol_animal_tasks_v4.json", + "output_path": "D:/RUFAS/S256/", + "report_name": "report_S365", + "analysis_prefix": "animal_S256", + "only_inputs": false, + "plot_whole_new": false, + "inputs_to_collate_override": false, + "inputs_to_collate": [ + "animal animal_config management_decisions breeding_start_day_h", + "animal animal_config management_decisions days_in_preg_when_dry", + "animal animal_config management_decisions heifer_repro_cull_time", + "animal animal_config management_decisions cull_milk_production", + "animal animal_config farm_level repro cows estrus_detection_rate", + "animal animal_config farm_level bodyweight mature_body_weight_avg", + "animal animal_config from_literature repro avg_estrus_cycle_cow", + "animal animal_config from_literature culling mastitis_cull probability", + "manure_management bedding_configs 0 bedding_mass_per_day" + ], + "plot_inputs": false, + "outputs_to_collate_override": false, + "outputs_to_collate": [ + "total herd enteric methane, kg_ver_hor_agg", + "Total milk, kg_ver_agg", + "adjusted herd enteric CH4 per FPCM, kg/kg_hor_agg", + "adjusted whole herd enteric methane, kg_hor_agg", + "Lact cow feed CO2 kg/cow/d_hor_agg" + ], + "plot_outputs": false + } + ] +} \ No newline at end of file diff --git a/output/output_filters/_csv_all_variables.txt b/output/output_filters/_csv_all_variables.txt deleted file mode 100644 index 9abb766cee..0000000000 --- a/output/output_filters/_csv_all_variables.txt +++ /dev/null @@ -1 +0,0 @@ -.* \ No newline at end of file