diff --git a/README.md b/README.md index 151cb31..3bb0522 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,10 @@ including food consumption paterns, environmental impact and emissions data, population and land use. It also provides an interface to run external models by using xarray as the data container. +AgriFoodPy also provides a pipeline manager to build end-to-end simulations and +analysis toolchains. Modules can also be executed in standalone mode, which +does not require a pipeline to be defined. + In addition to this package, we have also pre-packaged some datasets for use with agrifood. These can be found on the agrifoodpy_data repository https://github.com/FixOurFood/agrifoodpy-data @@ -36,69 +40,46 @@ pip install git+https://github.com/FixOurFood/agrifoodpy-data.git@importable ## Usage: -Each of the four basic modules on AgriFoodPy (Food, Land, Impact, Population) -has its own set of basic array manipulation functionality, a set of -modelling methods to extract basic metrics from datasets, and interfaces with -external modelling packages and code. - -Agrifoodpy employs _xarray_ accesors to provide additional functionality on top -of the array manipulation provided by xarray. +AgriFoodPy modules can be used to manipulate food system data in standalone mode +or by constructing a pipeline of modules which can be executed partially or +completely. -Basic usage of the accesors depend on the type of array being manipulated. -The following examples uses the **food** module with the importable UK data -mentioned above: +To build a pipeline ```python -# import the FoodBalanceSheet accessor and FAOSTAT from agrifoodpy_data -from agrifoodpy.food.food import FoodBalanceSheet -from agrifoodpy_data.food import FAOSTAT +from agrifoodpy.pipeline import Pipeline +from agrifoodpy.utils.load_dataset import load_dataset +from agrifoodpy.food.model import import matplotlib.pyplot as plt -# Extract data for the UK (Region=229) -food_uk = FAOSTAT.sel(Region=229) +# Create pipeline object +fs = Pipeline() -# Compute the Self-sufficiency ratio using the fbs accessor SSR function -SSR = food_uk.fbs.SSR(per_item=True) +# Add node to load food balance sheet data from external module. +fs.add_node(load_dataset, + { + "datablock_path": "food", + "module": "agrifoodpy_data.food", + "data_attr": "FAOSTAT", + "coords": {"Year":np.arange(1990, 2010), "Region":229} + }) -# Plot the results using the fbs accessor plot_years function -SSR.fbs.plot_years() -plt.show() -``` -To use the specific models and interfaces to external code, these need to be -imported +# Add node convert scale Food Balance Sheet by a constant +fs.add_node(fbs_convert, + { + "fbs":"food", + "convertion_arr":1e-6 # From 1000 Tonnes to kg + }) -```python -# import the FoodBalanceSheet accessor and FAOSTAT from agrifoodpy_data -from agrifoodpy.food.food import FoodBalanceSheet -from agrifoodpy_data.food import FAOSTAT -import agrifoodpy.food.model as food_model -import matplotlib.pyplot as plt +fs.run() -# Extract data for the UK in 2020 (Region=229, Year=2020) -food_uk = FAOSTAT.sel(Region=229, Year=2020) - -# Scale consumption of meat to 50%, -food_uk_scaled = food_model.balanced_scaling(food_uk, - items=2731, - element="food", - origin="production", - scale=0.5, - constant=True) - -# Plot bar summary of resultant food quantities -food_uk_scaled.fbs.plot_bars(elements=["production","imports"], - inverted_elements=["exports","food"]) -plt.show() +results = fs.datablock ``` -In he future, we plan to implement a pipeline manager to automatize certain -aspects of the agrifood execution, and to simulate a comprehensive model where -all aspects of the food system are considered simultaneously. - ## Examples and documentation -[Examples](https://agrifoodpy.readthedocs.io/en/latest/examples/index.html#modules) +[Examples](https://agrifoodpy.readthedocs.io/en/latest/examples/index.html) demonstrating the functionality of AgriFoodPy can be the found in the [package documentation](https://agrifoodpy.readthedocs.io/en/latest/). These include the use of accessors to manipulate data and access to basic diff --git a/agrifoodpy/food/food.py b/agrifoodpy/food/food.py index 933f856..ee327f2 100644 --- a/agrifoodpy/food/food.py +++ b/agrifoodpy/food/food.py @@ -1,7 +1,7 @@ """ Food supply module. The Food module provides the FoodBalanceSheet and FoodElementSheet accessor -classes to manipulate and analyse Food data stored in xarray.Dataset and +classes to manipulate and analyse Food data stored in xarray.Dataset and xarray.DataArray formats, respectively. It also provides a constructor style function which allows the creation of a @@ -11,20 +11,25 @@ import numpy as np import xarray as xr import copy -import warnings - -from agrifoodpy.array_accessor import XarrayAccessorBase +from ..array_accessor import XarrayAccessorBase import matplotlib.pyplot as plt -def FoodSupply(items, years, quantities, regions=None, elements=None, - long_format=True): + +def FoodSupply( + items, + years, + quantities, + regions=None, + elements=None, + long_format=True +): """ Food Supply style dataset constructor - Constructs a food balance sheet style xarray.Dataset or xarray.DataArray for - a given list of items, years and regions, and an array data shaped - accordingly. - + Constructs a food balance sheet style xarray.Dataset or xarray.DataArray + for a given list of items, years and regions, and an array data shaped + accordingly. + Parameters ---------- items : (ni,) array_like @@ -42,14 +47,15 @@ def FoodSupply(items, years, quantities, regions=None, elements=None, Boolean flag to interpret data in long or wide format elements : (ne,) array_like, optional Array with element name strings. If `elements` is provided, a dataset - is created for each element in `elements` with the quantities being each - of the sub-arrays indexed by the first coordinate of the input array. + is created for each element in `elements` with the quantities being + each of the sub-arrays indexed by the first coordinate of the input + array. Returns ------- fbs : xarray.Dataset - Food Supply dataset containing the food quantity for each `Item`, `Year` - and `Region` with one dataarray per element in `elements`. + Food Supply dataset containing the food quantity for each `Item`, + `Year` and `Region` with one dataarray per element in `elements`. """ # if the input has a single element, proceed with long format @@ -61,14 +67,14 @@ def FoodSupply(items, years, quantities, regions=None, elements=None, # Identify unique values in coordinates _items = np.unique(items) _years = np.unique(years) - coords = {"Item" : _items, - "Year" : _years,} + coords = {"Item": _items, + "Year": _years} # find positions in output array to organize data ii = [np.searchsorted(_items, items), np.searchsorted(_years, years)] size = (len(_items), len(_years)) - # If regions and are provided, add the coordinate information + # If regions and are provided, add the coordinate information if regions is not None: _regions = np.unique(regions) ii.append(np.searchsorted(_regions, regions)) @@ -76,7 +82,7 @@ def FoodSupply(items, years, quantities, regions=None, elements=None, coords["Region"] = _regions # Create empty dataset - fbs = xr.Dataset(coords = coords) + fbs = xr.Dataset(coords=coords) if long_format: # dataset, quantities @@ -84,7 +90,7 @@ def FoodSupply(items, years, quantities, regions=None, elements=None, else: # dataset, coords ndims = len(coords)+1 - + # make sure the long format has the right number of dimensions while len(quantities.shape) < ndims: quantities = np.expand_dims(quantities, axis=0) @@ -124,12 +130,18 @@ def FoodSupply(items, years, quantities, regions=None, elements=None, return fbs + @xr.register_dataset_accessor("fbs") class FoodBalanceSheet(XarrayAccessorBase): - def scale_element(self, element, scale, items=None): - """Scales list of items from an element in a food balance sheet like - DataSet. + def scale_element( + self, + element, + scale, + items=None + ): + """Scales list of items from an element in a Food Balance Sheet like + Dataset. Parameters ---------- @@ -143,12 +155,12 @@ def scale_element(self, element, scale, items=None): Destination element DataArray to which the difference is added to items : list of int or list of str, optional List of items to be scaled. If not provided, all items are scaled. - + Returns ------- out : xarray.Dataset FAOSTAT formatted Food Supply dataset with scaled quantities. - + """ fbs = self._obj @@ -166,17 +178,25 @@ def scale_element(self, element, scale, items=None): out = copy.deepcopy(fbs) # Scale items - sel = {"Item":items} + sel = {"Item": items} out[element].loc[sel] = out[element].loc[sel] * scale return out - def scale_add(self, element_in, element_out, scale, items=None, add=True, - elasticity=None): + def scale_add( + self, + element_in, + element_out, + scale, + items=None, + add=True, + elasticity=None + ): + """Scales item quantities of an element and adds the difference to another element DataArray - + Parameters ---------- fbs : xarray.Dataset @@ -194,7 +214,7 @@ def scale_add(self, element_in, element_out, scale, items=None, add=True, elasticity : float, float array_like optional Fractional percentage of the difference that is added to each element in element_out. - + Returns ------- out : xarray.Dataset @@ -203,7 +223,7 @@ def scale_add(self, element_in, element_out, scale, items=None, add=True, """ fbs = self._obj - + if np.isscalar(element_out): element_out = [element_out] @@ -221,11 +241,18 @@ def scale_add(self, element_in, element_out, scale, items=None, add=True, for elmnt, add_el, elast in zip(element_out, add, elasticity): out[elmnt] = out[elmnt] + np.where(add_el, -1, 1)*dif*elast - + return out - def SSR(self, items=None, per_item=False, domestic=None, - production="production", imports="imports", exports="exports"): + def SSR( + self, + items=None, + per_item=False, + domestic=None, + production="production", + imports="imports", + exports="exports" + ): """Self-sufficiency ratio Self-sufficiency ratio (SSR) or ratios for a list of item imports, @@ -253,8 +280,8 @@ def SSR(self, items=None, per_item=False, domestic=None, Returns ------- data : xarray.Dataarray - Self-sufficiency ratio or ratios for the list of items, one for each - year of the input food Dataset "Year" coordinate. + Self-sufficiency ratio or ratios for the list of items, one for + each year of the input food Dataset "Year" coordinate. """ @@ -275,8 +302,15 @@ def SSR(self, items=None, per_item=False, domestic=None, return fbs[production].sum(dim="Item") / domestic_use.sum(dim="Item") - def IDR(self, items=None, per_item=False, imports="imports", domestic=None, - production="production", exports="exports"): + def IDR( + self, + items=None, + per_item=False, + imports="imports", + domestic=None, + production="production", + exports="exports" + ): """Import-dependency ratio Import-ependency ratio (IDR) or ratios for a list of item imports, @@ -291,7 +325,8 @@ def IDR(self, items=None, per_item=False, imports="imports", domestic=None, list of items to compute the IDR for from the food Dataset. If no list is provided, the IDR is computed for all items. per_item : bool, optional - Whether to return an IDR for each item separately. Default is false. + Whether to return an IDR for each item separately. Default is + false. domestic : string, optional Name of the DataArray containing the domestic use data imports : string, optional @@ -300,7 +335,7 @@ def IDR(self, items=None, per_item=False, imports="imports", domestic=None, Name of the DataArray containing the exports data production : string, optional Name of the DataArray containing the production data - + Returns ------- @@ -327,18 +362,26 @@ def IDR(self, items=None, per_item=False, imports="imports", domestic=None, return fbs["imports"].sum(dim="Item") / domestic_use.sum(dim="Item") - def plot_bars(self, show="Item", elements=None, inverted_elements=None, - ax=None, colors=None, labels=None, **kwargs): + def plot_bars( + self, + show="Item", + elements=None, + inverted_elements=None, + ax=None, + colors=None, + labels=None, + **kwargs + ): """Plot total quantities per element on a horizontal bar plot Produces a horizontal bar plot with a bar per element on the vertical axis plotted on a cumulative form. Each bar is the sum of quantities on each element, broken down by the selected coordinate "show". The - starting x-axis position of each bar will depend on the cumulative value - up to that element. The order of elements can be defined by the + starting x-axis position of each bar will depend on the cumulative + value up to that element. The order of elements can be defined by the "element" parameter. A second set of "inverted_elements" can be given, and these will be plotted from right to left starting from the previous - cumulative sum, minus the corresponding sum of the inverted elements. + cumulative sum, minus the corresponding sum of the inverted elements. Parameters ---------- @@ -359,11 +402,12 @@ def plot_bars(self, show="Item", elements=None, inverted_elements=None, colors : list of str, optional String list containing the colors for each of the elements in the "show" coordinate. - If not defined, a color list is generated from the standard cycling. + If not defined, a color list is generated from the standard + cycling. labels : str, list of str, optional String list containing the labels for the legend of the elements in - the "show" coordinate. If not set, no labels are printed. If "show", - the values of the "show" dimension are used. + the "show" coordinate. If not set, no labels are printed. + If "show", the values of the "show" dimension are used. **kwargs : dict Style options to be passed on to the actual plot function, such as linewidth, alpha, etc. @@ -395,10 +439,14 @@ def plot_bars(self, show="Item", elements=None, inverted_elements=None, new_fbs = FoodBalanceSheet(fbs) new_fbs = FoodBalanceSheet(new_fbs.group_sum(coordinate=show)) - return new_fbs.plot_bars(show=show, elements=elements, - inverted_elements=inverted_elements, - ax=ax, colors=colors, labels=labels, - **kwargs) + return new_fbs.plot_bars( + show=show, + elements=elements, + inverted_elements=inverted_elements, + ax=ax, + colors=colors, + labels=labels, + **kwargs) else: raise ValueError(f"The coordinate {show} is not a valid " "dimension or coordinate of the Dataset.") @@ -423,25 +471,24 @@ def plot_bars(self, show="Item", elements=None, inverted_elements=None, elif np.all(labels == "show"): labels = [str(val) for val in fbs[show].values] - # Plot non inverted elements first cumul = 0 for ie, element in enumerate(elements): ax.hlines(ie, 0, cumul, color="k", alpha=0.2, linestyle="dashed", - linewidth=0.5) + linewidth=0.5) if size_show == 1: - ax.barh(ie, left = cumul, width=food_sum[element], + ax.barh(ie, left=cumul, width=food_sum[element], color=colors[0]) - cumul +=food_sum[element] + cumul += food_sum[element] else: for ii, val in enumerate(food_sum[element]): - ax.barh(ie, left = cumul, width=val, color=colors[ii], + ax.barh(ie, left=cumul, width=val, color=colors[ii], label=labels[ii]) cumul += val # Then the inverted elements if inverted_elements is not None: - + if np.isscalar(inverted_elements): inverted_elements = [inverted_elements] @@ -451,22 +498,22 @@ def plot_bars(self, show="Item", elements=None, inverted_elements=None, cumul = 0 for ie, element in enumerate(reversed(inverted_elements)): ax.hlines(len_elements-1 - ie, 0, cumul, color="k", alpha=0.2, - linestyle="dashed", linewidth=0.5) + linestyle="dashed", linewidth=0.5) if size_show == 1: - ax.barh(len_elements-1 - ie, left = cumul, + ax.barh(len_elements-1 - ie, left=cumul, width=food_sum[element], color=colors[0]) - cumul +=food_sum[element] + cumul += food_sum[element] else: for ii, val in enumerate(food_sum[element]): - ax.barh(len_elements-1 - ie, left = cumul, width=val, + ax.barh(len_elements-1 - ie, left=cumul, width=val, color=colors[ii], label=labels[ii]) cumul += val # Plot decorations ax.set_yticks(np.arange(len_elements), labels=elements) - ax.tick_params(axis="x",direction="in", pad=-12) + ax.tick_params(axis="x", direction="in", pad=-12) ax.invert_yaxis() # labels read top-to-bottom - ax.set_ylim(len_elements,-1) + ax.set_ylim(len_elements, -1) # Unique labels if print_labels: @@ -476,16 +523,23 @@ def plot_bars(self, show="Item", elements=None, inverted_elements=None, return ax + @xr.register_dataarray_accessor("fbs") class FoodElementSheet(XarrayAccessorBase): - - def plot_years(self, show=None, stack=True,ax=None, colors=None, - labels=None, **kwargs): + def plot_years( + self, + show=None, + stack=True, + ax=None, + colors=None, + labels=None, + **kwargs + ): """ Fill plot with quantities at each year value Produces a vertical fill plot with quantities for each year on the "Year" coordinate of the input dataset in the horizontal axis. If the - "show" coordinate exists, then the vertical fill plot is a stack of the + "show" coordinate exists, then the vertical fill plot is a stack of the sums of the other coordinates at that year for each item in the "show" coordinate. @@ -502,21 +556,22 @@ def plot_years(self, show=None, stack=True,ax=None, colors=None, returned. stack : boolean, optional Whether to stack fill plots or not. If 'True', the fill curves are - stacked on top of each other and the upper fill curve represents the - sum of all elements for a given year. + stacked on top of each other and the upper fill curve represents + the sum of all elements for a given year. If 'false', each element along the 'show' dimension is plotted - starting from the origin. + starting from the origin. ax : matplotlib.pyplot.artist, optional Axes on which to draw the plot. If not provided, a new artist is created. colors : list of str, optional String list containing the colors for each of the elements in the "show" coordinate. - If not defined, a color list is generated from the standard cycling. + If not defined, a color list is generated from the standard + cycling. labels : str, list of str, optional String list containing the labels for the legend of the elements in - the "show" coordinate. If not set, no labels are printed. If "show", - the values of the "show" dimension are used. + the "show" coordinate. If not set, no labels are printed. + If "show", the values of the "show" dimension are used. **kwargs : dict Style options to be passed on to the actual plot function, such as linewidth, alpha, etc. @@ -549,7 +604,7 @@ def plot_years(self, show=None, stack=True,ax=None, colors=None, elif show in fbs.coords: new_fbs = FoodElementSheet(fbs) new_fbs = FoodElementSheet(new_fbs.group_sum(coordinate=show)) - + return new_fbs.plot_years(show=show, stack=stack, ax=ax, colors=colors, labels=labels, **kwargs) elif show is None: @@ -583,18 +638,19 @@ def plot_years(self, show=None, stack=True,ax=None, colors=None, # Plot if size_cumsum == 1: ax.fill_between(years, cumsum, color=colors[0], alpha=0.5) - ax.plot(years, cumsum, color=colors[0], linewidth=0.5, label=labels) + ax.plot(years, cumsum, color=colors[0], linewidth=0.5, + label=labels) else: - ax.fill_between(years, cumsum.isel({show:0}), color=colors[0], + ax.fill_between(years, cumsum.isel({show: 0}), color=colors[0], alpha=0.5) - ax.plot(years, cumsum.isel({show:0}), color=colors[0], + ax.plot(years, cumsum.isel({show: 0}), color=colors[0], linewidth=0.5, label=labels[0]) - for id in range(1,size_cumsum): - ax.fill_between(years, cumsum.isel({show:id}), - cumsum.isel({show:id-1}), color=colors[id], + for id in range(1, size_cumsum): + ax.fill_between(years, cumsum.isel({show: id}), + cumsum.isel({show: id-1}), color=colors[id], alpha=0.5) - ax.plot(years, cumsum.isel({show:id}), color=colors[id], - linewidth=0.5,label=labels[id]) + ax.plot(years, cumsum.isel({show: id}), color=colors[id], + linewidth=0.5, label=labels[id]) ax.set_xlim(years.min(), years.max()) ax.set_ylim(bottom=0) diff --git a/agrifoodpy/food/model.py b/agrifoodpy/food/model.py index 3f106b0..d1b8021 100644 --- a/agrifoodpy/food/model.py +++ b/agrifoodpy/food/model.py @@ -3,146 +3,328 @@ import xarray as xr import numpy as np -# from .food_supply import FoodBalanceSheet -import warnings - -def balanced_scaling(fbs, items, scale, element, year=None, adoption=None, - timescale=10, origin=None, constant=False, - fallback=None): - """Scale items quantities across multiple elements in a FoodBalanceSheet - Dataset - - Scales selected item quantities on a food balance sheet and with the - posibility to keep the sum of selected elements constant. - Optionally, produce an Dataset with a sequence of quantities over the years - following a smooth scaling according to the selected functional form. - - The elements used to supply the modified quantities can be selected to keep - a balanced food balance sheet. +import copy +from ..pipeline import standalone +from ..utils.dict_utils import get_dict, set_dict, item_parser + + +@standalone(input_keys=["fbs"], return_keys=["out_key"]) +def balanced_scaling( + fbs, + scale, + element, + items=None, + constant=False, + origin=None, + add_to_origin=True, + elasticity=None, + fallback=None, + add_to_fallback=True, + out_key=None, + datablock=None +): + """ Scales items in a Food Balance Sheet, while optionally maintaining + total quantities + + Scales selected item quantities on a Food Balance Sheet, with the option + to keep the sum over an element DataArray constant. + Changes can be propagated to a set of origin FBS elements according to an + elasticity parameter. Parameters ---------- fbs : xarray.Dataset - Input food balance sheet Dataset. - items : list - List of items to scale in the food balance sheet. - element : string - Name of the DataArray to scale. + Input food balance sheet Dataset scale : float - Scaling parameter after full adoption. - year : int, optional - Year of the Food Balance Sheet to use. If not set, the last year of the - array is used - adoption : string, optional - Shape of the scaling adoption curve. "logistic" uses a logistic model - for a slow-fast-slow adoption. "linear" uses a constant slope adoption - during the the "timescale period" - timescale : int, optional - Timescale for the scaling to be applied completely. If "year" + - "timescale" is greater than the last year in the array, it is extended - to accomodate the extra years. - origin : string, optional - Name of the DataArray which will be used to balance the food balance - sheets. Any change to the "element" DataArray will be reflected in this - DataArray. + Scaling parameter after full adoption + element : string + Name of the DataArray to scale + items : list, optional + List of items to scaled in the food balance sheet. If None, all items + are scaled and 'constant' is ignored constant : bool, optional If set to True, the sum of element remains constant by scaling the non - selected items accordingly. - fallback : string, optional - Name of the DataArray used to provide the excess required to balance the - food balance sheet in case the "origin" falls below zero. + selected items accordingly + origin : string, list, optional + Names of the DataArrays which will be used as source for the quantity + changes. Any change to the "element" DataArray will be reflected in + this DataArray + add_to_origin : bool, array, optional + Whether to add or subtract the difference from the respective origins + elasticity : float, array, optional + Relative fraction of the total difference to be assigned to each origin + element. Values are not normalized. + fallback : string + Name of the DataArray to use as fallback in case the origin quantities + fall below zero + add_to_fallback : bool, optional + Whether to add or subtract the difference below zero in the origin + DataArray to the fallback array. + out_key : string, tuple + Output datablock path to write results to. If not given, input path is + overwritten + datablock : dict, optional + Dictionary containing data Returns ------- data : xarray.Dataarray - Food balance sheet Dataset with scaled "food" values. + Food balance sheet Dataset with scaled values. """ - # Check for single item inputs - if np.isscalar(items): - items = [items] - - # Check for single item list fbs - input_item_list = fbs.Item.values - if np.isscalar(input_item_list): - input_item_list = [input_item_list] - if constant: - warnings.warn("Constant set to true but input only has a single item.") - constant = False - - # If no items are provided, we scale all of them. - if items is None or np.sort(items) is np.sort(input_item_list): - items = fbs.Item.values - if constant: - warnings.warn("Cannot keep food constant when scaling all items.") - constant = False - - # Define Dataarray to use as pivot - if "Year" in fbs.dims: - if year is None: - if np.isscalar(fbs.Year.values): - year = fbs.Year.values - fbs_toscale = fbs - else: - year = fbs.Year.values[-1] - fbs_toscale = fbs.isel(Year=-1) - else: - fbs_toscale = fbs.sel(Year=year) + # Pepare inputs + data = copy.deepcopy(get_dict(datablock, fbs)) + out = copy.deepcopy(data) + + if out_key is None: + out_key = fbs + if items is None: + scaled_items = data.Item.values + constant = False else: - fbs_toscale = fbs - try: - year = fbs.Year.values - except AttributeError: - year=0 - - # Define scale array based on year range - if adoption is not None: - if adoption == "linear": - from agrifoodpy.utils.scaling import linear_scale as scale_func - elif adoption == "logistic": - from agrifoodpy.utils.scaling import logistic_scale as scale_func + scaled_items = item_parser(data, items) + + if origin is not None and np.isscalar(origin): + origin = [origin] + + if np.isscalar(add_to_origin): + add_to_origin = [add_to_origin]*len(origin) + + if elasticity is None: + elasticity = [1/len(origin)]*len(origin) + + # Scale input + if origin is None: + out = out.fbs.scale_element( + element=element, + scale=scale, + items=scaled_items + ) + + else: + out = out.fbs.scale_add( + element_in=element, + element_out=origin, + scale=scale, + items=scaled_items, + add=add_to_origin, + elasticity=elasticity + ) + + # If quantities are set to be constant + if constant: + + delta = out[element] - data[element] + + # Identify non selected items and scaling + non_sel_items = np.setdiff1d(data.Item.values, scaled_items) + non_sel_scale = (data.sel(Item=non_sel_items)[element].sum(dim="Item") + - delta.sum(dim="Item")) \ + / data.sel(Item=non_sel_items)[element].sum(dim="Item") + + # Make sure no scaling occurs on inf and nan + non_sel_scale = non_sel_scale.where( + np.isfinite(non_sel_scale)).fillna(1.0) + + if origin is None: + out = out.fbs.scale_element( + element=element, + scale=non_sel_scale, + items=non_sel_items + ) + else: - raise ValueError("Adoption must be one of 'linear' or 'logistic'") - - scale_arr = scale_func(year, year, year+timescale-1, year+timescale-1, - c_init=1, c_end = scale) - - fbs_toscale = fbs_toscale * xr.ones_like(scale_arr) - + out = out.fbs.scale_add( + element_in=element, + element_out=origin, + scale=non_sel_scale, + items=non_sel_items, + add=add_to_origin, + elasticity=elasticity + ) + + # If a fallback DataArray is defined, transfer the excess negative + # quantities to it + if fallback is not None: + for orig in origin: + dif = out[orig].where(out[orig] < 0).fillna(0) + out[fallback] -= np.where(add_to_fallback, 1, -1)*dif + out[orig] = out[orig].where(out[orig] > 0, 0) + + set_dict(datablock, out_key, out) + + return datablock + + +@standalone(input_keys=["fbs", "convertion_arr"], return_keys=["out_key"]) +def fbs_convert( + fbs, + convertion_arr, + out_key=None, + datablock=None +): + """Scales quantities in a food balance sheet using a conversion + dataarray, dataset, or scaling factor. + + Parameters + ---------- + fbs : str, xarray.Dataset + Datablock paths to the food balance sheet datasets or the datasets + themselves. + convertion_arr : str, xarray.DataArray, tuple or float + Datablock path to the conversion array, datablock-key tuple, or the + array or float itself. + out_key : str, list + Datablock key of the resulting dataset to be stored in the datablock. + datablock : Dict + Dictionary containing data. + + Returns + ------- + dict or xarray.Dataset + - Updated datablock if a datablock is provided. + - xarray.Dataset with converted quantities if no datablock is provided. + """ + + # Retrieve target array + data = get_dict(datablock, fbs) + + # retrieve convertion array + if isinstance(convertion_arr, xr.DataArray): + convertion_arr = convertion_arr.where( + np.isfinite(convertion_arr), other=0) else: - scale_arr = scale + convertion_arr = get_dict(datablock, convertion_arr) - # Create a deep copy to modify and return - out = fbs_toscale.copy(deep=True) - osplit = origin.split("-")[-1] - - out = out.fbs.scale_add(element, osplit, scale_arr, items, - add = origin.startswith("-")) - + # If no output key is provided, overwrite original dataset + if out_key is None: + out_key = fbs - if constant: + out = data*convertion_arr + set_dict(datablock, out_key, out) + + return datablock + + +@standalone(["fbs"], ["out_key"]) +def SSR( + fbs, + items=None, + per_item=False, + production="production", + imports="imports", + exports="exports", + out_key=None, + datablock=None, +): + """Self-sufficiency ratio + + Self-sufficiency ratio (SSR) or ratios for a list of item imports, + exports and production quantities. + + Parameters + ---------- + fbs : xarray.Dataset + Input Dataset containing an "Item" coordinate and, optionally, a + "Year" coordinate. + items : list, optional + list of items to compute the SSR for from the food Dataset. If no + list is provided, the SSR is computed for all items. + per_item : bool, optional + Whether to return an SSR for each item separately. Default is false + production : string, optional + Name of the DataArray containing the production data + imports : string, optional + Name of the DataArray containing the imports data + exports : string, optional + Name of the DataArray containing the exports data + datablock : dict, optional + Dictionary containing the food balance sheet Dataset. + + Returns + ------- + data : xarray.Dataarray + Self-sufficiency ratio or ratios for the list of items, one for each + year of the input food Dataset "Year" coordinate. + + """ + + fbs = get_dict(datablock, fbs) + + if items is not None: + if np.isscalar(items): + items = [items] + fbs = fbs.sel(Item=items) + + domestic_use = fbs[production] + fbs[imports] - fbs[exports] + + if per_item: + ssr = fbs[production] / domestic_use + else: + ssr = fbs[production].sum(dim="Item") / domestic_use.sum(dim="Item") + + set_dict(datablock, out_key, ssr) + + return datablock + + +@standalone(["fbs"], ["out_key"]) +def IDR( + fbs, + items=None, + per_item=False, + imports="imports", + production="production", + exports="exports", + out_key=None, + datablock=None, +): + """Import-dependency ratio + + Import-ependency ratio (IDR) or ratios for a list of item imports, + exports and production quantities. + + Parameters + ---------- + fbs : xarray.Dataset + Input Dataset containing an "Item" coordinate and, optionally, a + "Year" coordinate. + items : list, optional + list of items to compute the IDR for from the food Dataset. If no + list is provided, the IDR is computed for all items. + per_item : bool, optional + Whether to return an IDR for each item separately. Default is false. + imports : string, optional + Name of the DataArray containing the imports data + exports : string, optional + Name of the DataArray containing the exports data + production : string, optional + Name of the DataArray containing the production data + datablock : dict, optional + Dictionary containing the food balance sheet Dataset. + + Returns + ------- + data : xarray.Datarray + Import-dependency ratio or ratios for the list of items, one for + each year of the input food Dataset "Year" coordinate. + """ + + fbs = get_dict(datablock, fbs) + + if items is not None: + if np.isscalar(items): + items = [items] + fbs = fbs.sel(Item=items) + + domestic_use = fbs[production] + fbs[imports] - fbs[exports] + + if per_item: + idr = fbs["imports"] / domestic_use + else: + idr = fbs["imports"].sum(dim="Item") / domestic_use.sum(dim="Item") + + set_dict(datablock, out_key, idr) - delta = out[element] - fbs_toscale[element] - - # Scale non selected items - non_sel_items = np.setdiff1d(fbs_toscale.Item.values, items) - non_sel_scale = (fbs_toscale.sel(Item=non_sel_items)[element].sum(dim="Item") - delta.sum(dim="Item")) / fbs_toscale.sel(Item=non_sel_items)[element].sum(dim="Item") - - # Make sure inf and nan values are not scaled - non_sel_scale = non_sel_scale.where(np.isfinite(non_sel_scale)).fillna(1.0) - - if np.any(non_sel_scale < 0): - warnings.warn("Additional consumption cannot be compensated by \ - reduction of non-selected items") - - out = out.fbs.scale_add(element, osplit, non_sel_scale, - non_sel_items, add = origin.startswith("-")) - - # If fallback is defined, adjust to prevent negative values - if fallback is not None: - df = out[osplit].where(out[osplit] < 0).fillna(0) - out[fallback.split("-")[-1]] -= np.where(fallback.startswith("-"), 1, -1)*df - out[osplit] = out[osplit].where(out[osplit] > 0, 0) - - return out \ No newline at end of file + return datablock diff --git a/agrifoodpy/food/tests/test_model.py b/agrifoodpy/food/tests/test_model.py index b18aa65..1da1db8 100644 --- a/agrifoodpy/food/tests/test_model.py +++ b/agrifoodpy/food/tests/test_model.py @@ -4,5 +4,367 @@ import warnings def test_balanced_scaling(): - + from agrifoodpy.food.model import balanced_scaling + + items = ["Beef", "Apples"] + years = [2020, 2021] + + fbs = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[10., 20.], [30., 40.]]), + production=(["Year", "Item"], [[50., 60.], [70., 80.]]), + exports=(["Year", "Item"], [[5., 10.], [15., 20.]]), + food=(["Year", "Item"], [[55., 70.], [85., 100.]]) + ), + + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + # Test basic result + result_basic = balanced_scaling( + fbs, + scale=1.0, + element="food" + ) + + xr.testing.assert_equal(result_basic, fbs) + + # Test result with scalar scaling factor + result_scalar = balanced_scaling( + fbs, + scale=2.0, + element="food" + ) + + ex_result_scalar = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[10., 20.], [30., 40.]]), + production=(["Year", "Item"], [[50., 60.], [70., 80.]]), + exports=(["Year", "Item"], [[5., 10.], [15., 20.]]), + food=(["Year", "Item"], [[110., 140.], [170., 200.]]) + ), + + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + xr.testing.assert_equal(result_scalar, ex_result_scalar) + + # Test results with selected items + result_items = balanced_scaling( + fbs, + scale=2.0, + element="food", + items="Beef" + ) + + ex_result_items = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[10., 20.], [30., 40.]]), + production=(["Year", "Item"], [[50., 60.], [70., 80.]]), + exports=(["Year", "Item"], [[5., 10.], [15., 20.]]), + food=(["Year", "Item"], [[110., 70.], [170., 100.]]) + ), + + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + xr.testing.assert_equal(result_items, ex_result_items) + + # Test results with selected items and setting constant to True + result_constant = balanced_scaling( + fbs, + scale=2.0, + element="food", + items="Beef", + constant=True + ) + + ex_result_constant = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[10., 20.], [30., 40.]]), + production=(["Year", "Item"], [[50., 60.], [70., 80.]]), + exports=(["Year", "Item"], [[5., 10.], [15., 20.]]), + food=(["Year", "Item"], [[110., 15.], [170., 15.]]) + ), + + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + xr.testing.assert_equal(result_constant, ex_result_constant) + + # Test selected items, constant to True, origin from "production" + result_origin = balanced_scaling( + fbs, + scale=2.0, + element="food", + items="Beef", + constant=True, + origin="production" + ) + + ex_result_origin = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[10., 20.], [30., 40.]]), + # production=(["Year", "Item"], [[50., 60.], [70., 80.]]), + production=(["Year", "Item"], [[105., 5.], [155., -5.]]), + exports=(["Year", "Item"], [[5., 10.], [15., 20.]]), + # food=(["Year", "Item"], [[55., 70.], [85., 100.]]) + food=(["Year", "Item"], [[110., 15.], [170., 15.]]) + ), + + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + xr.testing.assert_equal(result_origin, ex_result_origin) + + # Test with fallback + result_fallback = balanced_scaling( + fbs, + scale=2.0, + element="food", + items="Beef", + constant=True, + origin="production", + fallback="imports" + ) + + ex_result_fallback = xr.Dataset( + data_vars=dict( + # imports=(["Year", "Item"], [[10., 20.], [30., 40.]]), + imports=(["Year", "Item"], [[10., 20.], [30., 45.]]), + # production=(["Year", "Item"], [[50., 60.], [70., 80.]]), + production=(["Year", "Item"], [[105., 5.], [155., 0.]]), + exports=(["Year", "Item"], [[5., 10.], [15., 20.]]), + # food=(["Year", "Item"], [[55., 70.], [85., 100.]]) + food=(["Year", "Item"], [[110., 15.], [170., 15.]]) + ), + + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + xr.testing.assert_equal(result_fallback, ex_result_fallback) + + # Test selected items, constant to True, origin from "exports" + # add_to_origin set to False + result_add_origin = balanced_scaling( + fbs, + scale=2.0, + element="food", + items="Beef", + constant=True, + origin="exports", + add_to_origin=False + ) + + ex_result_add_origin = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[10., 20.], [30., 40.]]), + production=(["Year", "Item"], [[50., 60.], [70., 80.]]), + # exports=(["Year", "Item"], [[5., 10.], [15., 20.]]), + exports=(["Year", "Item"], [[-50., 65.], [-70., 105.]]), + # food=(["Year", "Item"], [[55., 70.], [85., 100.]]) + food=(["Year", "Item"], [[110., 15.], [170., 15.]]) + ), + + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + xr.testing.assert_equal(result_add_origin, ex_result_add_origin) + + + # Test with multiple origins and separate elasticity values + result_elasticity = balanced_scaling( + fbs, + scale=2.0, + element="food", + items="Beef", + constant=True, + origin=["production", "imports"], + elasticity=[0.8, 0.2] + ) + + ex_result_elasticity = xr.Dataset( + data_vars=dict( + # imports=(["Year", "Item"], [[10., 20.], [30., 40.]]), + imports=(["Year", "Item"], [[21., 9.], [47., 23.]]), + # production=(["Year", "Item"], [[50., 60.], [70., 80.]]), + production=(["Year", "Item"], [[94., 16.], [138., 12.]]), + exports=(["Year", "Item"], [[5., 10.], [15., 20.]]), + # food=(["Year", "Item"], [[55., 70.], [85., 100.]]) + food=(["Year", "Item"], [[110., 15.], [170., 15.]]) + ), + + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + xr.testing.assert_equal(result_elasticity, ex_result_elasticity) + + # Test with a scaling array + from agrifoodpy.utils.scaling import linear_scale + + scale_arr = linear_scale( + 2020, + 2020, + 2021, + 2021, + c_init=1.0, + c_end=2.0 + ) + + result_scale_arr = balanced_scaling( + fbs, + scale=scale_arr, + element="food", + items="Beef", + ) + + ex_result_scale_arr = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[10., 20.], [30., 40.]]), + production=(["Year", "Item"], [[50., 60.], [70., 80.]]), + exports=(["Year", "Item"], [[5., 10.], [15., 20.]]), + # food=(["Year", "Item"], [[55., 70.], [85., 100.]]) + food=(["Year", "Item"], [[55., 70.], [170., 100.]]) + ), + + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + xr.testing.assert_equal(result_scale_arr, ex_result_scale_arr) + + +def test_SSR(): + + from agrifoodpy.food.model import SSR + + items = ["Beef", "Apples"] + years = [2020, 2021] + + fbs = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[10, 20], [30, 40]]), + exports=(["Year", "Item"], [[5, 10], [15, 20]]), + production=(["Year", "Item"], [[50, 60], [70, 80]]), + ), + + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + # Test basic result on all items + result_basic = SSR(fbs) + ex_result_basic = xr.DataArray([0.88, 0.810810], dims=("Year"), + coords={"Year": years}) + + xr.testing.assert_allclose(result_basic, ex_result_basic) + + # Test for an item subset + result_subset = SSR(fbs, items="Beef") + ex_result_subset = xr.DataArray([0.909090, 0.823529], dims=("Year"), + coords={"Year": years}) + + xr.testing.assert_allclose(result_subset, ex_result_subset) + + # Test per item + result_peritem = SSR(fbs, per_item=True) + ex_result_peritem = xr.DataArray([[0.909090, 0.857142], [0.823529, 0.8]], + dims=(["Year", "Item"]), + coords={"Year": years, "Item": items}) + + xr.testing.assert_allclose(result_peritem, ex_result_peritem) + +def test_IDR(): + + from agrifoodpy.food.model import IDR + + items = ["Beef", "Apples"] + years = [2020, 2021] + + fbs = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[10, 20], [30, 40]]), + exports=(["Year", "Item"], [[5, 10], [15, 20]]), + production=(["Year", "Item"], [[50, 60], [70, 80]]), + ), + + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + # Test basic result on all items + result_basic = IDR(fbs) + ex_result_basic = xr.DataArray([0.24, 0.37837838], dims=("Year"), + coords={"Year": years}) + + xr.testing.assert_allclose(result_basic, ex_result_basic) + + # Test for an item subset + result_subset = IDR(fbs, items="Beef") + ex_result_subset = xr.DataArray([0.1818181, 0.352941], dims=("Year"), + coords={"Year": years}) + + xr.testing.assert_allclose(result_subset, ex_result_subset) + + # Test per item + result_peritem = IDR(fbs, per_item=True) + ex_result_peritem = xr.DataArray([[0.1818181, 0.285714], [0.352941, 0.4]], + dims=(["Year", "Item"]), + coords={"Year": years, "Item": items}) + + xr.testing.assert_allclose(result_peritem, ex_result_peritem) + +def test_fbs_convert(): + + from agrifoodpy.food.model import fbs_convert + + items = ["Beef", "Apples"] + years = [2020, 2021] + + fbs = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[10, 20], [30, 40]]), + exports=(["Year", "Item"], [[5, 10], [15, 20]]), + production=(["Year", "Item"], [[50, 60], [70, 80]]), + ), + + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + # Test basic result + result_basic = fbs_convert(fbs, convertion_arr=1.0) + ex_result_basic = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[10, 20], [30, 40]]), + exports=(["Year", "Item"], [[5, 10], [15, 20]]), + production=(["Year", "Item"], [[50, 60], [70, 80]]), + ), + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + xr.testing.assert_allclose(result_basic, ex_result_basic) + + # Test with a conversion array + conversion_arr = xr.DataArray([1.0, 2.0], dims=["Item"], coords={"Item": items}) + result_conversion = fbs_convert(fbs, convertion_arr=conversion_arr) + ex_result_conversion = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[10, 40], [30, 80]]), + exports=(["Year", "Item"], [[5, 20], [15, 40]]), + production=(["Year", "Item"], [[50, 120], [70, 160]]), + ), + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + xr.testing.assert_allclose(result_conversion, ex_result_conversion) + + # Test with a conversion factor + result_factor = fbs_convert(fbs, convertion_arr=2.0) + ex_result_factor = xr.Dataset( + data_vars=dict( + imports=(["Year", "Item"], [[20, 40], [60, 80]]), + exports=(["Year", "Item"], [[10, 20], [30, 40]]), + production=(["Year", "Item"], [[100, 120], [140, 160]]), + ), + coords=dict(Item=("Item", items), Year=("Year", years)) + ) + + xr.testing.assert_allclose(result_factor, ex_result_factor) diff --git a/agrifoodpy/impact/data/PN18.nc b/agrifoodpy/impact/data/PN18.nc deleted file mode 100644 index c3a369e..0000000 Binary files a/agrifoodpy/impact/data/PN18.nc and /dev/null differ diff --git a/agrifoodpy/impact/data/PN18_FAOSTAT.nc b/agrifoodpy/impact/data/PN18_FAOSTAT.nc deleted file mode 100644 index 828d474..0000000 Binary files a/agrifoodpy/impact/data/PN18_FAOSTAT.nc and /dev/null differ diff --git a/agrifoodpy/impact/impact.py b/agrifoodpy/impact/impact.py index 863e7dd..92b0897 100644 --- a/agrifoodpy/impact/impact.py +++ b/agrifoodpy/impact/impact.py @@ -3,9 +3,16 @@ import numpy as np import xarray as xr -from agrifoodpy.array_accessor import XarrayAccessorBase +from ..array_accessor import XarrayAccessorBase -def impact(items, regions, quantities, datasets=None, long_format=True): + +def impact( + items, + regions, + quantities, + datasets=None, + long_format=True +): """Impact style dataset constructor Parameters @@ -41,15 +48,15 @@ def impact(items, regions, quantities, datasets=None, long_format=True): _items = np.unique(items) _regions = np.unique(regions) - coords = {"Item" : _items, - "Region" : _regions} + coords = {"Item": _items, + "Region": _regions} # find positions in output array to organize data ii = [np.searchsorted(_items, items), np.searchsorted(_regions, regions)] size = (len(_items), len(_regions)) # Create empty dataset - data = xr.Dataset(coords = coords) + data = xr.Dataset(coords=coords) if long_format: ndims = 2 @@ -89,6 +96,7 @@ def impact(items, regions, quantities, datasets=None, long_format=True): return data + @xr.register_dataset_accessor("impact") class Impact(XarrayAccessorBase): @@ -98,8 +106,8 @@ def __init__(self, xarray_obj): @staticmethod def _validate(obj): - """Check that the Impact object is an xarray.DataArray or xarray.Dataset - """ + """Check that the Impact object is an xarray.DataArray or xarray. + Dataset""" if not isinstance(obj, xr.Dataset): raise TypeError("Food Balance Sheet must be an xarray.DataSet") @@ -111,10 +119,10 @@ def match(self, matching_matrix): impact: xarray.DataSet xarray dataset including a list of items and impacts matching_matrix: pandas dataframe - Defines how items are matched from the input to the output datasets, - with the values of the matrix indicating the scaling of the - impact quantities. Column names indicate the original item list, while - row names indicate the new item list + Defines how items are matched from the input to the output + datasets, with the values of the matrix indicating the scaling of + the impact quantities. Column names indicate the original item + list, while row names indicate the new item list Returns ------- @@ -132,20 +140,20 @@ def match(self, matching_matrix): # First column is the item code column in_items_mat = matching_matrix.columns[1:] - assert np.equal(in_items, in_items_mat).all() , \ + assert np.equal(in_items, in_items_mat).all(), \ "Input items do not match assignment matrix" # Again, we avoid first column mat = matching_matrix.iloc[:, 1:].fillna(0).to_numpy() dataset_out = xr.Dataset( - coords = dict( + coords=dict( Item=("Item", out_items), ) ) for var in list(impact.keys()): data_out = np.matmul(mat, impact[var].values) - dataset_out = dataset_out.assign({var:("Item", data_out)}) + dataset_out = dataset_out.assign({var: ("Item", data_out)}) - return dataset_out \ No newline at end of file + return dataset_out diff --git a/agrifoodpy/impact/model.py b/agrifoodpy/impact/model.py index db88ca1..a4e9792 100644 --- a/agrifoodpy/impact/model.py +++ b/agrifoodpy/impact/model.py @@ -1,29 +1,32 @@ """ Module for impact intervention models """ - -import xarray as xr import numpy as np -def fbs_impacts(fbs, impact_element, population=None, sum_dims=None): - """Computes total impacts from quantities in a food balance sheet Dataset or - food element quantity DataArray, summing over items, regions or years if + +def fbs_impacts( + fbs, + impact_element, + population=None, + sum_dims=None +): + """Computes total impacts from quantities in a food balance sheet Dataset + or food element quantity DataArray, summing over items, regions or years if instructed. Parameters ---------- fbs : xarray.DataArray or xarray.Dataset Food Balance Sheet array with food quantities for a set of items, - regions and/or years. + regions and/or years. impact_element : xarray.DataArray - Impact DataArray containing the impacts for set of items, regions and/or - years. + Impact DataArray containing the impacts for set of items, regions + and/or years. population : xarray.DataArray If given, the input impacts are considered per-capita values and multiplied by the population array - sum_dims : str Dimension labels to sum over - + Returns ------- total_impact : xarray.DataArray or xarray.Dataset @@ -36,28 +39,29 @@ def fbs_impacts(fbs, impact_element, population=None, sum_dims=None): if sum_dims is not None: total_impact = total_impact.sum(dim=sum_dims) - + return total_impact + def fair_co2_only(emissions): """Simple Interface to FaIR, the Finite-amplitude Impulse-Response atmosferic model. - Computes the concentration, radiative forcing and temperature anomaly for an - array of CO2 emissions per year assuming a clean atmosphere and default + Computes the concentration, radiative forcing and temperature anomaly for + an array of CO2 emissions per year assuming a clean atmosphere and default values for amosferic parameters. Parameters ---------- emissions : xarray.DataArray or xarray.Dataset - Array containing GHG emissions in Gt CO2e per year + Array containing GHG emissions in Gt CO2e per year Returns ------- T : xarray.DataArray Temperature anomaly in Kelvin degrees at the zero layer C : xarray.DataArray - Atmosferic CO2e concetration in ppm + Atmosferic CO2e concetration in ppm F : xarray.DataArray Effective radiative forcing in W m^-2 """ @@ -69,7 +73,7 @@ def fair_co2_only(emissions): years = np.unique(emissions.Year.values) # Configure method, timebounds, and labels - f.ghg_method='myhre1998' + f.ghg_method = 'myhre1998' f.define_time(years[0]-0.5, years[-1]+0.5, 1) f.define_scenarios(["default"]) f.define_configs(["default"]) @@ -80,12 +84,12 @@ def fair_co2_only(emissions): 'CO2': { 'type': 'co2', 'input_mode': 'emissions', - # it doesn't behave as a GHG itself in the model, but as a precursor - 'greenhouse_gas': True, + # doesn't behave as a GHG itself in the model, but as a precursor + 'greenhouse_gas': True, 'aerosol_chemistry_from_emissions': False, 'aerosol_chemistry_from_concentration': False, }} - + f.define_species(species, properties) # Allocate arrays @@ -94,10 +98,10 @@ def fair_co2_only(emissions): # Set default values fill(f.climate_configs["ocean_heat_transfer"], [1.1, 1.6, 0.9], config='default') - + fill(f.climate_configs["ocean_heat_capacity"], [8, 14, 100], config='default') - + fill(f.climate_configs["deep_ocean_efficacy"], 1.1, config='default') # Set initial conditions. @@ -109,22 +113,22 @@ def fair_co2_only(emissions): # Fill species configs f.fill_species_configs() - - f.emissions.loc[{"scenario":"default", - "specie":"CO2", - "config":"default"}] = emissions.to_numpy() - + + f.emissions.loc[{"scenario": "default", + "specie": "CO2", + "config": "default"}] = emissions.to_numpy() + # Run and return f.run(progress=False) - return_dict = {"scenario":"default", "config":"default"} + return_dict = {"scenario": "default", "config": "default"} T = f.temperature.sel(return_dict).drop_vars( ["scenario", "config", "layer"]).squeeze() - + C = f.concentration.sel(return_dict).drop_vars( ["scenario", "config", "specie"]).squeeze() - + F = f.forcing.sel(return_dict).drop_vars( ["scenario", "config", "specie"]).squeeze() diff --git a/agrifoodpy/land/land.py b/agrifoodpy/land/land.py index 9a06076..baae270 100644 --- a/agrifoodpy/land/land.py +++ b/agrifoodpy/land/land.py @@ -3,11 +3,11 @@ import numpy as np import xarray as xr -import matplotlib import matplotlib.pyplot as plt from matplotlib import colors as mcolors import matplotlib.patches as mpatches + @xr.register_dataarray_accessor("land") class LandDataArray: def __init__(self, xarray_obj): @@ -16,19 +16,19 @@ def __init__(self, xarray_obj): @staticmethod def _validate(obj): - """Validate land xarray, checking it is a DataSet and has the minimum set - of coordinates + """Validate land xarray, checking it is a DataSet and has the minimum + set of coordinates """ if not isinstance(obj, xr.DataArray): raise TypeError("Land array must be an xarray.Dataarray") - + if "x" not in obj.dims or "y" not in obj.dims: raise AttributeError("Land array must have 'x' and 'y' dimensions") def plot(self, ax=None, class_coord=None, colors=None, labels=None, **kwargs): """Plot a LandDataArray - + Generates a plot of a LandDataArray using matplotlib imshow, without interpolation and setting the origin low to align north at the top. If a dominant classification map type is provided, the plot will be @@ -46,12 +46,12 @@ def plot(self, ax=None, class_coord=None, colors=None, labels=None, Dictionary of colors to use for each land class. If not provided, the default matplotlib colour map is used. labels : list of strings - Dictionary of labels to use for each land class. If not provided and - the map is a class percentage map, the coordinate values are used - as labels. + Dictionary of labels to use for each land class. If not provided + and the map is a class percentage map, the coordinate values are + used as labels. **kwargs : dict Style options to be passed to the imshow function. - + Returns ------- ax : matplotlib axes instance @@ -62,7 +62,7 @@ def plot(self, ax=None, class_coord=None, colors=None, labels=None, if ax is None: f, ax = plt.subplots() - # Check the map type by checking if there are any additional coordinates + # Check the map type by checking for additional coordinates extra_coords = [dim for dim in map.dims if dim not in ["x", "y"]] if len(extra_coords) >= 1: if labels is None: @@ -89,32 +89,40 @@ def plot(self, ax=None, class_coord=None, colors=None, labels=None, ymin, ymax = map.y.values[[0, -1]] ax.imshow(map, interpolation="none", origin="lower", - extent=[xmin-dx_low, xmax+dx_high, ymin-dy_low, ymax+dy_high], + extent=[xmin-dx_low, + xmax+dx_high, + ymin-dy_low, + ymax+dy_high], cmap=cmap, norm=norm) - + patches = [mpatches.Patch(color=colors[i], - label=labels[i]) for i in np.arange(len(labels))] - + label=labels[i]) + for i in np.arange(len(labels))] + ax.legend(handles=patches, loc="best") - + return ax - - def area_by_type(self, values = None, dim = None): + + def area_by_type( + self, + values=None, + dim=None + ): """Area per map category in a LandDataArray - - Returns a DataArray with the total number of pixels for each category or - category subset of the LandDataArray. + + Returns a DataArray with the total number of pixels for each category + or category subset of the LandDataArray. Parameters ---------- values : int, array - List of category types to return the total area for. If not set, the - function returns areas for all values found on the map, excluding - nan values. + List of category types to return the total area for. If not set, + the function returns areas for all values found on the map, + excluding nan values. dim : string Name to assign to the categories coordinate. If not set, the input DataArray name is used instead. - + Returns ------- xarray.DataArray @@ -136,17 +144,25 @@ def area_by_type(self, values = None, dim = None): # Prevent nan values from being counted nan_indices = np.isnan(values) values = values[~nan_indices] - area = [ones.where(map==value).sum() for value in values] - - area_arr = xr.DataArray(area, dims=dim, coords={dim:values}) + area = [ones.where(map == value).sum() for value in values] + + area_arr = xr.DataArray(area, dims=dim, coords={dim: values}) return area_arr - def area_overlap(self, map_right, values_left = None, values_right = None, dim_left=None, dim_right=None): + def area_overlap( + self, + map_right, + values_left=None, + values_right=None, + dim_left=None, + dim_right=None + ): """Area overlap of selected categories between two maps - - Returns a DataArray with the total number of pixels for each combination - of categories from the left and right map selected categories. Casa + + Returns a DataArray with the total number of pixels for each + combination of categories from the left and right map selected + categories. Parameters ---------- @@ -161,12 +177,14 @@ def area_overlap(self, map_right, values_left = None, values_right = None, dim_l overlaps for. If not set, all category types are used, except nan values. dim_left : string - Names to assign to the category coordinates on the output DataArray. + Names to assign to the category coordinates on the output + DataArray. If not set, the input DataArray name is used instead. dim_right : string - Names to assign to the category coordinates on the output DataArray. + Names to assign to the category coordinates on the output + DataArray. If not set, the input DataArray name is used instead. - + Returns ------- area_arr : xarray.DataArray @@ -175,8 +193,8 @@ def area_overlap(self, map_right, values_left = None, values_right = None, dim_l """ map_left = self._obj - # Check that both maps have the same dimensions and coordinates. if not, - # this raises a ValueError (alternatively, align the maps and use ) + # Check that both maps have the same dimensions and coordinates. + # Otherwise, this raises a ValueError xr.align(map_left, map_right, join='exact') if dim_left is None: @@ -203,14 +221,23 @@ def area_overlap(self, map_right, values_left = None, values_right = None, dim_l values_right = values_right[~nan_indices_right] ones = xr.ones_like(map_left) - area = [[ones.where(map_left==vl).where(map_right==vr).sum().values for vr in values_right] for vl in values_left] + area = [[ones.where(map_left == vl).where(map_right == vr).sum().values + for vr in values_right] for vl in values_left] - area_arr = xr.DataArray(area, dims=[dim_left, dim_right], coords={dim_left:values_left, dim_right:values_right}) + area_arr = xr.DataArray(area, + dims=[dim_left, dim_right], + coords={dim_left: values_left, + dim_right: values_right}) return area_arr - - def category_match(self, map_right, values_left=None, values_right=None, - join="left"): + + def category_match( + self, + map_right, + values_left=None, + values_right=None, + join="left" + ): """Returns a land Dataarray with values where a selected overlap occurs between categories from two maps. This returns the values from the left map where coincidence occurs between the left and right map. @@ -225,7 +252,7 @@ def category_match(self, map_right, values_left=None, values_right=None, values_right : int, array List of category types from the right map to match. If not set, all category types are used, except nan values. - + Returns ------- category_match : xarray.DataArray @@ -257,8 +284,13 @@ def category_match(self, map_right, values_left=None, values_right=None, return category_match - def dominant_class(self, class_coord=None, return_index=False): - """Returns a land DataArray with the dominant land class for each pixel. + def dominant_class( + self, + class_coord=None, + return_index=False + ): + """Returns a land DataArray with the dominant land class for each + pixel. Parameters ---------- @@ -282,8 +314,8 @@ class value. if return_index: len_class = len(map[class_coord].values) - map = map.assign_coords({class_coord:np.arange(len_class)}) + map = map.assign_coords({class_coord: np.arange(len_class)}) map = map.idxmax(dim=class_coord, skipna=True) - return map \ No newline at end of file + return map diff --git a/agrifoodpy/land/model.py b/agrifoodpy/land/model.py index 0021dc8..0134455 100644 --- a/agrifoodpy/land/model.py +++ b/agrifoodpy/land/model.py @@ -3,16 +3,25 @@ """ import numpy as np -from agrifoodpy.land.land import LandDataArray +from ..land.land import LandDataArray + + +def land_sequestration( + land_da, + use_id, + fraction, + max_seq, + years=None, + growth_timescale=10, + growth="linear", + ha_per_pixel=1 +): -def land_sequestration(land_da, use_id, fraction, max_seq, years=None, - growth_timescale=10, growth="linear", ha_per_pixel=1): - """Additional land use sequestration model. - + Computes the anual additional sequestration from land use change as a function of the different land category converted fractional areas. - + Given a Land Data Array map with pixel id values for the different land use types, the model computes additional sequestration from land given the new value in [t CO2e / yr]. @@ -25,7 +34,7 @@ def land_sequestration(land_da, use_id, fraction, max_seq, years=None, use_id : int, array Land category identifiers for the land uses to be converted. fraction : float, array - Fraction of each repurposed land category + Fraction of each repurposed land category max_seq : float Maximum sequestration achieved at the end of the growth period in [t CO2e / yr] @@ -44,7 +53,7 @@ def land_sequestration(land_da, use_id, fraction, max_seq, years=None, seq : xarray.DataArray DataArray with the per year sequestration """ - + if np.isscalar(use_id): use_id = np.array(use_id) @@ -56,9 +65,9 @@ def land_sequestration(land_da, use_id, fraction, max_seq, years=None, if not (fraction >= 0).all() and (fraction <= 1).all(): raise ValueError("Input fraction values must be between 0 and 1") - + pixel_count_category = land_da.land.area_by_type(values=use_id) - + # area in hectares area_category = pixel_count_category * ha_per_pixel @@ -68,7 +77,7 @@ def land_sequestration(land_da, use_id, fraction, max_seq, years=None, from agrifoodpy.utils.scaling import logistic_scale as growth_shape else: raise ValueError("Growth must be one of 'linear' or 'logistic'") - + if years is not None: # single scalar value if np.isscalar(years): @@ -83,8 +92,8 @@ def land_sequestration(land_da, use_id, fraction, max_seq, years=None, else: scale = 1 - # agroforestry + # agroforestry area = area_category * fraction total_seq = area.sum() * scale * max_seq - return total_seq \ No newline at end of file + return total_seq diff --git a/agrifoodpy/pipeline/__init__.py b/agrifoodpy/pipeline/__init__.py new file mode 100644 index 0000000..26952e7 --- /dev/null +++ b/agrifoodpy/pipeline/__init__.py @@ -0,0 +1,6 @@ +""" +This module provides methods to build a pipeline for the AgriFoodPy package. +""" + +from .pipeline import * +from ..utils.dict_utils import * \ No newline at end of file diff --git a/agrifoodpy/pipeline/pipeline.py b/agrifoodpy/pipeline/pipeline.py new file mode 100644 index 0000000..ecc3f60 --- /dev/null +++ b/agrifoodpy/pipeline/pipeline.py @@ -0,0 +1,200 @@ +"""Pipeline implementation + +This class provides methods to build and manage a pipeline for end to end +simulations using the agrifoodpy package. +""" + +import copy +from functools import wraps +from inspect import signature +import time + + +class Pipeline(): + '''Class for constructing and running pipelines of functions with + individual sets of parameters.''' + def __init__(self, datablock=None): + self.nodes = [] + self.params = [] + self.names = [] + if datablock is not None: + self.datablock = datablock + else: + self.datablock = {} + + @classmethod + def read(cls, filename): + """Read a pipeline from a configuration file + + Parameters + ---------- + filename : str + The name of the configuration file. + + Returns + ------- + pipeline : Pipeline + The pipeline object. + """ + raise NotImplementedError("This method is not yet implemented.") + + def datablock_write(self, path, value): + """Writes a single value to the datablock at the specified path. + + Parameters + ---------- + path : list + The datablock path to the value to be written. + value : any + The value to be written. + """ + current = self.datablock + + for key in path[:-1]: + current = current.setdefault(key, {}) + current[path[-1]] = value + + def add_node(self, node, params={}, name=None): + """Adds a node to the pipeline, including its function and execution + parameters. + + Parameters + ---------- + node : function + The function to be executed on this node. + params : dict, optional + The parameters to be passed to the node function. + name : str, optional + The name of the node. If not provided, a generic name will be + assigned. + """ + + # Copy the parameters to avoid modifying the original dictionaries + params = copy.deepcopy(params) + + if name is None: + name = "Node {}".format(len(self.nodes) + 1) + + self.names.append(name) + self.nodes.append(node) + self.params.append(params) + + def run(self, from_node=0, to_node=None, timing=False): + """Runs the pipeline + + Parameters + ---------- + from_node : int, optional + The index of the first node to be executed. Defaults to 0. + + to_node : int, optional + The index of the last node to be executed. If not provided, all + nodes will be executed + + timing : bool, optional + If True, the execution time of each node will be printed. Defaults + to False. + """ + + if to_node is None: + to_node = len(self.nodes) + + pipeline_start_time = time.time() + + # Execute the node functions within the specified range + for i in range(from_node, to_node): + node = self.nodes[i] + params = self.params[i] + + node_start_time = time.time() + + # Run node + self.datablock = node(datablock=self.datablock, **params) + + node_end_time = time.time() + node_time = node_end_time - node_start_time + + if timing: + print(f"Node {i + 1}: {self.names[i]}, \ + executed in {node_time:.4f} seconds.") + + pipeline_end_time = time.time() + pipeline_time = pipeline_end_time - pipeline_start_time + + if timing: + print(f"Pipeline executed in {pipeline_time:.4f} seconds.") + + +def standalone(input_keys, return_keys): + """ Decorator to make a pipeline node available as a standalone function + + If datablock is not passed as a kwarg, and datasets are passed directly + instead of datablock keys, a temporary datablock is created and the + datasets associated with the arguments in input_keys are added to it. + The function then returns the specified datasets in return_keys. + + Parameters + ---------- + input_keys: list of strings + List of dataset keys to be added to the temporary datablock + return_keys: list of strings + List of keys to datablock datasets to be returned by the decorated + function. + + Returns + ------- + + wrapper: function + The decorated function + + """ + def pipeline_decorator(test_func): + @wraps(test_func) + def wrapper(*args, **kwargs): + + # Identify positional arguments + func_sig = signature(test_func) + func_params = func_sig.parameters + + kwargs.update({key: arg for key, arg in zip(func_params.keys(), + args)}) + + # Make sure the datablock is passed as a kwarg, if not, create it + datablock = kwargs.get("datablock", None) + + # Fill in missing arguments with their default values + for key, param in func_params.items(): + if key not in kwargs: + # Check if there is a default value + if param.default is not param.empty: + kwargs[key] = param.default + + standalone = datablock is None + if standalone: + # Create datablock + datablock = {key: kwargs[key] + for key in kwargs if key in input_keys} + kwargs["datablock"] = datablock + + # Create list of keys for passed arguments only + for key in input_keys: + if kwargs.get(key, None) is not None: + kwargs[key] = key + + # Fill return keys if they are not passed or are None + for key in return_keys: + if kwargs.get(key, None) is None: + kwargs[key] = key + + result = test_func(**kwargs) + + # return tuple of results + if standalone: + if len(return_keys) == 1: + return result[kwargs[return_keys[0]]] + else: + return tuple(result[key] for key in return_keys) + + return result + return wrapper + return pipeline_decorator diff --git a/agrifoodpy/pipeline/tests/__init__.py b/agrifoodpy/pipeline/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agrifoodpy/pipeline/tests/test_pipeline.py b/agrifoodpy/pipeline/tests/test_pipeline.py new file mode 100644 index 0000000..1562c91 --- /dev/null +++ b/agrifoodpy/pipeline/tests/test_pipeline.py @@ -0,0 +1,158 @@ +from agrifoodpy.pipeline.pipeline import Pipeline, standalone + +def test_init(): + pipeline = Pipeline() + +def test_add_node(): + pipeline = Pipeline() + def dummy_node(datablock, param1): + datablock['result'] = param1 + return datablock + + pipeline.add_node(dummy_node, params={'param1': 10}, name='Test Node') + assert(len(pipeline.nodes) == 1) + assert(pipeline.names[0] == 'Test Node') + assert(pipeline.params[0] == {'param1': 10}) + +def test_run_pipeline(): + pipeline = Pipeline() + def node1(datablock, param1): + datablock['result1'] = param1 + return datablock + + def node2(datablock, param2): + datablock['result2'] = param2 + return datablock + + pipeline.add_node(node1, params={'param1': 10}) + pipeline.add_node(node2, params={'param2': 20}) + + pipeline.run() + assert(pipeline.datablock['result1'] == 10) + assert(pipeline.datablock['result2'] == 20) + +def test_run_first_node_only(): + pipeline = Pipeline() + def node1(datablock, param1): + datablock['result1'] = param1 + return datablock + + def node2(datablock, param2): + datablock['result2'] = param2 + return datablock + + pipeline.add_node(node1, params={'param1': 10}) + pipeline.add_node(node2, params={'param2': 20}) + + pipeline.run(to_node=1) + assert(pipeline.datablock['result1'] == 10) + assert('result2' not in pipeline.datablock) + +def test_run_nodes_separately(): + pipeline = Pipeline() + def node1(datablock, param1): + datablock['result1'] = param1 + return datablock + + def node2(datablock, param2): + datablock['result1'] *= param2 + return datablock + + pipeline.add_node(node1, params={'param1': 10}) + pipeline.add_node(node2, params={'param2': 2}) + + pipeline.run(to_node=1) + assert(pipeline.datablock['result1'] == 10) + assert('result2' not in pipeline.datablock) + + pipeline.run(from_node=1) + assert(pipeline.datablock['result1'] == 20) + +def test_standalone_decorator(): + pipeline = Pipeline() + @standalone([], ['output1']) + def test_func(input1, output1="output1", datablock=None): + datablock[output1] = input1 * 2 + return datablock + + result = test_func(5) + assert(result == 10) + + pipeline.add_node(test_func, params={'input1': 5, 'output1': 'output1'}) + pipeline.run() + assert(pipeline.datablock['output1'] == 10) + +def test_datablock_write(): + pipeline = Pipeline() + pipeline.datablock_write(['a', 'b', 'c'], 10) + assert(pipeline.datablock['a']['b']['c'] == 10) + + pipeline.datablock_write(['a', 'b', 'd'], 20) + assert(pipeline.datablock['a']['b']['c'] == 10) + assert(pipeline.datablock['a']['b']['d'] == 20) + + pipeline.datablock_write(['a', 'e'], 30) + assert(pipeline.datablock['a']['b']['c'] == 10) + assert(pipeline.datablock['a']['b']['d'] == 20) + assert(pipeline.datablock['a']['e'] == 30) + + pipeline.datablock_write(['f'], 40) + assert(pipeline.datablock['a']['b']['c'] == 10) + assert(pipeline.datablock['a']['b']['d'] == 20) + assert(pipeline.datablock['a']['e'] == 30) + assert(pipeline.datablock['f'] == 40) + +def test_standalone_decorator(): + + from agrifoodpy.pipeline.pipeline import Pipeline, standalone + + test_datablock = {'x': 5, 'y': 10} + + # Test decorated function with single input key + @standalone(['x'], ['out_key']) + def double_numbers_decorated(x, out_key, datablock=None): + datablock[out_key] = datablock[x]*2 + return datablock + + result_double = double_numbers_decorated(5) + assert result_double == 10 + + # Test decorated function with multiple input keys + @standalone(['x', 'y'], ['out_key']) + def sum_numbers_decorated(x, y, out_key, datablock=None): + datablock[out_key] = datablock[x] + datablock[y] + return datablock + + result_sum = sum_numbers_decorated(5, 10) + assert result_sum == 15 + + # Test decorated function with no input keys + @standalone([], ['out_key']) + def return_constant_decorated(out_key, datablock=None): + datablock[out_key] = 42 + return datablock + + result_constant = return_constant_decorated() + assert result_constant == 42 + + # Test decorated function with multiple return keys + @standalone(['x'], ['out_key1', 'out_key2']) + def multiple_returns_decorated(x, out_key1, out_key2, datablock=None): + datablock[out_key1] = datablock[x] * 2 + datablock[out_key2] = datablock[x] + 10 + return datablock + + result_multiple = multiple_returns_decorated(5) + assert result_multiple[0] == 10 + assert result_multiple[1] == 15 + + # Test decorated function inside a pipeline + pipeline = Pipeline(test_datablock) + @standalone(['x'], ['out_key']) + def pipeline_decorated(x, out_key, datablock=None): + datablock[out_key] = datablock[x] * 3 + return datablock + + pipeline.add_node(pipeline_decorated, params={'x': 'x', 'out_key': 'result'}) + pipeline.run() + assert pipeline.datablock['result'] == 15 \ No newline at end of file diff --git a/agrifoodpy/population/population.py b/agrifoodpy/population/population.py index 36be12d..40e8c34 100644 --- a/agrifoodpy/population/population.py +++ b/agrifoodpy/population/population.py @@ -4,9 +4,16 @@ import numpy as np import xarray as xr -from agrifoodpy.array_accessor import XarrayAccessorBase +from ..array_accessor import XarrayAccessorBase -def population(years, regions, quantities, datasets=None, long_format=True): + +def population( + years, + regions, + quantities, + datasets=None, + long_format=True +): """Population style dataset constructor Parameters @@ -42,15 +49,15 @@ def population(years, regions, quantities, datasets=None, long_format=True): # Identify unique values in coordinates _years = np.unique(years) _regions = np.unique(regions) - coords = {"Year" : _years, - "Region" : _regions} + coords = {"Year": _years, + "Region": _regions} # find positions in output array to organize data ii = [np.searchsorted(_years, years), np.searchsorted(_regions, regions)] size = (len(_years), len(_regions)) # Create empty dataset - data = xr.Dataset(coords = coords) + data = xr.Dataset(coords=coords) if long_format: # dataset, quantities @@ -95,6 +102,7 @@ def population(years, regions, quantities, datasets=None, long_format=True): return data + @xr.register_dataarray_accessor("pop") class PopulationDataArray(XarrayAccessorBase): pass diff --git a/agrifoodpy/tests/test_base_class.py b/agrifoodpy/tests/test_base_class.py index b385b80..f2a062a 100644 --- a/agrifoodpy/tests/test_base_class.py +++ b/agrifoodpy/tests/test_base_class.py @@ -1,6 +1,6 @@ import numpy as np import xarray as xr -from agrifoodpy.array_accessor import XarrayAccessorBase +from ..array_accessor import XarrayAccessorBase def test_add_years(): diff --git a/agrifoodpy/utils/dict_utils.py b/agrifoodpy/utils/dict_utils.py new file mode 100644 index 0000000..c662fda --- /dev/null +++ b/agrifoodpy/utils/dict_utils.py @@ -0,0 +1,94 @@ +"""Pipeline utilities""" + +import numpy as np + + +def item_parser(fbs, items): + """Extracts a list of items from a dataset using a coordinate-key tuple, + or converts a scalar item to a list + + Parameters + ---------- + + fbs : xarray.Dataset or xarray.DataArray + The dataset containing the coordinate-key to extract items from. + items : tuple, scalar + If a tuple, the first element is the name of the coordinate and the + second element is a list of items to extract. If a scalar, the item + is converted to a list. + + Returns + ------- + list + A list of items matching the coordinate-key description, or containing + the scalar item. + """ + + if items is None: + return None + + if isinstance(items, tuple): + items = fbs.sel(Item=fbs[items[0]].isin(items[1])).Item.values + elif np.isscalar(items): + items = [items] + + return items + + +def get_dict(datablock, keys): + """Returns an element from a dictionary using a key or tuple of keys used + to describe a path of keys + + Parameters + ---------- + + datablock : dict + The input dictionary + keys : str or tuple + Dictionary key, or tuple of keys + """ + + if isinstance(keys, tuple): + out = datablock + for key in keys: + out = out[key] + else: + out = datablock[keys] + + return out + + +def set_dict(datablock, keys, object, create_missing=True): + """Sets an element in a dictionary using a key or tuple of keys used to + describe a path of keys + + Parameters + ---------- + + datablock : dict + The input dictionary + keys : str or tuple + Dictionary key, or tuple of keys + object : any + The object to set in the dictionary + create_missing : bool, optional + If True, creates missing keys in the dictionary. Defaults to True. + + Raises + ------ + KeyError + If a key in the path does not exist and create_missing is False. + """ + + if isinstance(keys, tuple): + out = datablock + for key in keys[:-1]: + if key not in out: + if create_missing: + out[key] = {} + else: + raise KeyError(f"Key '{key}' not found in datablock.") + out = out[key] + out[keys[-1]] = object + else: + datablock[keys] = object diff --git a/agrifoodpy/utils/nodes.py b/agrifoodpy/utils/nodes.py new file mode 100644 index 0000000..77a7dfe --- /dev/null +++ b/agrifoodpy/utils/nodes.py @@ -0,0 +1,289 @@ +import copy +import xarray as xr +import importlib + +from ..pipeline import standalone +from ..utils.dict_utils import get_dict, set_dict + + +@standalone(["dataset"], ["dataset"]) +def add_items( + dataset, + items, + values=None, + copy_from=None, + datablock=None +): + """Adds a list of items to a selected dataset in the datablock and + initializes their values. + + Parameters + ---------- + datablock : Datablock + Datablock object. + dataset : dict + Datablock path to the datasets to modify. + items : list, dict + List of items or dictionary of items attributes to add. If a + dictionary, keys are the item names, and non-dimension coordinates. + values : list, optional + List of values to initialize the items. If not set, values are set to + 0, unless the copy from parameter is set. + copy_from : list, optional + Items to copy the values from. + + Returns + ------- + dict or xarray.Dataset + - If no datablock is provided, returns a xarray.Dataset with the new + items. + - If a datablock is provided, returns the datablock with the modified + datasets on the corresponding keys. + + """ + + # Check if items is a dictionary + if isinstance(items, dict): + items_src = copy.deepcopy(items) + new_items = items_src.pop("Item") + else: + new_items = items + items_src = {} + + # Add new items to the datasets + data = get_dict(datablock, dataset) + + data = data.fbs.add_items(new_items, copy_from=copy_from) + for key, val in items_src.items(): + data[key].loc[{"Item": new_items}] = val + + if values is not None: + data.loc[{"Item": new_items}] = values + + elif copy_from is None: + data.loc[{"Item": new_items}] = 0 + + set_dict(datablock, dataset, data) + + return datablock + + +@standalone(["dataset"], ["dataset"]) +def add_years( + dataset, + years, + projection='empty', + datablock=None +): + """ + Extends the Year coordinates of a dataset. + + Parameters + ---------- + datablock : dict + The datablock dictionary where the dataset is stored. + dataset : str + Datablock key of the dataset to extend. + years : list + List of years to extend the dataset to. + projection : str + Projection mode. If "constant", the last year of the input array + is copied to every new year. If "empty", values are initialized and + set to zero. If a float array is given, these are used to populate + the new year using a scaling of the last year of the array + """ + + data = get_dict(datablock, dataset) + + data = data.fbs.add_years(years, projection) + + set_dict(datablock, dataset, data) + + return datablock + + +def copy_datablock(datablock, key, out_key): + """Copy a datablock element into a new key in the datablock + + Parameters + ---------- + datablock : xarray.Dataset + The datablock to print + key : str + The key of the datablock to print + out_key : str + The key of the datablock to copy to + + Returns + ------- + datablock : dict + Datablock to with added key + """ + + datablock[out_key] = copy.deepcopy(datablock[key]) + + return datablock + + +def print_datablock( + datablock, + key, + attr=None, + method=None, + args=None, + kwargs=None, + preffix="", + suffix="" +): + """Prints a datablock element or its attributes/methods at any point in the + pipeline execution. + + Parameters + ---------- + datablock : dict + The datablock to print from. + key : str + The key of the datablock to print. + attr : str, optional + Name of an attribute of the object to print. + method : str, optional + Name of a method of the object to call and print. + args : list, optional + Positional arguments for the method call. + kwargs : dict, optional + Keyword arguments for the method call. + + Returns + ------- + datablock : dict + Unmodified datablock to continue execution. + """ + obj = datablock[key] + + # Extract attribute + if attr is not None: + if hasattr(obj, attr): + obj = getattr(obj, attr) + else: + print(f"Object has no attribute '{attr}'") + return datablock + + # Call method + if method is not None: + if hasattr(obj, method): + func = getattr(obj, method) + if callable(func): + args = args or [] + kwargs = kwargs or {} + try: + obj = func(*args, **kwargs) + except Exception as e: + print(f"Error calling {method} on {key}: {e}") + return datablock + else: + print(f"'{method}' is not callable on {key}") + return datablock + else: + print(f"Object has no method '{method}'") + return datablock + + # Final print + print(f"{preffix}{obj}{suffix}") + return datablock + + +def write_to_datablock(datablock, key, value, overwrite=True): + """Writes a value to a specified key in the datablock. + + Parameters + ---------- + datablock : dict + The datablock to write to. + key : str + The key in the datablock where the value will be written. + value : any + The value to write to the datablock. + overwrite : bool, optional + If True, overwrite the existing value at the key. + If False, do not overwrite. + + Returns + ------- + datablock : dict + The updated datablock with the new key-value pair. + """ + + if not overwrite and key in datablock: + raise KeyError( + "Key already exists in datablock and overwrite is set to False.") + + set_dict(datablock, key, value) + + return datablock + + +def _import_dataset(module_name, dataset_name): + module = importlib.import_module(module_name) + dataset = getattr(module, dataset_name) + return dataset + + +@standalone([], ['datablock_path']) +def load_dataset( + datablock_path, + path=None, + module=None, + data_attr=None, + da=None, + coords=None, + scale=1., + datablock=None +): + """Loads a dataset to the specified datablock dictionary. + + Parameters + ---------- + datablock : dict + The datablock path where the dataset is stored + path : str + The path to the dataset stored in a netCDF file. + module : str + The module name where the dataset will be imported from. + data_attr : str + The attribute name of the dataset in the module. + da : str + The dataarray to be loaded. + coords : dict + Dictionary containing the coordinates of the dataset to be loaded. + scale : float + Optional multiplicative factor to be applied to the dataset on load. + datablock_path : str + The path to the datablock where the dataset is stored. + + """ + + # Load dataset from Netcdf file + if path is not None: + try: + with xr.open_dataset(path) as data: + dataset = data.load() + + except ValueError: + with xr.open_dataarray(path) as data: + dataset = data.load() + + # Load dataset from module + elif module is not None and data_attr is not None: + dataset = _import_dataset(module, data_attr) + + # Select dataarray and coords from dataset + if da is not None: + dataset = dataset[da] + + if coords is not None: + dataset = dataset.sel(coords) + + # Add dataset to datablock + datablock[datablock_path] = dataset * scale + + return datablock diff --git a/agrifoodpy/utils/scaling.py b/agrifoodpy/utils/scaling.py index 945893d..a98ff9d 100644 --- a/agrifoodpy/utils/scaling.py +++ b/agrifoodpy/utils/scaling.py @@ -3,6 +3,7 @@ import numpy as np import xarray as xr + def logistic_scale(y0, y1, y2, y3, c_init, c_end): """ Create an xarray DataArray with a logistic growth interval @@ -20,7 +21,7 @@ def logistic_scale(y0, y1, y2, y3, c_init, c_end): The initial constant value. c_end : (float) The final constant value. - + Returns: xarray DataArray An xarray DataArray object with 'year' as the coordinate and values set by a logistic growth between the user defined intervals. @@ -33,8 +34,8 @@ def logistic_scale(y0, y1, y2, y3, c_init, c_end): # Set values between y1 and y2 using a logistic curve var_segment = np.logical_and(years >= y1, years < y2) t = (years[var_segment] - y1) / (y2 - y1) - values[var_segment] = c_init + \ - (c_end - c_init) *(1 / (1 + np.exp(-10 * (t - 0.5)))) + values[var_segment] = c_init \ + + (c_end - c_init)*(1 / (1 + np.exp(-10 * (t - 0.5)))) # Set values between y2 and y3 to c_end values[years >= y2] = c_end @@ -42,15 +43,16 @@ def logistic_scale(y0, y1, y2, y3, c_init, c_end): data_array = xr.DataArray(values, dims='Year', coords={'Year': years}) return data_array + def linear_scale(y0, y1, y2, y3, c_init, c_end): """ Create an xarray DataArray with a single coordinate called 'year'. - + The values from the first year 'y0' up to a given year 'y1' will be constant, then they will vary linearly between 'y1' and another given year 'y2', and from 'y2' until the end of the array 'y3', the values will continue with a constant value. - + Parameters: y0 : (int) Starting year. @@ -63,12 +65,12 @@ def linear_scale(y0, y1, y2, y3, c_init, c_end): c_init : (float) Value to use for initial constant scale segment. c_end : (float) - + Returns: xr.DataArray An xarray DataArray object with 'year' as the coordinate and values set by a linear growth between the user defined intervals. """ - + # Create arrays and set values between y0 and y1 to c_init years = np.arange(y0, y3 + 1) values = np.ones_like(years, dtype=float) * c_init @@ -80,10 +82,10 @@ def linear_scale(y0, y1, y2, y3, c_init, c_end): else: slope = float((c_end - c_init) / (y2 - y1)) values[var_segment] = slope * (years[var_segment] - y1) + c_init - + # Set values between y2 and y3 to c_end values[years >= y2] = c_end - + data_array = xr.DataArray(values, coords={'Year': years}, dims=['Year']) - - return data_array \ No newline at end of file + + return data_array diff --git a/agrifoodpy/utils/tests/__init__.py b/agrifoodpy/utils/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agrifoodpy/utils/tests/data/generate_dataset.py b/agrifoodpy/utils/tests/data/generate_dataset.py new file mode 100644 index 0000000..ad7db80 --- /dev/null +++ b/agrifoodpy/utils/tests/data/generate_dataset.py @@ -0,0 +1,13 @@ +import xarray as xr +import numpy as np + +items = ["Beef", "Apples", "Poultry"] + +shape = (3, 2, 2) + +data = np.reshape(np.arange(np.prod(shape)), shape) + +ds = xr.Dataset({"data": (("Item", "X", "Y"), data)}, + coords={"Item": items, "X": [0, 1], "Y": [0, 1]}) + +xr.Dataset.to_netcdf(ds, "test_dataset.nc") \ No newline at end of file diff --git a/agrifoodpy/utils/tests/data/test_dataset.nc b/agrifoodpy/utils/tests/data/test_dataset.nc new file mode 100644 index 0000000..ad1cefa Binary files /dev/null and b/agrifoodpy/utils/tests/data/test_dataset.nc differ diff --git a/agrifoodpy/utils/tests/test_add_items.py b/agrifoodpy/utils/tests/test_add_items.py new file mode 100644 index 0000000..2e8dc1b --- /dev/null +++ b/agrifoodpy/utils/tests/test_add_items.py @@ -0,0 +1,57 @@ +import numpy as np +import xarray as xr + + +def test_add_items(): + + from agrifoodpy.utils.nodes import add_items + + items = ["Beef", "Apples", "Poultry"] + item_origin = ["Animal", "Vegetal", "Animal"] + new_items = ["Tomatoes", "Potatoes", "Eggs"] + + data = np.random.rand(3, 2, 2) + expected_items = np.concatenate([items, new_items]) + + ds = xr.Dataset({"data": (("Item", "X", "Y"), data)}, + coords={"Item": items, "X": [0, 1], "Y": [0, 1]}) + ds = ds.assign_coords({"Item_origin": ("Item", item_origin)}) + + # Test basic functionality + result_add = add_items(ds, new_items) + + assert np.array_equal(result_add["Item"].values, expected_items) + for item in new_items: + assert np.all(result_add["data"].sel(Item=item).values == 0) + + # Test copying from a single existing item + result_copy = add_items(ds, new_items, copy_from="Beef") + + assert np.array_equal(result_copy["Item"], expected_items) + for item_i in new_items: + assert np.array_equal(result_copy["data"].sel(Item=item_i), + ds.data.sel(Item="Beef")) + + # Test copying from multiple existing items + result_copy_multiple = add_items(ds, new_items, copy_from=["Beef", + "Apples", + "Poultry"]) + + assert np.array_equal(result_copy_multiple["Item"], expected_items) + assert np.array_equal(result_copy_multiple["data"].sel(Item=new_items), + ds.data.sel(Item=["Beef", "Apples", "Poultry"])) + + # Test providing values as dictionary + new_items_dict = { + "Item": new_items, + "Item_origin": ["Vegetal", "Vegetal", "Animal"], + } + + result_dict = add_items(ds, new_items_dict) + + assert np.array_equal(result_dict["Item"].values, expected_items) + assert np.array_equal(result_dict["Item_origin"].values, + ["Animal", "Vegetal", "Animal", + "Vegetal", "Vegetal", "Animal"]) + for item in new_items: + assert np.all(result_dict["data"].sel(Item=item).values == 0) diff --git a/agrifoodpy/utils/tests/test_add_years.py b/agrifoodpy/utils/tests/test_add_years.py new file mode 100644 index 0000000..ed57e54 --- /dev/null +++ b/agrifoodpy/utils/tests/test_add_years.py @@ -0,0 +1,40 @@ +import numpy as np +import xarray as xr + + +def test_add_items(): + + from agrifoodpy.utils.nodes import add_years + + items = ["Beef", "Apples", "Poultry"] + years = [2010, 2011, 2012] + + shape = (3, 3) + data = np.reshape(np.arange(np.prod(shape)), shape) + + ds = xr.Dataset({"data": (("Item", "Year"), data)}, + coords={"Item": items, "Year": years}) + + # Test basic functionality + new_years = [2013, 2014] + result_add = add_years(ds, new_years) + expected_years = years + new_years + assert np.array_equal(result_add["Year"].values, expected_years) + for year in new_years: + assert np.all(np.isnan(result_add["data"].sel(Year=year).values)) + + # Test projection mode 'constant' + result_constant = add_years(ds, new_years, projection='constant') + assert np.array_equal(result_constant["Year"].values, expected_years) + for year in new_years: + assert np.array_equal(result_constant["data"].sel(Year=year).values, + ds.data.isel(Year=-1).values) + + # Test projection mode with float array + scaling_factors = np.array([1.0, 2.0]) + result_scaled = add_years(ds, new_years, projection=scaling_factors) + assert np.array_equal(result_scaled["Year"].values, expected_years) + for i, year in enumerate(new_years): + expected_values = ds.data.isel(Year=-1).values * scaling_factors[i] + assert np.array_equal(result_scaled["data"].sel(Year=year).values, + expected_values) diff --git a/agrifoodpy/utils/tests/test_copy_datablock.py b/agrifoodpy/utils/tests/test_copy_datablock.py new file mode 100644 index 0000000..f176eea --- /dev/null +++ b/agrifoodpy/utils/tests/test_copy_datablock.py @@ -0,0 +1,36 @@ +import numpy as np + + +def test_copy_datablock(): + + from agrifoodpy.pipeline import Pipeline + from agrifoodpy.utils.nodes import copy_datablock + + datablock = { + 'test_dataset': { + 'fbs': { + 'data': np.array([[1, 2], [3, 4]]), + 'years': [2020, 2021] + } + } + } + + # Test copying the datablock + pipeline = Pipeline(datablock=datablock) + pipeline.add_node( + copy_datablock, + params={ + 'key': 'test_dataset', + 'out_key': 'copied_dataset' + }, + ) + + # Execute the pipeline + pipeline.run() + + # Check if the copied dataset exists in the datablock + assert 'copied_dataset' in pipeline.datablock + assert np.array_equal( + pipeline.datablock['copied_dataset']['fbs']['data'], + datablock['test_dataset']['fbs']['data'] + ) diff --git a/agrifoodpy/utils/tests/test_dict_utils.py b/agrifoodpy/utils/tests/test_dict_utils.py new file mode 100644 index 0000000..4eca715 --- /dev/null +++ b/agrifoodpy/utils/tests/test_dict_utils.py @@ -0,0 +1,83 @@ +import numpy as np +import xarray as xr + + +def test_item_parser(): + + from agrifoodpy.utils.dict_utils import item_parser + items = ["Beef", "Apples", "Poultry"] + + item_origin = ["Animal", "Vegetal", "Animal"] + + data = np.random.rand(3, 2, 2) + + fbs = xr.Dataset({"data": (("Item", "X", "Y"), data)}, + coords={"Item": items, "X": [0, 1], "Y": [0, 1]}) + fbs = fbs.assign_coords({"Item_origin": ("Item", item_origin)}) + # Test case for item_parser with None input + + items = item_parser(fbs, None) + assert items is None + + # Test case for item_parser with tuple input + items = item_parser(fbs, ("Item_origin", ["Animal"])) + assert np.array_equal(items, ["Beef", "Poultry"]) + + # Tesct case for item_parser with scalar input + items = item_parser(fbs, "Beef") + assert np.array_equal(items, ["Beef"]) + + +def test_get_dict(): + + from agrifoodpy.utils.dict_utils import get_dict + + datablock = { + "key1": { + "key2": { + "key3": "value" + } + }, + "key4": "another_value" + } + + # Test case for get_dict with a single key + value = get_dict(datablock, "key4") + assert value == "another_value" + + # Test case for get_dict with a tuple of keys + value = get_dict(datablock, ("key1", "key2", "key3")) + assert value == "value" + + +def test_set_dict(): + + from agrifoodpy.utils.dict_utils import set_dict + + datablock = { + "key1": { + "key2": { + "key3": "value" + } + }, + "key4": "another_value" + } + + # Test case for set_dict with a single key + set_dict(datablock, "key4", "new_value") + assert datablock["key4"] == "new_value" + + # Test case for set_dict with a tuple of keys + set_dict(datablock, ("key1", "key2", "key3"), "new_nested_value") + assert datablock["key1"]["key2"]["key3"] == "new_nested_value" + + # Test case for set_dict with create_missing=True + set_dict(datablock, ("key5", "key6"), "missing_value", create_missing=True) + assert datablock["key5"]["key6"] == "missing_value" + + # Test case for set_dict with create_missing=False + try: + set_dict(datablock, ("key7", "key8"), "should_fail", + create_missing=False) + except KeyError: + pass diff --git a/agrifoodpy/utils/tests/test_load_dataset.py b/agrifoodpy/utils/tests/test_load_dataset.py new file mode 100644 index 0000000..55b44ff --- /dev/null +++ b/agrifoodpy/utils/tests/test_load_dataset.py @@ -0,0 +1,39 @@ +import numpy as np +import xarray as xr + +from agrifoodpy.utils.nodes import load_dataset +import os + + +def test_load_dataset(): + + items = ["Beef", "Apples", "Poultry"] + shape = (3, 2, 2) + data = np.reshape(np.arange(np.prod(shape)), shape) + + expected_ds = xr.Dataset({"data": (("Item", "X", "Y"), data)}, + coords={"Item": items, "X": [0, 1], "Y": [0, 1]}) + + script_dir = os.path.dirname(__file__) + test_data_path = os.path.join(script_dir, "data/test_dataset.nc") + + # Test loading a dataset from a file path + ds = load_dataset(path=test_data_path) + assert isinstance(ds, xr.Dataset) + assert ds.equals(expected_ds) + + # Test loading a dataset and selecting a specific dataarray + ds_dataarray = load_dataset(path=test_data_path, da="data") + assert isinstance(ds_dataarray, xr.DataArray) + assert ds_dataarray.equals(expected_ds["data"]) + + # Test loading a dataset and selecting specific items + sel = {"Item": ["Beef", "Apples"]} + ds_selected = load_dataset(path=test_data_path, coords=sel) + expected_selected_ds = expected_ds.sel(sel) + assert ds_selected.equals(expected_selected_ds) + + # Test loading a dataset and applying a scale factor + scaled_ds = load_dataset(path=test_data_path, scale=2.0) + expected_scaled_data = expected_ds * 2.0 + assert scaled_ds.equals(expected_scaled_data) diff --git a/agrifoodpy/utils/tests/test_print_datablock.py b/agrifoodpy/utils/tests/test_print_datablock.py new file mode 100644 index 0000000..9d3622b --- /dev/null +++ b/agrifoodpy/utils/tests/test_print_datablock.py @@ -0,0 +1,58 @@ +import numpy as np +import xarray as xr + + +def test_print_datablock(): + + from agrifoodpy.utils.nodes import print_datablock + + items = ["Beef", "Apples", "Poultry"] + + data = np.random.rand(3, 2, 2) + + ds = xr.Dataset({"data": (("Item", "X", "Y"), data)}, + coords={"Item": items, "X": [0, 1], "Y": [0, 1]}) + + datablock = { + 'test_dict': { + 'data': np.array([[1, 2], [3, 4]]), + 'years': [2020, 2021] + }, + + 'test_xarray': ds, + 'test_string': "Hello, World!", + 'test_list': items, + 'test_array': data + + } + + # Test printing a dictionary element + datablock = print_datablock(datablock, 'test_dict') + + # Test printing an xarray Dataset + datablock = print_datablock(datablock, 'test_xarray') + + # Test printing a string + datablock = print_datablock(datablock, 'test_string') + + # Test printing a list + datablock = print_datablock(datablock, 'test_list') + + # Test printing an array + datablock = print_datablock(datablock, 'test_array') + + # Test printing an attribute of the xarray Dataset + datablock = print_datablock(datablock, 'test_xarray', + attr='data_vars') + + # Test calling a method on the xarray Dataset + datablock = print_datablock(datablock, 'test_xarray', + method='mean', args=[('X', 'Y')]) + + # Test calling a method with keyword arguments + datablock = print_datablock(datablock, 'test_xarray', + method='sel', kwargs={'Item': 'Beef'}) + + # Test error handling for non-existent attribute + datablock = print_datablock(datablock, 'test_xarray', + attr='non_existent_attr') diff --git a/agrifoodpy/utils/tests/test_scaling.py b/agrifoodpy/utils/tests/test_scaling.py index a71c90d..5b6e86e 100644 --- a/agrifoodpy/utils/tests/test_scaling.py +++ b/agrifoodpy/utils/tests/test_scaling.py @@ -1,7 +1,7 @@ import numpy as np -import xarray as xr from agrifoodpy.utils.scaling import linear_scale, logistic_scale + def test_logistic_scale(): # Basic functionality test @@ -12,7 +12,7 @@ def test_logistic_scale(): truth = [0, 0, 0, 0, 0, 0.06692851, 0.47425873, 2.68941421, 7.31058579, 9.52574127, 10, 10, 10, 10, 10, 10] - + assert np.allclose(basic_result, truth) assert np.array_equal(basic_result["Year"].values, np.arange(2000, 2016)) @@ -23,7 +23,7 @@ def test_logistic_scale(): truth = [-1, -1, -1, -1, -1, -1.06023566, -1.42683286, -3.42047279, -7.57952721, -9.57316714, -10, -10, -10, -10, -10, -10] - + assert np.allclose(negative_result, truth) # Instant change test @@ -46,11 +46,12 @@ def test_logistic_scale(): c_init, c_end = 5.5, 5.5 constant_value = logistic_scale(y0, y1, y2, y3, c_init, c_end) - assert np.array_equal(constant_value, c_init * np.ones(y3+1-y0)) + assert np.array_equal(constant_value, c_init * np.ones(y3+1-y0)) assert np.array_equal(constant_value, c_end * np.ones(y3+1-y0)) + def test_linear_scale(): - + # Basic functionality test basic_result = linear_scale(2000, 2005, 2010, 2015, 0, 10) truth = [0, 0, 0, 0, 0, 0, 2, 4, 6, 8, 10, 10, 10, 10, 10, 10] @@ -62,8 +63,8 @@ def test_linear_scale(): y0, y1, y2, y3 = 2000, 2005, 2010, 2015 c_init, c_end = -1, -10 negative_result = linear_scale(y0, y1, y2, y3, c_init, c_end) - truth = [ -1, -1, -1, -1, -1, -1, -2.8, -4.6, -6.4, -8.2, - -10. , -10. , -10. , -10. , -10. , -10. ] + truth = [-1, -1, -1, -1, -1, -1, -2.8, -4.6, -6.4, -8.2, + -10., -10., -10., -10., -10., -10.] assert np.allclose(negative_result, truth) @@ -89,5 +90,3 @@ def test_linear_scale(): assert np.array_equal(constant_value, c_init * np.ones(y3+1-y0)) assert np.array_equal(constant_value, c_end * np.ones(y3+1-y0)) - - diff --git a/agrifoodpy/utils/tests/test_write_to_datablock.py b/agrifoodpy/utils/tests/test_write_to_datablock.py new file mode 100644 index 0000000..8986e31 --- /dev/null +++ b/agrifoodpy/utils/tests/test_write_to_datablock.py @@ -0,0 +1,34 @@ +def test_write_to_datablock(): + from agrifoodpy.utils.nodes import write_to_datablock + + datablock_basic = {} + + # Basic write to the datablock + write_to_datablock(datablock_basic, "test_key", "test_value") + assert datablock_basic["test_key"] == "test_value" + + # Write to the datablock with a tuple value + datablock_tuple = {} + write_to_datablock( + datablock=datablock_tuple, + key=("test_key_1", "test_key_2"), + value="test_tuple_value" + ) + assert datablock_tuple["test_key_1"]["test_key_2"] == "test_tuple_value" + + # Overwrite existing key + datablock_overwrite = {"test_key": "old_value"} + write_to_datablock(datablock_overwrite, "test_key", "new_value") + assert datablock_overwrite["test_key"] == "new_value" + + # Attempt to write without overwriting an existing key + datablock_no_overwrite = {"test_key": "existing_value"} + try: + write_to_datablock( + datablock=datablock_no_overwrite, + key="test_key", + value="new_value", + overwrite=False) + + except KeyError as e: + assert str(e) == "'Key already exists in datablock and overwrite is set to False.'" \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 1e3bcd6..9b60950 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -9,7 +9,7 @@ project = 'agrifoodpy' copyright = '2023, AgriFoodPy developers' author = 'Juan P. Cordero' -release = '0.1.0' +release = '0.2.0' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/examples/modules/plot_animal_consumption_scaling.py b/examples/modules/plot_animal_consumption_scaling.py index 7caede3..c2e2829 100644 --- a/examples/modules/plot_animal_consumption_scaling.py +++ b/examples/modules/plot_animal_consumption_scaling.py @@ -2,10 +2,10 @@ ======================================== Reducing UK's animal product consumption ======================================== - -This example demonstrates how to combine a food supply array with a scaling -model to reduce animal product consumption. -It also employs a few functions from the ``fbs`` accessor to group and plot +This example demonstrates how to combine a Food Balance Sheet array with a +scaling model to reduce animal product consumption, while keeping total +consumption constant across all items. +It also employs a few ``fbs`` accessor class functions to group and plot food balance sheet arrays. Consumption of animal based products is halved, while keeping total comsumed @@ -13,31 +13,33 @@ """ import numpy as np +from matplotlib import pyplot as plt from agrifoodpy_data.food import FAOSTAT - -import agrifoodpy.food from agrifoodpy.food.model import balanced_scaling - -from matplotlib import pyplot as plt - # Select food items and production values for the last year of data in the UK # Values are in 1000 Tonnes country_code = 229 food_uk = FAOSTAT.isel(Year=-1).sel(Region=country_code) +# Select all Animal Products according to its Item_origin, which is a +# non-dimension coordinate animal_items = food_uk.sel(Item=food_uk.Item_origin=="Animal Products").Item.values - # Scale domestic use of animal items by a factor of 0.5, while keeping # the sum of domestic use constant. Reduce imports to account for the new -# consumption values -food_uk_scaled = balanced_scaling(food_uk, - element="domestic", - items=animal_items, - scale=0.5, - origin="-imports", - fallback="-exports", - constant=True) +# consumption values and use production as fallback, subtracting any negative +# excess if required + +food_uk_scaled = balanced_scaling( + food_uk, + element="domestic", + scale=0.5, + items=animal_items, + constant=True, + origin="imports", + fallback="production", + add_to_fallback=False +) # We group the original and scaled quantities by origin and plot to compare food_uk_origin = food_uk.fbs.group_sum("Item_origin") @@ -45,8 +47,8 @@ #%% # From the plot we can see that domestic use of animal products is reduced by -# half, while keeping total weight constant. We used ``-exports`` as the -# fallback for any extra origin required. If any item domestic use reduction +# half, while keeping total weight constant. We used ``production`` as the +# fallback for any extra origin required. If any domestic use reduction # requires more origin reduction than available, the remaining is taken from # the ``fallback`` DataArray element. diff --git a/examples/modules/plot_emissions_animal_scaling.py b/examples/modules/plot_emissions_animal_scaling.py index 13dea0f..0c4680c 100644 --- a/examples/modules/plot_emissions_animal_scaling.py +++ b/examples/modules/plot_emissions_animal_scaling.py @@ -2,10 +2,9 @@ =================================================== Plot emissions from different item groups and years =================================================== - -This example demonstrates how manipulate a Food Balance Sheet array, add +This example demonstrates how to manipulate a Food Balance Sheet array, add items and years to it and combine it with impact data to plot total GHG -emissions dissagregated by selected coordinates. +emissions dissagregated by a selected coordinate. Two datasets are imported from the agrifoodpy_data package: @@ -24,16 +23,16 @@ from matplotlib import pyplot as plt -# Load FAOSTAT array to memory. -FAOSTAT.load(); - # Select food items and production values for the UK and the US # Values are in [1000 Tonnes] -country_codes = [229, 231] +UK_FAO_CODE = 229 +US_FAO_CODE = 231 + +country_codes = [UK_FAO_CODE, US_FAO_CODE] food = FAOSTAT.sel(Region=country_codes)["production"] # Convert emissions from [g CO2e] to [Gt CO2e] -ghg_emissions = PN18["GHG Emissions"] / 1e6 +ghg_emissions = PN18["GHG Emissions (IPCC 2013)"] / 1e6 food_emissions = fbs_impacts(food, ghg_emissions) ax = food_emissions.fbs.plot_years(show="Region", labels=["UK", "USA"]) diff --git a/examples/modules/plot_reforest_ALC_4_5.py b/examples/modules/plot_reforest_ALC_4_5.py index ea9ff97..d995d06 100644 --- a/examples/modules/plot_reforest_ALC_4_5.py +++ b/examples/modules/plot_reforest_ALC_4_5.py @@ -56,13 +56,18 @@ coniferous_max_seq = 14 broadleaf_fraction = 0.5 -seq_forest= broadleaf_max_seq * (broadleaf_fraction) + \ +seq_forest = broadleaf_max_seq * (broadleaf_fraction) + \ coniferous_max_seq * (1-broadleaf_fraction) -co2e_seq = land_sequestration(land_use, [1,2], max_seq=seq_forest, - fraction=[0.0, pasture_4_5/total_area_england*0.5], - years = np.arange(2020,2070), - growth_timescale=25) +# We can now compute the additional carbon sequestration from reforesting +co2e_seq = land_sequestration( + land_da=land_use, + use_id=[1,2], + max_seq=seq_forest, + fraction=[0.0, pasture_4_5/total_area_england*0.5], + years = np.arange(2020,2070), + growth_timescale=25 + ) ax = co2e_seq.fbs.plot_years() ax.set_ylabel("[t CO2 / yr]") diff --git a/examples/pipeline/README.rst b/examples/pipeline/README.rst new file mode 100644 index 0000000..38b3bea --- /dev/null +++ b/examples/pipeline/README.rst @@ -0,0 +1,6 @@ +.. _pipeline_examples: + +Pipeline +-------- + +Basic pipeline examples \ No newline at end of file diff --git a/examples/pipeline/plot_balanced_scaling_food_balance_sheet_pipeline.py b/examples/pipeline/plot_balanced_scaling_food_balance_sheet_pipeline.py new file mode 100644 index 0000000..59e04e4 --- /dev/null +++ b/examples/pipeline/plot_balanced_scaling_food_balance_sheet_pipeline.py @@ -0,0 +1,208 @@ +""" +====================================== +Building a Food Balance Sheet Pipeline +====================================== + +This example demonstrates the use of the pipeline manager to create a simple +pipeline of modules. + +In this particular example, we will load a food balance sheet dataset, compute +prelimimary Self-Sufficiency and Import Dependency Ratios (SSR, IDR) and add +new items and years to the dataset. +We will also print the SSR and IDR values to the console. +Finally, we will scale the food balance sheet to reduce animal products and +plot the results. +""" + +# %% +# We start by creating a pipeline object, which will manage the flow of data +# through the different modules. + +import numpy as np +from matplotlib import pyplot as plt + +from agrifoodpy.pipeline.pipeline import Pipeline + +from agrifoodpy.food import model +from agrifoodpy.utils import nodes +from agrifoodpy.utils.scaling import linear_scale + +# Create a pipeline object +pipeline = Pipeline() + +# %% +# We add a node to the pipeline to load a food balance sheet dataset. + +# Load a dataset +pipeline.add_node( + nodes.load_dataset, + name="Load Dataset", + params={ + "datablock_path": "food", + "module": "agrifoodpy_data.food", + "data_attr": "FAOSTAT", + "coords": { + "Item": [2731, 2511], + "Year": [2019, 2020], + "Region": 229}, + } +) + +# %% +# We add a node to the pipeline to store a conversion factor in the +# datablock. This conversion factor will be used to convert the food balance +# sheet data from 1000 tonnes to kgs. + +# Add convertion factors to the datablock +pipeline.add_node( + nodes.write_to_datablock, + name="Write to datablock", + params={ + "key": "tonnes_to_kgs", + "value": 1e6, + } +) + +# Convert food data from 1000 tonnes to kgs +pipeline.add_node( + model.fbs_convert, + name="Convert from 1000 tonnes to kgs", + params={ + "fbs": "food", + "convertion_arr": "tonnes_to_kgs", + } +) + +# %% +# Compute preliminary Self-Sufficiency Ratio (SSR) and Import Dependency Ratio +# (IDR) + + +# Compute IDR and SSR for food +pipeline.add_node( + model.SSR, + name="Compute SSR for food", + params={ + "fbs": "food", + "out_key": "SSR" + } +) + +# Compute IDR and SSR for food +pipeline.add_node( + model.IDR, + name="Compute IDR for food", + params={ + "fbs": "food", + "out_key": "IDR" + } +) + +# %% +# Print the SSR and IDR values to the console + +# Add a print node to display the SSR +pipeline.add_node( + nodes.print_datablock, + name="Print SSR", + params={ + "key": "SSR", + "method": "to_numpy", + "preffix": "SSR values: ", + } +) + +# %% +# Now we can add new items to the food balance sheet dataset. + +# Add an item to the food dataset +pipeline.add_node( + nodes.add_items, + name="Add item to food", + params={ + "dataset": "food", + "items": { + "Item": 5000, + "Item_name": "Cultured meat", + "Item_group": "Cultured products", + "Item_origin": "Synthetic origin", + }, + "copy_from": 2731 + } +) + +# %% +# We can also add new years to the food balance sheet dataset. + +projection = np.linspace(1.1, 2.0, 10) +new_years = np.arange(2021, 2031) + +# Extend the year range of the food dataset +pipeline.add_node( + nodes.add_years, + name="Add years to food", + params={ + "dataset": "food", + "years": new_years, + "projection": projection, + } +) + +# %% +# We execute the pipeline to run all the nodes in order. + +pipeline.run(timing=True) + +# %% Finally, we plot the results + +# Get the food results from the pipeline and plot using the fbs accessor +food_results = pipeline.datablock["food"]["food"] + +f, ax = plt.subplots(figsize=(10, 6)) +food_results.fbs.plot_years(show="Item_name", labels="show", ax=ax) +plt.show() + +# %% +# We can continue adding nodes to the pipeline, even after being executed +# once. To pick up where we left, we indicate which node to start execution +# from + +# Define a year dependent linear scale starting decreasing at 2021 from 1 to +# 0.5 +scaling = linear_scale( + 2019, + 2021, + 2030, + 2030, + 1, + 0.5 +) + +# We will add a node to scale consumption +pipeline.add_node( + model.balanced_scaling, + name="Balanced scaling of items", + params={ + "fbs": "food", + "scale": scaling, + "element": "food", + "items": ("Item_name", "Bovine Meat"), + "constant": True, + "out_key": "food_scaled" + } +) + +# Execute the recently added node +pipeline.run(from_node=8, timing=True) + +# Get the food results from the pipeline and plot using the fbs accessor +scaled_food_results = pipeline.datablock["food_scaled"]["food"] + +f, ax = plt.subplots(figsize=(10, 6)) +scaled_food_results.fbs.plot_years(show="Item_name", labels="show", ax=ax) +plt.show() + +# %% +# We can see in the scaled Food Balance Sheet that Bovine Meat consumption is +# reduced by half by 2030, while the total sum across all items remains +# constant. diff --git a/setup.py b/setup.py index 649a794..046c7f3 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ HERE = pathlib.Path(__file__).parent -VERSION = '0.1.1' +VERSION = '0.2.0' PACKAGE_NAME = 'AgriFoodPy' AUTHOR = 'FixOurFood developers' AUTHOR_EMAIL = 'juanpablo.cordero@york.ac.uk'