From be22ceccc538db4444291e84c6d028f23472419e Mon Sep 17 00:00:00 2001 From: Romain Sacchi Date: Fri, 7 Nov 2025 17:08:55 +0100 Subject: [PATCH] Improve documentation and README --- README.md | 80 ++++-- brightpath/bwconverter.py | 107 +++++++- brightpath/simaproconverter.py | 79 +++++- brightpath/utils.py | 436 ++++++++++++++++++++++----------- 4 files changed, 522 insertions(+), 180 deletions(-) diff --git a/README.md b/README.md index 6f3f7b7..95c330b 100644 --- a/README.md +++ b/README.md @@ -1,41 +1,83 @@ # BrightPath [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -BrightPath is a Python library designed to convert Brightway2 LCA inventories -into a format that can be imported into Simapro 9.x. +BrightPath bridges life-cycle assessment (LCA) data between +[Brightway2](https://brightway.dev/) and [SimaPro](https://simapro.com/). It +bundles the mappings that are required to translate units, flow names and +metadata between both tools and exposes high-level helpers to perform the +conversion in either direction. + +## Features + +* Convert Brightway2 inventories exported as Excel spreadsheets to the SimaPro + CSV format. +* Import SimaPro CSV exports and normalise them so that they can be registered + as Brightway databases. +* Ship curated mappings for biosphere flows, technosphere exchanges, + sub-compartments and blacklist entries required during the conversion. ## Installation -Use the package manager [pip](https://pip.pypa.io/en/stable/) to install BrightPath. +Install BrightPath from PyPI using [pip](https://pip.pypa.io/): ```bash - - pip install brightpath - +pip install brightpath ``` ## Usage -```python - -import brightpath +### Convert Brightway inventories to SimaPro CSV -# Create a converter object with the path to the Brightway LCA inventory -converter = brightpath.BrightwayConverter('path_to_inventory') +```python +from brightpath import BrightwayConverter + +converter = BrightwayConverter( + filepath="/path/to/brightway-export.xlsx", + metadata="/path/to/metadata.yaml", # optional + ecoinvent_version="3.9", +) + +# Write the converted inventory to a CSV file (defaults to the current +# working directory unless ``export_dir`` is provided during initialisation). +output_path = converter.convert_to_simapro(database="ecoinvent") +print(output_path) +``` -# Convert the inventory to a format compatible with Simapro 9.x -sima_inventory = converter.to_simapro() +The converter also accepts inventory data that has already been loaded into +memory via the ``data`` argument and can return the converted rows directly by +calling ``convert_to_simapro(format="data")``. -# Save the converted inventory to a CSV file -sima_inventory.to_file('output_path') +### Convert SimaPro CSV exports to Brightway datasets +```python +from brightpath import SimaproConverter + +converter = SimaproConverter( + filepath="/path/to/simapro-export.csv", + ecoinvent_version="3.9", + db_name="my-simapro-import", +) + +# Normalise exchange names, locations and metadata so that they align with +# Brightway conventions. +converter.convert_to_brightway() + +# The processed data lives on ``converter.i`` (an instance of +# ``bw2io.SimaProCSVImporter``). You can now write the database to your +# Brightway project if desired: +# converter.i.write_database() ``` -## License +## Development -[BSD-3-Clause](https://github.com/romainsacchi/brightpath/blob/master/LICENSE). +* Source code is formatted with standard Python tools and tested with + `pytest`. +* Data files required for the conversions live under `brightpath/data`. +* Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidance on running tests + and submitting pull requests. -## Contributing +## License -See [contributing](https://github.com/romainsacchi/brightpath/blob/master/CONTRIBUTING.md). +BrightPath is distributed under the +[BSD-3-Clause license](LICENSE). diff --git a/brightpath/bwconverter.py b/brightpath/bwconverter.py index 8edf70f..1a6d43b 100644 --- a/brightpath/bwconverter.py +++ b/brightpath/bwconverter.py @@ -36,8 +36,52 @@ class BrightwayConverter: - """ - Convert Brightway2 inventories to Simapro CSV files. + """Convert Brightway2 inventories to SimaPro CSV files. + + The converter loads inventories exported from Brightway2 and prepares + them for the SimaPro import format. The instance keeps references to the + different lookup tables that are needed throughout the conversion. + + :param filepath: Path to the Brightway inventory spreadsheet. + :type filepath: str | None + :param data: Brightway inventories provided directly instead of loading + them from ``filepath``. + :type data: list | None + :param metadata: Optional path to a YAML file containing additional + metadata to append to the export. + :type metadata: str | None + :param ecoinvent_version: Version string of the ecoinvent database the + inventories are linked to. + :type ecoinvent_version: str + :param export_dir: Directory where generated SimaPro CSV files are saved. + :type export_dir: str | None + :ivar filepath: Path to the Brightway inventory spreadsheet. + :vartype filepath: str | None + :ivar inventories: Brightway activities that are going to be converted. + :vartype inventories: list[dict] | None + :ivar simapro_blacklist: Exchanges that should not be exported. + :vartype simapro_blacklist: dict + :ivar simapro_fields: Order and structure of SimaPro sections. + :vartype simapro_fields: list[str] + :ivar simapro_units: Mapping of Brightway units to SimaPro units. + :vartype simapro_units: dict[str, str] + :ivar simapro_headers: Header rows used when writing the CSV file. + :vartype simapro_headers: list[str] + :ivar simapro_technosphere: Mapping from technosphere exchanges to + SimaPro names. + :vartype simapro_technosphere: dict[tuple[str, str], str] + :ivar simapro_biosphere: Mapping from biosphere exchanges to SimaPro + names. + :vartype simapro_biosphere: dict[str, str] + :ivar simapro_subcompartment: Mapping of biosphere subcompartments to + SimaPro names. + :vartype simapro_subcompartment: dict[str, str] + :ivar ei_version: Version of the ecoinvent database in use. + :vartype ei_version: str + :ivar metadata: Optional metadata loaded from the YAML file. + :vartype metadata: dict | None + :ivar export_dir: Output directory for converted CSV files. + :vartype export_dir: pathlib.Path """ def __init__( @@ -48,8 +92,26 @@ def __init__( ecoinvent_version: str = "3.9", export_dir: str = None, ): - """ - :param filepath: path to the BW inventory spreadsheet file + """Instantiate a converter that targets the SimaPro CSV format. + + When ``filepath`` is provided the inventories are loaded from the + spreadsheet using :func:`brightpath.utils.import_bw_inventories`. If + ``filepath`` is omitted, pre-loaded ``data`` can be supplied instead. + Optional ``metadata`` is validated and attached to the export. + + :param filepath: Path to the Brightway inventory spreadsheet file. + :type filepath: str | None + :param data: Inventories loaded in memory. Used when ``filepath`` is + ``None``. + :type data: list | None + :param metadata: Path to the metadata YAML file. + :type metadata: str | None + :param ecoinvent_version: Version of the linked ecoinvent database. + :type ecoinvent_version: str + :param export_dir: Directory where SimaPro exports will be written. + :type export_dir: str | None + :raises FileNotFoundError: If the metadata file does not exist. + :raises ValueError: If the metadata file is not a YAML document. """ self.filepath = filepath self.inventories = import_bw_inventories(filepath) if self.filepath else data @@ -70,10 +132,20 @@ def __init__( self.export_dir = Path(export_dir) or Path.cwd() def format_inventories_for_simapro(self, database: str): - """ - Format inventories to Simapro format. - :param database: name of the database to link to. - :return: list + """Transform the Brightway inventories into the SimaPro structure. + + This method orchestrates the conversion of each activity in the + Brightway dataset into the row-based structure expected by SimaPro. + The resulting structure is compatible with the CSV export performed + by :meth:`convert_to_simapro`. + + :param database: Name of the target database to link to. Valid values + are ``"ecoinvent"`` and ``"uvek"``. + :type database: str + :return: Rows ready to be written to a SimaPro CSV file. + :rtype: list[list[str]] + :raises ValueError: If required information is missing from the + inventories. """ rows = [ @@ -500,9 +572,22 @@ def format_inventories_for_simapro(self, database: str): def convert_to_simapro( self, database: str = "ecoinvent", format: str = "csv" ) -> [str, list]: - """ - Convert the inventories to Simapro CSV files. - :param database: Name of the database to link to. Default is `ecoinvent`, but can be `uvek`. + """Export the converted inventories. + + The inventories are formatted using + :meth:`format_inventories_for_simapro` and either returned as raw data + or written to disk as a CSV file, depending on ``format``. + + :param database: Database to use when resolving exchanges. Accepted + values are ``"ecoinvent"`` and ``"uvek"``. + :type database: str + :param format: Output mode. Use ``"data"`` to receive the converted + rows instead of writing a CSV file. + :type format: str + :return: The CSV filepath when ``format`` is ``"csv"`` or the raw + SimaPro data rows when ``format`` is ``"data"``. + :rtype: str | list[list[str]] + :raises ValueError: If an unsupported ``database`` value is supplied. """ if database not in ("ecoinvent", "uvek"): diff --git a/brightpath/simaproconverter.py b/brightpath/simaproconverter.py index 6b64be9..8945708 100644 --- a/brightpath/simaproconverter.py +++ b/brightpath/simaproconverter.py @@ -27,6 +27,13 @@ def format_technosphere_exchange(txt: str): + """Split and normalise a technosphere exchange name from SimaPro. + + :param txt: Raw exchange string as found in a SimaPro CSV export. + :type txt: str + :return: Tuple containing the cleaned name, reference product and location. + :rtype: tuple[str, str, str] + """ location_correction = { "WECC, US only": "US-WECC", @@ -151,6 +158,13 @@ def format_technosphere_exchange(txt: str): def load_ecoinvent_activities(version: str) -> list: + """Load the list of ecoinvent activities for the given version. + + :param version: Ecoinvent version identifier, e.g. ``"3.9"``. + :type version: str + :return: Rows describing ecoinvent activities. + :rtype: list[list[str]] + """ with open(DATA_DIR / "export" / f"list_ei{version}_cutoff_activities.csv") as f: reader = csv.reader(f) next(reader) @@ -158,6 +172,19 @@ def load_ecoinvent_activities(version: str) -> list: def format_biosphere_exchange(exc, ei_version, bio_flows, bio_mapping): + """Normalise a biosphere exchange to match ecoinvent conventions. + + :param exc: Exchange to adjust in-place. + :type exc: dict + :param ei_version: Version of ecoinvent used for interpretation. + :type ei_version: str + :param bio_flows: Known biosphere flows for the version. + :type bio_flows: list[tuple[str, str, str]] + :param bio_mapping: Mapping to resolve outdated flow names. + :type bio_mapping: dict + :return: The updated exchange dictionary. + :rtype: dict + """ if "in ground" in exc["name"]: if ei_version not in ["3.5", "3.6", "3.7", "3.8"]: exc["name"] = exc["name"].replace(", in ground", "") @@ -244,14 +271,48 @@ def format_biosphere_exchange(exc, ei_version, bio_flows, bio_mapping): class SimaproConverter: + """Convert SimaPro CSV exports into Brightway-compatible datasets. + + :param filepath: Path to the SimaPro CSV file. + :type filepath: str + :param ecoinvent_version: Version of ecoinvent to align biosphere data to. + :type ecoinvent_version: str + :param db_name: Optional name of the Brightway database to create. + :type db_name: str | None + :ivar filepath: Normalised path to the validated CSV file. + :vartype filepath: pathlib.Path + :ivar i: Instance of :class:`bw2io.SimaProCSVImporter` handling the data. + :vartype i: bw2io.SimaProCSVImporter + :ivar db_name: Name of the Brightway database that will be created. + :vartype db_name: str + :ivar ecoinvent_version: Version of the ecoinvent database in use. + :vartype ecoinvent_version: str + :ivar biosphere: Mapping between SimaPro and Brightway biosphere flows. + :vartype biosphere: dict + :ivar technosphere: Mapping between SimaPro and Brightway technosphere + exchanges. + :vartype technosphere: dict + :ivar subcompartments: Mapping of sub-compartments between the databases. + :vartype subcompartments: dict + :ivar ei_biosphere_flows: Known biosphere flows for the selected version. + :vartype ei_biosphere_flows: list[tuple[str, str, str]] + :ivar biosphere_flows_correspondence: Mapping of outdated biosphere names. + :vartype biosphere_flows_correspondence: dict + """ + def __init__( self, filepath: str, ecoinvent_version: str = "3.9", db_name: str = None ): - """ - Initialize the SimaproConverter object. - - :param data: list of Simapro inventories - :param ecoinvent_version: ecoinvent version to use + """Initialise the converter and load the SimaPro inventory. + + :param filepath: Path to the SimaPro CSV export to convert. + :type filepath: str + :param ecoinvent_version: Ecoinvent version that should be used when + reconciling biosphere flows. + :type ecoinvent_version: str + :param db_name: Optional Brightway database name override. + :type db_name: str | None + :raises FileNotFoundError: If the provided CSV file cannot be found. """ logging.basicConfig( @@ -283,7 +344,7 @@ def __init__( self.i.db_name = self.db_name def check_database_name(self): - + """Ensure exchanges reference the correct Brightway database name.""" for act in self.i.data: act["database"] = self.i.db_name @@ -294,7 +355,7 @@ def check_database_name(self): exc["input"] = (self.i.db_name, exc["input"][1]) def convert_to_brightway(self): - + """Convert the imported SimaPro data into Brightway inventories.""" print("- format exchanges") internal_datasets = [] for ds in self.i.data: @@ -366,14 +427,16 @@ def convert_to_brightway(self): print("Done!") def remove_empty_datasets(self): + """Remove datasets that contain no exchanges.""" self.i.data = [ds for ds in self.i.data if len(ds["exchanges"]) >= 1] def remove_empty_exchanges(self): + """Remove exchanges that have a zero amount.""" for ds in self.i.data: ds["exchanges"] = [e for e in ds["exchanges"] if e["amount"] != 0.0] def check_inventories(self): - + """Perform basic validation of the converted inventories.""" for ds in self.i.data: if len([x for x in ds["exchanges"] if x["type"] == "production"]) != 1: print( diff --git a/brightpath/utils.py b/brightpath/utils.py index 417db7d..b3391e2 100644 --- a/brightpath/utils.py +++ b/brightpath/utils.py @@ -24,8 +24,15 @@ def get_simapro_biosphere() -> Dict[str, str]: - # Load the matching dictionary between ecoinvent and Simapro biosphere flows - # for each ecoinvent biosphere flow name, it gives the corresponding Simapro name + """Load the correspondence between ecoinvent and SimaPro biosphere flows. + + :return: Mapping from an ecoinvent biosphere flow name to its SimaPro + equivalent name. + :rtype: dict[str, str] + :raises FileNotFoundError: If the mapping file is missing from + ``brightpath/data/export``. + :raises json.JSONDecodeError: If the mapping file cannot be parsed. + """ filename = "simapro-biosphere.json" filepath = DATA_DIR / "export" / filename @@ -44,8 +51,14 @@ def get_simapro_biosphere() -> Dict[str, str]: def get_simapro_subcompartments() -> Dict[str, str]: - # Load the matching dictionary between ecoinvent and Simapro subcompartments - # contained in simapro_subcompartments.yaml + """Load the mapping of biosphere sub-compartments. + + :return: Mapping from ecoinvent sub-compartment names to their SimaPro + equivalents. + :rtype: dict[str, str] + :raises FileNotFoundError: If the YAML file with the mapping is missing. + :raises yaml.YAMLError: If the YAML file cannot be parsed. + """ filename = "simapro_subcompartments.yaml" filepath = DATA_DIR / "export" / filename @@ -66,7 +79,14 @@ def get_simapro_subcompartments() -> Dict[str, str]: def get_simapro_technosphere() -> Dict[Tuple[str, str], str]: - # Load the matching dictionary between ecoinvent and Simapro product flows + """Load the correspondence between ecoinvent and SimaPro product flows. + + :return: Mapping where the key is the pair ``(name, location)`` of an + ecoinvent technosphere exchange and the value is the SimaPro exchange + name. + :rtype: dict[tuple[str, str], str] + :raises FileNotFoundError: If the CSV mapping file is missing. + """ filename = "simapro-technosphere-3.5.csv" filepath = DATA_DIR / "export" / filename @@ -84,8 +104,14 @@ def get_simapro_technosphere() -> Dict[Tuple[str, str], str]: def get_simapro_ecoinvent_blacklist(): - # Load the list of Simapro biosphere flows that - # should be excluded from the export + """Load the list of exchanges to exclude when exporting to SimaPro. + + :return: Dictionary describing exchanges that must be skipped for the + ecoinvent export. + :rtype: dict + :raises FileNotFoundError: If the blacklist file is missing. + :raises yaml.YAMLError: If the blacklist file cannot be parsed. + """ filename = "simapro_blacklist.yaml" filepath = DATA_DIR / "export" / filename @@ -102,8 +128,14 @@ def get_simapro_ecoinvent_blacklist(): def get_simapro_uvek_blacklist(): - # Load the list of Simapro uvek flows that - # should be excluded from the export + """Load the blacklist of SimaPro UVEK exchanges. + + :return: Dictionary describing exchanges that must be skipped when + targeting the UVEK database. + :rtype: dict + :raises FileNotFoundError: If the blacklist YAML file cannot be found. + :raises yaml.YAMLError: If the blacklist file cannot be parsed. + """ filename = "uvek_blacklist.yaml" filepath = DATA_DIR / "export" / filename @@ -120,9 +152,13 @@ def get_simapro_uvek_blacklist(): def get_ecoinvent_to_uvek_mapping(): - """ - Load ecoinvent_to_uvek_mapping.csv into a dictionary. - :return: dictionary with tuples of ecoinvent flow name and location as keys + """Load the mapping between ecoinvent flows and UVEK identifiers. + + :return: Dictionary keyed by a tuple consisting of the ecoinvent flow + name, location and additional qualifiers, pointing to the UVEK + identifier. + :rtype: dict[tuple[str, str, str, str], str] + :raises FileNotFoundError: If the CSV mapping file cannot be found. """ filename = "ecoinvent_to_uvek_mapping.csv" filepath = DATA_DIR / "export" / filename @@ -135,9 +171,12 @@ def get_ecoinvent_to_uvek_mapping(): def get_ecoinvent_transport_distances(): - """ - Load ei_transport.csv into a dictionary. - :return: dictionary with tuples of ecoinvent flow name and location as keys + """Load default transport distances for ecoinvent flows. + + :return: Mapping from exchange name to a dictionary containing transport + distances per mode and region. + :rtype: dict[str, dict[str, str]] + :raises FileNotFoundError: If the CSV with transport distances is missing. """ filename = "ei_transport.csv" filepath = DATA_DIR / "export" / filename @@ -164,13 +203,14 @@ def get_ecoinvent_transport_distances(): def get_simapro_fields_list() -> list[str]: + """Return the ordered list of SimaPro section names. + + :return: Sequence of field names that describes the structure of a + SimaPro CSV export. + :rtype: list[str] + :raises FileNotFoundError: If the YAML definition file is missing. + :raises yaml.YAMLError: If the YAML file cannot be parsed. """ - Load the list of Simapro fields that - should be included in the export. - :return: list of Simapro fields - """ - # Load the list of Simapro fields that - # should be included in the export filename = "simapro_fields.yaml" filepath = DATA_DIR / "export" / filename @@ -185,13 +225,13 @@ def get_simapro_fields_list() -> list[str]: def get_simapro_units(): + """Load the mapping of Brightway units to SimaPro units. + + :return: Dictionary mapping source units to their SimaPro counterparts. + :rtype: dict[str, str] + :raises FileNotFoundError: If the YAML definition file is missing. + :raises yaml.YAMLError: If the YAML file cannot be parsed. """ - Load the list of Simapro fields that - should be included in the export. - :return: list of Simapro fields - """ - # Load the list of Simapro fields that - # should be included in the export filename = "simapro_units.yaml" filepath = DATA_DIR / "export" / filename @@ -206,13 +246,13 @@ def get_simapro_units(): def get_simapro_headers(): + """Load the SimaPro header rows that precede each export. + + :return: List of header strings used when generating SimaPro CSV files. + :rtype: list[str] + :raises FileNotFoundError: If the YAML definition file is missing. + :raises yaml.YAMLError: If the YAML file cannot be parsed. """ - Load the list of Simapro fields that - should be included in the export. - :return: list of Simapro fields - """ - # Load the list of Simapro fields that - # should be included in the export filename = "simapro_headers.yaml" filepath = DATA_DIR / "export" / filename @@ -228,10 +268,13 @@ def get_simapro_headers(): def get_simapro_ecoinvent_exceptions(): - """ - Load the YAML file "simapro_ei_exceptions.yaml" - and return it as a dictionary. - :return: + """Load the list of special-case ecoinvent flows. + + :return: Dictionary describing exchanges that require bespoke handling + during the conversion. + :rtype: dict + :raises FileNotFoundError: If the YAML exception file is missing. + :raises yaml.YAMLError: If the YAML file cannot be parsed. """ filename = "simapro_ei_exceptions.yaml" @@ -251,11 +294,13 @@ def get_simapro_ecoinvent_exceptions(): def get_waste_exchange_names(): - """ - Load the list of names that - indicate that the input is a - waste treatment. - :return: list of name + """Return the keywords that identify waste-treatment exchanges. + + :return: List of strings that indicate an exchange represents waste + treatment. + :rtype: list[str] + :raises FileNotFoundError: If the YAML file cannot be found. + :raises yaml.YAMLError: If the YAML file cannot be parsed. """ filename = "waste_exchange_names.yaml" @@ -271,11 +316,15 @@ def get_waste_exchange_names(): def check_inventories(data: list) -> None: - """ - Check that inventories, and the exchanges they contain - have all the mandatory fields. - :param data: list of activities - :return: list of activities or error + """Validate that inventories contain the required information. + + The function verifies that each exchange includes the mandatory keys + expected by the conversion logic. A :class:`ValueError` is raised when + missing data is detected and the offending exchanges are displayed. + + :param data: Brightway activities that should be checked. + :type data: list[dict] + :raises ValueError: If an exchange misses mandatory attributes. """ MANDATORY_TECH_EXC_KEYS = ["name", "reference product", "location", "unit"] @@ -330,10 +379,17 @@ def check_inventories(data: list) -> None: def import_bw_inventories(filepath: str) -> list[dict]: - """ - Import inventories from a spreadsheet file. - :param filepath: - :return: list of inventories + """Load Brightway inventories from an Excel workbook. + + The importer relies on :mod:`bw2io` to read Brightway exports and ensures + that the required migrations are available. + + :param filepath: Path to the Excel inventory spreadsheet. + :type filepath: str + :return: List of activities in Brightway format. + :rtype: list[dict] + :raises FileNotFoundError: If ``filepath`` does not exist. + :raises ValueError: If the provided file does not have the ``.xlsx`` suffix. """ # using bw2io, we load the inventories contained # in the spreadsheet file @@ -363,6 +419,14 @@ def import_bw_inventories(filepath: str) -> list[dict]: def check_metadata(metadata: dict) -> dict: + """Validate metadata against the expected schema. + + :param metadata: Raw metadata read from the YAML file. + :type metadata: dict + :return: Sanitised metadata that matches the schema. + :rtype: dict + :raises voluptuous.error.MultipleInvalid: If validation fails. + """ # metadata dictionary should conform to the following schema: # Define the validation schema system_description_schema = Schema( @@ -401,10 +465,17 @@ def check_metadata(metadata: dict) -> dict: def load_inventory_metadata(filepath: str) -> dict: - """ - Load the metadata of the inventory. - :param filepath: - :return: metadata + """Load and validate inventory metadata from disk. + + :param filepath: Path to the YAML document containing the metadata. + :type filepath: str + :return: Validated metadata dictionary. + :rtype: dict + :raises FileNotFoundError: If the metadata file does not exist. + :raises ValueError: If the file does not have a ``.yaml`` extension. + :raises yaml.YAMLError: If the metadata file cannot be parsed. + :raises voluptuous.error.MultipleInvalid: If the metadata structure is + invalid. """ # if filepath is a string, convert to Path object if isinstance(filepath, str): @@ -431,11 +502,14 @@ def load_inventory_metadata(filepath: str) -> dict: def is_activity_waste_treatment(activity: dict, database: str) -> bool: - """ - Detect whether the given activity is a - process or a waste treatment. - :param activity: - :return: True or False + """Determine whether an activity represents waste treatment. + + :param activity: Brightway activity dictionary to inspect. + :type activity: dict + :param database: Name of the target database used for heuristics. + :type database: str + :return: ``True`` if the activity is a waste treatment process. + :rtype: bool """ if "type" in activity: @@ -451,11 +525,14 @@ def is_activity_waste_treatment(activity: dict, database: str) -> bool: def is_a_waste_treatment(name: str, database: str) -> bool: - """ - Detect if name contains typical to waste treatment. - :param name: exchange name - :param database: database to link to - :return: bool. + """Check whether an exchange name matches waste-treatment keywords. + + :param name: Exchange name to analyse. + :type name: str + :param database: Target database used to refine the decision. + :type database: str + :return: ``True`` if the exchange is considered waste treatment. + :rtype: bool """ WASTE_TERMS = get_waste_exchange_names() NOT_WASTE_TERMS = [ @@ -478,10 +555,13 @@ def is_a_waste_treatment(name: str, database: str) -> bool: def find_production_exchange(activity: dict) -> dict: - """ - Find the production exchange of the given activity. - :param activity: - :return: production exchange + """Retrieve the production exchange from an activity. + + :param activity: Activity whose production exchange should be returned. + :type activity: dict + :return: The production exchange of the activity. + :rtype: dict + :raises ValueError: If the activity does not contain a production exchange. """ for exc in activity["exchanges"]: if exc["type"] == "production": @@ -492,10 +572,13 @@ def find_production_exchange(activity: dict) -> dict: def get_technosphere_exchanges(activity: dict) -> list: - """ - Get the technosphere exchanges of the given activity. - :param activity: - :return: technosphere exchanges + """Return the technosphere exchanges from an activity. + + :param activity: Activity for which technosphere exchanges should be + collected. + :type activity: dict + :return: Technosphere exchanges with non-zero amounts. + :rtype: list[dict] """ return [ exc @@ -505,11 +588,15 @@ def get_technosphere_exchanges(activity: dict) -> list: def get_biosphere_exchanges(activity: dict, category: str = None) -> list: - """ - Get the technosphere exchanges of the given activity. - :param activity: activity - :param category: biosphere category - :return: biosphere exchanges + """Return biosphere exchanges optionally filtered by category. + + :param activity: Activity for which biosphere exchanges should be + collected. + :type activity: dict + :param category: Biosphere compartment to filter for, e.g. ``"air"``. + :type category: str | None + :return: Biosphere exchanges that match the optional category. + :rtype: list[dict] """ return [ exc @@ -523,13 +610,20 @@ def get_biosphere_exchanges(activity: dict, category: str = None) -> list: def format_exchange_name( name: str, reference_product: str, location: str, unit: str, database: str ) -> str: - """ - Format the name of the exchange. - :param name: exchange name. - :param reference_product: exchange reference product. - :param location: exchange location. - :param database: database to link to. - :return: + """Format a Brightway exchange name for SimaPro compatibility. + + :param name: Exchange name from the Brightway inventory. + :type name: str + :param reference_product: Reference product of the exchange. + :type reference_product: str + :param location: Location code associated with the exchange. + :type location: str + :param unit: Unit of the exchange. + :type unit: str + :param database: Target database used to select the formatting logic. + :type database: str + :return: Name formatted according to SimaPro conventions. + :rtype: str """ if database == "ecoinvent": @@ -565,12 +659,12 @@ def format_exchange_name( def get_simapro_uncertainty_type(uncertainty_type: int) -> str: - """ - Brightway uses integers to define uncertianty distribution types. - https://stats-arrays.readthedocs.io/en/latest/#mapping-parameter-array-columns-to-uncertainty-distributions - Simapro uses strings. - :param uncertainty_type: - :return: uncertainty name + """Map Brightway uncertainty codes to the SimaPro string representation. + + :param uncertainty_type: Integer identifier of the uncertainty type. + :type uncertainty_type: int + :return: Human readable uncertainty label used by SimaPro. + :rtype: str """ UNCERTAINITY_TYPES = { @@ -586,11 +680,14 @@ def get_simapro_uncertainty_type(uncertainty_type: int) -> str: def is_blacklisted(exchange: dict, database: str) -> bool: - """ - Check whether a name is blacklisted or not - :param name: name - :param database: database to link to. - :return: bool + """Check whether an exchange should be excluded during conversion. + + :param exchange: Exchange dictionary to inspect. + :type exchange: dict + :param database: Target database, ``"ecoinvent"`` or ``"uvek"``. + :type database: str + :return: ``True`` when the exchange must be ignored. + :rtype: bool """ if exchange["name"] in simapro_ecoinvent_blacklist: @@ -604,11 +701,14 @@ def is_blacklisted(exchange: dict, database: str) -> bool: def convert_sd_to_sd2(value: float, uncertainty_type: str) -> float: - """ - Convert standard deviation of underlying lognormal distirbution - to standard deviation squared. - :param value: - :return: squared standard deviation + """Convert standard deviations according to SimaPro expectations. + + :param value: Standard deviation or lognormal sigma from Brightway. + :type value: float + :param uncertainty_type: Uncertainty distribution label. + :type uncertainty_type: str + :return: Converted standard deviation compatible with SimaPro. + :rtype: float """ if uncertainty_type == "Lognormal": @@ -624,9 +724,12 @@ def convert_sd_to_sd2(value: float, uncertainty_type: str) -> float: def get_uvek_conversion_factors() -> dict: - """ - Get conversion factors for uvek database. - :return: dictionary + """Load conversion factors specific to the UVEK database. + + :return: Mapping of exchange names to conversion factors and units. + :rtype: dict + :raises FileNotFoundError: If the YAML file is missing. + :raises yaml.YAMLError: If the YAML file cannot be parsed. """ filename = "uvek_conversion_factors.yaml" filepath = DATA_DIR / "export" / filename @@ -642,6 +745,13 @@ def get_uvek_conversion_factors() -> dict: def round_floats_in_string(s): + """Round floating point numbers found inside a string to two decimals. + + :param s: Arbitrary text potentially containing floating point numbers. + :type s: str + :return: String where the embedded numbers have been rounded. + :rtype: str + """ # Pattern to detect float numbers in a string pattern = re.compile(r"[-+]?\d*\.\d+") @@ -654,10 +764,12 @@ def round_match(match): def get_subcategory(category: str) -> str: - """ - Extract Simapro subcategory from string - :param category: - :return: + """Extract the SimaPro subcategory from a combined category string. + + :param category: Category string containing ``/``-separated components. + :type category: str + :return: Subcategory formatted with backslashes as required by SimaPro. + :rtype: str """ if len(category.split("/")) > 1: @@ -671,11 +783,12 @@ def get_subcategory(category: str) -> str: def flag_exchanges(activity: dict) -> dict: - """ - We flag exchanges to keep track of whether they have been - processed or not. - :param activity: activity - :return: activity with flagged exchanged + """Mark exchanges as unused before processing. + + :param activity: Activity whose exchanges should be flagged. + :type activity: dict + :return: Activity with a ``used`` flag initialised on each exchange. + :rtype: dict """ for exc in activity["exchanges"]: @@ -685,10 +798,10 @@ def flag_exchanges(activity: dict) -> dict: def print_unused_exchanges(inventories: list) -> None: - """ - Print unused exchanges - :param inventories: - :return: None + """Display exchanges that were not converted. + + :param inventories: Converted activities to inspect. + :type inventories: list[dict] """ unused_exchanges = [] @@ -726,12 +839,14 @@ def print_unused_exchanges(inventories: list) -> None: def check_exchanges_for_conversion(exchanges: list, database: str) -> list: - """ - Check if some exchanges need to be converted. - Specifically when linking to uvek. - :param exchanges: exchanges to potentially convert. - :param database: converted exchanges. - :return: list of exchanges + """Apply database-specific conversion factors to exchanges. + + :param exchanges: Exchanges that might require conversion. + :type exchanges: list[dict] + :param database: Target database identifier. + :type database: str + :return: Updated list of exchanges. + :rtype: list[dict] """ if database == "uvek": @@ -745,14 +860,14 @@ def check_exchanges_for_conversion(exchanges: list, database: str) -> list: def fetch_transport_distance(name: str, location: str) -> tuple: - """ - Depending on the exchange name `name`, return one or - several exchanges representing additional transport. - The uvek database does not have market datasets, - hence transport has to be added manually. - :param name: exchange name - :param location: location of the consuming activity - :return: one or several transport exchanges + """Return default transport distances for a product and location. + + :param name: Name of the technosphere exchange. + :type name: str + :param location: Location code of the consuming activity. + :type location: str + :return: Distances for train, lorry and barge transport. + :rtype: tuple[float, float, float] """ if name in ecoinvent_transport_distances: @@ -773,10 +888,13 @@ def fetch_transport_distance(name: str, location: str) -> tuple: def add_distri_transport(activity: dict) -> dict: - """ - Add transport exchanges for distribution. - :param activity: activity - :return: activity with added transport exchanges. + """Add distribution transport exchanges required by the UVEK database. + + :param activity: Activity that should receive additional transport + exchanges. + :type activity: dict + :return: Activity enriched with transport exchanges. + :rtype: dict """ train_ch, lorry_ch, barge_ch = (0.0, 0.0, 0.0) @@ -928,6 +1046,13 @@ def add_distri_transport(activity: dict) -> dict: def remove_duplicates(data): + """Remove datasets that share the same name from a list of activities. + + :param data: Activities to deduplicate. + :type data: list[dict] + :return: New list containing only the first occurrence of each dataset. + :rtype: list[dict] + """ a = [] acts = [] for x in data: @@ -940,6 +1065,13 @@ def remove_duplicates(data): def check_simapro_inventory(file): + """Check a SimaPro CSV file for forbidden units. + + :param file: Path to the CSV inventory file. + :type file: str + :return: Path to the cleaned CSV file with forbidden units replaced. + :rtype: str + """ # read CSV file new_file_data = [] with open(file, "r", encoding="latin-1") as f: @@ -968,13 +1100,12 @@ def check_simapro_inventory(file): def search_for_forbidden_units(row: list) -> list: - """ - Search for forbidden units. - Returns the csv row. - - :param row: list of values - :return: list of values + """Replace forbidden units found in a CSV row. + :param row: Row values to inspect. + :type row: list[str] + :return: Row with forbidden units replaced by allowed ones. + :rtype: list[str] """ FORBIDDEN_UNITS = { "min": "minute", @@ -989,6 +1120,13 @@ def search_for_forbidden_units(row: list) -> list: def load_biosphere_correspondence(): + """Load the correspondence between SimaPro and ecoinvent biosphere flows. + + :return: Mapping of biosphere flows grouped by compartment. + :rtype: dict + :raises FileNotFoundError: If the correspondence file cannot be found. + :raises yaml.YAMLError: If the YAML file cannot be parsed. + """ filename = "correspondence_biosphere_flows.yaml" filepath = DATA_DIR / "export" / filename if not filepath.is_file(): @@ -1008,6 +1146,12 @@ def load_biosphere_correspondence(): def load_ei_biosphere_flows(): + """Load the list of biosphere flows available in ecoinvent. + + :return: Unique set of tuples ``(name, category, subcategory)``. + :rtype: list[tuple[str, str, str]] + :raises FileNotFoundError: If the biosphere flow file cannot be found. + """ filename = "flows_biosphere_39.csv" filepath = DATA_DIR / "export" / filename if not filepath.is_file(): @@ -1023,7 +1167,15 @@ def load_ei_biosphere_flows(): def lower_cap_first_letter(s): - # Check if the string starts with an acronym (all uppercase letters followed by a space, end of string, dash, or comma) + """Lowercase the first character unless the input starts with an acronym. + + :param s: String to normalise. + :type s: str + :return: Adjusted string that preserves acronyms. + :rtype: str + """ + # Check if the string starts with an acronym (all uppercase letters + # followed by a space, end of string, dash, or comma) if re.match(r"^[A-Z]+(\s|$|-|,)", s): return s # Keep acronyms unchanged return s[0].lower() + s[1:] if s else s # Lowercase first letter otherwise