diff --git a/.gitignore b/.gitignore index dfe58380d..ae3b3cce2 100644 --- a/.gitignore +++ b/.gitignore @@ -69,3 +69,37 @@ docs/_build # Mypy Cache .mypy_cache/ + + +# Local data and backups +/data_bundle* +/datasets/ +/demandregio* +/pypsa_technology_data_egon_data.zip +/PyPSA-technology-data-94085a8/ +/industrial_sites/ +/openstreetmap/ +/tyndp/ +/vg250/ +/bnetza_mastr/ +*.orig +*.pid* +*.csv +*.sql +*.zip +*.yaml + +# Python and IDE folders +/airflow/ +/.spyproject/ +/docker/ + +# Other +/hstore_Extension +et --hard ORIG_HEAD +git status + +*__BACKUP* +*__BASE* +*__LOCAL* +*__REMOTE* diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py index ca5b98ca2..63c7813d3 100644 --- a/src/egon/data/datasets/DSM_cts_ind.py +++ b/src/egon/data/datasets/DSM_cts_ind.py @@ -20,7 +20,7 @@ import pandas as pd from egon.data import config, db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.electricity_demand.temporal import calc_load_curve from egon.data.datasets.industry.temporal import identify_bus from egon.data.metadata import ( @@ -134,7 +134,72 @@ class DsmPotential(Dataset): #: name: str = "DsmPotential" #: - version: str = "0.0.7" + version: str = "0.0.10" + + sources = DatasetSources( + tables={ + "cts_loadcurves": { + "schema": "demand", + "table": "egon_etrago_electricity_cts", + }, + "ind_osm_loadcurves": { + "schema": "demand", + "table": "egon_osm_ind_load_curves", + }, + "ind_osm_loadcurves_individual": { + "schema": "demand", + "table": "egon_osm_ind_load_curves_individual", + }, + "ind_sites_loadcurves": { + "schema": "demand", + "table": "egon_sites_ind_load_curves", + }, + "ind_sites_loadcurves_individual": { + "schema": "demand", + "table": "egon_sites_ind_load_curves_individual", + }, + "ind_sites": {"schema": "demand", "table": "egon_industrial_sites"}, + "ind_sites_schmidt": { + "schema": "demand", + "table": "egon_schmidt_industrial_sites", + }, + "demandregio_ind_sites": { + "schema": "demand", + "table": "egon_demandregio_sites_ind_electricity", + }, + } + ) + targets = DatasetTargets( + tables={ + "bus": {"schema": "grid", "table": "egon_etrago_bus"}, + "link": {"schema": "grid", "table": "egon_etrago_link"}, + "link_timeseries": { + "schema": "grid", + "table": "egon_etrago_link_timeseries", + }, + "store": {"schema": "grid", "table": "egon_etrago_store"}, + "store_timeseries": { + "schema": "grid", + "table": "egon_etrago_store_timeseries", + }, + "cts_loadcurves_dsm": { + "schema": "demand", + "table": "egon_etrago_electricity_cts_dsm_timeseries", + }, + "ind_osm_loadcurves_individual_dsm": { + "schema": "demand", + "table": "egon_osm_ind_load_curves_individual_dsm_timeseries", + }, + "demandregio_ind_sites_dsm": { + "schema": "demand", + "table": "egon_demandregio_sites_ind_electricity_dsm_timeseries", + }, + "ind_sites_loadcurves_individual": { + "schema": "demand", + "table": "egon_sites_ind_load_curves_individual_dsm_timeseries", + }, + } + ) def __init__(self, dependencies): super().__init__( @@ -147,9 +212,7 @@ def __init__(self, dependencies): # Datasets class EgonEtragoElectricityCtsDsmTimeseries(Base): - target = config.datasets()["DSM_CTS_industry"]["targets"][ - "cts_loadcurves_dsm" - ] + target = DsmPotential.targets.tables["cts_loadcurves_dsm"] __tablename__ = target["table"] __table_args__ = {"schema": target["schema"]} @@ -164,9 +227,7 @@ class EgonEtragoElectricityCtsDsmTimeseries(Base): class EgonOsmIndLoadCurvesIndividualDsmTimeseries(Base): - target = config.datasets()["DSM_CTS_industry"]["targets"][ - "ind_osm_loadcurves_individual_dsm" - ] + target = DsmPotential.targets.tables["ind_osm_loadcurves_individual_dsm"] __tablename__ = target["table"] __table_args__ = {"schema": target["schema"]} @@ -182,9 +243,7 @@ class EgonOsmIndLoadCurvesIndividualDsmTimeseries(Base): class EgonDemandregioSitesIndElectricityDsmTimeseries(Base): - target = config.datasets()["DSM_CTS_industry"]["targets"][ - "demandregio_ind_sites_dsm" - ] + target = DsmPotential.targets.tables["demandregio_ind_sites_dsm"] __tablename__ = target["table"] __table_args__ = {"schema": target["schema"]} @@ -201,9 +260,7 @@ class EgonDemandregioSitesIndElectricityDsmTimeseries(Base): class EgonSitesIndLoadCurvesIndividualDsmTimeseries(Base): - target = config.datasets()["DSM_CTS_industry"]["targets"][ - "ind_sites_loadcurves_individual" - ] + target = DsmPotential.targets.tables["ind_sites_loadcurves_individual"] __tablename__ = target["table"] __table_args__ = {"schema": target["schema"]} @@ -219,7 +276,7 @@ class EgonSitesIndLoadCurvesIndividualDsmTimeseries(Base): def add_metadata_individual(): - targets = config.datasets()["DSM_CTS_industry"]["targets"] + targets = DsmPotential.targets.tables targets = { k: v for k, v in targets.items() if "dsm_timeseries" in v["table"] @@ -422,9 +479,7 @@ def cts_data_import(cts_cool_vent_ac_share): # import load data - sources = config.datasets()["DSM_CTS_industry"]["sources"][ - "cts_loadcurves" - ] + sources = DsmPotential.sources.tables["cts_loadcurves"] ts = db.select_dataframe( f"""SELECT bus_id, scn_name, p_set FROM @@ -465,14 +520,12 @@ def ind_osm_data_import(ind_vent_cool_share): # import load data - sources = config.datasets()["DSM_CTS_industry"]["sources"][ - "ind_osm_loadcurves" - ] + sources = DsmPotential.sources.tables["ind_osm_loadcurves"] dsm = db.select_dataframe( f""" SELECT bus, scn_name, p_set FROM - {sources['schema']}.{sources['table']} + {sources["schema"]}.{sources["table"]} """ ) @@ -503,14 +556,12 @@ def ind_osm_data_import_individual(ind_vent_cool_share): # import load data - sources = config.datasets()["DSM_CTS_industry"]["sources"][ - "ind_osm_loadcurves_individual" - ] + sources = DsmPotential.sources.tables["ind_osm_loadcurves_individual"] dsm = db.select_dataframe( f""" SELECT osm_id, bus_id as bus, scn_name, p_set FROM - {sources['schema']}.{sources['table']} + {sources["schema"]}.{sources["table"]} """ ) @@ -543,14 +594,12 @@ def ind_sites_vent_data_import(ind_vent_share, wz): # import load data - sources = config.datasets()["DSM_CTS_industry"]["sources"][ - "ind_sites_loadcurves" - ] + sources = DsmPotential.sources.tables["ind_sites_loadcurves"] dsm = db.select_dataframe( f""" SELECT bus, scn_name, p_set FROM - {sources['schema']}.{sources['table']} + {sources["schema"]}.{sources["table"]} WHERE wz = {wz} """ ) @@ -582,14 +631,12 @@ def ind_sites_vent_data_import_individual(ind_vent_share, wz): # import load data - sources = config.datasets()["DSM_CTS_industry"]["sources"][ - "ind_sites_loadcurves_individual" - ] + sources = DsmPotential.sources.tables["ind_sites_loadcurves_individual"] dsm = db.select_dataframe( f""" SELECT site_id, bus_id as bus, scn_name, p_set FROM - {sources['schema']}.{sources['table']} + {sources["schema"]}.{sources["table"]} WHERE wz = {wz} """ ) @@ -613,24 +660,22 @@ def calc_ind_site_timeseries(scenario): # calc_load_curves_ind_sites # select demands per industrial site including the subsector information - source1 = config.datasets()["DSM_CTS_industry"]["sources"][ - "demandregio_ind_sites" - ] + source1 = DsmPotential.sources.tables["demandregio_ind_sites"] demands_ind_sites = db.select_dataframe( - f"""SELECT industrial_sites_id, wz, demand - FROM {source1['schema']}.{source1['table']} - WHERE scenario = '{scenario}' - AND demand > 0 - """ + f"""SELECT industrial_sites_id, wz, demand + FROM {source1["schema"]}.{source1["table"]} + WHERE scenario = '{scenario}' + AND demand > 0 + """ ).set_index(["industrial_sites_id"]) # select industrial sites as demand_areas from database - source2 = config.datasets()["DSM_CTS_industry"]["sources"]["ind_sites"] + source2 = DsmPotential.sources.tables["ind_sites"] demand_area = db.select_geodataframe( f"""SELECT id, geom, subsector FROM - {source2['schema']}.{source2['table']}""", + {source2["schema"]}.{source2["table"]}""", index_col="id", geom_col="geom", epsg=3035, @@ -685,13 +730,11 @@ def calc_ind_site_timeseries(scenario): def relate_to_schmidt_sites(dsm): # import industrial sites by Schmidt - source = config.datasets()["DSM_CTS_industry"]["sources"][ - "ind_sites_schmidt" - ] + source = DsmPotential.sources.tables["ind_sites_schmidt"] schmidt = db.select_dataframe( f"""SELECT application, geom FROM - {source['schema']}.{source['table']}""" + {source["schema"]}.{source["table"]}""" ) # relate calculated timeseries (dsm) to Schmidt's industrial sites @@ -879,10 +922,10 @@ def create_dsm_components( dsm_buses["scn_name"] = dsm["scn_name"].copy() # get original buses and add copy of relevant information - target1 = config.datasets()["DSM_CTS_industry"]["targets"]["bus"] + target1 = DsmPotential.targets.tables["bus"] original_buses = db.select_geodataframe( f"""SELECT bus_id, v_nom, scn_name, x, y, geom FROM - {target1['schema']}.{target1['table']}""", + {target1["schema"]}.{target1["table"]}""", geom_col="geom", epsg=4326, ) @@ -934,8 +977,8 @@ def create_dsm_components( dsm_links["scn_name"] = dsm_buses["scn_name"].copy() # set link_id - target2 = config.datasets()["DSM_CTS_industry"]["targets"]["link"] - sql = f"""SELECT link_id FROM {target2['schema']}.{target2['table']}""" + target2 = DsmPotential.targets.tables["link"] + sql = f"""SELECT link_id FROM {target2["schema"]}.{target2["table"]}""" max_id = pd.read_sql_query(sql, con) max_id = max_id["link_id"].max() if np.isnan(max_id): @@ -971,8 +1014,8 @@ def create_dsm_components( dsm_stores["original_bus"] = dsm_buses["original_bus"].copy() # set store_id - target3 = config.datasets()["DSM_CTS_industry"]["targets"]["store"] - sql = f"""SELECT store_id FROM {target3['schema']}.{target3['table']}""" + target3 = DsmPotential.targets.tables["store"] + sql = f"""SELECT store_id FROM {target3["schema"]}.{target3["table"]}""" max_id = pd.read_sql_query(sql, con) max_id = max_id["store_id"].max() if np.isnan(max_id): @@ -1109,7 +1152,7 @@ def data_export(dsm_buses, dsm_links, dsm_stores, carrier): Remark to be filled in column 'carrier' identifying DSM-potential """ - targets = config.datasets()["DSM_CTS_industry"]["targets"] + targets = DsmPotential.targets.tables # dsm_buses @@ -1217,8 +1260,7 @@ def delete_dsm_entries(carrier): Remark in column 'carrier' identifying DSM-potential """ - targets = config.datasets()["DSM_CTS_industry"]["targets"] - + targets = DsmPotential.targets.tables # buses sql = ( @@ -1230,12 +1272,10 @@ def delete_dsm_entries(carrier): # links sql = f""" - DELETE FROM {targets["link_timeseries"]["schema"]}. - {targets["link_timeseries"]["table"]} t + DELETE FROM {targets['link_timeseries']['schema']}.{targets['link_timeseries']['table']} t WHERE t.link_id IN ( - SELECT l.link_id FROM {targets["link"]["schema"]}. - {targets["link"]["table"]} l + SELECT l.link_id FROM {targets['link']['schema']}.{targets['link']['table']} l WHERE l.carrier LIKE '{carrier}' ); """ @@ -1243,8 +1283,7 @@ def delete_dsm_entries(carrier): db.execute_sql(sql) sql = f""" - DELETE FROM {targets["link"]["schema"]}. - {targets["link"]["table"]} l + DELETE FROM {targets['link']['schema']}.{targets['link']['table']} l WHERE (l.carrier LIKE '{carrier}'); """ @@ -1253,12 +1292,10 @@ def delete_dsm_entries(carrier): # stores sql = f""" - DELETE FROM {targets["store_timeseries"]["schema"]}. - {targets["store_timeseries"]["table"]} t + DELETE FROM {targets['store_timeseries']['schema']}.{targets['store_timeseries']['table']} t WHERE t.store_id IN ( - SELECT s.store_id FROM {targets["store"]["schema"]}. - {targets["store"]["table"]} s + SELECT s.store_id FROM {targets['store']['schema']}.{targets['store']['table']} s WHERE s.carrier LIKE '{carrier}' ); """ @@ -1266,7 +1303,7 @@ def delete_dsm_entries(carrier): db.execute_sql(sql) sql = f""" - DELETE FROM {targets["store"]["schema"]}.{targets["store"]["table"]} s + DELETE FROM {targets['store']['schema']}.{targets['store']['table']} s WHERE (s.carrier LIKE '{carrier}'); """ @@ -1852,4 +1889,4 @@ def dsm_cts_ind_processing(): dsm_cts_ind_individual() - add_metadata_individual() + add_metadata_individual() \ No newline at end of file diff --git a/src/egon/data/datasets/__init__.py b/src/egon/data/datasets/__init__.py index d65339d01..19ced4a92 100644 --- a/src/egon/data/datasets/__init__.py +++ b/src/egon/data/datasets/__init__.py @@ -3,15 +3,18 @@ from __future__ import annotations from collections import abc -from dataclasses import dataclass +from dataclasses import dataclass, field from functools import partial, reduce, update_wrapper -from typing import Callable, Iterable, Set, Tuple, Union +from typing import Callable, Dict, Iterable, Set, Tuple, Union import re +import json +from pathlib import Path from airflow.models.baseoperator import BaseOperator as Operator from airflow.operators.python import PythonOperator from sqlalchemy import Column, ForeignKey, Integer, String, Table, orm, tuple_ from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.dialects.postgresql import JSONB from egon.data import config, db, logger @@ -79,6 +82,9 @@ class Model(Base): version = Column(String, nullable=False) epoch = Column(Integer, default=0) scenarios = Column(String, nullable=False) + sources = Column(JSONB, nullable=True) + targets = Column(JSONB, nullable=True) + dependencies = orm.relationship( "Model", secondary=DependencyGraph, @@ -88,6 +94,78 @@ class Model(Base): ) +@dataclass +class DatasetSources: + tables: Dict[str, str] = field(default_factory=dict) + files: Dict[str, str] = field(default_factory=dict) + urls: Dict[str, str] = field(default_factory=dict) + + def empty(self): + return not (self.tables or self.files or self.urls) + + def get_table_schema(self, key: str) -> str: + """Returns the schema of the table identified by key.""" + try: + return self.tables[key].split(".", 1)[0] + except (KeyError, AttributeError, IndexError): + raise ValueError(f"Invalid table reference: {self.tables.get(key)}") + + def get_table_name(self, key: str) -> str: + """Returns the table name of the table identified by key.""" + try: + return self.tables[key].split(".", 1)[1] + except (KeyError, AttributeError, IndexError): + raise ValueError(f"Invalid table reference: {self.tables.get(key)}") + + def to_dict(self): + return { + "tables": self.tables, + "urls": self.urls, + "files": self.files, + } + + @classmethod + def from_dict(cls, data): + return cls( + tables=data.get("tables", {}), + urls=data.get("urls", {}), + files=data.get("files", {}), + ) + +@dataclass +class DatasetTargets: + tables: Dict[str, str] = field(default_factory=dict) + files: Dict[str, str] = field(default_factory=dict) + + def empty(self): + return not (self.tables or self.files) + + def get_table_schema(self, key: str) -> str: + """Returns the schema of the table identified by key.""" + try: + return self.tables[key].split(".", 1)[0] + except (KeyError, AttributeError, IndexError): + raise ValueError(f"Invalid table reference: {self.tables.get(key)}") + + def get_table_name(self, key: str) -> str: + """Returns the table name of the table identified by key.""" + try: + return self.tables[key].split(".", 1)[1] + except (KeyError, AttributeError, IndexError): + raise ValueError(f"Invalid table reference: {self.tables.get(key)}") + + def to_dict(self): + return { + "tables": self.tables, + "files": self.files, + } + + def from_dict(cls, data): + return cls( + tables=data.get("tables", {}), + files=data.get("files", {}), + ) + #: A :class:`Task` is an Airflow :class:`Operator` or any #: :class:`Callable ` taking no arguments and returning #: :obj:`None`. :class:`Callables ` will be converted @@ -189,6 +267,12 @@ class Dataset: #: and a sequential number in case the data changes without the date #: or region changing, for example due to implementation changes. version: str + #: The sources used by the datasets. + #: Could be tables, files and urls + sources: DatasetSources = field(init=False) + #: The targets created by the datasets. + #: Could be tables and files + targets: DatasetTargets = field(init=False) #: The first task(s) of this :class:`Dataset` will be marked as #: downstream of any of the listed dependencies. In case of bare #: :class:`Task`, a direct link will be created whereas for a @@ -235,7 +319,10 @@ def update(self, session): name=self.name, version=self.version, scenarios=config.settings()["egon-data"]["--scenarios"], + sources=self.sources.to_dict() if hasattr(self.sources, "to_dict") else dict(self.sources), + targets=self.targets.to_dict() if hasattr(self.targets, "to_dict") else dict(self.targets), ) + dependencies = ( session.query(Model) .filter( @@ -262,6 +349,43 @@ def update(self, session): def __post_init__(self): self.dependencies = list(self.dependencies) + + class_sources = getattr(type(self), "sources", None) + + if not isinstance(class_sources, DatasetSources): + logger.warning( + f"Dataset '{type(self).__name__}' has no valid class-level 'sources' attribute. " + "Defaulting to empty DatasetSources().", + stacklevel=2 + ) + self.sources = DatasetSources() + else: + self.sources = class_sources + if self.sources.empty(): + logger.warning( + f"Dataset '{type(self).__name__}' defines 'sources', but it is empty. " + "Please check if this is intentional.", + stacklevel=2 + ) + + + class_targets = getattr(type(self), "targets", None) + + if not isinstance(class_targets, DatasetTargets): + logger.warning( + f"Dataset '{type(self).__name__}' has no valid class-level 'targets' attribute. " + "Defaulting to empty DatasetTargets().", + stacklevel=2 + ) + self.targets = DatasetTargets() + else: + self.targets = class_targets + if self.targets.empty(): + logger.warning( + f"Dataset '{type(self).__name__}' defines 'targets', but it is empty. " + "Please check if this is intentional.", + stacklevel=2 + ) if not isinstance(self.tasks, Tasks_): self.tasks = Tasks_(self.tasks) if len(self.tasks.last) > 1: @@ -302,3 +426,105 @@ def __post_init__(self): for p in predecessors: for first in self.tasks.first: p.set_downstream(first) + + self.register() + + def __init_subclass__(cls) -> None: + # Warn about missing or invalid class attributes + if not isinstance(getattr(cls, "sources", None), DatasetSources): + logger.warning( + f"Dataset '{cls.__name__}' does not define a valid class-level 'sources'.", + stacklevel=2 + ) + if not isinstance(getattr(cls, "targets", None), DatasetTargets): + logger.warning( + f"Dataset '{cls.__name__}' does not define a valid class-level 'targets'.", + stacklevel=2 + ) + + def register(self): + with db.session_scope() as session: + existing = session.query(Model).filter_by( + name=self.name + ).first() + + if not existing: + entry = Model( + name=self.name, + version="will be filled after execution", + scenarios="{}", + sources=self.sources.to_dict(), + targets=self.targets.to_dict() + ) + session.add(entry) + +def load_sources_and_targets( + name: str, +) -> tuple[DatasetSources, DatasetTargets]: + """ + Load DatasetSources and DatasetTargets from the datasets table. + + Parameters + ---------- + name (str): Name of the dataset. + + Returns + ------- + Tuple[DatasetSources, DatasetTargets] + """ + with db.session_scope() as session: + dataset_entry = ( + session.query(Model) + .filter_by(name=name) + .first() + ) + + if dataset_entry is None: + raise ValueError(f"Dataset '{name}' not found in the database.") + + # Extract raw JSON dicts within the session + raw_sources = dict(dataset_entry.sources or {}) + raw_targets = dict(dataset_entry.targets or {}) + + # Recreate objects *outside the session* (now safe) + sources = DatasetSources(**raw_sources) + targets = DatasetTargets(**raw_targets) + + return sources, targets + + +def export_dataset_io_to_json( + output_path: str = "dataset_io_overview.json", +) -> None: + """ + Export all sources and targets of datasets to a JSON file. + + Parameters + ---------- + output_path : str + Path to the output JSON file. + """ + + result = {} + + with db.session_scope() as session: + datasets = session.query(Model).all() + + for dataset in datasets: + name = dataset.name + + try: + raw_sources = dict(dataset.sources or {}) + raw_targets = dict(dataset.targets or {}) + + result[name] = { + "sources": raw_sources, + "targets": raw_targets, + } + except Exception as e: + print(f"⚠️ Could not process dataset '{name}': {e}") + + # Save to JSON + output_file = Path(output_path) + output_file.write_text(json.dumps(result, indent=2, ensure_ascii=False)) + print(f"✅ Dataset I/O overview written to {output_file.resolve()}") diff --git a/src/egon/data/datasets/calculate_dlr.py b/src/egon/data/datasets/calculate_dlr.py index 2bb70c524..33f438aea 100644 --- a/src/egon/data/datasets/calculate_dlr.py +++ b/src/egon/data/datasets/calculate_dlr.py @@ -15,7 +15,7 @@ import xarray as xr from egon.data import config, db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.scenario_parameters import get_sector_parameters @@ -40,7 +40,23 @@ class Calculate_dlr(Dataset): #: name: str = "dlr" #: - version: str = "0.0.2" + version: str = "0.0.3" + + sources = DatasetSources( + files={ + "regions_shape": "data_bundle_egon_data/regions_dynamic_line_rating/Germany_regions.shp", + "weather_cutout": "data_bundle_egon_data/cutouts/germany-{weather_year}-era5.nc", + }, + tables={ + "trans_lines": {"schema": "grid", "table": "egon_etrago_line"}, + "line_timeseries": {"schema": "grid", "table": "egon_etrago_line_timeseries"}, + }, + ) + targets = DatasetTargets( + tables={ + "line_timeseries": {"schema": "grid", "table": "egon_etrago_line_timeseries"} + } + ) def __init__(self, dependencies): super().__init__( @@ -59,16 +75,10 @@ def dlr(): *No parameters required """ - cfg = config.datasets()["dlr"] for scn in set(config.settings()["egon-data"]["--scenarios"]): weather_year = get_sector_parameters("global", scn)["weather_year"] - regions_shape_path = ( - Path(".") - / "data_bundle_egon_data" - / "regions_dynamic_line_rating" - / "Germany_regions.shp" - ) + regions_shape_path = Path(Calculate_dlr.sources.files["regions_shape"]) # Calculate hourly DLR per region dlr_hourly_dic, dlr_hourly = DLR_Regions( @@ -83,8 +93,8 @@ def dlr(): sql = f""" SELECT scn_name, line_id, topo, s_nom FROM - {cfg['sources']['trans_lines']['schema']}. - {cfg['sources']['trans_lines']['table']} + {Calculate_dlr.sources.tables["trans_lines"]["schema"]}. + {Calculate_dlr.sources.tables["trans_lines"]["table"]} """ df = gpd.GeoDataFrame.from_postgis( sql, con, crs="EPSG:4326", geom_col="topo" @@ -155,15 +165,15 @@ def dlr(): # Delete existing data db.execute_sql( f""" - DELETE FROM {cfg['sources']['line_timeseries']['schema']}. - {cfg['sources']['line_timeseries']['table']}; + DELETE FROM {Calculate_dlr.sources.tables["line_timeseries"]["schema"]}. + {Calculate_dlr.sources.tables["line_timeseries"]["table"]}; """ ) # Insert into database trans_lines.to_sql( - f"{cfg['targets']['line_timeseries']['table']}", - schema=f"{cfg['targets']['line_timeseries']['schema']}", + Calculate_dlr.targets.tables["line_timeseries"]["table"], + schema=Calculate_dlr.targets.tables["line_timeseries"]["schema"], con=db.engine(), if_exists="append", index=False, @@ -188,9 +198,10 @@ def DLR_Regions(weather_year, regions_shape_path): regions = regions.sort_values(by=["Region"]) # The data downloaded using Atlite is loaded in 'weather_data_raw'. - file_name = f"germany-{weather_year}-era5.nc" - weather_info_path = ( - Path(".") / "data_bundle_egon_data" / "cutouts" / file_name + weather_info_path = Path( + Calculate_dlr.sources.files["weather_cutout"].format( + weather_year=weather_year + ) ) weather_data_raw = xr.open_mfdataset(str(weather_info_path)) weather_data_raw = weather_data_raw.rio.write_crs(4326) diff --git a/src/egon/data/datasets/ch4_prod.py b/src/egon/data/datasets/ch4_prod.py index faaff35c5..e00cef706 100755 --- a/src/egon/data/datasets/ch4_prod.py +++ b/src/egon/data/datasets/ch4_prod.py @@ -21,7 +21,7 @@ from egon.data import config, db from egon.data.config import settings -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.scenario_parameters import get_sector_parameters @@ -45,7 +45,37 @@ class CH4Production(Dataset): name: str = "CH4Production" #: - version: str = "0.0.9" + version: str = "0.0.10" + + sources = DatasetSources( + tables={ + "buses": { + "schema": "grid", + "table": "egon_etrago_bus", + }, + "gas_voronoi": { + "schema": "grid", + "table": "egon_gas_voronoi", + }, + "vg250_sta_union": { + "schema": "boundaries", + "table": "vg250_sta_union", + }, + }, + ) + + targets = DatasetTargets( + tables={ + "stores": { + "schema": "grid", + "table": "egon_etrago_generator", + }, + "biogas_generator": { + "schema": "grid", + "table": "egon_biogas_generator", + }, + } + ) def __init__(self, dependencies): super().__init__( @@ -233,23 +263,25 @@ def load_biogas_generators(scn_name): boundary = settings()["egon-data"]["--dataset-boundary"] if boundary != "Everything": db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_biogas_generator CASCADE; + f""" + DROP TABLE IF EXISTS {CH4Production.targets.tables['biogas_generator']['schema']}.{CH4Production.targets.tables['biogas_generator']['table']} CASCADE; """ ) biogas_generators_list.to_postgis( - "egon_biogas_generator", + CH4Production.targets.tables["biogas_generator"]["table"], engine, - schema="grid", + schema=CH4Production.targets.tables["biogas_generator"]["schema"], index=False, if_exists="replace", ) - sql = """SELECT * - FROM grid.egon_biogas_generator, boundaries.vg250_sta_union as vg + sql = f""" + SELECT * + FROM {CH4Production.targets.tables['biogas_generator']['schema']}.{CH4Production.targets.tables['biogas_generator']['table']} AS egon_biogas_generator, + {CH4Production.sources.tables['vg250_sta_union']['schema']}.{CH4Production.sources.tables['vg250_sta_union']['table']} AS vg WHERE ST_Transform(vg.geometry,4326) && egon_biogas_generator.geom - AND ST_Contains(ST_Transform(vg.geometry,4326), egon_biogas_generator.geom)""" - + AND ST_Contains(ST_Transform(vg.geometry,4326), egon_biogas_generator.geom) + """ biogas_generators_list = gpd.GeoDataFrame.from_postgis( sql, con=engine, geom_col="geom", crs=4326 ) @@ -257,8 +289,8 @@ def load_biogas_generators(scn_name): columns=["id", "bez", "area_ha", "geometry"] ) db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_biogas_generator CASCADE; + f""" + DROP TABLE IF EXISTS {CH4Production.targets.tables['biogas_generator']['schema']}.{CH4Production.targets.tables['biogas_generator']['table']} CASCADE; """ ) @@ -323,17 +355,17 @@ def import_gas_generators(): engine = db.engine() # Select source and target from dataset configuration - source = config.datasets()["gas_prod"]["source"] - target = config.datasets()["gas_prod"]["target"] + for scn_name in config.settings()["egon-data"]["--scenarios"]: # Clean table db.execute_sql( f""" - DELETE FROM {target['stores']['schema']}.{target['stores']['table']} + DELETE FROM {CH4Production.targets.tables['stores']['schema']}.{CH4Production.targets.tables['stores']['table']} WHERE "carrier" = 'CH4' AND scn_name = '{scn_name}' AND bus not IN ( - SELECT bus_id FROM {source['buses']['schema']}.{source['buses']['table']} + SELECT bus_id + FROM {CH4Production.sources.tables['buses']['schema']}.{CH4Production.sources.tables['buses']['table']} WHERE scn_name = '{scn_name}' AND country != 'DE' ); """ @@ -375,7 +407,7 @@ def import_gas_generators(): CH4_generators_list = db.select_dataframe( f""" SELECT bus_id as bus, scn_name, carrier - FROM grid.egon_gas_voronoi + FROM {CH4Production.sources.tables['gas_voronoi']['schema']}.{CH4Production.sources.tables['gas_voronoi']['table']} WHERE scn_name = '{scn_name}' AND carrier = 'CH4' """ @@ -426,9 +458,9 @@ def import_gas_generators(): # Insert data to db CH4_generators_list.to_sql( - target["stores"]["table"], + CH4Production.targets.tables["stores"]["table"], engine, - schema=target["stores"]["schema"], + schema=CH4Production.targets.tables["stores"]["schema"], index=False, if_exists="append", - ) + ) \ No newline at end of file diff --git a/src/egon/data/datasets/ch4_storages.py b/src/egon/data/datasets/ch4_storages.py index 81a20e48d..24472c73c 100755 --- a/src/egon/data/datasets/ch4_storages.py +++ b/src/egon/data/datasets/ch4_storages.py @@ -17,7 +17,7 @@ from egon.data import config, db from egon.data.config import settings -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.gas_grid import ( ch4_nodes_number_G, define_gas_nodes_list, @@ -47,15 +47,29 @@ class CH4Storages(Dataset): #: name: str = "CH4Storages" #: - version: str = "0.0.3" + version: str = "0.0.4" + + sources = DatasetSources( + files={ + "scigrid_storages": "datasets/gas_data/data/IGGIELGN_Storages.csv" + }, + tables={ + "gas_buses": "grid.egon_etrago_bus", + }, + ) + targets = DatasetTargets( + tables={ + "stores": "grid.egon_etrago_store", + } + ) def __init__(self, dependencies): super().__init__( name=self.name, version=self.version, dependencies=dependencies, - # tasks=(insert_ch4_storages), - tasks=(notasks), + tasks=(insert_ch4_storages), + #tasks=(notasks), ) @@ -85,12 +99,10 @@ def import_installed_ch4_storages(scn_name): Dataframe containing the CH4 cavern store units in Germany """ - target_file = ( - Path(".") / "datasets" / "gas_data" / "data" / "IGGIELGN_Storages.csv" - ) + storage_file = CH4Storages.sources.files["scigrid_storages"] Gas_storages_list = pd.read_csv( - target_file, + storage_file, delimiter=";", decimal=".", usecols=["lat", "long", "country_code", "param", "method"], @@ -235,8 +247,6 @@ def import_ch4_grid_capacity(scn_name): List of gas stores in Germany modelling the gas grid storage capacity """ - # Select source from dataset configuration - source = config.datasets()["gas_stores"]["source"] Gas_grid_capacity = 130000 # Storage capacity of the CH4 grid - G.Volk "Die Herauforderung an die Bundesnetzagentur die Energiewende zu meistern" Berlin, Dec 2012 N_ch4_nodes_G = ch4_nodes_number_G( @@ -247,9 +257,9 @@ def import_ch4_grid_capacity(scn_name): ) # Storage capacity associated to each CH4 node of the German grid sql_gas = f"""SELECT bus_id, scn_name, carrier, geom - FROM {source['buses']['schema']}.{source['buses']['table']} - WHERE carrier = 'CH4' AND scn_name = '{scn_name}' - AND country = 'DE';""" + FROM {CH4Storages.sources.tables['gas_buses']} + WHERE carrier = 'CH4' AND scn_name = '{scn_name}' + AND country = 'DE';""" Gas_storages_list = db.select_geodataframe(sql_gas, epsg=4326) # Add missing column @@ -301,18 +311,15 @@ def insert_ch4_stores(scn_name): # Connect to local database engine = db.engine() - # Select target from dataset configuration - source = config.datasets()["gas_stores"]["source"] - target = config.datasets()["gas_stores"]["target"] # Clean table db.execute_sql( f""" - DELETE FROM {target['stores']['schema']}.{target['stores']['table']} + DELETE FROM {CH4Storages.targets.tables['stores']} WHERE "carrier" = 'CH4' AND scn_name = '{scn_name}' AND bus IN ( - SELECT bus_id FROM {source['buses']['schema']}.{source['buses']['table']} + SELECT bus_id FROM {CH4Storages.sources.tables['gas_buses']} WHERE scn_name = '{scn_name}' AND country = 'DE' ); @@ -340,9 +347,9 @@ def insert_ch4_stores(scn_name): # Insert data to db gas_storages_list.to_sql( - target["stores"]["table"], + CH4Storages.targets.get_table_name("stores"), engine, - schema=target["stores"]["schema"], + schema=CH4Storages.targets.get_table_schema("stores"), index=False, if_exists="append", ) diff --git a/src/egon/data/datasets/chp/.spyproject/config/codestyle.ini b/src/egon/data/datasets/chp/.spyproject/config/codestyle.ini new file mode 100644 index 000000000..0f54b4c43 --- /dev/null +++ b/src/egon/data/datasets/chp/.spyproject/config/codestyle.ini @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/src/egon/data/datasets/chp/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini b/src/egon/data/datasets/chp/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini new file mode 100644 index 000000000..0b95e5cee --- /dev/null +++ b/src/egon/data/datasets/chp/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini @@ -0,0 +1,5 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + diff --git a/src/egon/data/datasets/chp/.spyproject/config/defaults/defaults-encoding-0.2.0.ini b/src/egon/data/datasets/chp/.spyproject/config/defaults/defaults-encoding-0.2.0.ini new file mode 100644 index 000000000..0ce193c1e --- /dev/null +++ b/src/egon/data/datasets/chp/.spyproject/config/defaults/defaults-encoding-0.2.0.ini @@ -0,0 +1,3 @@ +[encoding] +text_encoding = utf-8 + diff --git a/src/egon/data/datasets/chp/.spyproject/config/defaults/defaults-vcs-0.2.0.ini b/src/egon/data/datasets/chp/.spyproject/config/defaults/defaults-vcs-0.2.0.ini new file mode 100644 index 000000000..ee2548333 --- /dev/null +++ b/src/egon/data/datasets/chp/.spyproject/config/defaults/defaults-vcs-0.2.0.ini @@ -0,0 +1,4 @@ +[vcs] +use_version_control = False +version_control_system = + diff --git a/src/egon/data/datasets/chp/.spyproject/config/defaults/defaults-workspace-0.2.0.ini b/src/egon/data/datasets/chp/.spyproject/config/defaults/defaults-workspace-0.2.0.ini new file mode 100644 index 000000000..2a73ab7ad --- /dev/null +++ b/src/egon/data/datasets/chp/.spyproject/config/defaults/defaults-workspace-0.2.0.ini @@ -0,0 +1,6 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + diff --git a/src/egon/data/datasets/chp/.spyproject/config/encoding.ini b/src/egon/data/datasets/chp/.spyproject/config/encoding.ini new file mode 100644 index 000000000..a17acedd7 --- /dev/null +++ b/src/egon/data/datasets/chp/.spyproject/config/encoding.ini @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/src/egon/data/datasets/chp/.spyproject/config/vcs.ini b/src/egon/data/datasets/chp/.spyproject/config/vcs.ini new file mode 100644 index 000000000..fd66eae01 --- /dev/null +++ b/src/egon/data/datasets/chp/.spyproject/config/vcs.ini @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/src/egon/data/datasets/chp/.spyproject/config/workspace.ini b/src/egon/data/datasets/chp/.spyproject/config/workspace.ini new file mode 100644 index 000000000..405acabfa --- /dev/null +++ b/src/egon/data/datasets/chp/.spyproject/config/workspace.ini @@ -0,0 +1,12 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False +project_type = 'empty-project-type' +recent_files = ['__init__.py', '..\\..\\datasets.yml'] + +[main] +version = 0.2.0 +recent_files = [] + diff --git a/src/egon/data/datasets/chp/__init__.py b/src/egon/data/datasets/chp/__init__.py index ac51ff881..c3d7163d8 100644 --- a/src/egon/data/datasets/chp/__init__.py +++ b/src/egon/data/datasets/chp/__init__.py @@ -19,7 +19,7 @@ import pypsa from egon.data import config, db -from egon.data.datasets import Dataset, wrapped_partial +from egon.data.datasets import Dataset, wrapped_partial, DatasetSources, DatasetTargets from egon.data.datasets.chp.match_nep import insert_large_chp, map_carrier from egon.data.datasets.chp.small_chp import ( assign_use_case, @@ -259,15 +259,12 @@ def assign_heat_bus(): None. """ - sources = config.datasets()["chp_location"]["sources"] - target = config.datasets()["chp_location"]["targets"]["chp_table"] - for scenario in config.settings()["egon-data"]["--scenarios"]: # Select CHP with use_case = 'district_heating' chp = db.select_geodataframe( f""" SELECT * FROM - {target['schema']}.{target['table']} + {Chp.targets.tables['chp_table']} WHERE scenario = '{scenario}' AND district_heating = True """, @@ -284,8 +281,7 @@ def assign_heat_bus(): f""" SELECT area_id, ST_Centroid(geom_polygon) as geom FROM - {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} + {Chp.sources.tables['district_heating_areas']} WHERE scenario = '{scenario}' """, epsg=4326, @@ -306,7 +302,7 @@ def assign_heat_bus(): # Drop district heating CHP without heat_bus_id db.execute_sql( f""" - DELETE FROM {target['schema']}.{target['table']} + DELETE FROM {Chp.targets.tables['chp_table']} WHERE scenario = '{scenario}' AND district_heating = True """ @@ -363,29 +359,28 @@ def insert_biomass_chp(scenario): None. """ - cfg = config.datasets()["chp_location"] # import target values from NEP 2021, scneario C 2035 target = select_target("biomass", scenario) # import data for MaStR mastr = pd.read_csv( - WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_biomass"] + WORKING_DIR_MASTR_OLD / Chp.sources.files["mastr_biomass"] ).query("EinheitBetriebsstatus=='InBetrieb'") # Drop entries without federal state or 'AusschließlichWirtschaftszone' mastr = mastr[ - mastr.Bundesland.isin( - pd.read_sql( - f"""SELECT DISTINCT ON (gen) - REPLACE(REPLACE(gen, '-', ''), 'ü', 'ue') as states - FROM {cfg['sources']['vg250_lan']['schema']}. - {cfg['sources']['vg250_lan']['table']}""", - con=db.engine(), - ).states.values - ) - ] - + mastr.Bundesland.isin( + pd.read_sql( + # The f-string now correctly ends after the FROM clause + f"""SELECT DISTINCT ON (gen) + REPLACE(REPLACE(gen, '-', ''), 'ü', 'ue') as states + FROM {Chp.sources.tables['vg250_lan']}""", + # con=db.engine() is now a separate argument to pd.read_sql + con=db.engine(), + ).states.values + ) +] # Scaling will be done per federal state in case of eGon2035 scenario. if scenario == "eGon2035": level = "federal_state" @@ -400,10 +395,10 @@ def insert_biomass_chp(scenario): # Assign bus_id if len(mastr_loc) > 0: mastr_loc["voltage_level"] = assign_voltage_level( - mastr_loc, cfg, WORKING_DIR_MASTR_OLD + mastr_loc, Chp.sources, WORKING_DIR_MASTR_OLD ) - mastr_loc = assign_bus_id(mastr_loc, cfg) - mastr_loc = assign_use_case(mastr_loc, cfg["sources"], scenario) + mastr_loc = assign_bus_id(mastr_loc, Chp.sources) + mastr_loc = assign_use_case(mastr_loc, Chp.sources, scenario) # Insert entries with location session = sessionmaker(bind=db.engine())() @@ -430,15 +425,15 @@ def insert_biomass_chp(scenario): def insert_chp_statusquo(scn="status2019"): - cfg = config.datasets()["chp_location"] + # import data for MaStR mastr = pd.read_csv( - WORKING_DIR_MASTR_NEW / "bnetza_mastr_combustion_cleaned.csv" + WORKING_DIR_MASTR_NEW / Chp.sources.files["mastr_combustion"] ) mastr_biomass = pd.read_csv( - WORKING_DIR_MASTR_NEW / "bnetza_mastr_biomass_cleaned.csv" + WORKING_DIR_MASTR_NEW / Chp.sources.files["mastr_biomass"] ) mastr = pd.concat([mastr, mastr_biomass]).reset_index(drop=True) @@ -479,11 +474,9 @@ def insert_chp_statusquo(scn="status2019"): mastr.groupby("Energietraeger").Nettonennleistung.sum().mul(1e-6) geom_municipalities = db.select_geodataframe( - """ - SELECT gen, ST_UNION(geometry) as geom + """SELECT gen, ST_UNION(geometry) as geom FROM boundaries.vg250_gem - GROUP BY gen - """ + GROUP BY gen""" ).set_index("gen") # Assing Laengengrad and Breitengrad to chps without location data @@ -532,16 +525,16 @@ def insert_chp_statusquo(scn="status2019"): # Assign bus_id if len(mastr) > 0: mastr["voltage_level"] = assign_voltage_level( - mastr, cfg, WORKING_DIR_MASTR_NEW + mastr, Chp.sources, WORKING_DIR_MASTR_NEW ) gas_bus_id = db.assign_gas_bus_id(mastr, scn, "CH4").bus - mastr = assign_bus_id(mastr, cfg, drop_missing=True) + mastr = assign_bus_id(mastr, Chp.sources, drop_missing=True) mastr["gas_bus_id"] = gas_bus_id - mastr = assign_use_case(mastr, cfg["sources"], scn) + mastr = assign_use_case(mastr, Chp.sources, scn) # Insert entries with location session = sessionmaker(bind=db.engine())() @@ -577,17 +570,13 @@ def insert_chp_egon2035(): """ - sources = config.datasets()["chp_location"]["sources"] - - targets = config.datasets()["chp_location"]["targets"] - insert_biomass_chp("eGon2035") # Insert large CHPs based on NEP's list of conventional power plants - MaStR_konv = insert_large_chp(sources, targets["chp_table"], EgonChp) + MaStR_konv = insert_large_chp(Chp.sources, Chp.targets.tables["chp_table"], EgonChp) # Insert smaller CHPs (< 10MW) based on existing locations from MaStR - existing_chp_smaller_10mw(sources, MaStR_konv, EgonChp) + existing_chp_smaller_10mw(Chp.sources, MaStR_konv, EgonChp) gpd.GeoDataFrame( MaStR_konv[ @@ -602,8 +591,8 @@ def insert_chp_egon2035(): ] ] ).to_postgis( - targets["mastr_conventional_without_chp"]["table"], - schema=targets["mastr_conventional_without_chp"]["schema"], + Chp.targets.get_table_name("mastr_conventional_without_chp"), + schema=Chp.targets.get_table_schema("mastr_conventional_without_chp"), con=db.engine(), if_exists="replace", ) @@ -682,11 +671,9 @@ def insert_chp_egon100re(): """ - sources = config.datasets()["chp_location"]["sources"] - db.execute_sql( f""" - DELETE FROM {EgonChp.__table__.schema}.{EgonChp.__table__.name} + DELETE FROM {Chp.targets.tables['chp_table']} WHERE scenario = 'eGon100RE' """ ) @@ -695,7 +682,7 @@ def insert_chp_egon100re(): additional_capacity = db.select_dataframe( """ SELECT capacity - FROM supply.egon_scenario_capacities + FROM {Chp.sources.tables['scenario_capacities']} WHERE scenario_name = 'eGon100RE' AND carrier = 'urban_central_gas_CHP' """ @@ -721,8 +708,7 @@ def insert_chp_egon100re(): residential_and_service_demand as demand, area_id, ST_Transform(ST_PointOnSurface(geom_polygon), 4326) as geom FROM - {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} + {Chp.sources.tables['district_heating_areas']} WHERE scenario = 'eGon100RE' """ ) @@ -814,6 +800,36 @@ def insert_chp_egon100re(): class Chp(Dataset): + + + sources = DatasetSources( + tables={ + "list_conv_pp": "supply.egon_nep_2021_conventional_powerplants", + "egon_mv_grid_district": "grid.egon_mv_grid_district", + "ehv_voronoi": "grid.egon_ehv_substation_voronoi", + "etrago_buses": "grid.egon_etrago_bus", + "osm_landuse": "openstreetmap.osm_landuse", + "osm_polygon": "openstreetmap.osm_polygon", + "district_heating_areas": "demand.egon_district_heating_areas", + "industrial_demand_osm": "demand.egon_demandregio_osm_ind_electricity", + "vg250_lan": "boundaries.vg250_lan", + "scenario_capacities": "supply.egon_scenario_capacities", + }, + files={ + "mastr_combustion": "bnetza_mastr_combustion_cleaned.csv", + "mastr_location": "location_elec_generation_raw.csv", + "mastr_biomass": "bnetza_mastr_biomass_cleaned.csv", + }, + ) + targets = DatasetTargets( + tables={ + "chp_table": "supply.egon_chp_plants", + "mastr_conventional_without_chp": "supply.egon_mastr_conventional_without_chp", + } + ) + + + """ Extract combined heat and power plants for each scenario @@ -845,7 +861,7 @@ class Chp(Dataset): #: name: str = "Chp" #: - version: str = "0.0.10" + version: str = "0.0.15" def __init__(self, dependencies): super().__init__( @@ -853,4 +869,4 @@ def __init__(self, dependencies): version=self.version, dependencies=dependencies, tasks=tasks, - ) + ) \ No newline at end of file diff --git a/src/egon/data/datasets/chp/match_nep.py b/src/egon/data/datasets/chp/match_nep.py index d91824ffc..7785d050f 100755 --- a/src/egon/data/datasets/chp/match_nep.py +++ b/src/egon/data/datasets/chp/match_nep.py @@ -7,6 +7,7 @@ import pandas as pd from egon.data import config, db +from egon.data.datasets import load_sources_and_targets from egon.data.datasets.chp.small_chp import assign_use_case from egon.data.datasets.mastr import WORKING_DIR_MASTR_OLD from egon.data.datasets.power_plants import ( @@ -28,14 +29,14 @@ def select_chp_from_nep(sources): CHP plants from NEP list """ + table_nep = sources.tables['list_conv_pp'] # Select CHP plants with geolocation from list of conventional power plants chp_NEP_data = db.select_dataframe( f""" SELECT bnetza_id, name, carrier, chp, postcode, capacity, city, federal_state, c2035_chp, c2035_capacity - FROM {sources['list_conv_pp']['schema']}. - {sources['list_conv_pp']['table']} + FROM {table_nep} WHERE bnetza_id != 'KW<10 MW' AND (chp = 'Ja' OR c2035_chp = 'Ja') AND c2035_capacity > 0 @@ -124,7 +125,7 @@ def select_chp_from_mastr(sources): # Read-in data from MaStR MaStR_konv = pd.read_csv( - WORKING_DIR_MASTR_OLD / sources["mastr_combustion"], + WORKING_DIR_MASTR_OLD / sources.files["mastr_combustion"], delimiter=",", usecols=[ "Nettonennleistung", @@ -347,7 +348,7 @@ def insert_large_chp(sources, target, EgonChp): # Assign voltage level to MaStR MaStR_konv["voltage_level"] = assign_voltage_level( MaStR_konv.rename({"el_capacity": "Nettonennleistung"}, axis=1), - config.datasets()["chp_location"], + sources, WORKING_DIR_MASTR_OLD, ) @@ -399,9 +400,10 @@ def insert_large_chp(sources, target, EgonChp): MaStR_konv["geometry"] = geopandas.points_from_xy( MaStR_konv["Laengengrad"], MaStR_konv["Breitengrad"] ) + MaStR_konv["voltage_level"] = assign_voltage_level( MaStR_konv.rename({"el_capacity": "Nettonennleistung"}, axis=1), - config.datasets()["chp_location"], + sources, WORKING_DIR_MASTR_OLD, ) @@ -535,7 +537,7 @@ def insert_large_chp(sources, target, EgonChp): # Assign bus_id insert_chp["bus_id"] = assign_bus_id( - insert_chp, config.datasets()["chp_location"] + insert_chp, sources ).bus_id # Assign gas bus_id @@ -546,8 +548,10 @@ def insert_large_chp(sources, target, EgonChp): insert_chp = assign_use_case(insert_chp, sources, scenario="eGon2035") # Delete existing CHP in the target table + target_schema, target_table = target.split('.')[-2:] + db.execute_sql( - f""" DELETE FROM {target['schema']}.{target['table']} + f""" DELETE FROM {target_schema}.{target_table} WHERE carrier IN ('gas', 'other_non_renewable', 'oil') AND scenario='eGon2035';""" ) @@ -575,4 +579,4 @@ def insert_large_chp(sources, target, EgonChp): session.add(entry) session.commit() - return MaStR_konv + return MaStR_konv \ No newline at end of file diff --git a/src/egon/data/datasets/chp/small_chp.py b/src/egon/data/datasets/chp/small_chp.py index 63ab202b0..25d0511b8 100755 --- a/src/egon/data/datasets/chp/small_chp.py +++ b/src/egon/data/datasets/chp/small_chp.py @@ -8,6 +8,7 @@ import pandas as pd from egon.data import config, db +from egon.data.datasets import load_sources_and_targets from egon.data.datasets.power_plants import ( assign_bus_id, filter_mastr_geometry, @@ -94,7 +95,7 @@ def existing_chp_smaller_10mw(sources, MaStR_konv, EgonChp): # Assign bus_id mastr_chp["bus_id"] = assign_bus_id( - mastr_chp, config.datasets()["chp_location"] + mastr_chp, sources ).bus_id mastr_chp = assign_use_case(mastr_chp, sources, "eGon2035") @@ -159,6 +160,8 @@ def extension_to_areas( None. """ + sources, _ = load_sources_and_targets("Chp") + session = sessionmaker(bind=db.engine())() np.random.seed(seed=config.settings()["egon-data"]["--random-seed"]) @@ -221,7 +224,7 @@ def extension_to_areas( selected_areas["voltage_level"] = selected_chp["voltage_level"] selected_areas.loc[:, "bus_id"] = assign_bus_id( - selected_areas, config.datasets()["chp_location"] + selected_areas, sources ).bus_id entry = EgonChp( @@ -317,17 +320,14 @@ def extension_district_heating( """ - sources = config.datasets()["chp_location"]["sources"] - targets = config.datasets()["chp_location"]["targets"] + sources, targets = load_sources_and_targets("Chp") existing_chp = db.select_dataframe( f""" SELECT el_capacity, th_capacity, voltage_level, b.area_id FROM - {targets['chp_table']['schema']}. - {targets['chp_table']['table']} a, - {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} b + {targets.tables['chp_table']} a, + {sources.tables['district_heating_areas']} b WHERE a.scenario = 'eGon2035' AND b.scenario = 'eGon2035' AND district_heating = True @@ -335,8 +335,7 @@ def extension_district_heating( ST_Transform( ST_Centroid(geom_polygon), 4326), (SELECT ST_Union(geometry) - FROM {sources['vg250_lan']['schema']}. - {sources['vg250_lan']['table']} + FROM {sources.tables['vg250_lan']} WHERE REPLACE(REPLACE(gen, '-', ''), 'ü', 'ue') ='{federal_state}')) AND el_capacity < 10 ORDER BY el_capacity, residential_and_service_demand @@ -353,18 +352,16 @@ def extension_district_heating( residential_and_service_demand as demand, area_id, ST_Transform(ST_PointOnSurface(geom_polygon), 4326) as geom FROM - {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} + {sources.tables['district_heating_areas']} WHERE scenario = 'eGon2035' AND ST_Intersects(ST_Transform(ST_Centroid(geom_polygon), 4326), ( SELECT ST_Union(d.geometry) FROM - {sources['vg250_lan']['schema']}.{sources['vg250_lan']['table']} d + {sources.tables['vg250_lan']} d WHERE REPLACE(REPLACE(gen, '-', ''), 'ü', 'ue') ='{federal_state}')) AND area_id NOT IN ( SELECT district_heating_area_id - FROM {targets['chp_table']['schema']}. - {targets['chp_table']['table']} + FROM {targets.tables['chp_table']} WHERE scenario = 'eGon2035' AND district_heating = TRUE) """ @@ -388,17 +385,14 @@ def extension_district_heating( as demand, b.area_id, ST_Transform(ST_PointOnSurface(geom_polygon), 4326) as geom FROM - {targets['chp_table']['schema']}. - {targets['chp_table']['table']} a, - {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} b + {targets.tables['chp_table']} a, + {sources.tables['district_heating_areas']} b WHERE b.scenario = 'eGon2035' AND a.scenario = 'eGon2035' AND ST_Intersects( ST_Transform(ST_Centroid(geom_polygon), 4326), (SELECT ST_Union(d.geometry) - FROM {sources['vg250_lan']['schema']}. - {sources['vg250_lan']['table']} d + FROM {sources.tables['vg250_lan']} d WHERE REPLACE(REPLACE(gen, '-', ''), 'ü', 'ue') ='{federal_state}')) AND a.district_heating_area_id = b.area_id GROUP BY ( @@ -447,15 +441,13 @@ def extension_industrial(federal_state, additional_capacity, flh_chp, EgonChp): """ - sources = config.datasets()["chp_location"]["sources"] - targets = config.datasets()["chp_location"]["targets"] + sources, targets = load_sources_and_targets("Chp") existing_chp = db.select_dataframe( f""" SELECT el_capacity, th_capacity, voltage_level FROM - {targets['chp_table']['schema']}. - {targets['chp_table']['table']} a + {targets.tables['chp_table']} a WHERE a.scenario = 'eGon2035' AND district_heating = False AND el_capacity < 10 @@ -471,17 +463,14 @@ def extension_industrial(federal_state, additional_capacity, flh_chp, EgonChp): SUM(demand) as demand, a.osm_id, ST_PointOnSurface(b.geom) as geom, b.name FROM - {sources['industrial_demand_osm']['schema']}. - {sources['industrial_demand_osm']['table']} a, - {sources['osm_landuse']['schema']}. - {sources['osm_landuse']['table']} b + {sources.tables['industrial_demand_osm']} a, + {sources.tables['osm_landuse']} b WHERE a.scenario = 'eGon2035' AND b.id = a.osm_id AND NOT ST_Intersects( ST_Transform(b.geom, 4326), (SELECT ST_Union(geom) FROM - {targets['chp_table']['schema']}. - {targets['chp_table']['table']} + {targets.tables['chp_table']} )) AND b.tags::json->>'landuse' = 'industrial' AND b.name NOT LIKE '%%kraftwerk%%' @@ -497,8 +486,7 @@ def extension_industrial(federal_state, additional_capacity, flh_chp, EgonChp): AND ST_Intersects( ST_Transform(ST_Centroid(b.geom), 4326), (SELECT ST_Union(d.geometry) - FROM {sources['vg250_lan']['schema']}. - {sources['vg250_lan']['table']} d + FROM {sources.tables['vg250_lan']} d WHERE REPLACE(REPLACE(gen, '-', ''), 'ü', 'ue') ='{federal_state}')) GROUP BY (a.osm_id, b.geom, b.name) @@ -546,34 +534,37 @@ def extension_per_federal_state(federal_state, EgonChp): """ - sources = config.datasets()["chp_location"]["sources"] - target_table = config.datasets()["chp_location"]["targets"]["chp_table"] + sources, targets = load_sources_and_targets("Chp") + + # Get separate schema and table name for SQL construction + target_schema = targets.get_table_schema("chp_table") + target_table_only = targets.get_table_name("chp_table").split('.')[-1] - targets = select_target("small_chp", "eGon2035") + capacity_targets = select_target("small_chp", "eGon2035") existing_capacity = db.select_dataframe( f""" SELECT SUM(el_capacity) as capacity, district_heating - FROM {target_table['schema']}. - {target_table['table']} + FROM {target_schema}. + {target_table_only} WHERE sources::json->>'el_capacity' = 'MaStR' AND carrier != 'biomass' AND scenario = 'eGon2035' AND ST_Intersects(geom, ( SELECT ST_Union(geometry) FROM - {sources['vg250_lan']['schema']}.{sources['vg250_lan']['table']} b + {sources.tables['vg250_lan']} b WHERE REPLACE(REPLACE(gen, '-', ''), 'ü', 'ue') ='{federal_state}')) GROUP BY district_heating """ ) - print(f"Target capacity in {federal_state}: {targets[federal_state]}") + print(f"Target capacity in {federal_state}: {capacity_targets[federal_state]}") print( f"Existing capacity in {federal_state}: {existing_capacity.capacity.sum()}" ) additional_capacity = ( - targets[federal_state] - existing_capacity.capacity.sum() + capacity_targets[federal_state] - existing_capacity.capacity.sum() ) if additional_capacity > 0: @@ -655,14 +646,18 @@ def assign_use_case(chp, sources, scenario): """ + + table_landuse = sources.tables['osm_landuse'] + table_polygon = sources.tables['osm_polygon'] + table_dh = sources.tables['district_heating_areas'] + # Select osm industrial areas which don't include power or heat supply # (name not includes 'Stadtwerke', 'Kraftwerk', 'Müllverbrennung'...) landuse_industrial = db.select_geodataframe( f""" SELECT ST_Buffer(geom, 100) as geom, tags::json->>'name' as name - FROM {sources['osm_landuse']['schema']}. - {sources['osm_landuse']['table']} + FROM {table_landuse} WHERE tags::json->>'landuse' = 'industrial' AND(name NOT LIKE '%%kraftwerk%%' OR name NOT LIKE '%%Müllverbrennung%%' @@ -673,15 +668,13 @@ def assign_use_case(chp, sources, scenario): """, epsg=4326, ) - # Select osm polygons where a district heating chp is likely # (name includes 'Stadtwerke', 'Kraftwerk', 'Müllverbrennung'...) possible_dh_locations = db.select_geodataframe( f""" SELECT ST_Buffer(geom, 100) as geom, tags::json->>'name' as name - FROM {sources['osm_polygon']['schema']}. - {sources['osm_polygon']['table']} + FROM {table_polygon} WHERE name LIKE '%%Stadtwerke%%' OR name LIKE '%%kraftwerk%%' OR name LIKE '%%Müllverbrennung%%' @@ -700,8 +693,7 @@ def assign_use_case(chp, sources, scenario): district_heating = db.select_geodataframe( f""" SELECT area_id, ST_Buffer(geom_polygon, 1000) as geom - FROM {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} + FROM {table_dh} WHERE scenario = '{scenario}' """, epsg=4326, @@ -743,4 +735,4 @@ def assign_use_case(chp, sources, scenario): # Set district_heating = True for all district heating chp chp.loc[district_heating_chp.index, "district_heating"] = True - return chp + return chp \ No newline at end of file diff --git a/src/egon/data/datasets/chp_etrago.py b/src/egon/data/datasets/chp_etrago.py index fd12470dc..6622fbb59 100644 --- a/src/egon/data/datasets/chp_etrago.py +++ b/src/egon/data/datasets/chp_etrago.py @@ -6,7 +6,7 @@ import pandas as pd from egon.data import config, db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.etrago_setup import link_geom_from_buses from egon.data.datasets.scenario_parameters import get_sector_parameters @@ -33,8 +33,20 @@ class ChpEtrago(Dataset): #: name: str = "ChpEtrago" #: - version: str = "0.0.6" - + version: str = "0.0.7" + sources = DatasetSources( + tables={ + "chp_table": "supply.egon_chp_plants", + "district_heating_areas": "demand.egon_district_heating_areas", + "etrago_buses": "grid.egon_etrago_bus", + } + ) + targets = DatasetTargets( + tables={ + "link": "grid.egon_etrago_link", + "generator": "grid.egon_etrago_generator", + } + ) def __init__(self, dependencies): super().__init__( name=self.name, @@ -45,23 +57,21 @@ def __init__(self, dependencies): def insert_egon100re(): - sources = config.datasets()["chp_etrago"]["sources"] - targets = config.datasets()["chp_etrago"]["targets"] db.execute_sql( f""" - DELETE FROM {targets['link']['schema']}.{targets['link']['table']} + DELETE FROM {ChpEtrago.targets.tables['link']} WHERE carrier LIKE '%%CHP%%' AND scn_name = 'eGon100RE' AND bus0 IN (SELECT bus_id - FROM {sources['etrago_buses']['schema']}.{sources['etrago_buses']['table']} + FROM {ChpEtrago.sources.tables['etrago_buses']} WHERE scn_name = 'eGon100RE' AND country = 'DE') AND bus1 IN (SELECT bus_id - FROM {sources['etrago_buses']['schema']}.{sources['etrago_buses']['table']} + FROM {ChpEtrago.sources.tables['etrago_buses']} WHERE scn_name = 'eGon100RE' AND country = 'DE') """ @@ -73,21 +83,17 @@ def insert_egon100re(): SELECT electrical_bus_id, ch4_bus_id, a.carrier, SUM(el_capacity) AS el_capacity, SUM(th_capacity) AS th_capacity, c.bus_id as heat_bus_id - FROM {sources['chp_table']['schema']}. - {sources['chp_table']['table']} a - JOIN {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} b + FROM {ChpEtrago.sources.tables['chp_table']} a + JOIN {ChpEtrago.sources.tables['district_heating_areas']} b ON a.district_heating_area_id = b.area_id JOIN grid.egon_etrago_bus c ON ST_Transform(ST_Centroid(b.geom_polygon), 4326) = c.geom - WHERE a.scenario='eGon100RE' AND b.scenario = 'eGon100RE' AND c.scn_name = 'eGon100RE' AND c.carrier = 'central_heat' AND NOT district_heating_area_id IS NULL - GROUP BY ( - electrical_bus_id, ch4_bus_id, a.carrier, c.bus_id) + GROUP BY (electrical_bus_id, ch4_bus_id, a.carrier, c.bus_id) """ ) @@ -100,7 +106,7 @@ def insert_egon100re(): gpd.GeoDataFrame( index=chp_dh.index, data={ - "scn_name": "eGon2035", + "scn_name": "eGon100RE", "bus0": chp_dh.loc[:, "ch4_bus_id"].astype(int), "bus1": chp_dh.loc[:, "electrical_bus_id"].astype(int), "p_nom": chp_dh.loc[:, "el_capacity"], @@ -121,8 +127,8 @@ def insert_egon100re(): # Insert into database chp_el.to_postgis( - targets["link"]["table"], - schema=targets["link"]["schema"], + ChpEtrago.targets.get_table_name("link"), + schema=ChpEtrago.targets.get_table_schema("link"), con=db.engine(), if_exists="append", ) @@ -147,38 +153,36 @@ def insert_egon100re(): ) chp_heat.to_postgis( - targets["link"]["table"], - schema=targets["link"]["schema"], + ChpEtrago.targets.get_table_name("link"), + schema=ChpEtrago.targets.get_table_schema("link"), con=db.engine(), if_exists="append", ) def insert_scenario(scenario): - sources = config.datasets()["chp_etrago"]["sources"] - targets = config.datasets()["chp_etrago"]["targets"] db.execute_sql( f""" - DELETE FROM {targets['link']['schema']}.{targets['link']['table']} + DELETE FROM {ChpEtrago.targets.tables['link']} WHERE carrier LIKE '%%CHP%%' AND scn_name = '{scenario}' AND bus0 IN (SELECT bus_id - FROM {sources['etrago_buses']['schema']}.{sources['etrago_buses']['table']} + FROM {ChpEtrago.sources.tables['etrago_buses']} WHERE scn_name = '{scenario}' AND country = 'DE') AND bus1 IN (SELECT bus_id - FROM {sources['etrago_buses']['schema']}.{sources['etrago_buses']['table']} + FROM {ChpEtrago.sources.tables['etrago_buses']} WHERE scn_name = '{scenario}' AND country = 'DE') """ ) db.execute_sql( f""" - DELETE FROM {targets['generator']['schema']}.{targets['generator']['table']} + DELETE FROM {ChpEtrago.targets.tables['generator']} WHERE carrier LIKE '%%CHP%%' AND scn_name = '{scenario}' """ @@ -189,21 +193,17 @@ def insert_scenario(scenario): SELECT electrical_bus_id, ch4_bus_id, a.carrier, SUM(el_capacity) AS el_capacity, SUM(th_capacity) AS th_capacity, c.bus_id as heat_bus_id - FROM {sources['chp_table']['schema']}. - {sources['chp_table']['table']} a - JOIN {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} b + FROM {ChpEtrago.sources.tables['chp_table']} a + JOIN {ChpEtrago.sources.tables['district_heating_areas']} b ON a.district_heating_area_id = b.area_id JOIN grid.egon_etrago_bus c ON ST_Transform(ST_Centroid(b.geom_polygon), 4326) = c.geom - WHERE a.scenario='{scenario}' AND b.scenario = '{scenario}' AND c.scn_name = '{scenario}' AND c.carrier = 'central_heat' AND NOT district_heating_area_id IS NULL - GROUP BY ( - electrical_bus_id, ch4_bus_id, a.carrier, c.bus_id) + GROUP BY (electrical_bus_id, ch4_bus_id, a.carrier, c.bus_id) """ ) @@ -243,8 +243,8 @@ def insert_scenario(scenario): # Insert into database chp_el.to_postgis( - targets["link"]["table"], - schema=targets["link"]["schema"], + ChpEtrago.targets.get_table_name("link"), + schema=ChpEtrago.targets.get_table_schema("link"), con=db.engine(), if_exists="append", ) @@ -269,8 +269,8 @@ def insert_scenario(scenario): ) chp_heat.to_postgis( - targets["link"]["table"], - schema=targets["link"]["schema"], + ChpEtrago.targets.get_table_name("link"), + schema=ChpEtrago.targets.get_table_schema("link"), con=db.engine(), if_exists="append", ) @@ -308,8 +308,8 @@ def insert_scenario(scenario): ) chp_el_gen.to_sql( - targets["generator"]["table"], - schema=targets["generator"]["schema"], + ChpEtrago.targets.get_table_name("generator"), + schema=ChpEtrago.targets.get_table_schema("generator"), con=db.engine(), if_exists="append", index=False, @@ -333,8 +333,8 @@ def insert_scenario(scenario): ) chp_heat_gen.to_sql( - targets["generator"]["table"], - schema=targets["generator"]["schema"], + ChpEtrago.targets.get_table_name("generator"), + schema=ChpEtrago.targets.get_table_schema("generator"), con=db.engine(), if_exists="append", index=False, @@ -344,7 +344,7 @@ def insert_scenario(scenario): f""" SELECT electrical_bus_id, ch4_bus_id, carrier, SUM(el_capacity) AS el_capacity, SUM(th_capacity) AS th_capacity - FROM {sources['chp_table']['schema']}.{sources['chp_table']['table']} + FROM {ChpEtrago.sources.tables['chp_table']} WHERE scenario='{scenario}' AND district_heating_area_id IS NULL GROUP BY (electrical_bus_id, ch4_bus_id, carrier) @@ -387,8 +387,8 @@ def insert_scenario(scenario): ]["chp_gas"] chp_el_ind.to_postgis( - targets["link"]["table"], - schema=targets["link"]["schema"], + ChpEtrago.targets.get_table_name("link"), + schema=ChpEtrago.targets.get_table_schema("link"), con=db.engine(), if_exists="append", ) @@ -424,8 +424,8 @@ def insert_scenario(scenario): chp_el_ind_gen["carrier"] = "industrial_" + chp_el_ind_gen.carrier + "_CHP" chp_el_ind_gen.to_sql( - targets["generator"]["table"], - schema=targets["generator"]["schema"], + ChpEtrago.targets.get_table_name("generator"), + schema=ChpEtrago.targets.get_table_schema("generator"), con=db.engine(), if_exists="append", index=False, diff --git a/src/egon/data/datasets/data_bundle/__init__.py b/src/egon/data/datasets/data_bundle/__init__.py index ffae46a9c..db0edcadc 100644 --- a/src/egon/data/datasets/data_bundle/__init__.py +++ b/src/egon/data/datasets/data_bundle/__init__.py @@ -1,13 +1,14 @@ """The central module containing all code dealing with small scale input-data """ + from pathlib import Path from urllib.request import urlretrieve import shutil import zipfile from egon.data import config -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets def download(): @@ -22,12 +23,9 @@ def download(): if data_bundle_path.exists() and data_bundle_path.is_dir(): shutil.rmtree(data_bundle_path) # Get parameters from config and set download URL - sources = config.datasets()["data-bundle"]["sources"]["zenodo"] - url = ( - f"https://zenodo.org/record/{sources['deposit_id']}/files/" - "data_bundle_egon_data.zip" - ) - target_file = config.datasets()["data-bundle"]["targets"]["file"] + + url = DataBundle.sources.urls["zenodo_data_bundle"]["url"] + target_file = DataBundle.targets.files["data_bundle"] # check if file exists if not Path(target_file).exists(): @@ -39,16 +37,27 @@ def download(): class DataBundle(Dataset): + + + sources = DatasetSources( + urls={ + "zenodo_data_bundle": { + "url": "https://zenodo.org/record/16576506/files/data_bundle_egon_data.zip" + } + } + ) + + targets = DatasetTargets( + files={ + "data_bundle": "data_bundle_egon_data.zip" + } + ) + def __init__(self, dependencies): - deposit_id = config.datasets()["data-bundle"]["sources"]["zenodo"][ - "deposit_id" - ] - deposit_id_powerd = config.datasets()["data-bundle"]["sources"][ - "zenodo" - ]["deposit_id"] + super().__init__( name="DataBundle", - version=f"{deposit_id}-{deposit_id_powerd}-0.0.3", + version="0.0.3", dependencies=dependencies, tasks=(download,), ) diff --git a/src/egon/data/datasets/demandregio/__init__.py b/src/egon/data/datasets/demandregio/__init__.py index b65005bbf..c3e9e565c 100644 --- a/src/egon/data/datasets/demandregio/__init__.py +++ b/src/egon/data/datasets/demandregio/__init__.py @@ -13,7 +13,7 @@ import pandas as pd from egon.data import db, logger -from egon.data.datasets import Dataset, wrapped_partial +from egon.data.datasets import Dataset, wrapped_partial, DatasetSources, DatasetTargets from egon.data.datasets.scenario_parameters import ( EgonScenario, get_sector_parameters, @@ -57,11 +57,38 @@ class DemandRegio(Dataset): * :py:class:`demand.egon_demandregio_timeseries_cts_ind ` is created and filled """ + + sources = DatasetSources( + files={ + "wz_cts": "WZ_definition/CTS_WZ_definition.csv", + "wz_industry": "WZ_definition/ind_WZ_definition.csv", + "new_consumers_2035": "new_largescale_consumers_nep.csv", + "cache_zip": "demand_regio_backup/cache.zip", + "dbdump_zip": "demand_regio_backup/status2019-egon-demandregio-cts-ind.zip", + }, + tables={ + "vg250_krs": "boundaries.vg250_krs", + } + ) + targets = DatasetTargets( + files={ + "cache_dir": "demandregio/cache", + "dbdump_dir": "demandregio/dbdump", + }, + tables={ + "hh_demand": "demand.egon_demandregio_hh", + "cts_ind_demand": "demand.egon_demandregio_cts_ind", + "population": "society.egon_demandregio_population", + "households": "society.egon_demandregio_household", + "wz_definitions": "demand.egon_demandregio_wz", + "timeseries_cts_ind": "demand.egon_demandregio_timeseries_cts_ind", + } + ) #: name: str = "DemandRegio" #: - version: str = "0.0.12" + version: str = "0.0.16" def __init__(self, dependencies): super().__init__( @@ -69,6 +96,7 @@ def __init__(self, dependencies): version=self.version, dependencies=dependencies, tasks=( + get_cached_tables, # adhoc workaround #180 create_tables, { @@ -196,7 +224,7 @@ def data_in_boundaries(df): return df[ df.index.isin( pd.read_sql( - "SELECT DISTINCT ON (nuts) nuts FROM boundaries.vg250_krs", + f"SELECT DISTINCT ON (nuts) nuts FROM {DemandRegio.sources.tables['vg250_krs']}", engine, ).nuts ) @@ -204,36 +232,19 @@ def data_in_boundaries(df): def insert_cts_ind_wz_definitions(): - """Insert demandregio's definitions of CTS and industrial branches - - Returns - ------- - None. - - """ - - source = egon.data.config.datasets()["demandregio_cts_ind_demand"][ - "sources" - ] - - target = egon.data.config.datasets()["demandregio_cts_ind_demand"][ - "targets" - ]["wz_definitions"] + """Insert demandregio's definitions of CTS and industrial branches""" engine = db.engine() - for sector in source["wz_definitions"]: - file_path = ( - Path(".") - / "data_bundle_egon_data" - / "WZ_definition" - / source["wz_definitions"][sector] - ) + # This dictionary replaces the logic from the old config file + wz_files = { + "CTS": "wz_cts", + "industry": "wz_industry" + } - if sector == "CTS": - delimiter = ";" - else: - delimiter = "," + for sector, file_key in wz_files.items(): + file_path = Path("data_bundle_egon_data") / DemandRegio.sources.files[file_key] + delimiter = ";" if sector == "CTS" else "," df = ( pd.read_csv(file_path, delimiter=delimiter, header=None) .rename({0: "wz", 1: "definition"}, axis="columns") @@ -241,9 +252,9 @@ def insert_cts_ind_wz_definitions(): ) df["sector"] = sector df.to_sql( - target["table"], + DemandRegio.targets.get_table_name("wz_definitions"), engine, - schema=target["schema"], + schema=DemandRegio.targets.get_table_schema("wz_definitions"), if_exists="append", ) @@ -293,17 +304,12 @@ def adjust_ind_pes(ec_cts_ind): """ - pes_path = ( - Path(".") / "data_bundle_powerd_data" / "pypsa_eur" / "resources" - ) + pes_path = Path("data_bundle_egon_data") - sources = egon.data.config.datasets()["demandregio_cts_ind_demand"][ - "sources" - ]["new_consumers_2050"] - # Extract today's industrial demand from pypsa-eur-sec + # All file paths now use the new class attributes demand_today = pd.read_csv( - pes_path / sources["pes-demand-today"], + pes_path / DemandRegio.sources.files["pes_demand_today"], header=None, ).transpose() @@ -325,18 +331,17 @@ def adjust_ind_pes(ec_cts_ind): # Calculate future industrial demand from pypsa-eur-sec # based on production and energy demands per carrier ('sector ratios') - prod_tomorrow = pd.read_csv(pes_path / sources["pes-production-tomorrow"]) - + prod_tomorrow = pd.read_csv(pes_path / DemandRegio.sources.files["pes_production_tomorrow"]) prod_tomorrow = prod_tomorrow[prod_tomorrow["kton/a"] == "DE"].set_index( "kton/a" ) sector_ratio = ( - pd.read_csv(pes_path / sources["pes-sector-ratios"]) + pd.read_csv(pes_path / DemandRegio.sources.files["pes_sector_ratios"]) .set_index("MWh/tMaterial") .loc["elec"] ) - + demand_tomorrow = prod_tomorrow.multiply( sector_ratio.div(1000) ).transpose()["DE"] @@ -412,18 +417,8 @@ def adjust_cts_ind_nep(ec_cts_ind, sector): ec_cts_ind : pandas.DataFrame CTS or industry demand including new largescale consumers. - """ - sources = egon.data.config.datasets()["demandregio_cts_ind_demand"][ - "sources" - ] - - file_path = ( - Path(".") - / "data_bundle_egon_data" - / "nep2035_version2021" - / sources["new_consumers_2035"] - ) - + """ + file_path = Path("data_bundle_egon_data") / DemandRegio.sources.files["new_consumers_2035"] # get data from NEP per federal state new_con = pd.read_csv(file_path, delimiter=";", decimal=",", index_col=0) @@ -625,9 +620,7 @@ def insert_hh_demand(scenario, year, engine): None. """ - targets = egon.data.config.datasets()["demandregio_household_demand"][ - "targets" - ]["household_demand"] + # get demands of private households per nuts and size from demandregio ec_hh = disagg_households_power(scenario, year) @@ -646,9 +639,9 @@ def insert_hh_demand(scenario, year, engine): df["hh_size"] = hh_size df = df.rename({hh_size: "demand"}, axis="columns") df.to_sql( - targets["table"], + DemandRegio.targets.get_table_name("hh_demand"), engine, - schema=targets["schema"], + schema=DemandRegio.targets.get_table_schema("hh_demand"), if_exists="append", ) @@ -672,9 +665,10 @@ def insert_hh_demand(scenario, year, engine): f"Couldnt get profiles from FFE, will use pickeld fallback! \n {e}" ) hh_load_timeseries = pd.read_csv( - "data_bundle_egon_data/demand_regio_backup/df_load_profiles.csv", + Path("data_bundle_egon_data") / "demand_regio_backup" / "df_load_profiles.csv", index_col="time", ) + hh_load_timeseries.index = pd.to_datetime( hh_load_timeseries.index, format="%Y-%m-%d %H:%M:%S" ) @@ -716,30 +710,23 @@ def insert_cts_ind(scenario, year, engine, target_values): None. """ - targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][ - "targets" - ] + #targets = egon.data.config.datasets()["demandregio_cts_ind_demand"]["targets"] wz_table = pd.read_sql( - "SELECT wz, sector FROM demand.egon_demandregio_wz", + f"SELECT wz, sector FROM {DemandRegio.targets.tables['wz_definitions']}", con=engine, - index_col="wz", + index_col="wz" ) # Workaround: Since the disaggregator does not work anymore, data from # previous runs is used for eGon2035 and eGon100RE if scenario == "eGon2035": - file2035_path = ( - Path(".") - / "data_bundle_egon_data" - / "demand_regio_backup" - / "egon_demandregio_cts_ind_egon2035.csv" - ) + file2035_path = Path("data_bundle_egon_data") / "demand_regio_backup" / "egon_demandregio_cts_ind_egon2035.csv" ec_cts_ind2 = pd.read_csv(file2035_path) ec_cts_ind2.to_sql( - targets["cts_ind_demand"]["table"], + DemandRegio.targets.get_table_name("cts_ind_demand"), engine, - targets["cts_ind_demand"]["schema"], + schema=DemandRegio.targets.get_table_schema("cts_ind_demand"), if_exists="append", index=False, ) @@ -747,8 +734,9 @@ def insert_cts_ind(scenario, year, engine, target_values): if scenario == "eGon100RE": ec_cts_ind2 = pd.read_csv( - "data_bundle_egon_data/demand_regio_backup/egon_demandregio_cts_ind.csv" + Path("data_bundle_egon_data") / "demand_regio_backup" / "egon_demandregio_cts_ind.csv" ) + ec_cts_ind2["sector"] = ec_cts_ind2["wz"].map(wz_table["sector"]) factor_ind = target_values[scenario]["industry"] / ( ec_cts_ind2[ec_cts_ind2["sector"] == "industry"]["demand"].sum() @@ -770,9 +758,9 @@ def insert_cts_ind(scenario, year, engine, target_values): ec_cts_ind2.drop(columns=["sector"], inplace=True) ec_cts_ind2.to_sql( - targets["cts_ind_demand"]["table"], + DemandRegio.targets.get_table_name("cts_ind_demand"), engine, - targets["cts_ind_demand"]["schema"], + schema=DemandRegio.targets.get_table_schema("cts_ind_demand"), if_exists="append", index=False, ) @@ -814,11 +802,11 @@ def insert_cts_ind(scenario, year, engine, target_values): df = df.rename({wz: "demand"}, axis="columns") df.index = df.index.rename("nuts3") df.to_sql( - targets["cts_ind_demand"]["table"], - engine, - targets["cts_ind_demand"]["schema"], - if_exists="append", - ) + DemandRegio.targets.get_table_name("cts_ind_demand"), + engine, + schema=DemandRegio.targets.get_table_schema("cts_ind_demand"), + if_exists="append", + ) def insert_household_demand(): @@ -830,18 +818,16 @@ def insert_household_demand(): None. """ - targets = egon.data.config.datasets()["demandregio_household_demand"][ - "targets" - ] + engine = db.engine() scenarios = egon.data.config.settings()["egon-data"]["--scenarios"] scenarios.append("eGon2021") - for t in targets: + for table_key in ["hh_demand"]: # Assuming this is the only target here db.execute_sql( - f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};" + f"DELETE FROM {DemandRegio.targets.tables[table_key]};" ) for scn in scenarios: @@ -864,14 +850,16 @@ def insert_cts_ind_demands(): None. """ - targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][ - "targets" - ] + engine = db.engine() - for t in targets: + for table_key in [ + "cts_ind_demand", + "wz_definitions", + "timeseries_cts_ind", + ]: db.execute_sql( - f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};" + f"DELETE FROM {DemandRegio.targets.tables[table_key]};" ) insert_cts_ind_wz_definitions() @@ -916,12 +904,12 @@ def insert_society_data(): None. """ - targets = egon.data.config.datasets()["demandregio_society"]["targets"] + engine = db.engine() - for t in targets: + for table_key in ["population", "households"]: db.execute_sql( - f"DELETE FROM {targets[t]['schema']}.{targets[t]['table']};" + f"DELETE FROM {DemandRegio.targets.tables[table_key]};" ) target_years = np.append( @@ -935,9 +923,9 @@ def insert_society_data(): # Select data for nuts3-regions in boundaries (needed for testmode) df_pop = data_in_boundaries(df_pop) df_pop.to_sql( - targets["population"]["table"], + DemandRegio.targets.get_table_name("population"), engine, - schema=targets["population"]["schema"], + schema=DemandRegio.targets.get_table_schema("population"), if_exists="append", ) @@ -951,9 +939,9 @@ def insert_society_data(): df["hh_size"] = hh_size df = df.rename({hh_size: "households"}, axis="columns") df.to_sql( - targets["household"]["table"], + DemandRegio.targets.get_table_name("households"), engine, - schema=targets["household"]["schema"], + schema=DemandRegio.targets.get_table_schema("households"), if_exists="append", ) @@ -973,9 +961,7 @@ def insert_timeseries_per_wz(sector, year): None. """ - targets = egon.data.config.datasets()["demandregio_cts_ind_demand"][ - "targets" - ] + if sector == "CTS": profiles = ( @@ -1023,18 +1009,16 @@ def insert_timeseries_per_wz(sector, year): db.execute_sql( f""" - DELETE FROM {targets['timeseries_cts_ind']['schema']}. - {targets['timeseries_cts_ind']['table']} + DELETE FROM {DemandRegio.targets.tables['timeseries_cts_ind']} WHERE wz IN ( - SELECT wz FROM {targets['wz_definitions']['schema']}. - {targets['wz_definitions']['table']} + SELECT wz FROM {DemandRegio.targets.tables['wz_definitions']} WHERE sector = '{sector}') """ ) df.to_sql( - targets["timeseries_cts_ind"]["table"], - schema=targets["timeseries_cts_ind"]["schema"], + DemandRegio.targets.get_table_name("timeseries_cts_ind"), + schema=DemandRegio.targets.get_table_schema("timeseries_cts_ind"), con=db.engine(), if_exists="append", ) diff --git a/src/egon/data/datasets/district_heating_areas/__init__.py b/src/egon/data/datasets/district_heating_areas/__init__.py index 29e1a932d..66352251e 100644 --- a/src/egon/data/datasets/district_heating_areas/__init__.py +++ b/src/egon/data/datasets/district_heating_areas/__init__.py @@ -30,7 +30,7 @@ import pandas as pd from egon.data import config, db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.district_heating_areas.plot import ( plot_heat_density_sorted, ) @@ -75,7 +75,52 @@ class DistrictHeatingAreas(Dataset): #: name: str = "district-heating-areas" #: - version: str = "0.0.2" + version: str = "0.0.3" + + sources = DatasetSources( + tables={ + # zensus_population.processed + "zensus_population": { + "schema": "society", + "table": "destatis_zensus_population_per_ha", + }, + # zensus_misc.processed.file_table_map -> Wohnungen + "zensus_apartment": { + "schema": "society", + "table": "egon_destatis_zensus_apartment_per_ha", + }, + # heat_demand_cts, heat_supply, etrago_heat, etc. + "peta_heat": { + "schema": "demand", + "table": "egon_peta_heat", + }, + # vg250.processed.file_table_map -> "VG250_KRS.shp": "vg250_krs" + "vg250_krs": { + "schema": "boundaries", + "table": "vg250_krs", + }, + }, + files={}, + ) + + targets = DatasetTargets( + tables={ + # used by many modules (heat_supply, etrago_heat, chp_location, PtH2, ...) + "district_heating_areas": { + "schema": "demand", + "table": "egon_district_heating_areas", + }, + "map_district_heating_areas": { + "schema": "demand", + "table": "egon_map_zensus_district_heating_areas", + }, + }, + files={ + "results_path": { + "filepath": "district_heating_areas/", + }, + }, + ) def __init__(self, dependencies): super().__init__( @@ -220,28 +265,36 @@ def load_census_data(minimum_connection_rate=0.3): # only census cells where egon-data has a heat demand are considered district_heat = db.select_geodataframe( - """SELECT flats.zensus_population_id, flats.characteristics_text, + f"""SELECT flats.zensus_population_id, flats.characteristics_text, flats.quantity, flats.quantity_q, pop.geom_point, pop.geom AS geom_polygon - FROM society.egon_destatis_zensus_apartment_per_ha AS flats - JOIN society.destatis_zensus_population_per_ha AS pop + FROM {DistrictHeatingAreas.sources.tables["zensus_apartment"]["schema"]}. + {DistrictHeatingAreas.sources.tables["zensus_apartment"]["table"]} AS flats + JOIN {DistrictHeatingAreas.sources.tables["zensus_population"]["schema"]}. + {DistrictHeatingAreas.sources.tables["zensus_population"]["table"]} AS pop ON flats.zensus_population_id = pop.id AND flats.characteristics_text = 'Fernheizung (Fernwärme)' AND flats.zensus_population_id IN - (SELECT zensus_population_id FROM demand.egon_peta_heat);""", + (SELECT zensus_population_id FROM + {DistrictHeatingAreas.sources.tables["peta_heat"]["schema"]}. + {DistrictHeatingAreas.sources.tables["peta_heat"]["table"]});""", index_col="zensus_population_id", geom_col="geom_polygon", ) heating_type = db.select_geodataframe( - """SELECT flats.zensus_population_id, + f"""SELECT flats.zensus_population_id, SUM(flats.quantity) AS quantity, pop.geom AS geom_polygon - FROM society.egon_destatis_zensus_apartment_per_ha AS flats - JOIN society.destatis_zensus_population_per_ha AS pop + FROM {DistrictHeatingAreas.sources.tables["zensus_apartment"]["schema"]}. + {DistrictHeatingAreas.sources.tables["zensus_apartment"]["table"]} AS flats + JOIN {DistrictHeatingAreas.sources.tables["zensus_population"]["schema"]}. + {DistrictHeatingAreas.sources.tables["zensus_population"]["table"]} AS pop ON flats.zensus_population_id = pop.id AND flats.attribute = 'HEIZTYP' AND flats.zensus_population_id IN - (SELECT zensus_population_id FROM demand.egon_peta_heat) + (SELECT zensus_population_id FROM + {DistrictHeatingAreas.sources.tables["peta_heat"]["schema"]}. + {DistrictHeatingAreas.sources.tables["peta_heat"]["table"]}) GROUP BY flats.zensus_population_id, pop.geom;""", index_col="zensus_population_id", geom_col="geom_polygon", @@ -287,8 +340,10 @@ def load_heat_demands(scenario_name): f"""SELECT demand.zensus_population_id, SUM(demand.demand) AS residential_and_service_demand, pop.geom AS geom_polygon - FROM demand.egon_peta_heat AS demand - JOIN society.destatis_zensus_population_per_ha AS pop + FROM {DistrictHeatingAreas.sources.tables["peta_heat"]["schema"]}. + {DistrictHeatingAreas.sources.tables["peta_heat"]["table"]} AS demand + JOIN {DistrictHeatingAreas.sources.tables["zensus_population"]["schema"]}. + {DistrictHeatingAreas.sources.tables["zensus_population"]["table"]} AS pop ON demand.zensus_population_id = pop.id AND demand.scenario = '{scenario_name}' GROUP BY demand.zensus_population_id, pop.geom;""", @@ -437,8 +492,10 @@ def area_grouping( ] nuts3_boundaries = db.select_geodataframe( - """ - SELECT gen, geometry as geom FROM boundaries.vg250_krs + f""" + SELECT gen, geometry as geom FROM + {DistrictHeatingAreas.sources.tables["vg250_krs"]["schema"]}. + {DistrictHeatingAreas.sources.tables["vg250_krs"]["table"]} """ ) join_2 = gpd.sjoin( @@ -660,12 +717,14 @@ def district_heating_areas(scenario_name, plotting=False): scenario_dh_area["scenario"] = scenario_name db.execute_sql( - f"""DELETE FROM demand.egon_map_zensus_district_heating_areas - WHERE scenario = '{scenario_name}'""" + f"""DELETE FROM + {DistrictHeatingAreas.targets.tables["map_district_heating_areas"]["schema"]}. + {DistrictHeatingAreas.targets.tables["map_district_heating_areas"]["table"]} + WHERE scenario = '{scenario_name}'""" ) scenario_dh_area[["scenario", "area_id", "zensus_population_id"]].to_sql( - "egon_map_zensus_district_heating_areas", - schema="demand", + DistrictHeatingAreas.targets.tables["map_district_heating_areas"]["table"], + schema=DistrictHeatingAreas.targets.tables["map_district_heating_areas"]["schema"], con=db.engine(), if_exists="append", index=False, @@ -701,14 +760,16 @@ def district_heating_areas(scenario_name, plotting=False): # )].index.values}""") db.execute_sql( - f"""DELETE FROM demand.egon_district_heating_areas - WHERE scenario = '{scenario_name}'""" + f"""DELETE FROM + {DistrictHeatingAreas.targets.tables["district_heating_areas"]["schema"]}. + {DistrictHeatingAreas.targets.tables["district_heating_areas"]["table"]} + WHERE scenario = '{scenario_name}'""" ) areas_dissolved.reset_index().drop( "zensus_population_id", axis="columns" ).to_postgis( - "egon_district_heating_areas", - schema="demand", + DistrictHeatingAreas.targets.tables["district_heating_areas"]["table"], + schema=DistrictHeatingAreas.targets.tables["district_heating_areas"]["schema"], con=db.engine(), if_exists="append", ) @@ -850,7 +911,11 @@ def add_metadata(): } meta_json = "'" + json.dumps(meta) + "'" - db.submit_comment(meta_json, "demand", "egon_district_heating_areas") + db.submit_comment( + meta_json, + DistrictHeatingAreas.targets.tables["district_heating_areas"]["schema"], + DistrictHeatingAreas.targets.tables["district_heating_areas"]["table"], + ) # Metadata creation for "id mapping" table meta = { @@ -941,7 +1006,9 @@ def add_metadata(): meta_json = "'" + json.dumps(meta) + "'" db.submit_comment( - meta_json, "demand", "egon_map_zensus_district_heating_areas" + meta_json, + DistrictHeatingAreas.targets.tables["map_district_heating_areas"]["schema"], + DistrictHeatingAreas.targets.tables["map_district_heating_areas"]["table"], ) return None @@ -978,7 +1045,8 @@ def study_prospective_district_heating_areas(): """ # create directory to store files - results_path = "district_heating_areas/" + results_path = DistrictHeatingAreas.targets.files["results_path"]["filepath"] + if not os.path.exists(results_path): os.mkdir(results_path) diff --git a/src/egon/data/datasets/electrical_neighbours.py b/src/egon/data/datasets/electrical_neighbours.py index 91498b80b..68453a103 100644 --- a/src/egon/data/datasets/electrical_neighbours.py +++ b/src/egon/data/datasets/electrical_neighbours.py @@ -15,7 +15,7 @@ import requests from egon.data import config, db, logger -from egon.data.datasets import Dataset, wrapped_partial +from egon.data.datasets import Dataset, wrapped_partial, DatasetSources, DatasetTargets from egon.data.datasets.fill_etrago_gen import add_marginal_costs from egon.data.datasets.fix_ehv_subnetworks import select_bus_id from egon.data.datasets.pypsaeur import prepared_network @@ -42,24 +42,34 @@ def get_cross_border_buses(scenario, sources): return db.select_geodataframe( f""" SELECT * - FROM {sources['electricity_buses']['schema']}. - {sources['electricity_buses']['table']} + FROM {ElectricalNeighbours.sources.tables['electricity_buses']['schema']}. + {ElectricalNeighbours.sources.tables['electricity_buses']['table']} WHERE - NOT ST_INTERSECTS ( + NOT ST_INTERSECTS( geom, - (SELECT ST_Transform(ST_Buffer(geometry, 5), 4326) FROM - {sources['german_borders']['schema']}. - {sources['german_borders']['table']})) - AND (bus_id IN ( - SELECT bus0 FROM - {sources['lines']['schema']}.{sources['lines']['table']}) - OR bus_id IN ( - SELECT bus1 FROM - {sources['lines']['schema']}.{sources['lines']['table']})) + ( + SELECT ST_Transform(ST_Buffer(geometry, 5), 4326) + FROM {ElectricalNeighbours.sources.tables['german_borders']['schema']}. + {ElectricalNeighbours.sources.tables['german_borders']['table']} + ) + ) + AND ( + bus_id IN ( + SELECT bus0 + FROM {ElectricalNeighbours.sources.tables['lines']['schema']}. + {ElectricalNeighbours.sources.tables['lines']['table']} + ) + OR bus_id IN ( + SELECT bus1 + FROM {ElectricalNeighbours.sources.tables['lines']['schema']}. + {ElectricalNeighbours.sources.tables['lines']['table']} + ) + ) AND scn_name = '{scenario}'; - """, - epsg=4326, - ) + """, + epsg=4326, + ) + def get_cross_border_lines(scenario, sources): @@ -79,13 +89,14 @@ def get_cross_border_lines(scenario, sources): return db.select_geodataframe( f""" SELECT * - FROM {sources['lines']['schema']}.{sources['lines']['table']} a + FROM {ElectricalNeighbours.sources.tables['lines']['schema']}. + {ElectricalNeighbours.sources.tables['lines']['table']} a WHERE ST_INTERSECTS ( a.topo, (SELECT ST_Transform(ST_boundary(geometry), 4326) - FROM {sources['german_borders']['schema']}. - {sources['german_borders']['table']})) + FROM {ElectricalNeighbours.sources.tables['german_borders']['schema']}. + {ElectricalNeighbours.sources.tables['german_borders']['table']})) AND scn_name = '{scenario}'; """, epsg=4326, @@ -148,14 +159,14 @@ def buses(scenario, sources, targets): """ sql_delete = f""" - DELETE FROM {sources['electricity_buses']['schema']}. - {sources['electricity_buses']['table']} + DELETE FROM {ElectricalNeighbours.sources.tables['electricity_buses']['schema']}. + {ElectricalNeighbours.sources.tables['electricity_buses']['table']} WHERE country != 'DE' AND scn_name = '{scenario}' AND carrier = 'AC' AND bus_id NOT IN ( SELECT bus_i - FROM {sources['osmtgmod_bus']['schema']}. - {sources['osmtgmod_bus']['table']}) + FROM {ElectricalNeighbours.sources.tables['osmtgmod_bus']['schema']}. + {ElectricalNeighbours.sources.tables['osmtgmod_bus']['table']}) """ # Delete existing buses @@ -282,8 +293,8 @@ def buses(scenario, sources, targets): "status2023", ]: # TODO: status2023 this is hardcoded shit central_buses.to_postgis( - targets["buses"]["table"], - schema=targets["buses"]["schema"], + ElectricalNeighbours.targets.tables["buses"]["table"], + schema=ElectricalNeighbours.targets.tables["buses"]["schema"], if_exists="append", con=db.engine(), index=False, @@ -292,8 +303,8 @@ def buses(scenario, sources, targets): # (buses with another voltage_level or inside Germany in test mode) else: central_buses[central_buses.carrier == "AC"].to_postgis( - targets["buses"]["table"], - schema=targets["buses"]["schema"], + ElectricalNeighbours.targets.tables["buses"]["table"], + schema=ElectricalNeighbours.targets.tables["buses"]["schema"], if_exists="append", con=db.engine(), index=False, @@ -302,7 +313,7 @@ def buses(scenario, sources, targets): return central_buses -def lines_between_foreign_countries(scenario, sorces, targets, central_buses): +def lines_between_foreign_countries(scenario, sources, targets, central_buses): # import network from pypsa-eur network = prepared_network() @@ -414,9 +425,9 @@ def lines_between_foreign_countries(scenario, sorces, targets, central_buses): gdf = gdf.set_index(f"{table_name}_id") gdf.to_postgis( - f"egon_etrago_{table_name}", + ElectricalNeighbours.targets.tables[f"{table_name}s"]["table"], db.engine(), - schema="grid", + schema=ElectricalNeighbours.targets.tables[f"{table_name}s"]["schema"], if_exists="append", index=True, index_label=f"{table_name}_id", @@ -445,23 +456,23 @@ def cross_border_lines(scenario, sources, targets, central_buses): # Delete existing data db.execute_sql( f""" - DELETE FROM {targets['lines']['schema']}. - {targets['lines']['table']} + DELETE FROM {ElectricalNeighbours.targets.tables['lines']['schema']}. + {ElectricalNeighbours.targets.tables['lines']['table']} WHERE scn_name = '{scenario}' AND line_id NOT IN ( SELECT branch_id - FROM {sources['osmtgmod_branch']['schema']}. - {sources['osmtgmod_branch']['table']} + FROM {ElectricalNeighbours.sources.tables['osmtgmod_branch']['schema']}. + {ElectricalNeighbours.sources.tables['osmtgmod_branch']['table']} WHERE result_id = 1 and (link_type = 'line' or link_type = 'cable')) AND bus0 IN ( SELECT bus_i - FROM {sources['osmtgmod_bus']['schema']}. - {sources['osmtgmod_bus']['table']}) + FROM {ElectricalNeighbours.sources.tables['osmtgmod_bus']['schema']}. + {ElectricalNeighbours.sources.tables['osmtgmod_bus']['table']}) AND bus1 NOT IN ( SELECT bus_i - FROM {sources['osmtgmod_bus']['schema']}. - {sources['osmtgmod_bus']['table']}) + FROM {ElectricalNeighbours.sources.tables['osmtgmod_bus']['schema']}. + {ElectricalNeighbours.sources.tables['osmtgmod_bus']['table']}) """ ) @@ -558,8 +569,8 @@ def cross_border_lines(scenario, sources, targets, central_buses): # Insert lines to the database new_lines.to_postgis( - targets["lines"]["table"], - schema=targets["lines"]["schema"], + ElectricalNeighbours.targets.tables["lines"]["table"], + schema=ElectricalNeighbours.targets.tables["lines"]["schema"], if_exists="append", con=db.engine(), index=False, @@ -641,13 +652,13 @@ def central_transformer(scenario, sources, targets, central_buses, new_lines): # Delete existing transformers in foreign countries db.execute_sql( f""" - DELETE FROM {targets['transformers']['schema']}. - {targets['transformers']['table']} + DELETE FROM {ElectricalNeighbours.targets.tables['transformers']['schema']}. + {ElectricalNeighbours.targets.tables['transformers']['table']} WHERE scn_name = '{scenario}' AND trafo_id NOT IN ( SELECT branch_id - FROM {sources['osmtgmod_branch']['schema']}. - {sources['osmtgmod_branch']['table']} + FROM {ElectricalNeighbours.sources.tables['osmtgmod_branch']['schema']}. + {ElectricalNeighbours.sources.tables['osmtgmod_branch']['table']} WHERE result_id = 1 and link_type = 'transformer') """ ) @@ -698,8 +709,8 @@ def central_transformer(scenario, sources, targets, central_buses, new_lines): # Insert transformers to the database trafo.to_sql( - targets["transformers"]["table"], - schema=targets["transformers"]["schema"], + ElectricalNeighbours.targets.tables["transformers"]["table"], + schema=ElectricalNeighbours.targets.tables["transformers"]["schema"], if_exists="append", con=db.engine(), index=False, @@ -726,21 +737,21 @@ def foreign_dc_lines(scenario, sources, targets, central_buses): # Delete existing dc lines to foreign countries db.execute_sql( f""" - DELETE FROM {targets['links']['schema']}. - {targets['links']['table']} + DELETE FROM {ElectricalNeighbours.targets.tables['links']['schema']}. + {ElectricalNeighbours.targets.tables['links']['table']} WHERE scn_name = '{scenario}' AND carrier = 'DC' AND bus0 IN ( SELECT bus_id - FROM {sources['electricity_buses']['schema']}. - {sources['electricity_buses']['table']} + FROM {ElectricalNeighbours.sources.tables['electricity_buses']['schema']}. + {ElectricalNeighbours.sources.tables['electricity_buses']['table']} WHERE scn_name = '{scenario}' AND carrier = 'AC' AND country = 'DE') AND bus1 IN ( SELECT bus_id - FROM {sources['electricity_buses']['schema']}. - {sources['electricity_buses']['table']} + FROM {ElectricalNeighbours.sources.tables['electricity_buses']['schema']}. + {ElectricalNeighbours.sources.tables['electricity_buses']['table']} WHERE scn_name = '{scenario}' AND carrier = 'AC' AND country != 'DE') @@ -827,8 +838,8 @@ def foreign_dc_lines(scenario, sources, targets, central_buses): # Insert DC lines to the database foreign_links.to_postgis( - targets["links"]["table"], - schema=targets["links"]["schema"], + ElectricalNeighbours.targets.tables["links"]["table"], + schema=ElectricalNeighbours.targets.tables["links"]["schema"], if_exists="append", con=db.engine(), index=False, @@ -844,8 +855,8 @@ def grid(): """ # Select sources and targets from dataset configuration - sources = config.datasets()["electrical_neighbours"]["sources"] - targets = config.datasets()["electrical_neighbours"]["targets"] + sources = ElectricalNeighbours.sources.tables + targets = ElectricalNeighbours.targets.tables for scenario in config.settings()["egon-data"]["--scenarios"]: central_buses = buses(scenario, sources, targets) @@ -926,24 +937,26 @@ def get_foreign_bus_id(scenario): """ - sources = config.datasets()["electrical_neighbours"]["sources"] - bus_id = db.select_geodataframe( f"""SELECT bus_id, ST_Buffer(geom, 1) as geom, country - FROM grid.egon_etrago_bus + FROM {ElectricalNeighbours.sources.tables['electricity_buses']['schema']}. + {ElectricalNeighbours.sources.tables['electricity_buses']['table']} WHERE scn_name = '{scenario}' AND carrier = 'AC' AND v_nom = 380. AND country != 'DE' AND bus_id NOT IN ( SELECT bus_i - FROM osmtgmod_results.bus_data) + FROM {ElectricalNeighbours.sources.tables['osmtgmod_bus']['schema']}. + {ElectricalNeighbours.sources.tables['osmtgmod_bus']['table']}) """, epsg=3035, ) # insert installed capacities - file = zipfile.ZipFile(f"tyndp/{sources['tyndp_capacities']}") + file = zipfile.ZipFile( + ElectricalNeighbours.sources.files['tyndp_capacities'] + ) # Select buses in neighbouring countries as geodataframe buses = pd.read_excel( @@ -978,8 +991,6 @@ def calc_capacities(): """ - sources = config.datasets()["electrical_neighbours"]["sources"] - countries = [ "AT", "BE", @@ -995,7 +1006,9 @@ def calc_capacities(): ] # insert installed capacities - file = zipfile.ZipFile(f"tyndp/{sources['tyndp_capacities']}") + file = zipfile.ZipFile( + ElectricalNeighbours.sources.files['tyndp_capacities'] + ) df = pd.read_excel( file.open("TYNDP-2020-Scenario-Datafile.xlsx").read(), sheet_name="Capacity", @@ -1057,14 +1070,13 @@ def insert_generators_tyndp(capacities): None. """ - targets = config.datasets()["electrical_neighbours"]["targets"] + targets = ElectricalNeighbours.targets.tables map_buses = get_map_buses() # Delete existing data db.execute_sql( f""" - DELETE FROM - {targets['generators']['schema']}.{targets['generators']['table']} + DELETE FROM {targets['generators']['schema']}.{targets['generators']['table']} WHERE bus IN ( SELECT bus_id FROM {targets['buses']['schema']}.{targets['buses']['table']} @@ -1077,9 +1089,8 @@ def insert_generators_tyndp(capacities): db.execute_sql( f""" - DELETE FROM - {targets['generators_timeseries']['schema']}. - {targets['generators_timeseries']['table']} + DELETE FROM {targets['generators_timeseries']['schema']}. + {targets['generators_timeseries']['table']} WHERE generator_id NOT IN ( SELECT generator_id FROM {targets['generators']['schema']}.{targets['generators']['table']} @@ -1161,7 +1172,7 @@ def insert_storage_tyndp(capacities): None. """ - targets = config.datasets()["electrical_neighbours"]["targets"] + targets = ElectricalNeighbours.targets.tables map_buses = get_map_buses() # Delete existing data @@ -1294,21 +1305,21 @@ def tyndp_demand(): """ map_buses = get_map_buses() - sources = config.datasets()["electrical_neighbours"]["sources"] - targets = config.datasets()["electrical_neighbours"]["targets"] + sources = ElectricalNeighbours.sources # class attributes + targets = ElectricalNeighbours.targets # Delete existing data db.execute_sql( f""" - DELETE FROM {targets['loads']['schema']}. - {targets['loads']['table']} + DELETE FROM {targets.tables['loads']['schema']}. + {targets.tables['loads']['table']} WHERE scn_name = 'eGon2035' AND carrier = 'AC' AND bus NOT IN ( SELECT bus_i - FROM {sources['osmtgmod_bus']['schema']}. - {sources['osmtgmod_bus']['table']}) + FROM {sources.tables['osmtgmod_bus']['schema']}. + {sources.tables['osmtgmod_bus']['table']}) """ ) @@ -1354,11 +1365,11 @@ def tyndp_demand(): # Read in data from TYNDP for 2030 and 2040 dataset_2030 = pd.read_excel( - f"tyndp/{sources['tyndp_demand_2030']}", sheet_name=nodes, skiprows=10 + sources.files['tyndp_demand_2030'], sheet_name=nodes, skiprows=10 ) dataset_2040 = pd.read_excel( - f"tyndp/{sources['tyndp_demand_2040']}", sheet_name=None, skiprows=10 + sources.files['tyndp_demand_2040'], sheet_name=None, skiprows=10 ) # Transform map_buses to pandas.Series and select only used values @@ -1710,7 +1721,7 @@ def insert_storage_units_sq(scn_name="status2019"): sto_sq = df_gen_sq.loc[:, df_gen_sq.columns == "Hydro Pumped Storage"] sto_sq.rename(columns={"Hydro Pumped Storage": "p_nom"}, inplace=True) - targets = config.datasets()["electrical_neighbours"]["targets"] + targets = ElectricalNeighbours.targets.tables # Delete existing data db.execute_sql( @@ -1888,12 +1899,11 @@ def insert_generators_sq(scn_name="status2019"): ) save_entsoe_data(df_gen_sq, file_path=file_path) - targets = config.datasets()["electrical_neighbours"]["targets"] + targets = ElectricalNeighbours.targets.tables # Delete existing data db.execute_sql( f""" - DELETE FROM - {targets['generators']['schema']}.{targets['generators']['table']} + DELETE FROM {targets['generators']['schema']}.{targets['generators']['table']} WHERE bus IN ( SELECT bus_id FROM {targets['buses']['schema']}.{targets['buses']['table']} @@ -1906,9 +1916,8 @@ def insert_generators_sq(scn_name="status2019"): db.execute_sql( f""" - DELETE FROM - {targets['generators_timeseries']['schema']}. - {targets['generators_timeseries']['table']} + DELETE FROM {targets['generators_timeseries']['schema']}. + {targets['generators_timeseries']['table']} WHERE generator_id NOT IN ( SELECT generator_id FROM {targets['generators']['schema']}.{targets['generators']['table']} @@ -1970,8 +1979,10 @@ def renewable_timeseries_pypsaeur(scn_name): foreign_re_generators = db.select_dataframe( f""" SELECT generator_id, a.carrier, country, x, y - FROM grid.egon_etrago_generator a - JOIN grid.egon_etrago_bus b + FROM {ElectricalNeighbours.targets.tables['generators']['schema']}. + {ElectricalNeighbours.targets.tables['generators']['table']} a + JOIN {ElectricalNeighbours.targets.tables['buses']['schema']}. + {ElectricalNeighbours.targets.tables['buses']['table']} b ON a.bus = b.bus_id WHERE a.scn_name = '{scn_name}' AND b.scn_name = '{scn_name}' @@ -2060,8 +2071,8 @@ def insert_loads_sq(scn_name="status2019"): None. """ - sources = config.datasets()["electrical_neighbours"]["sources"] - targets = config.datasets()["electrical_neighbours"]["targets"] + sources = ElectricalNeighbours.sources + targets = ElectricalNeighbours.targets if scn_name == "status2019": year_start_end = {"year_start": "20190101", "year_end": "20200101"} @@ -2090,34 +2101,33 @@ def insert_loads_sq(scn_name="status2019"): # Delete existing data db.execute_sql( f""" - DELETE FROM {targets['load_timeseries']['schema']}. - {targets['load_timeseries']['table']} + DELETE FROM {targets.tables['load_timeseries']['schema']}. + {targets.tables['load_timeseries']['table']} WHERE scn_name = '{scn_name}' AND load_id IN ( - SELECT load_id FROM {targets['loads']['schema']}. - {targets['loads']['table']} - WHERE - scn_name = '{scn_name}' - AND carrier = 'AC' - AND bus NOT IN ( - SELECT bus_i - FROM {sources['osmtgmod_bus']['schema']}. - {sources['osmtgmod_bus']['table']})) + SELECT load_id FROM {targets.tables['loads']['schema']}. + {targets.tables['loads']['table']} + WHERE scn_name = '{scn_name}' + AND carrier = 'AC' + AND bus NOT IN ( + SELECT bus_i + FROM {sources.tables['osmtgmod_bus']['schema']}. + {sources.tables['osmtgmod_bus']['table']})) """ ) db.execute_sql( f""" - DELETE FROM {targets['loads']['schema']}. - {targets['loads']['table']} + DELETE FROM {targets.tables['loads']['schema']}. + {targets.tables['loads']['table']} WHERE scn_name = '{scn_name}' AND carrier = 'AC' AND bus NOT IN ( SELECT bus_i - FROM {sources['osmtgmod_bus']['schema']}. - {sources['osmtgmod_bus']['table']}) + FROM {sources.tables['osmtgmod_bus']['schema']}. + {sources.tables['osmtgmod_bus']['table']}) """ ) @@ -2210,7 +2220,36 @@ class ElectricalNeighbours(Dataset): #: name: str = "ElectricalNeighbours" #: - version: str = "0.0.11" + version: str = "0.0.13" + + sources = DatasetSources( + tables={ + "electricity_buses": {"schema": "grid", "table": "egon_etrago_bus"}, + "lines": {"schema": "grid", "table": "egon_etrago_line"}, + "german_borders": {"schema": "boundaries", "table": "vg250_sta_union"}, + "osmtgmod_bus": {"schema": "osmtgmod_results", "table": "bus_data"}, + "osmtgmod_branch": {"schema": "osmtgmod_results", "table": "branch_data"}, + }, + files={ + "tyndp_capacities": "tyndp/TYNDP-2020-Scenario-Datafile.xlsx.zip", + "tyndp_demand_2030": "tyndp/Demand_TimeSeries_2030_DistributedEnergy.xlsx", + "tyndp_demand_2040": "tyndp/Demand_TimeSeries_2040_DistributedEnergy.xlsx", + }, + ) + + targets = DatasetTargets( + tables={ + "buses": {"schema": "grid", "table": "egon_etrago_bus"}, + "lines": {"schema": "grid", "table": "egon_etrago_line"}, + "links": {"schema": "grid", "table": "egon_etrago_link"}, + "transformers": {"schema": "grid", "table": "egon_etrago_transformer"}, + "loads": {"schema": "grid", "table": "egon_etrago_load"}, + "load_timeseries": {"schema": "grid", "table": "egon_etrago_load_timeseries"}, + "generators": {"schema": "grid", "table": "egon_etrago_generator"}, + "generators_timeseries":{"schema": "grid", "table": "egon_etrago_generator_timeseries"}, + "storage": {"schema": "grid", "table": "egon_etrago_storage"}, + } + ) def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/electricity_demand/__init__.py b/src/egon/data/datasets/electricity_demand/__init__.py index f6ef464d5..3668b05fd 100644 --- a/src/egon/data/datasets/electricity_demand/__init__.py +++ b/src/egon/data/datasets/electricity_demand/__init__.py @@ -8,7 +8,7 @@ import pandas as pd from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.electricity_demand.temporal import insert_cts_load from egon.data.datasets.electricity_demand_timeseries.hh_buildings import ( HouseholdElectricityProfilesOfBuildings, @@ -45,7 +45,17 @@ class HouseholdElectricityDemand(Dataset): #: name: str = "HouseholdElectricityDemand" #: - version: str = "0.0.5" + version: str = "0.0.6" + + targets = DatasetTargets( + tables={ + "household_demands_zensus": { + "schema": "demand", + "table": "egon_demandregio_zensus_electricity", + } + } + ) + def __init__(self, dependencies): super().__init__( @@ -88,7 +98,34 @@ class CtsElectricityDemand(Dataset): #: name: str = "CtsElectricityDemand" #: - version: str = "0.0.2" + version: str = "0.0.3" + + sources = DatasetSources( + tables={ + "demandregio": {"schema": "demand", "table": "egon_demandregio_cts_ind"}, + "demandregio_wz": {"schema": "demand", "table": "egon_demandregio_wz"}, + "heat_demand_cts": {"schema": "demand", "table": "egon_peta_heat"}, + "map_zensus_vg250": {"schema": "boundaries", "table": "egon_map_zensus_vg250"}, + "demandregio_cts": {"schema": "demand", "table": "egon_demandregio_cts_ind"}, + "demandregio_timeseries": {"schema": "demand", "table": "egon_demandregio_timeseries_cts_ind"}, + "map_grid_districts": {"schema": "boundaries", "table": "egon_map_zensus_grid_districts"}, + "map_vg250": {"schema": "boundaries", "table": "egon_map_zensus_vg250"}, + "zensus_electricity": {"schema": "demand", "table": "egon_demandregio_zensus_electricity"}, + } + ) + + targets = DatasetTargets( + tables={ + "cts_demands_zensus": { + "schema": "demand", + "table": "egon_demandregio_zensus_electricity", + }, + "cts_demand_curves": { + "schema": "demand", + "table": "egon_etrago_electricity_cts", + }, + } + ) def __init__(self, dependencies): super().__init__( @@ -255,22 +292,18 @@ def distribute_cts_demands(): """ - sources = egon.data.config.datasets()["electrical_demands_cts"]["sources"] - - target = egon.data.config.datasets()["electrical_demands_cts"]["targets"][ - "cts_demands_zensus" - ] + db.execute_sql( - f"""DELETE FROM {target['schema']}.{target['table']} - WHERE sector = 'service'""" + f"""DELETE FROM {CtsElectricityDemand.targets.tables['cts_demands_zensus']['schema']}.{CtsElectricityDemand.targets.tables['cts_demands_zensus']['table']} + WHERE sector = 'service'""" ) # Select match between zensus cells and nuts3 regions of vg250 map_nuts3 = db.select_dataframe( f"""SELECT zensus_population_id, vg250_nuts3 as nuts3 FROM - {sources['map_zensus_vg250']['schema']}. - {sources['map_zensus_vg250']['table']}""", + {CtsElectricityDemand.sources.tables['map_zensus_vg250']['schema']}. + {CtsElectricityDemand.sources.tables['map_zensus_vg250']['table']}""", index_col="zensus_population_id", ) @@ -280,8 +313,8 @@ def distribute_cts_demands(): peta = db.select_dataframe( f"""SELECT zensus_population_id, demand as heat_demand, sector, scenario FROM - {sources['heat_demand_cts']['schema']}. - {sources['heat_demand_cts']['table']} + {CtsElectricityDemand.sources.tables['heat_demand_cts']['schema']}. + {CtsElectricityDemand.sources.tables['heat_demand_cts']['table']} WHERE scenario = '{scn}' AND sector = 'service'""", index_col="zensus_population_id", @@ -299,13 +332,13 @@ def distribute_cts_demands(): # Select forecasted electrical demands from demandregio table demand_nuts3 = db.select_dataframe( f"""SELECT nuts3, SUM(demand) as demand FROM - {sources['demandregio']['schema']}. - {sources['demandregio']['table']} + {CtsElectricityDemand.sources.tables['demandregio']['schema']}. + {CtsElectricityDemand.sources.tables['demandregio']['table']} WHERE scenario = '{scn}' AND wz IN ( SELECT wz FROM - {sources['demandregio_wz']['schema']}. - {sources['demandregio_wz']['table']} + {CtsElectricityDemand.sources.tables['demandregio_wz']['schema']}. + {CtsElectricityDemand.sources.tables['demandregio_wz']['table']} WHERE sector = 'CTS') GROUP BY nuts3""", index_col="nuts3", @@ -321,8 +354,8 @@ def distribute_cts_demands(): # Insert data to target table peta[["scenario", "demand", "sector"]].to_sql( - target["table"], - schema=target["schema"], + CtsElectricityDemand.targets.tables["cts_demands_zensus"]["table"], + schema=CtsElectricityDemand.targets.tables["cts_demands_zensus"]["schema"], con=db.engine(), if_exists="append", ) diff --git a/src/egon/data/datasets/electricity_demand/temporal.py b/src/egon/data/datasets/electricity_demand/temporal.py index 69eb46ea6..b6d80a916 100644 --- a/src/egon/data/datasets/electricity_demand/temporal.py +++ b/src/egon/data/datasets/electricity_demand/temporal.py @@ -10,6 +10,7 @@ from egon.data import db import egon.data.config import egon.data.datasets.scenario_parameters.parameters as scenario_parameters +from egon.data.datasets import load_sources_and_targets Base = declarative_base() @@ -56,15 +57,12 @@ def calc_load_curve(share_wz, scn, annual_demand=1): """ year = int(scenario_parameters.global_settings(scn)["weather_year"]) - sources = egon.data.config.datasets()["electrical_load_curves_cts"][ - "sources" - ] + sources, _ = load_sources_and_targets("CtsElectricityDemand") # Select normalizes load curves per cts branch df_select = db.select_dataframe( f"""SELECT wz, load_curve - FROM {sources['demandregio_timeseries']['schema']}. - {sources['demandregio_timeseries']['table']} + FROM {sources.tables["demandregio_timeseries"]["schema"]}.{sources.tables["demandregio_timeseries"]["table"]} WHERE year = {year}""", index_col="wz", ).transpose() @@ -132,21 +130,17 @@ def calc_load_curves_cts(scenario): """ - sources = egon.data.config.datasets()["electrical_load_curves_cts"][ - "sources" - ] - + sources, _ = load_sources_and_targets("CtsElectricityDemand") # Select demands per cts branch and nuts3-region demands_nuts = db.select_dataframe( f"""SELECT nuts3, wz, demand - FROM {sources['demandregio_cts']['schema']}. - {sources['demandregio_cts']['table']} + FROM {sources.tables["demandregio_cts"]["schema"]}.{sources.tables["demandregio_cts"]["table"]} WHERE scenario = '{scenario}' AND demand > 0 AND wz IN ( SELECT wz FROM - {sources['demandregio_wz']['schema']}. - {sources['demandregio_wz']['table']} + {sources.tables["demandregio_wz"]["schema"]}. + {sources.tables["demandregio_wz"]["table"]} WHERE sector = 'CTS') """ ).set_index(["nuts3", "wz"]) @@ -156,14 +150,12 @@ def calc_load_curves_cts(scenario): f"""SELECT a.zensus_population_id, a.demand, b.vg250_nuts3 as nuts3, c.bus_id - FROM {sources['zensus_electricity']['schema']}. - {sources['zensus_electricity']['table']} a + FROM {sources.tables["zensus_electricity"]["schema"]}.{sources.tables["zensus_electricity"]["table"]} a INNER JOIN - {sources['map_vg250']['schema']}.{sources['map_vg250']['table']} b + {sources.tables["map_vg250"]["schema"]}.{sources.tables["map_vg250"]["table"]} b ON (a.zensus_population_id = b.zensus_population_id) INNER JOIN - {sources['map_grid_districts']['schema']}. - {sources['map_grid_districts']['table']} c + {sources.tables["map_grid_districts"]["schema"]}.{sources.tables["map_grid_districts"]["table"]} c ON (a.zensus_population_id = c.zensus_population_id) WHERE a.scenario = '{scenario}' AND a.sector = 'service' @@ -213,9 +205,7 @@ def insert_cts_load(): """ - targets = egon.data.config.datasets()["electrical_load_curves_cts"][ - "targets" - ] + _, targets = load_sources_and_targets("CtsElectricityDemand") create_table() @@ -224,8 +214,7 @@ def insert_cts_load(): db.execute_sql( f""" DELETE FROM - {targets['cts_demand_curves']['schema']} - .{targets['cts_demand_curves']['table']} + {targets.tables["cts_demand_curves"]["schema"]}.{targets.tables["cts_demand_curves"]["table"]} WHERE scn_name = '{scenario}' """ ) @@ -244,8 +233,8 @@ def insert_cts_load(): # Insert into database load_ts_df.to_sql( - targets["cts_demand_curves"]["table"], - schema=targets["cts_demand_curves"]["schema"], + targets.tables["cts_demand_curves"]["table"], + schema=targets.tables["cts_demand_curves"]["schema"], con=db.engine(), if_exists="append", ) diff --git a/src/egon/data/datasets/electricity_demand_etrago.py b/src/egon/data/datasets/electricity_demand_etrago.py index 51b9bd1bd..449d51a55 100644 --- a/src/egon/data/datasets/electricity_demand_etrago.py +++ b/src/egon/data/datasets/electricity_demand_etrago.py @@ -10,7 +10,7 @@ import pandas as pd from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets import egon.data.config @@ -29,13 +29,13 @@ def demands_per_bus(scenario): """ # Read information from configuration file - sources = egon.data.config.datasets()["etrago_electricity"]["sources"] + # Select data on CTS electricity demands per bus cts_curves = db.select_dataframe( f"""SELECT bus_id AS bus, p_set FROM - {sources['cts_curves']['schema']}. - {sources['cts_curves']['table']} + {ElectricalLoadEtrago.sources.tables['cts_curves']['schema']}. + {ElectricalLoadEtrago.sources.tables['cts_curves']['table']} WHERE scn_name = '{scenario}'""", ) @@ -43,8 +43,8 @@ def demands_per_bus(scenario): ind_curves_osm = db.select_dataframe( f"""SELECT bus, p_set FROM - {sources['osm_curves']['schema']}. - {sources['osm_curves']['table']} + {ElectricalLoadEtrago.sources.tables['osm_curves']['schema']}. + {ElectricalLoadEtrago.sources.tables['osm_curves']['table']} WHERE scn_name = '{scenario}'""", ) @@ -52,8 +52,8 @@ def demands_per_bus(scenario): ind_curves_sites = db.select_dataframe( f"""SELECT bus, p_set FROM - {sources['sites_curves']['schema']}. - {sources['sites_curves']['table']} + {ElectricalLoadEtrago.sources.tables['sites_curves']['schema']}. + {ElectricalLoadEtrago.sources.tables['sites_curves']['table']} WHERE scn_name = '{scenario}'""", ) @@ -61,8 +61,8 @@ def demands_per_bus(scenario): hh_curves = db.select_dataframe( f"""SELECT bus_id AS bus, p_set FROM - {sources['household_curves']['schema']}. - {sources['household_curves']['table']} + {ElectricalLoadEtrago.sources.tables['household_curves']['schema']}. + {ElectricalLoadEtrago.sources.tables['household_curves']['table']} WHERE scn_name = '{scenario}'""", ) @@ -166,36 +166,33 @@ def export_to_db(): None. """ - sources = egon.data.config.datasets()["etrago_electricity"]["sources"] - targets = egon.data.config.datasets()["etrago_electricity"]["targets"] for scenario in egon.data.config.settings()["egon-data"]["--scenarios"]: # Delete existing data from database db.execute_sql( f""" DELETE FROM - {targets['etrago_load']['schema']}.{targets['etrago_load']['table']} + {ElectricalLoadEtrago.targets.tables['etrago_load']['schema']}.{ElectricalLoadEtrago.targets.tables['etrago_load']['table']} WHERE scn_name = '{scenario}' AND carrier = 'AC' AND bus IN ( SELECT bus_id FROM - {sources['etrago_buses']['schema']}. - {sources['etrago_buses']['table']} + {ElectricalLoadEtrago.sources.tables['etrago_buses']['schema']}. + {ElectricalLoadEtrago.sources.tables['etrago_buses']['table']} WHERE country = 'DE' AND carrier = 'AC' AND scn_name = '{scenario}') """ ) - db.execute_sql( f""" DELETE FROM - {targets['etrago_load_curves']['schema']}.{targets['etrago_load_curves']['table']} + {ElectricalLoadEtrago.targets.tables['etrago_load_curves']['schema']}.{ElectricalLoadEtrago.targets.tables['etrago_load_curves']['table']} WHERE scn_name = '{scenario}' AND load_id NOT IN ( SELECT load_id FROM - {targets['etrago_load']['schema']}. - {targets['etrago_load']['table']} + {ElectricalLoadEtrago.targets.tables['etrago_load']['schema']}. + {ElectricalLoadEtrago.targets.tables['etrago_load']['table']} WHERE scn_name = '{scenario}') """ ) @@ -248,15 +245,16 @@ def export_to_db(): # Insert data into database load.to_sql( - targets["etrago_load"]["table"], - schema=targets["etrago_load"]["schema"], + ElectricalLoadEtrago.targets.tables["etrago_load"]["table"], + schema=ElectricalLoadEtrago.targets.tables["etrago_load"]["schema"], con=db.engine(), if_exists="append", ) + load_timeseries.to_sql( - targets["etrago_load_curves"]["table"], - schema=targets["etrago_load_curves"]["schema"], + ElectricalLoadEtrago.targets.tables["etrago_load_curves"]["table"], + schema=ElectricalLoadEtrago.targets.tables["etrago_load_curves"]["schema"], con=db.engine(), if_exists="append", ) @@ -285,7 +283,46 @@ class ElectricalLoadEtrago(Dataset): #: name: str = "Electrical_load_etrago" #: - version: str = "0.0.8" + version: str = "0.0.9" + + sources = DatasetSources( + tables={ + + "cts_curves": { + "schema": "demand", + "table": "egon_etrago_electricity_cts", + }, + "osm_curves": { + "schema": "demand", + "table": "egon_osm_ind_load_curves", + }, + "sites_curves": { + "schema": "demand", + "table": "egon_sites_ind_load_curves", + }, + "household_curves": { + "schema": "demand", + "table": "egon_etrago_electricity_households", + }, + "etrago_buses": { + "schema": "grid", + "table": "egon_etrago_bus", + }, + }, + ) + + targets = DatasetTargets( + tables={ + "etrago_load": { + "schema": "grid", + "table": "egon_etrago_load", + }, + "etrago_load_curves": { + "schema": "grid", + "table": "egon_etrago_load_timeseries", + }, + }, + ) def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/electricity_demand_timeseries/cts_buildings.py b/src/egon/data/datasets/electricity_demand_timeseries/cts_buildings.py index 9f8f64a8a..e05a4cf03 100644 --- a/src/egon/data/datasets/electricity_demand_timeseries/cts_buildings.py +++ b/src/egon/data/datasets/electricity_demand_timeseries/cts_buildings.py @@ -21,7 +21,7 @@ from egon.data import config, db from egon.data import logger as log -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.electricity_demand import ( EgonDemandRegioZensusElectricity, ) @@ -230,7 +230,82 @@ class CtsDemandBuildings(Dataset): #: name: str = "CtsDemandBuildings" #: - version: str = "0.0.4" + version: str = "0.0.5" + sources = DatasetSources( + tables={ + "osm_buildings_filtered": { + "schema": "openstreetmap", + "table": "osm_buildings_filtered", + }, + "osm_amenities_shops_filtered": { + "schema": "openstreetmap", + "table": "osm_amenities_shops_filtered", + }, + "osm_amenities_not_in_buildings_filtered": { + "schema": "openstreetmap", + "table": "osm_amenities_not_in_buildings_filtered", + }, + "osm_buildings_synthetic": { + "schema": "openstreetmap", + "table": "osm_buildings_synthetic", + }, + "map_zensus_buildings_filtered_all": { + "schema": "boundaries", + "table": "egon_map_zensus_buildings_filtered_all", + }, + + "zensus_electricity": { + "schema": "demand", + "table": "egon_demandregio_zensus_electricity", + }, + "peta_heat": { + "schema": "demand", + "table": "egon_peta_heat", + }, + "etrago_electricity_cts": { + "schema": "demand", + "table": "egon_etrago_electricity_cts", + }, + "etrago_heat_cts": { + "schema": "demand", + "table": "egon_etrago_heat_cts", + }, + } + ) + + targets = DatasetTargets( + tables={ + "cts_buildings": { + "schema": "openstreetmap", + "table": "egon_cts_buildings", + }, + "cts_electricity_building_share": { + "schema": "demand", + "table": "egon_cts_electricity_demand_building_share", + }, + "cts_heat_building_share": { + "schema": "demand", + "table": "egon_cts_heat_demand_building_share", + }, + "osm_buildings_synthetic": { + "schema": "openstreetmap", + "table": "osm_buildings_synthetic", + }, + "building_electricity_peak_loads": { + "schema": "demand", + "table": "egon_building_electricity_peak_loads", + }, + "building_heat_peak_loads": { + "schema": "demand", + "table": "egon_building_heat_peak_loads", + }, + "map_zensus_mvgd_buildings": { + "schema": "boundaries", + "table": "egon_map_zensus_mvgd_buildings", + }, + } + ) + def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py index 6de5a5b74..6231b5dd5 100755 --- a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py +++ b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py @@ -14,7 +14,7 @@ import pandas as pd from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.electricity_demand_timeseries.hh_profiles import ( HouseholdElectricityProfilesInCensusCells, get_iee_hh_demand_profiles_raw, @@ -27,7 +27,7 @@ engine = db.engine() Base = declarative_base() -data_config = egon.data.config.datasets() + RANDOM_SEED = egon.data.config.settings()["egon-data"]["--random-seed"] np.random.seed(RANDOM_SEED) @@ -1218,8 +1218,54 @@ class setup(Dataset): #: name: str = "Demand_Building_Assignment" #: - version: str = "0.0.7" + version: str = "0.0.8" #: + sources = DatasetSources( + tables={ + "hh_profiles_in_census_cells": { + "schema": "demand", + "table": "egon_household_electricity_profile_in_census_cell", + }, + "zensus_apartment_building_population_per_ha": { + "schema": "society", + "table": "egon_destatis_zensus_apartment_building_population_per_ha", + }, + "zensus_population_per_ha_inside_germany": { + "schema": "society", + "table": "destatis_zensus_population_per_ha_inside_germany", + }, + "osm_buildings": { + "schema": "openstreetmap", + "table": "osm_buildings", + }, + "osm_buildings_residential": { + "schema": "openstreetmap", + "table": "osm_buildings_residential", + }, + } + ) + + targets = DatasetTargets( + tables={ + "osm_buildings_synthetic": { + "schema": "openstreetmap", + "table": "osm_buildings_synthetic", + }, + "hh_profiles_of_buildings": { + "schema": "demand", + "table": "egon_household_electricity_profile_of_buildings", + }, + "hh_profiles_of_buildings_stats": { + "schema": "demand", + "table": "egon_household_electricity_profile_of_buildings_stats", + }, + "building_electricity_peak_loads": { + "schema": "demand", + "table": "egon_building_electricity_peak_loads", + }, + } + ) + tasks = ( map_houseprofiles_to_buildings, create_buildings_profiles_stats, diff --git a/src/egon/data/datasets/electricity_demand_timeseries/hh_profiles.py b/src/egon/data/datasets/electricity_demand_timeseries/hh_profiles.py index 7d613be6c..a4a58547b 100644 --- a/src/egon/data/datasets/electricity_demand_timeseries/hh_profiles.py +++ b/src/egon/data/datasets/electricity_demand_timeseries/hh_profiles.py @@ -22,7 +22,7 @@ import pandas as pd from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.scenario_parameters import get_scenario_year from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts import egon.data.config @@ -239,7 +239,71 @@ class HouseholdDemands(Dataset): #: name: str = "Household Demands" #: - version: str = "0.0.12" + version: str = "0.0.13" + sources = DatasetSources( + tables={ + "demandregio_hh": { + "schema": "demand", + "table": "egon_demandregio_hh", + }, + "destatis_zensus_population_per_ha_inside_germany": { + "schema": "society", + "table": "destatis_zensus_population_per_ha_inside_germany", + }, + "destatis_zensus_population_per_ha": { + "schema": "society", + "table": "destatis_zensus_population_per_ha", + }, + "egon_destatis_zensus_household_per_ha": { + "schema": "society", + "table": "egon_destatis_zensus_household_per_ha", + }, + "egon_map_zensus_vg250": { + "schema": "boundaries", + "table": "egon_map_zensus_vg250", + }, + "vg250_lan": { + "schema": "boundaries", + "table": "vg250_lan", + }, + "demandregio_household_load_profiles": { + "schema": "demand", + "table": "demandregio_household_load_profiles", + }, + }, + files={ + "household_electricity_demand_profiles": { + "path_testmode": "hh_el_load_profiles_2511.hdf", + "path": "hh_el_load_profiles_100k.hdf", + }, + "zensus_household_types": { + "path": "Zensus2011_Personen.csv", + }, + }, + + ) + + targets = DatasetTargets( + tables={ + "iee_household_load_profiles": { + "schema": "demand", + "table": "iee_household_load_profiles", + }, + "hh_profiles_in_census_cells": { + "schema": "demand", + "table": "egon_household_electricity_profile_in_census_cell", + }, + "zensus_household_per_ha_refined": { + "schema": "society", + "table": "egon_destatis_zensus_household_per_ha_refined", + }, + "etrago_electricity_households": { + "schema": "demand", + "table": "egon_etrago_electricity_households", + }, + } + ) + def __init__(self, dependencies): tasks = ( @@ -415,9 +479,7 @@ def get_iee_hh_demand_profiles_raw(): Table with profiles in columns and time as index. A pd.MultiIndex is used to distinguish load profiles from different EUROSTAT household types. - """ - data_config = egon.data.config.datasets() - pa_config = data_config["hh_demand_profiles"] + """ def ve(s): raise (ValueError(s)) @@ -434,17 +496,14 @@ def ve(s): ) ) - file_path = pa_config["sources"]["household_electricity_demand_profiles"][ - file_section - ] + file_path = HouseholdDemands.sources.files["household_electricity_demand_profiles"][file_section] download_directory = os.path.join( "data_bundle_egon_data", "household_electricity_demand_profiles" ) - hh_profiles_file = ( - Path(".") / Path(download_directory) / Path(file_path).name - ) + hh_profiles_file = Path(".") / Path(download_directory) / Path(file_path) + df_hh_profiles = pd.read_hdf(hh_profiles_file) @@ -518,17 +577,15 @@ def get_census_households_nuts1_raw(): pd.DataFrame Pre-processed zensus household data """ - data_config = egon.data.config.datasets() - pa_config = data_config["hh_demand_profiles"] - file_path = pa_config["sources"]["zensus_household_types"]["path"] + + file_path = HouseholdDemands.sources.files["zensus_household_types"]["path"] download_directory = os.path.join( "data_bundle_egon_data", "zensus_households" ) - households_file = ( - Path(".") / Path(download_directory) / Path(file_path).name - ) + households_file = Path(".") / Path(download_directory) / Path(file_path) + households_raw = pd.read_csv( households_file, @@ -868,9 +925,9 @@ def inhabitants_to_households(df_hh_people_distribution_abs): # As this is only used to estimate size of households for OR, OO # The hh types 1 P and 2 P households are dropped df_hh_size = db.select_dataframe( - sql=""" + sql=f""" SELECT characteristics_text, SUM(quantity) as summe - FROM society.egon_destatis_zensus_household_per_ha as egon_d + FROM {HouseholdDemands.sources.tables["egon_destatis_zensus_household_per_ha"]["schema"]}.{HouseholdDemands.sources.tables["egon_destatis_zensus_household_per_ha"]["table"]} as egon_d WHERE attribute = 'HHGROESS_KLASS' AND quantity_q < 2 GROUP BY characteristics_text """, index_col="characteristics_text", @@ -1018,10 +1075,10 @@ def get_census_households_grid(): # Retrieve information about households for each census cell # Only use cell-data which quality (quantity_q<2) is acceptable df_census_households_grid = db.select_dataframe( - sql=""" + sql=f""" SELECT grid_id, attribute, characteristics_code, characteristics_text, quantity - FROM society.egon_destatis_zensus_household_per_ha + FROM {HouseholdDemands.sources.tables["egon_destatis_zensus_household_per_ha"]["schema"]}.{HouseholdDemands.sources.tables["egon_destatis_zensus_household_per_ha"]["table"]} WHERE attribute = 'HHTYP_FAM' AND quantity_q <2""" ) df_census_households_grid = df_census_households_grid.drop( @@ -1030,7 +1087,7 @@ def get_census_households_grid(): # Missing data is detected df_missing_data = db.select_dataframe( - sql=""" + sql=f""" SELECT count(joined.quantity_gesamt) as amount, joined.quantity_gesamt as households FROM( @@ -1040,12 +1097,12 @@ def get_census_households_grid(): as insgesamt_minus_fam FROM ( SELECT grid_id, SUM(quantity) as quantity_sum_fam - FROM society.egon_destatis_zensus_household_per_ha + FROM {HouseholdDemands.sources.tables["egon_destatis_zensus_household_per_ha"]["schema"]}.{HouseholdDemands.sources.tables["egon_destatis_zensus_household_per_ha"]["table"]} WHERE attribute = 'HHTYP_FAM' GROUP BY grid_id) as t1 Full JOIN ( SELECT grid_id, sum(quantity) as quantity_gesamt - FROM society.egon_destatis_zensus_household_per_ha + FROM {HouseholdDemands.sources.tables["egon_destatis_zensus_household_per_ha"]["schema"]}.{HouseholdDemands.sources.tables["egon_destatis_zensus_household_per_ha"]["table"]} WHERE attribute = 'INSGESAMT' GROUP BY grid_id) as t2 ON t1.grid_id = t2.grid_id ) as joined @@ -1053,19 +1110,19 @@ def get_census_households_grid(): Group by quantity_gesamt """ ) missing_cells = db.select_dataframe( - sql=""" + sql=f""" SELECT t12.grid_id, t12.quantity FROM ( SELECT t2.grid_id, (case when quantity_sum_fam isnull then quantity_gesamt end) as quantity FROM ( SELECT grid_id, SUM(quantity) as quantity_sum_fam - FROM society.egon_destatis_zensus_household_per_ha + FROM {HouseholdDemands.sources.tables["egon_destatis_zensus_household_per_ha"]["schema"]}.{HouseholdDemands.sources.tables["egon_destatis_zensus_household_per_ha"]["table"]} WHERE attribute = 'HHTYP_FAM' GROUP BY grid_id) as t1 Full JOIN ( SELECT grid_id, sum(quantity) as quantity_gesamt - FROM society.egon_destatis_zensus_household_per_ha + FROM {HouseholdDemands.sources.tables["egon_destatis_zensus_household_per_ha"]["schema"]}.{HouseholdDemands.sources.tables["egon_destatis_zensus_household_per_ha"]["table"]} WHERE attribute = 'INSGESAMT' GROUP BY grid_id) as t2 ON t1.grid_id = t2.grid_id ) as t12 @@ -1088,14 +1145,14 @@ def get_census_households_grid(): # Census cells with nuts3 and nuts1 information df_grid_id = db.select_dataframe( - sql=""" + sql=f""" SELECT pop.grid_id, pop.id as cell_id, pop.population, vg250.vg250_nuts3 as nuts3, lan.nuts as nuts1, lan.gen FROM - society.destatis_zensus_population_per_ha_inside_germany as pop - LEFT JOIN boundaries.egon_map_zensus_vg250 as vg250 + {HouseholdDemands.sources.tables["destatis_zensus_population_per_ha_inside_germany"]["schema"]}.{HouseholdDemands.sources.tables["destatis_zensus_population_per_ha_inside_germany"]["table"]} as pop + LEFT JOIN {HouseholdDemands.sources.tables["egon_map_zensus_vg250"]["schema"]}.{HouseholdDemands.sources.tables["egon_map_zensus_vg250"]["table"]} as vg250 ON (pop.id=vg250.zensus_population_id) - LEFT JOIN boundaries.vg250_lan as lan + LEFT JOIN {HouseholdDemands.sources.tables["vg250_lan"]["schema"]}.{HouseholdDemands.sources.tables["vg250_lan"]["table"]} as lan ON (LEFT(vg250.vg250_nuts3, 3) = lan.nuts) WHERE lan.gf = 4 """ ) @@ -1583,7 +1640,7 @@ def houseprofiles_in_census_cells(): """ Allocate household electricity demand profiles for each census cell. - Creates table `emand.egon_household_electricity_profile_in_census_cell` that maps + Creates table demand.egon_household_electricity_profile_in_census_cell` that maps household electricity demand profiles to census cells. Each row represents one cell and contains a list of profile IDs. This table is fundamental for creating subsequent data like demand profiles on MV grid level or for @@ -1655,9 +1712,9 @@ def gen_profile_names(n): # Annual household electricity demand on NUTS-3 level (demand regio) df_demand_regio = db.select_dataframe( - sql=""" + sql=f""" SELECT year, nuts3, SUM (demand) as demand_mWha - FROM demand.egon_demandregio_hh as egon_d + FROM {HouseholdDemands.sources.tables["demandregio_hh"]["schema"]}.{HouseholdDemands.sources.tables["demandregio_hh"]["table"]} as egon_d GROUP BY nuts3, year ORDER BY year""", index_col=["year", "nuts3"], @@ -1853,7 +1910,9 @@ def get_demand_regio_hh_profiles_from_db(year): Selection of household demand profiles """ - query = """Select * from demand.demandregio_household_load_profiles + query = f""" + Select * + FROM {HouseholdDemands.sources.tables["demandregio_household_load_profiles"]["schema"]}.{HouseholdDemands.sources.tables["demandregio_household_load_profiles"]["table"]} Where year = year""" df_profile_loads = pd.read_sql(query, db.engine(), index_col="id") @@ -1913,8 +1972,8 @@ def tuple_format(x): if method == "slp": # Import demand regio timeseries demand per nuts3 area dr_series = pd.read_sql_query( - """ - SELECT year, nuts3, load_in_mwh FROM demand.demandregio_household_load_profiles + f""" + SELECT year, nuts3, load_in_mwh FROM {HouseholdDemands.sources.tables["demandregio_household_load_profiles"]["schema"]}.{HouseholdDemands.sources.tables["demandregio_household_load_profiles"]["table"]} """, con=engine, ) @@ -1925,8 +1984,8 @@ def tuple_format(x): # Population data per cell_id is used to scale the demand per nuts3 population = pd.read_sql_query( - """ - SELECT grid_id, population FROM society.destatis_zensus_population_per_ha + f""" + SELECT grid_id, population FROM {HouseholdDemands.sources.tables["destatis_zensus_population_per_ha"]["schema"]}.{HouseholdDemands.sources.tables["destatis_zensus_population_per_ha"]["table"]} """, con=engine, ) diff --git a/src/egon/data/datasets/emobility/heavy_duty_transport/__init__.py b/src/egon/data/datasets/emobility/heavy_duty_transport/__init__.py index de8aa5108..94bc3d624 100644 --- a/src/egon/data/datasets/emobility/heavy_duty_transport/__init__.py +++ b/src/egon/data/datasets/emobility/heavy_duty_transport/__init__.py @@ -109,7 +109,7 @@ class HeavyDutyTransport(Dataset): #: name: str = "HeavyDutyTransport" #: - version: str = "0.0.3" + version: str = "0.0.10" def __init__(self, dependencies): super().__init__( @@ -124,4 +124,4 @@ def __init__(self, dependencies): run_egon_truck, insert_hgv_h2_demand, ), - ) + ) \ No newline at end of file diff --git a/src/egon/data/datasets/emobility/heavy_duty_transport/create_h2_buses.py b/src/egon/data/datasets/emobility/heavy_duty_transport/create_h2_buses.py index 1ab9cca8d..35a65e032 100644 --- a/src/egon/data/datasets/emobility/heavy_duty_transport/create_h2_buses.py +++ b/src/egon/data/datasets/emobility/heavy_duty_transport/create_h2_buses.py @@ -81,10 +81,8 @@ def insert_new_entries(hgv_h2_demand_gdf: gpd.GeoDataFrame): Load data to insert. """ - new_id = db.next_etrago_id("load") - hgv_h2_demand_gdf["load_id"] = range( - new_id, new_id + len(hgv_h2_demand_gdf) - ) + start_id = db.next_etrago_id("load") + hgv_h2_demand_gdf["load_id"] = range(start_id, start_id + len(hgv_h2_demand_gdf)) # Add missing columns c = {"sign": -1, "type": np.nan, "p_set": np.nan, "q_set": np.nan} @@ -186,4 +184,4 @@ def read_hgv_h2_demand(scenario: str = "eGon2035"): return gpd.GeoDataFrame( df.merge(gdf_vg250[["geometry"]], left_index=True, right_index=True), crs=gdf_vg250.crs, - ).to_crs(epsg=srid_buses) + ).to_crs(epsg=srid_buses) \ No newline at end of file diff --git a/src/egon/data/datasets/emobility/heavy_duty_transport/data_io.py b/src/egon/data/datasets/emobility/heavy_duty_transport/data_io.py index 026cfb1a5..b484833ee 100644 --- a/src/egon/data/datasets/emobility/heavy_duty_transport/data_io.py +++ b/src/egon/data/datasets/emobility/heavy_duty_transport/data_io.py @@ -55,7 +55,7 @@ def bast_gdf(): df = pd.read_csv( path, - delimiter=r",", + delimiter=r";", decimal=r",", thousands=r".", encoding="ISO-8859-1", @@ -96,4 +96,4 @@ def nuts3_gdf(): logger.debug("Read in NUTS 3 districts.") - return gdf + return gdf \ No newline at end of file diff --git a/src/egon/data/datasets/emobility/heavy_duty_transport/db_classes.py b/src/egon/data/datasets/emobility/heavy_duty_transport/db_classes.py index bd8bbc6a7..517a36614 100644 --- a/src/egon/data/datasets/emobility/heavy_duty_transport/db_classes.py +++ b/src/egon/data/datasets/emobility/heavy_duty_transport/db_classes.py @@ -27,4 +27,4 @@ class EgonHeavyDutyTransportVoronoi(Base): truck_traffic = Column(Float) normalized_truck_traffic = Column(Float) hydrogen_consumption = Column(Float) - scenario = Column(String, ForeignKey(EgonScenario.name), primary_key=True) + scenario = Column(String, ForeignKey(EgonScenario.name), primary_key=True) \ No newline at end of file diff --git a/src/egon/data/datasets/emobility/heavy_duty_transport/h2_demand_distribution.py b/src/egon/data/datasets/emobility/heavy_duty_transport/h2_demand_distribution.py index 6d0ff2482..92fe71e6d 100644 --- a/src/egon/data/datasets/emobility/heavy_duty_transport/h2_demand_distribution.py +++ b/src/egon/data/datasets/emobility/heavy_duty_transport/h2_demand_distribution.py @@ -8,7 +8,7 @@ from shapely import wkt from shapely.geometry.multipolygon import MultiPolygon from shapely.geometry.polygon import Polygon -from shapely.ops import cascaded_union +from shapely.ops import unary_union import geopandas as gpd from egon.data import config, db @@ -144,7 +144,7 @@ def voronoi( # convert the boundary geometry into a union of the polygon # convert the Geopandas GeoSeries of Point objects to NumPy array of coordinates. - boundary_shape = cascaded_union(boundary.geometry) + boundary_shape = unary_union(boundary.geometry) coords = points_to_coords(points.geometry) # calculate Voronoi regions @@ -186,4 +186,4 @@ def voronoi( logger.info("Done.") - return poly_gdf + return poly_gdf \ No newline at end of file diff --git a/src/egon/data/datasets/emobility/motorized_individual_travel/__init__.py b/src/egon/data/datasets/emobility/motorized_individual_travel/__init__.py index 072a3e342..c6c5f089e 100644 --- a/src/egon/data/datasets/emobility/motorized_individual_travel/__init__.py +++ b/src/egon/data/datasets/emobility/motorized_individual_travel/__init__.py @@ -22,7 +22,7 @@ import pandas as pd from egon.data import config, db, subprocess -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.emobility.motorized_individual_travel.db_classes import ( # noqa: E501 EgonEvCountMunicipality, EgonEvCountMvGridDistrict, @@ -124,8 +124,8 @@ def download_and_preprocess(): ################################ # Download and import KBA data # ################################ - url = mit_sources["KBA"]["url"] - file = WORKING_DIR / mit_sources["KBA"]["file"] + url = MotorizedIndividualTravel.sources.urls["KBA"] + file = Path(MotorizedIndividualTravel.targets.files["KBA_download"]) if not os.path.isfile(file): urlretrieve(url, file) @@ -152,15 +152,15 @@ def download_and_preprocess(): kba_data.ags_reg_district = kba_data.ags_reg_district.astype("int") kba_data.to_csv( - WORKING_DIR / mit_sources["KBA"]["file_processed"], index=None + Path(MotorizedIndividualTravel.targets.files["KBA_processed"]), index=None ) ####################################### # Download and import RegioStaR7 data # ####################################### - url = mit_sources["RS7"]["url"] - file = WORKING_DIR / mit_sources["RS7"]["file"] + url = MotorizedIndividualTravel.sources.urls["RS7"] + file = Path(MotorizedIndividualTravel.targets.files["RS7_download"]) if not os.path.isfile(file): urlretrieve(url, file) @@ -175,7 +175,7 @@ def download_and_preprocess(): rs7_data.rs7_id = rs7_data.rs7_id.astype("int") rs7_data.to_csv( - WORKING_DIR / mit_sources["RS7"]["file_processed"], index=None + Path(MotorizedIndividualTravel.targets.files["RS7_processed"]), index=None ) @@ -185,11 +185,10 @@ def extract_trip_file(): for scenario_name in config.settings()["egon-data"]["--scenarios"]: print(f"SCENARIO: {scenario_name}") + trip_file_key = MotorizedIndividualTravel.source_trip_files[scenario_name] trip_file = trip_dir / Path( - DATASET_CFG["original_data"]["sources"]["trips"][scenario_name][ - "file" - ] - ) + MotorizedIndividualTravel.sources.files[trip_file_key] +) tar = tarfile.open(trip_file) if os.path.isfile(trip_file): @@ -213,11 +212,9 @@ def import_csv(f): for scenario_name in config.settings()["egon-data"]["--scenarios"]: print(f"SCENARIO: {scenario_name}") - trip_dir_name = Path( - DATASET_CFG["original_data"]["sources"]["trips"][scenario_name][ - "file" - ].split(".")[0] - ) + trip_file_key = MotorizedIndividualTravel.source_trip_files[scenario_name] + trip_file_path = MotorizedIndividualTravel.sources.files[trip_file_key] + trip_dir_name = Path(trip_file_path).stem trip_dir_root = DATA_BUNDLE_DIR / Path("mit_trip_data", trip_dir_name) @@ -395,6 +392,44 @@ class MotorizedIndividualTravel(Dataset): *emobility_mit*. """ + + sources = DatasetSources( + urls={ + "KBA": "https://www.kba.de/SharedDocs/Downloads/DE/Statistik/Fahrzeuge/FZ1/fz1_2021.xlsx?__blob=publicationFile&v=2", + "RS7": "https://www.bmvi.de/SharedDocs/DE/Anlage/G/regiostar-referenzdateien.xlsx?__blob=publicationFile", + }, + files={ + # These are the pre-generated trip data files from the data bundle + "trips_status2019": "mit_trip_data/eGon2035_RS7_min2k_2022-06-01_175429_simbev_run.tar.gz", + "trips_status2023": "mit_trip_data/eGon2035_RS7_min2k_2022-06-01_175429_simbev_run.tar.gz", + "trips_eGon2035": "mit_trip_data/eGon2035_RS7_min2k_2022-06-01_175429_simbev_run.tar.gz", + "trips_eGon100RE": "mit_trip_data/eGon100RE_RS7_min2k_2022-06-01_175444_simbev_run.tar.gz", + } + ) + targets = DatasetTargets( + files={ + "KBA_download": "motorized_individual_travel/fz1_2021.xlsx", + "KBA_processed": "motorized_individual_travel/fz1_2021_preprocessed.csv", + "RS7_download": "motorized_individual_travel/regiostar-referenzdateien.xlsx", + "RS7_processed": "motorized_individual_travel/regiostar-referenzdateien_preprocessed.csv", + }, + tables={ + "ev_pool": "emobility.egon_ev_pool", + "ev_trip": "emobility.egon_ev_trip", + "ev_count_reg_district": "emobility.egon_ev_count_registration_district", + "ev_count_municipality": "emobility.egon_ev_count_municipality", + "ev_count_mv_grid": "emobility.egon_ev_count_mv_grid_district", + "ev_mv_grid": "emobility.egon_ev_mv_grid_district", + "ev_metadata": "emobility.egon_ev_metadata", + } + ) + # A helper mapping to easily get the right trip file for each scenario + source_trip_files = { + "status2019": "trips_status2019", + "status2023": "trips_status2023", + "eGon2035": "trips_eGon2035", + "eGon100RE": "trips_eGon100RE", + } #: name: str = "MotorizedIndividualTravel" diff --git a/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/__init__.py b/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/__init__.py index 96ca504f7..02df37d67 100644 --- a/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/__init__.py +++ b/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/__init__.py @@ -15,7 +15,7 @@ import requests from egon.data import config, db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.emobility.motorized_individual_travel_charging_infrastructure.db_classes import ( # noqa: E501 EgonEmobChargingInfrastructure, add_metadata, @@ -25,7 +25,6 @@ ) WORKING_DIR = Path(".", "charging_infrastructure").resolve() -DATASET_CFG = config.datasets()["charging_infrastructure"] def create_tables() -> None: @@ -88,15 +87,36 @@ def get_tracbev_data() -> None: """ Wrapper function to get TracBEV data provided on Zenodo. """ - tracbev_cfg = DATASET_CFG["original_data"]["sources"]["tracbev"] - file = WORKING_DIR / tracbev_cfg["file"] + file = Path(MITChargingInfrastructure.targets.files["tracbev_download"]) + url = MITChargingInfrastructure.sources.urls["tracbev"] - download_zip(url=tracbev_cfg["url"], target=file) + download_zip(url=url, target=file) unzip_file(source=file, target=WORKING_DIR) class MITChargingInfrastructure(Dataset): + + + sources = DatasetSources( + urls={ + "tracbev": "https://zenodo.org/record/6466480/files/data.zip?download=1" + }, + tables={ + "mv_grid_districts": "grid.egon_mv_grid_district", + "buildings": "demand.egon_map_houseprofiles_buildings" + } + ) + targets = DatasetTargets( + files={ + "tracbev_download": "charging_infrastructure/data.zip" + }, + tables={ + "charging_infrastructure": "grid.egon_emob_charging_infrastructure" + } + ) + + """ Preparation of static model data for charging infrastructure for motorized individual travel. @@ -139,7 +159,7 @@ class MITChargingInfrastructure(Dataset): #: name: str = "MITChargingInfrastructure" #: - version: str = "0.0.1" + version: str = "0.0.3" def __init__(self, dependencies): super().__init__( @@ -154,4 +174,4 @@ def __init__(self, dependencies): run_tracbev, add_metadata, ), - ) + ) \ No newline at end of file diff --git a/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/db_classes.py b/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/db_classes.py index f64cff7bc..e1248d766 100644 --- a/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/db_classes.py +++ b/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/db_classes.py @@ -11,6 +11,7 @@ from sqlalchemy.ext.declarative import declarative_base from egon.data import config, db +from egon.data.datasets import load_sources_and_targets from egon.data.metadata import ( context, contributors, @@ -20,26 +21,24 @@ ) Base = declarative_base() -DATASET_CFG = config.datasets()["charging_infrastructure"] class EgonEmobChargingInfrastructure(Base): """ Class definition of table grid.egon_emob_charging_infrastructure. """ - - __tablename__ = DATASET_CFG["targets"]["charging_infrastructure"]["table"] - __table_args__ = { - "schema": DATASET_CFG["targets"]["charging_infrastructure"]["schema"] - } + __tablename__ = "egon_emob_charging_infrastructure" + __table_args__ = {"schema": "grid"} cp_id = Column(Integer, primary_key=True) mv_grid_id = Column(Integer) use_case = Column(String) weight = Column(Float) + + # SRID 3035 from YML) geometry = Column( Geometry( - srid=DATASET_CFG["original_data"]["sources"]["tracbev"]["srid"] + srid=3035 ) ) @@ -48,6 +47,8 @@ def add_metadata(): """ Add metadata to table grid.egon_emob_charging_infrastructure """ + sources, targets = load_sources_and_targets("MITChargingInfrastructure") + contris = contributors(["kh", "kh"]) contris[0]["date"] = "2023-03-14" @@ -110,10 +111,10 @@ def add_metadata(): "encoding": "UTF-8", "schema": { "fields": generate_resource_fields_from_db_table( - DATASET_CFG["targets"]["charging_infrastructure"][ + targets["charging_infrastructure"][ "schema" ], - DATASET_CFG["targets"]["charging_infrastructure"][ + targets["charging_infrastructure"][ "table" ], ), @@ -157,6 +158,6 @@ def add_metadata(): db.submit_comment( f"'{json.dumps(meta)}'", - DATASET_CFG["targets"]["charging_infrastructure"]["schema"], - DATASET_CFG["targets"]["charging_infrastructure"]["table"], + targets["charging_infrastructure"]["schema"], + targets["charging_infrastructure"]["table"], ) diff --git a/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/infrastructure_allocation.py b/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/infrastructure_allocation.py index c40aedf01..b68265251 100644 --- a/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/infrastructure_allocation.py +++ b/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/infrastructure_allocation.py @@ -15,6 +15,7 @@ import pandas as pd from egon.data import config, db +from egon.data.datasets import load_sources_and_targets from egon.data.datasets.emobility.motorized_individual_travel_charging_infrastructure.use_cases import ( # noqa: E501 home, hpc, @@ -23,7 +24,6 @@ ) WORKING_DIR = Path(".", "charging_infrastructure").resolve() -DATASET_CFG = config.datasets()["charging_infrastructure"] def write_to_db( @@ -42,21 +42,27 @@ def write_to_db( Calculated use case """ + sources, targets = load_sources_and_targets("MITChargingInfrastructure") + if gdf.empty: return if "energy" in gdf.columns: gdf = gdf.assign(weight=gdf.energy.div(gdf.energy.sum())) else: - rng = np.random.default_rng(DATASET_CFG["constants"]["random_seed"]) + rng = np.random.default_rng(sources.constants["random_seed"]) gdf = gdf.assign(weight=rng.integers(low=0, high=100, size=len(gdf))) gdf = gdf.assign(weight=gdf.weight.div(gdf.weight.sum())) + target_conf = targets.charging_infrastructure + target_table = target_conf["table"] + target_schema = target_conf["schema"] + max_id = db.select_dataframe( - """ - SELECT MAX(cp_id) FROM grid.egon_emob_charging_infrastructure + f""" + SELECT MAX(cp_id) FROM {target_schema}.{target_table} """ )["max"][0] @@ -69,12 +75,11 @@ def write_to_db( use_case=use_case, ) - targets = DATASET_CFG["targets"] - cols_to_export = targets["charging_infrastructure"]["cols_to_export"] + cols_to_export = target_conf["cols_to_export"] gpd.GeoDataFrame(gdf[cols_to_export], crs=gdf.crs).to_postgis( - targets["charging_infrastructure"]["table"], - schema=targets["charging_infrastructure"]["schema"], + target_table, + schema=target_schema, con=db.engine(), if_exists="append", ) @@ -155,10 +160,11 @@ def get_data() -> dict[gpd.GeoDataFrame]: * miscellaneous found in *datasets.yml* in section *charging_infrastructure* Returns - ------- - + # ... """ - tracbev_cfg = DATASET_CFG["original_data"]["sources"]["tracbev"] + sources, targets = load_sources_and_targets("MITChargingInfrastructure") + + tracbev_cfg = sources.original_data["sources"]["tracbev"] srid = tracbev_cfg["srid"] # TODO: get zensus housing data from DB instead of gpkg? @@ -247,26 +253,26 @@ def get_data() -> dict[gpd.GeoDataFrame]: ) data_dict["work_dict"] = { - "retail": DATASET_CFG["constants"]["work_weight_retail"], - "commercial": DATASET_CFG["constants"]["work_weight_commercial"], - "industrial": DATASET_CFG["constants"]["work_weight_industrial"], + "retail": sources.constants["work_weight_retail"], + "commercial": sources.constants["work_weight_commercial"], + "industrial": sources.constants["work_weight_industrial"], } - data_dict["sfh_available"] = DATASET_CFG["constants"][ + data_dict["sfh_available"] = sources.constants[ "single_family_home_share" ] - data_dict["sfh_avg_spots"] = DATASET_CFG["constants"][ + data_dict["sfh_avg_spots"] = sources.constants[ "single_family_home_spots" ] - data_dict["mfh_available"] = DATASET_CFG["constants"][ + data_dict["mfh_available"] = sources.constants[ "multi_family_home_share" ] - data_dict["mfh_avg_spots"] = DATASET_CFG["constants"][ + data_dict["mfh_avg_spots"] = sources.constants[ "multi_family_home_spots" ] data_dict["random_seed"] = np.random.default_rng( - DATASET_CFG["constants"]["random_seed"] + sources.constants["random_seed"] ) - return data_dict + return data_dict \ No newline at end of file diff --git a/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/use_cases.py b/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/use_cases.py index 1f543c829..0f1ee53bc 100644 --- a/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/use_cases.py +++ b/src/egon/data/datasets/emobility/motorized_individual_travel_charging_infrastructure/use_cases.py @@ -9,9 +9,7 @@ import numpy as np import pandas as pd -from egon.data import config - -DATASET_CFG = config.datasets()["charging_infrastructure"] +from egon.data.datasets import load_sources_and_targets def hpc(hpc_points: gpd.GeoDataFrame, uc_dict: dict) -> gpd.GeoDataFrame: @@ -278,6 +276,7 @@ def work( :param uc_dict: dict contains basic run info like region boundary and save directory """ + sources, targets = load_sources_and_targets("MITChargingInfrastructure") uc_id = "work" logger.debug(f"Use case: {uc_id}") @@ -292,7 +291,7 @@ def work( groups = in_region.groupby("landuse") group_labels = ["retail", "commercial", "industrial"] - srid = DATASET_CFG["original_data"]["sources"]["tracbev"]["srid"] + srid = sources.original_data["sources"]["tracbev"]["srid"] result = gpd.GeoDataFrame( columns=["geometry", "landuse", "potential"], crs=f"EPSG:{srid}" @@ -317,4 +316,4 @@ def work( f"{round(energy_sum, 1)} kWh got charged in region {uc_dict['key']}." ) - return gpd.GeoDataFrame(result, crs=landuse.crs) + return gpd.GeoDataFrame(result, crs=landuse.crs) \ No newline at end of file diff --git a/src/egon/data/datasets/era5.py b/src/egon/data/datasets/era5.py index baaf3ed0c..06c43a7e8 100644 --- a/src/egon/data/datasets/era5.py +++ b/src/egon/data/datasets/era5.py @@ -12,7 +12,7 @@ import geopandas as gpd from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.scenario_parameters import get_sector_parameters import egon.data.config @@ -43,19 +43,43 @@ class WeatherData(Dataset): #: name: str = "Era5" #: - version: str = "0.0.4" + version: str = "0.0.6" + + sources = DatasetSources( + files={}, + tables={ + "vg250_bbox": { + "schema": "boundaries", + "table": "vg250_sta_bbox", + }, + }, + ) + + + targets = DatasetTargets( + tables={ + "weather_cells": { + "schema": "supply", + "table": "egon_era5_weather_cells", + }, + "renewable_feedin": { + "schema": "supply", + "table": "egon_era5_renewable_feedin", + }, + }, + files={ + "weather_data": { + "path": "data_bundle_egon_data/cutouts" + } + } + ) def __init__(self, dependencies): super().__init__( name=self.name, version=self.version, dependencies=dependencies, - tasks=( - { - create_tables, - }, - insert_weather_cells, - ), # download_era5 should be included once issue #1250 is solved + tasks=({create_tables, download_era5}, insert_weather_cells), ) @@ -87,11 +111,13 @@ class EgonRenewableFeedIn(Base): def create_tables(): - db.execute_sql("CREATE SCHEMA IF NOT EXISTS supply;") + db.execute_sql( + f"CREATE SCHEMA IF NOT EXISTS {WeatherData.targets.tables['weather_cells']['schema']};" + ) engine = db.engine() db.execute_sql( f""" - DROP TABLE IF EXISTS {EgonEra5Cells.__table__.schema}.{EgonEra5Cells.__table__.name} CASCADE; + DROP TABLE IF EXISTS {WeatherData.targets.tables['weather_cells']['schema']}.{WeatherData.targets.tables['weather_cells']['table']} CASCADE; """ ) EgonEra5Cells.__table__.create(bind=engine, checkfirst=True) @@ -118,7 +144,9 @@ def import_cutout(boundary="Europe"): elif boundary == "Germany": geom_de = ( gpd.read_postgis( - "SELECT geometry as geom FROM boundaries.vg250_sta_bbox", + f"SELECT geometry as geom FROM " + f"{WeatherData.sources.tables['vg250_bbox']['schema']}." + f"{WeatherData.sources.tables['vg250_bbox']['table']}", db.engine(), ) .to_crs(4326) @@ -139,11 +167,7 @@ def import_cutout(boundary="Europe"): directory = ( Path(".") - / ( - egon.data.config.datasets()["era5_weather_data"]["targets"][ - "weather_data" - ]["path"] - ) + / WeatherData.targets.files["weather_data"]["path"] / f"{boundary.lower()}-{str(weather_year)}-era5.nc" ) @@ -165,11 +189,7 @@ def download_era5(): """ - directory = Path(".") / ( - egon.data.config.datasets()["era5_weather_data"]["targets"][ - "weather_data" - ]["path"] - ) + directory = Path(".") / WeatherData.targets.files["weather_data"]["path"] if not os.path.exists(directory): os.mkdir(directory) @@ -198,13 +218,10 @@ def insert_weather_cells(): None. """ - cfg = egon.data.config.datasets()["era5_weather_data"] db.execute_sql( - f""" - DELETE FROM {cfg['targets']['weather_cells']['schema']}. - {cfg['targets']['weather_cells']['table']} - """ + f"DELETE FROM {WeatherData.targets.tables['weather_cells']['schema']}." + f"{WeatherData.targets.tables['weather_cells']['table']}" ) cutout = import_cutout() @@ -214,14 +231,14 @@ def insert_weather_cells(): ) df.to_postgis( - cfg["targets"]["weather_cells"]["table"], - schema=cfg["targets"]["weather_cells"]["schema"], + WeatherData.targets.tables["weather_cells"]["table"], + schema=WeatherData.targets.tables["weather_cells"]["schema"], con=db.engine(), if_exists="append", ) db.execute_sql( - f"""UPDATE {cfg['targets']['weather_cells']['schema']}. - {cfg['targets']['weather_cells']['table']} - SET geom_point=ST_Centroid(geom);""" + f"UPDATE {WeatherData.targets.tables['weather_cells']['schema']}." + f"{WeatherData.targets.tables['weather_cells']['table']} " + f"SET geom_point=ST_Centroid(geom);" ) diff --git a/src/egon/data/datasets/etrago_setup.py b/src/egon/data/datasets/etrago_setup.py index f9f1659f3..683a983ee 100755 --- a/src/egon/data/datasets/etrago_setup.py +++ b/src/egon/data/datasets/etrago_setup.py @@ -23,7 +23,7 @@ import pypsa from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.metadata import ( context, contributors, @@ -133,15 +133,105 @@ def get_meta( class EtragoSetup(Dataset): + name: str = "EtragoSetup" + version: str = "0.0.13" + + sources = DatasetSources( + tables={}, + files={} + ) + + targets = DatasetTargets( + tables={ + "bus": { + "schema": "grid", + "table": "egon_etrago_bus", + }, + "bus_timeseries": { + "schema": "grid", + "table": "egon_etrago_bus_timeseries", + }, + "generator": { + "schema": "grid", + "table": "egon_etrago_generator", + }, + "generator_timeseries": { + "schema": "grid", + "table": "egon_etrago_generator_timeseries", + }, + "line": { + "schema": "grid", + "table": "egon_etrago_line", + }, + "line_timeseries": { + "schema": "grid", + "table": "egon_etrago_line_timeseries", + }, + "link": { + "schema": "grid", + "table": "egon_etrago_link", + }, + "link_timeseries": { + "schema": "grid", + "table": "egon_etrago_link_timeseries", + }, + "load": { + "schema": "grid", + "table": "egon_etrago_load", + }, + "load_timeseries": { + "schema": "grid", + "table": "egon_etrago_load_timeseries", + }, + "carrier": { + "schema": "grid", + "table": "egon_etrago_carrier", + }, + "storage": { + "schema": "grid", + "table": "egon_etrago_storage", + }, + "storage_timeseries": { + "schema": "grid", + "table": "egon_etrago_storage_timeseries", + }, + "store": { + "schema": "grid", + "table": "egon_etrago_store", + }, + "store_timeseries": { + "schema": "grid", + "table": "egon_etrago_store_timeseries", + }, + "temp_resolution": { + "schema": "grid", + "table": "egon_etrago_temp_resolution", + }, + "transformer": { + "schema": "grid", + "table": "egon_etrago_transformer", + }, + "transformer_timeseries": { + "schema": "grid", + "table": "egon_etrago_transformer_timeseries", + }, + "hv_busmap": { + "schema": "grid", + "table": "egon_etrago_hv_busmap", + }, + } + ) + def __init__(self, dependencies): super().__init__( - name="EtragoSetup", - version="0.0.11", + name=self.name, + version=self.version, dependencies=dependencies, tasks=(create_tables, {temp_resolution, insert_carriers}), ) + class EgonPfHvBus(Base): source_list = [ @@ -885,81 +975,45 @@ def create_tables(): ------- None. """ - db.execute_sql("CREATE SCHEMA IF NOT EXISTS grid;") + schema = EtragoSetup.targets.tables["bus"]["schema"] + db.execute_sql(f"CREATE SCHEMA IF NOT EXISTS {schema};") + engine = db.engine() ##################### drop tables with old names ######################### + db.execute_sql(f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_bus;") + db.execute_sql(f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_bus_timeseries;") + db.execute_sql(f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_carrier;") + db.execute_sql(f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_generator;") db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_bus;""" + f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_generator_timeseries;" ) + db.execute_sql(f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_line;") db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_bus_timeseries;""" + f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_line_timeseries;" ) + db.execute_sql(f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_link;") db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_carrier;""" + f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_link_timeseries;" ) + db.execute_sql(f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_load;") db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_generator;""" + f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_load_timeseries;" ) + db.execute_sql(f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_storage;") db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_generator_timeseries;""" + f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_storage_timeseries;" ) + db.execute_sql(f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_store;") db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_line;""" + f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_store_timeseries;" ) db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_line_timeseries;""" + f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_temp_resolution;" ) + db.execute_sql(f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_transformer;") db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_link;""" - ) - db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_link_timeseries;""" - ) - db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_load;""" - ) - db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_load_timeseries;""" - ) - db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_storage;""" - ) - db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_storage_timeseries;""" - ) - db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_store;""" - ) - db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_store_timeseries;""" - ) - db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_temp_resolution;""" - ) - db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_transformer;""" - ) - db.execute_sql( - """ - DROP TABLE IF EXISTS grid.egon_pf_hv_transformer_timeseries;""" + f"DROP TABLE IF EXISTS {schema}.egon_pf_hv_transformer_timeseries;" ) ########################################################################## @@ -1008,17 +1062,12 @@ def create_tables(): def temp_resolution(): - """Insert temporal resolution for etrago - - Returns - ------- - None. - - """ - + """Insert temporal resolution for eTraGo""" + schema = EtragoSetup.targets.tables['temp_resolution']['schema'] + table = EtragoSetup.targets.tables['temp_resolution']['table'] db.execute_sql( - """ - INSERT INTO grid.egon_etrago_temp_resolution + f""" + INSERT INTO {schema}.{table} (temp_id, timesteps, resolution, start_time) SELECT 1, 8760, 'h', TIMESTAMP '2011-01-01 00:00:00'; """ @@ -1026,20 +1075,14 @@ def temp_resolution(): def insert_carriers(): - """Insert list of carriers into eTraGo table - - Returns - ------- - None. - - """ - # Delete existing entries + """Insert list of carriers into eTraGo table""" + schema = EtragoSetup.targets.tables['carrier']['schema'] + table = EtragoSetup.targets.tables['carrier']['table'] db.execute_sql( - """ - DELETE FROM grid.egon_etrago_carrier + f""" + DELETE FROM {schema}.{table}; """ ) - # List carrier names from all components df = pd.DataFrame( data={ @@ -1095,14 +1138,13 @@ def insert_carriers(): # Insert data into database df.to_sql( - "egon_etrago_carrier", - schema="grid", + EtragoSetup.targets.tables["carrier"]["table"], + schema=EtragoSetup.targets.tables["carrier"]["schema"], con=db.engine(), if_exists="append", index=False, ) - def check_carriers(): """Check if any eTraGo table has carriers not included in the carrier table. @@ -1112,19 +1154,17 @@ def check_carriers(): used in any eTraGo table. """ carriers = db.select_dataframe( - f""" - SELECT name FROM grid.egon_etrago_carrier - """ - ) + f"SELECT name FROM {EtragoSetup.targets.tables['carrier']['schema']}." + f"{EtragoSetup.targets.tables['carrier']['table']}" + )["name"] + unknown_carriers = {} tables = ["bus", "store", "storage", "link", "line", "generator", "load"] for table in tables: - # Delete existing entries data = db.select_dataframe( - f""" - SELECT carrier FROM grid.egon_etrago_{table} - """ + f"SELECT carrier FROM {EtragoSetup.targets.tables[table]['schema']}." + f"{EtragoSetup.targets.tables[table]['table']}" ) unknown_carriers[table] = data[~data["carrier"].isin(carriers)][ "carrier" @@ -1163,13 +1203,13 @@ def link_geom_from_buses(df, scn_name): geom_buses = db.select_geodataframe( f""" SELECT bus_id, geom - FROM grid.egon_etrago_bus - WHERE scn_name = '{scn_name}' + FROM {EtragoSetup.targets.tables['bus']['schema']}.{EtragoSetup.targets.tables['bus']['table']} + WHERE scn_name = '{scn_name}'; """, index_col="bus_id", epsg=4326, ) - + # Create geometry columns for bus0 and bus1 df["geom_0"] = geom_buses.geom[df.bus0.values].values df["geom_1"] = geom_buses.geom[df.bus1.values].values diff --git a/src/egon/data/datasets/fill_etrago_gen.py b/src/egon/data/datasets/fill_etrago_gen.py index 86254fa6a..fcc87ce69 100644 --- a/src/egon/data/datasets/fill_etrago_gen.py +++ b/src/egon/data/datasets/fill_etrago_gen.py @@ -3,7 +3,7 @@ import pandas as pd from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.scenario_parameters import get_sector_parameters import egon.data.config @@ -30,7 +30,23 @@ class Egon_etrago_gen(Dataset): #: name: str = "etrago_generators" #: - version: str = "0.0.8" + version: str = "0.0.9" + + sources = DatasetSources( + tables={ + "power_plants": {"schema": "supply", "table": "egon_power_plants"}, + "renewable_feedin": {"schema": "supply", "table": "egon_era5_renewable_feedin"}, + "weather_cells": {"schema": "supply", "table": "egon_era5_weather_cells"}, + "bus": {"schema": "grid", "table": "egon_etrago_bus"}, + } + ) + + targets = DatasetTargets( + tables={ + "etrago_generators": {"schema": "grid", "table": "egon_etrago_generator"}, + "etrago_gen_time": {"schema": "grid", "table": "egon_etrago_generator_timeseries"}, + } + ) def __init__(self, dependencies): super().__init__( @@ -44,7 +60,8 @@ def __init__(self, dependencies): def fill_etrago_generators(): # Connect to the data base con = db.engine() - cfg = egon.data.config.datasets()["generators_etrago"] + cfg = Egon_etrago_gen # use class-level sources/targets + # Load required tables ( @@ -72,7 +89,10 @@ def fill_etrago_generators(): etrago_pp = add_marginal_costs(etrago_pp) etrago_gen_table = fill_etrago_gen_table( - etrago_pp2=etrago_pp, etrago_gen_orig=etrago_gen_orig, cfg=cfg, con=con + etrago_pp2=etrago_pp, + etrago_gen_orig=etrago_gen_orig, + cfg=cfg, + con=con, ) etrago_gen_time_table = fill_etrago_gen_time_table( @@ -158,11 +178,12 @@ def fill_etrago_gen_table(etrago_pp2, etrago_gen_orig, cfg, con): ) etrago_pp.to_sql( - name=f"{cfg['targets']['etrago_generators']['table']}", - schema=f"{cfg['targets']['etrago_generators']['schema']}", + name=cfg.targets.tables['etrago_generators']['table'], + schema=cfg.targets.tables['etrago_generators']['schema'], con=con, if_exists="append", ) + return etrago_pp @@ -196,51 +217,53 @@ def fill_etrago_gen_time_table( etrago_pp_time["temp_id"] = 1 etrago_pp_time.to_sql( - name=f"{cfg['targets']['etrago_gen_time']['table']}", - schema=f"{cfg['targets']['etrago_gen_time']['schema']}", + name=cfg.targets.tables['etrago_gen_time']['table'], + schema=cfg.targets.tables['etrago_gen_time']['schema'], con=con, if_exists="append", ) + return etrago_pp_time def load_tables(con, cfg): sql = f""" - SELECT * FROM - {cfg['sources']['power_plants']['schema']}. - {cfg['sources']['power_plants']['table']} + SELECT * FROM {cfg.sources.tables['power_plants']['schema']}.{cfg.sources.tables['power_plants']['table']} WHERE carrier != 'gas' """ + + power_plants = gpd.GeoDataFrame.from_postgis( sql, con, crs="EPSG:4326", index_col="id" ) sql = f""" - SELECT * FROM - {cfg['sources']['renewable_feedin']['schema']}. - {cfg['sources']['renewable_feedin']['table']} + SELECT * FROM {cfg.sources.tables['renewable_feedin']['schema']}.{cfg.sources.tables['renewable_feedin']['table']} """ + + renew_feedin = pd.read_sql(sql, con) sql = f""" - SELECT * FROM - {cfg['sources']['weather_cells']['schema']}. - {cfg['sources']['weather_cells']['table']} + SELECT * FROM {cfg.sources.tables['weather_cells']['schema']}.{cfg.sources.tables['weather_cells']['table']} """ + + weather_cells = gpd.GeoDataFrame.from_postgis(sql, con, crs="EPSG:4326") sql = f""" - SELECT * FROM - {cfg['targets']['etrago_generators']['schema']}. - {cfg['targets']['etrago_generators']['table']} + SELECT * FROM {cfg.targets.tables['etrago_generators']['schema']}.{cfg.targets.tables['etrago_generators']['table']} """ + + etrago_gen_orig = pd.read_sql(sql, con) sql = f""" - SELECT * FROM - {cfg['targets']['etrago_gen_time']['schema']}. - {cfg['targets']['etrago_gen_time']['table']} + SELECT * FROM {cfg.targets.tables['etrago_gen_time']['schema']}.{cfg.targets.tables['etrago_gen_time']['table']} """ + + + pp_time = pd.read_sql(sql, con) return power_plants, renew_feedin, weather_cells, etrago_gen_orig, pp_time @@ -289,30 +312,27 @@ def delete_previuos_gen(cfg, con, etrago_gen_orig, power_plants): if carrier_delete: db.execute_sql( - f"""DELETE FROM - {cfg['targets']['etrago_generators']['schema']}. - {cfg['targets']['etrago_generators']['table']} - WHERE carrier IN {*carrier_delete,} - AND bus IN ( - SELECT bus_id FROM {cfg['sources']['bus']['schema']}. - {cfg['sources']['bus']['table']} - WHERE country = 'DE' - AND carrier = 'AC' - AND scn_name = '{scn_name}') - AND scn_name ='{scn_name}' - """ + f"""DELETE FROM {cfg.targets.tables['etrago_generators']['schema']}.{cfg.targets.tables['etrago_generators']['table']} + WHERE carrier IN {*carrier_delete,} + AND bus IN ( + SELECT bus_id + FROM {cfg.sources.tables['bus']['schema']}.{cfg.sources.tables['bus']['table']} + WHERE country = 'DE' + AND carrier = 'AC' + AND scn_name = '{scn_name}' + ) + AND scn_name = '{scn_name}' + """ ) + db.execute_sql( - f"""DELETE FROM - {cfg['targets']['etrago_gen_time']['schema']}. - {cfg['targets']['etrago_gen_time']['table']} - WHERE generator_id NOT IN ( - SELECT generator_id FROM - {cfg['targets']['etrago_generators']['schema']}. - {cfg['targets']['etrago_generators']['table']}) + f"""DELETE FROM {cfg.targets.tables['etrago_gen_time']['schema']}.{cfg.targets.tables['etrago_gen_time']['table']} + WHERE generator_id NOT IN ( + SELECT generator_id + FROM {cfg.targets.tables['etrago_generators']['schema']}.{cfg.targets.tables['etrago_generators']['table']}) AND scn_name ='{scn_name}' - """ + """ ) diff --git a/src/egon/data/datasets/fix_ehv_subnetworks.py b/src/egon/data/datasets/fix_ehv_subnetworks.py index 1908aea19..536b98774 100644 --- a/src/egon/data/datasets/fix_ehv_subnetworks.py +++ b/src/egon/data/datasets/fix_ehv_subnetworks.py @@ -7,7 +7,7 @@ from egon.data import config, db, logger from egon.data.config import settings -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.etrago_setup import link_geom_from_buses from egon.data.datasets.scenario_parameters import get_sector_parameters @@ -36,14 +36,30 @@ class FixEhvSubnetworks(Dataset): #: name: str = "FixEhvSubnetworks" #: - version: str = "0.0.2" + version: str = "0.0.3" + + sources = DatasetSources( + tables={ + "buses": {"schema": "grid", "table": "egon_etrago_bus"}, + "lines": {"schema": "grid", "table": "egon_etrago_line"}, + "transformers": {"schema": "grid", "table": "egon_etrago_transformer"}, + } + ) + + targets = DatasetTargets( + tables={ + "buses": {"schema": "grid", "table": "egon_etrago_bus"}, + "lines": {"schema": "grid", "table": "egon_etrago_line"}, + "transformers": {"schema": "grid", "table": "egon_etrago_transformer"}, + } + ) def __init__(self, dependencies): super().__init__( name=self.name, version=self.version, dependencies=dependencies, - tasks=run, + tasks=(run,) ) @@ -51,7 +67,7 @@ def select_bus_id(x, y, v_nom, scn_name, carrier, find_closest=False): bus_id = db.select_dataframe( f""" SELECT bus_id - FROM grid.egon_etrago_bus + FROM {FixEhvSubnetworks.sources.tables['buses']['schema']}.{FixEhvSubnetworks.sources.tables['buses']['table']} WHERE x = {x} AND y = {y} AND v_nom = {v_nom} @@ -67,7 +83,7 @@ def select_bus_id(x, y, v_nom, scn_name, carrier, find_closest=False): bus_id = db.select_dataframe( f""" SELECT bus_id, st_distance(geom, 'SRID=4326;POINT({x} {y})'::geometry) - FROM grid.egon_etrago_bus + FROM {FixEhvSubnetworks.sources.tables['buses']['schema']}.{FixEhvSubnetworks.sources.tables['buses']['table']} WHERE v_nom = {v_nom} AND scn_name = '{scn_name}' AND carrier = '{carrier}' @@ -103,7 +119,9 @@ def add_bus(x, y, v_nom, scn_name): gdf.index.name = "bus_id" gdf.reset_index().to_postgis( - "egon_etrago_bus", schema="grid", con=db.engine(), if_exists="append" + FixEhvSubnetworks.targets.tables['buses']['table'], + schema=FixEhvSubnetworks.targets.tables['buses']['schema'], + con=db.engine(), if_exists="append" ) @@ -113,7 +131,7 @@ def drop_bus(x, y, v_nom, scn_name): if bus is not None: db.execute_sql( f""" - DELETE FROM grid.egon_etrago_bus + DELETE FROM {FixEhvSubnetworks.targets.tables['buses']['schema']}.{FixEhvSubnetworks.targets.tables['buses']['table']} WHERE scn_name = '{scn_name}' AND bus_id = {bus} @@ -176,10 +194,13 @@ def add_line(x0, y0, x1, y1, v_nom, scn_name, cables): gdf["capital_cost"] = (cost_per_km * gdf["length"]) * (gdf["cables"] / 3) gdf.index.name = "line_id" gdf.reset_index().to_postgis( - "egon_etrago_line", schema="grid", con=db.engine(), if_exists="append" + FixEhvSubnetworks.targets.tables['lines']['table'], + schema=FixEhvSubnetworks.targets.tables['lines']['schema'], + con=db.engine(), if_exists="append" ) + def drop_line(x0, y0, x1, y1, v_nom, scn_name): bus0 = select_bus_id(x0, y0, v_nom, scn_name, carrier="AC") bus1 = select_bus_id(x1, y1, v_nom, scn_name, carrier="AC") @@ -187,7 +208,7 @@ def drop_line(x0, y0, x1, y1, v_nom, scn_name): if (bus0 is not None) and (bus1 is not None): db.execute_sql( f""" - DELETE FROM grid.egon_etrago_line + DELETE FROM {FixEhvSubnetworks.targets.tables['lines']['schema']}.{FixEhvSubnetworks.targets.tables['lines']['table']} WHERE scn_name = '{scn_name}' AND bus0 = {bus0} @@ -206,7 +227,7 @@ def add_trafo(x, y, v_nom0, v_nom1, scn_name, n=1): ) df = pd.DataFrame( - index=[db.next_etrago_id("line")], + index=[db.next_etrago_id("transformer")], data={ "bus0": bus0, "bus1": bus1, @@ -227,8 +248,8 @@ def add_trafo(x, y, v_nom0, v_nom1, scn_name, n=1): gdf.index.name = "trafo_id" gdf.reset_index().to_postgis( - "egon_etrago_transformer", - schema="grid", + FixEhvSubnetworks.targets.tables['transformers']['table'], + schema=FixEhvSubnetworks.targets.tables['transformers']['schema'], con=db.engine(), if_exists="append", ) @@ -241,7 +262,7 @@ def drop_trafo(x, y, v_nom0, v_nom1, scn_name): if (bus0 is not None) and (bus1 is not None): db.execute_sql( f""" - DELETE FROM grid.egon_etrago_transformer + DELETE FROM {FixEhvSubnetworks.targets.tables['transformers']['schema']}.{FixEhvSubnetworks.targets.tables['transformers']['table']} WHERE scn_name = '{scn_name}' AND bus0 = {bus0} diff --git a/src/egon/data/datasets/gas_areas.py b/src/egon/data/datasets/gas_areas.py index 7977be662..b79cf3fea 100755 --- a/src/egon/data/datasets/gas_areas.py +++ b/src/egon/data/datasets/gas_areas.py @@ -22,7 +22,7 @@ sources, ) - +from egon.data.datasets import DatasetSources, DatasetTargets class GasAreaseGon2035(Dataset): """ Create the gas voronoi table and the gas voronoi areas for eGon2035 @@ -45,7 +45,31 @@ class GasAreaseGon2035(Dataset): #: name: str = "GasAreaseGon2035" #: - version: str = "0.0.2" + version: str = "0.0.4" + + # Dataset sources (input tables) + sources = DatasetSources( + tables={ + "vg250_sta_union": { + "schema": "boundaries", + "table": "vg250_sta_union", + }, + "egon_etrago_bus": { + "schema": "grid", + "table": "egon_etrago_bus", + }, + } + ) + + # Dataset targets (output tables) + targets = DatasetTargets( + tables={ + "ch4_voronoi": { + "schema": "grid", + "table": "egon_gas_voronoi", + }, + } + ) def __init__(self, dependencies): super().__init__( @@ -56,6 +80,7 @@ def __init__(self, dependencies): ) + class GasAreaseGon100RE(Dataset): """Insert the gas voronoi areas for eGon100RE @@ -78,14 +103,38 @@ class GasAreaseGon100RE(Dataset): #: name: str = "GasAreaseGon100RE" #: - version: str = "0.0.1" + version: str = "0.0.3" + + # Same sources as GasAreaseGon2035 + sources = DatasetSources( + tables={ + "vg250_sta_union": { + "schema": "boundaries", + "table": "vg250_sta_union", + }, + "egon_etrago_bus": { + "schema": "grid", + "table": "egon_etrago_bus", + }, + } + ) + + # Same target table + targets = DatasetTargets( + tables={ + "ch4_voronoi": { + "schema": "grid", + "table": "egon_gas_voronoi", + }, + } + ) def __init__(self, dependencies): super().__init__( name=self.name, version=self.version, dependencies=dependencies, - tasks=(voronoi_egon100RE), + tasks=(voronoi_egon100RE,), ) @@ -240,11 +289,11 @@ def create_voronoi(scn_name, carrier): table_exist = ( len( pd.read_sql( - """ + f""" SELECT * FROM information_schema.tables - WHERE table_schema = 'grid' - AND table_name = 'egon_gas_voronoi' + WHERE table_schema = '{GasAreaseGon2035.targets.tables["ch4_voronoi"]["schema"]}' + AND table_name = '{GasAreaseGon2035.targets.tables["ch4_voronoi"]["table"]}' LIMIT 1; """, engine, @@ -257,9 +306,9 @@ def create_voronoi(scn_name, carrier): create_gas_voronoi_table() boundary = db.select_geodataframe( - """ + f""" SELECT id, geometry - FROM boundaries.vg250_sta_union; + FROM {GasAreaseGon2035.sources.tables["vg250_sta_union"]["schema"]}.{GasAreaseGon2035.sources.tables["vg250_sta_union"]["table"]}; """, geom_col="geometry", ).to_crs(epsg=4326) @@ -276,7 +325,7 @@ def create_voronoi(scn_name, carrier): db.execute_sql( f""" - DELETE FROM grid.egon_gas_voronoi + DELETE FROM {GasAreaseGon2035.targets.tables["ch4_voronoi"]["schema"]}.{GasAreaseGon2035.targets.tables["ch4_voronoi"]["table"]} WHERE "carrier" IN ('{carrier_strings}') and "scn_name" = '{scn_name}'; """ ) @@ -284,7 +333,7 @@ def create_voronoi(scn_name, carrier): buses = db.select_geodataframe( f""" SELECT bus_id, geom - FROM grid.egon_etrago_bus + FROM {GasAreaseGon100RE.sources.tables['egon_etrago_bus']['schema']}.{GasAreaseGon100RE.sources.tables['egon_etrago_bus']['table']} WHERE scn_name = '{scn_name}' AND country = 'DE' AND carrier IN ('{carrier_strings}'); @@ -314,9 +363,9 @@ def create_voronoi(scn_name, carrier): # Insert data to db gdf.set_crs(epsg=4326).to_postgis( - f"egon_gas_voronoi", + GasAreaseGon2035.targets.tables["ch4_voronoi"]["table"], engine, - schema="grid", + schema=GasAreaseGon2035.targets.tables["ch4_voronoi"]["schema"], index=False, if_exists="append", dtype={"geom": Geometry}, @@ -340,7 +389,7 @@ class GasAreas(Dataset): #: name: str = "GasAreas" #: - version: str = "0.0.3" + version: str = "0.0.4" tasks = (create_gas_voronoi_table,) extra_dependencies = () diff --git a/src/egon/data/datasets/gas_grid.py b/src/egon/data/datasets/gas_grid.py index 51cee0905..f144cae18 100755 --- a/src/egon/data/datasets/gas_grid.py +++ b/src/egon/data/datasets/gas_grid.py @@ -31,7 +31,7 @@ from egon.data import config, db from egon.data.config import settings -from egon.data.datasets import Dataset, wrapped_partial +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets, wrapped_partial from egon.data.datasets.electrical_neighbours import central_buses_pypsaeur from egon.data.datasets.etrago_helpers import copy_and_modify_buses from egon.data.datasets.scenario_parameters import get_sector_parameters @@ -58,27 +58,28 @@ def download_SciGRID_gas_data(): None """ - path = Path(".") / "datasets" / "gas_data" + path = Path(GasNodesAndPipes.targets.files["scigrid_gas_data_dir"]["path"]) os.makedirs(path, exist_ok=True) - basename = "IGGIELGN" - zip_file = Path(".") / "datasets" / "gas_data" / "IGGIELGN.zip" + basename = GasNodesAndPipes.sources.tables["scigrid_gas"]["zenodo"]["basename"] + zip_file = path / GasNodesAndPipes.sources.tables["scigrid_gas"]["zenodo"]["zip_name"] zenodo_zip_file_url = ( - "https://zenodo.org/record/4767098/files/" + basename + ".zip" + f"https://zenodo.org/record/" + f"{GasNodesAndPipes.sources.tables['scigrid_gas']['zenodo']['deposit_id']}" + f"/files/{basename}.zip" ) if not os.path.isfile(zip_file): urlretrieve(zenodo_zip_file_url, zip_file) - components = [ - "Nodes", - "PipeSegments", - "Productions", - "Storages", - "LNGs", - ] #'Compressors' + + components = ["nodes", "pipes", "productions", "storages", "lngs"] + files = [] + for i in components: - files.append("data/" + basename + "_" + i + ".csv") + files.append( + "data/" + GasNodesAndPipes.sources.tables["scigrid_gas"]["files"][i] + ) with ZipFile(zip_file, "r") as zipObj: listOfFileNames = zipObj.namelist() @@ -106,9 +107,11 @@ def define_gas_nodes_list(): new_id = db.next_etrago_id("bus") target_file = ( - Path(".") / "datasets" / "gas_data" / "data" / "IGGIELGN_Nodes.csv" + Path(GasNodesAndPipes.targets.files["scigrid_gas_data_dir"]["path"]) + / "data" + / GasNodesAndPipes.sources.tables["scigrid_gas"]["files"]["nodes"] ) - + gas_nodes_list = pd.read_csv( target_file, delimiter=";", @@ -244,17 +247,17 @@ def insert_CH4_nodes_list(gas_nodes_list, scn_name="eGon2035"): # Insert data to db db.execute_sql( f""" - DELETE FROM grid.egon_etrago_bus WHERE "carrier" = 'CH4' AND - scn_name = '{c['scn_name']}' AND country = 'DE'; + DELETE FROM {GasNodesAndPipes.targets.tables["buses"]["schema"]}.{GasNodesAndPipes.targets.tables["buses"]["table"]} + WHERE "carrier" = 'CH4' AND scn_name = '{c['scn_name']}' AND country = 'DE'; """ ) # Insert CH4 data to db print(gas_nodes_list) gas_nodes_list.to_postgis( - "egon_etrago_bus", + GasNodesAndPipes.targets.tables["buses"]["table"], engine, - schema="grid", + schema=GasNodesAndPipes.targets.tables["buses"]["schema"], index=False, if_exists="append", dtype={"geom": Geometry()}, @@ -302,8 +305,8 @@ def define_gas_buses_abroad(scn_name="eGon2035"): if scn_name == "eGon100RE": gdf_abroad_buses = geopandas.read_postgis( f""" - SELECT * FROM grid.egon_etrago_bus WHERE "carrier" = '{gas_carrier}' AND - scn_name = '{scn_name}' AND country != 'DE'; + SELECT * FROM {GasNodesAndPipes.targets.tables["buses"]["schema"]}.{GasNodesAndPipes.targets.tables["buses"]["table"]} + WHERE "carrier" = '{gas_carrier}' AND scn_name = '{scn_name}' AND country != 'DE'; """, con=engine, crs=4326, @@ -347,8 +350,8 @@ def define_gas_buses_abroad(scn_name="eGon2035"): else: db.execute_sql( f""" - DELETE FROM grid.egon_etrago_bus WHERE "carrier" = '{gas_carrier}' AND - scn_name = '{scn_name}' AND country != 'DE'; + DELETE FROM {GasNodesAndPipes.targets.tables["buses"]["schema"]}.{GasNodesAndPipes.targets.tables["buses"]["table"]} + WHERE "carrier" = '{gas_carrier}' AND scn_name = '{scn_name}' AND country != 'DE'; """ ) @@ -469,9 +472,9 @@ def insert_gas_buses_abroad(scn_name="eGon2035"): # Insert to db if scn_name == "eGon100RE": gdf_abroad_buses[gdf_abroad_buses["country"] == "DE"].to_postgis( - "egon_etrago_bus", + GasNodesAndPipes.targets.tables["buses"]["table"], engine, - schema="grid", + schema=GasNodesAndPipes.targets.tables["buses"]["schema"], index=False, if_exists="append", dtype={"geom": Geometry()}, @@ -480,14 +483,14 @@ def insert_gas_buses_abroad(scn_name="eGon2035"): else: db.execute_sql( f""" - DELETE FROM grid.egon_etrago_bus WHERE "carrier" = '{gas_carrier}' AND - scn_name = '{scn_name}' AND country != 'DE'; + DELETE FROM {GasNodesAndPipes.targets.tables["buses"]["schema"]}.{GasNodesAndPipes.targets.tables["buses"]["table"]} + WHERE "carrier" = '{gas_carrier}' AND scn_name = '{scn_name}' AND country != 'DE'; """ ) gdf_abroad_buses.to_postgis( - "egon_etrago_bus", + GasNodesAndPipes.targets.tables["buses"]["table"], engine, - schema="grid", + schema=GasNodesAndPipes.targets.tables["buses"]["schema"], index=False, if_exists="append", dtype={"geom": Geometry()}, @@ -542,25 +545,20 @@ def define_gas_pipeline_list( # Select next id value new_id = db.next_etrago_id("link") - classifiaction_file = ( - Path(".") - / "data_bundle_egon_data" - / "pipeline_classification_gas" - / "pipeline_classification.csv" + classification_file = Path( + GasNodesAndPipes.sources.files["pipeline_classification"]["path"] ) classification = pd.read_csv( - classifiaction_file, + classification_file, delimiter=",", usecols=["classification", "max_transport_capacity_Gwh/d"], ) target_file = ( - Path(".") - / "datasets" - / "gas_data" - / "data" - / "IGGIELGN_PipeSegments.csv" + Path(GasNodesAndPipes.targets.files["scigrid_gas_data_dir"]["path"]) + / "data" + / GasNodesAndPipes.sources.tables["scigrid_gas"]["files"]["pipes"] ) gas_pipelines_list = pd.read_csv( @@ -944,18 +942,18 @@ def insert_gas_pipeline_list(gas_pipelines_list, scn_name="eGon2035"): # Clean db db.execute_sql( - f"""DELETE FROM grid.egon_etrago_link + f"""DELETE FROM {GasNodesAndPipes.targets.tables["links"]["schema"]}.{GasNodesAndPipes.targets.tables["links"]["table"]} WHERE "carrier" = '{gas_carrier}' AND scn_name = '{scn_name}' AND link_id IN( - SELECT link_id FROM grid.egon_etrago_link + SELECT link_id FROM {GasNodesAndPipes.targets.tables["links"]["schema"]}.{GasNodesAndPipes.targets.tables["links"]["table"]} WHERE bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {GasNodesAndPipes.targets.tables["buses"]["schema"]}.{GasNodesAndPipes.targets.tables["buses"]["table"]} WHERE country = 'DE' AND scn_name = '{scn_name}' ) AND bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {GasNodesAndPipes.targets.tables["buses"]["schema"]}.{GasNodesAndPipes.targets.tables["buses"]["table"]} WHERE country = 'DE' AND scn_name = '{scn_name}' ) @@ -966,36 +964,39 @@ def insert_gas_pipeline_list(gas_pipelines_list, scn_name="eGon2035"): print(gas_pipelines_list) # Insert data to db gas_pipelines_list.to_postgis( - "egon_etrago_gas_link", + GasNodesAndPipes.targets.tables["gas_link"]["table"], engine, - schema="grid", + schema=GasNodesAndPipes.targets.tables["gas_link"]["schema"], index=False, if_exists="replace", dtype={"geom": Geometry(), "topo": Geometry()}, ) db.execute_sql( - """ - select UpdateGeometrySRID('grid', 'egon_etrago_gas_link', 'topo', 4326) ; - - INSERT INTO grid.egon_etrago_link (scn_name, - link_id, carrier, - bus0, bus1, p_min_pu, - p_nom, p_nom_extendable, length, - geom, topo) - SELECT scn_name, - link_id, carrier, - bus0, bus1, p_min_pu, - p_nom, p_nom_extendable, length, - geom, topo - - FROM grid.egon_etrago_gas_link; - - DROP TABLE grid.egon_etrago_gas_link; + f""" + SELECT UpdateGeometrySRID( + '{GasNodesAndPipes.targets.tables["gas_link"]["schema"]}', + '{GasNodesAndPipes.targets.tables["gas_link"]["table"]}', + 'topo', + 4326 + ); + + INSERT INTO {GasNodesAndPipes.targets.tables["links"]["schema"]}.{GasNodesAndPipes.targets.tables["links"]["table"]} + (scn_name, link_id, carrier, bus0, bus1, p_min_pu, + p_nom, p_nom_extendable, length, geom, topo) + SELECT scn_name, + link_id, carrier, + bus0, bus1, p_min_pu, + p_nom, p_nom_extendable, length, + geom, topo + FROM {GasNodesAndPipes.targets.tables["gas_link"]["schema"]}.{GasNodesAndPipes.targets.tables["gas_link"]["table"]}; + + DROP TABLE {GasNodesAndPipes.targets.tables["gas_link"]["schema"]}.{GasNodesAndPipes.targets.tables["gas_link"]["table"]}; """ ) + def remove_isolated_gas_buses(scn_name="eGon2035"): """ Delete CH4 buses which are disconnected of the CH4 grid for the required @@ -1006,20 +1007,20 @@ def remove_isolated_gas_buses(scn_name="eGon2035"): None """ - targets = config.datasets()["gas_grid"]["targets"] + #targets = config.datasets()["gas_grid"]["targets"] db.execute_sql( f""" - DELETE FROM {targets['buses']['schema']}.{targets['buses']['table']} + DELETE FROM {GasNodesAndPipes.targets.tables["buses"]["schema"]}.{GasNodesAndPipes.targets.tables["buses"]["table"]} WHERE "carrier" = 'CH4' AND scn_name = '{scn_name}' AND country = 'DE' AND "bus_id" NOT IN - (SELECT bus0 FROM {targets['links']['schema']}.{targets['links']['table']} + (SELECT bus0 FROM {GasNodesAndPipes.targets.tables["links"]["schema"]}.{GasNodesAndPipes.targets.tables["links"]["table"]} WHERE scn_name = '{scn_name}' AND carrier = 'CH4') AND "bus_id" NOT IN - (SELECT bus1 FROM {targets['links']['schema']}.{targets['links']['table']} + (SELECT bus1 FROM {GasNodesAndPipes.targets.tables["links"]["schema"]}.{GasNodesAndPipes.targets.tables["links"]["table"]} WHERE scn_name = '{scn_name}' AND carrier = 'CH4'); """ @@ -1091,13 +1092,13 @@ def insert_gas_data_status(scn_name): # delete old entries db.execute_sql( f""" - DELETE FROM grid.egon_etrago_link + DELETE FROM {GasNodesAndPipes.targets.tables["links"]["schema"]}.{GasNodesAndPipes.targets.tables["links"]["table"]} WHERE carrier = 'CH4' AND scn_name = '{scn_name}' """ ) db.execute_sql( f""" - DELETE FROM grid.egon_etrago_bus + DELETE FROM {GasNodesAndPipes.targets.tables["buses"]["schema"]}.{GasNodesAndPipes.targets.tables["buses"]["table"]} WHERE carrier = 'CH4' AND scn_name = '{scn_name}' """ ) @@ -1126,7 +1127,7 @@ def insert_gas_data_status(scn_name): gdf.index.name = "bus_id" gdf.reset_index().to_postgis( - "egon_etrago_bus", schema="grid", con=db.engine(), if_exists="append" + GasNodesAndPipes.targets.tables["buses"]["table"], schema=GasNodesAndPipes.targets.tables["buses"]["schema"], con=db.engine(), if_exists="append" ) @@ -1154,7 +1155,7 @@ class GasNodesAndPipes(Dataset): #: name: str = "GasNodesAndPipes" #: - version: str = "0.0.11" + version: str = "0.0.13" tasks = () @@ -1169,6 +1170,41 @@ class GasNodesAndPipes(Dataset): ) tasks += (insert_gas_data,) + + sources = DatasetSources( + tables={ + "scigrid_gas": { + "zenodo": { + "deposit_id": "4767098", + "basename": "IGGIELGN", + "zip_name": "IGGIELGN.zip", + }, + "files": { + "nodes": "IGGIELGN_Nodes.csv", + "pipes": "IGGIELGN_PipeSegments.csv", + "productions": "IGGIELGN_Productions.csv", + "storages": "IGGIELGN_Storages.csv", + "lngs": "IGGIELGN_LNGs.csv", + }, + } + }, + files={ + "pipeline_classification": { + "path": "./data_bundle_egon_data/pipeline_classification_gas/pipeline_classification.csv" + }, + }, + ) + + targets = DatasetTargets( + tables={ + "buses": {"schema": "grid", "table": "egon_etrago_bus"}, + "links": {"schema": "grid", "table": "egon_etrago_link"}, + "gas_link": {"schema": "grid", "table": "egon_etrago_gas_link"}, + }, + files={ + "scigrid_gas_data_dir": {"path": "./datasets/gas_data"}, + }, + ) def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/gas_neighbours/.spyproject/config/codestyle.ini b/src/egon/data/datasets/gas_neighbours/.spyproject/config/codestyle.ini new file mode 100644 index 000000000..0f54b4c43 --- /dev/null +++ b/src/egon/data/datasets/gas_neighbours/.spyproject/config/codestyle.ini @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/src/egon/data/datasets/gas_neighbours/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini b/src/egon/data/datasets/gas_neighbours/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini new file mode 100644 index 000000000..0b95e5cee --- /dev/null +++ b/src/egon/data/datasets/gas_neighbours/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini @@ -0,0 +1,5 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + diff --git a/src/egon/data/datasets/gas_neighbours/.spyproject/config/defaults/defaults-encoding-0.2.0.ini b/src/egon/data/datasets/gas_neighbours/.spyproject/config/defaults/defaults-encoding-0.2.0.ini new file mode 100644 index 000000000..0ce193c1e --- /dev/null +++ b/src/egon/data/datasets/gas_neighbours/.spyproject/config/defaults/defaults-encoding-0.2.0.ini @@ -0,0 +1,3 @@ +[encoding] +text_encoding = utf-8 + diff --git a/src/egon/data/datasets/gas_neighbours/.spyproject/config/defaults/defaults-vcs-0.2.0.ini b/src/egon/data/datasets/gas_neighbours/.spyproject/config/defaults/defaults-vcs-0.2.0.ini new file mode 100644 index 000000000..ee2548333 --- /dev/null +++ b/src/egon/data/datasets/gas_neighbours/.spyproject/config/defaults/defaults-vcs-0.2.0.ini @@ -0,0 +1,4 @@ +[vcs] +use_version_control = False +version_control_system = + diff --git a/src/egon/data/datasets/gas_neighbours/.spyproject/config/defaults/defaults-workspace-0.2.0.ini b/src/egon/data/datasets/gas_neighbours/.spyproject/config/defaults/defaults-workspace-0.2.0.ini new file mode 100644 index 000000000..2a73ab7ad --- /dev/null +++ b/src/egon/data/datasets/gas_neighbours/.spyproject/config/defaults/defaults-workspace-0.2.0.ini @@ -0,0 +1,6 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + diff --git a/src/egon/data/datasets/gas_neighbours/.spyproject/config/encoding.ini b/src/egon/data/datasets/gas_neighbours/.spyproject/config/encoding.ini new file mode 100644 index 000000000..a17acedd7 --- /dev/null +++ b/src/egon/data/datasets/gas_neighbours/.spyproject/config/encoding.ini @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/src/egon/data/datasets/gas_neighbours/.spyproject/config/vcs.ini b/src/egon/data/datasets/gas_neighbours/.spyproject/config/vcs.ini new file mode 100644 index 000000000..fd66eae01 --- /dev/null +++ b/src/egon/data/datasets/gas_neighbours/.spyproject/config/vcs.ini @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/src/egon/data/datasets/gas_neighbours/.spyproject/config/workspace.ini b/src/egon/data/datasets/gas_neighbours/.spyproject/config/workspace.ini new file mode 100644 index 000000000..405acabfa --- /dev/null +++ b/src/egon/data/datasets/gas_neighbours/.spyproject/config/workspace.ini @@ -0,0 +1,12 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False +project_type = 'empty-project-type' +recent_files = ['__init__.py', '..\\..\\datasets.yml'] + +[main] +version = 0.2.0 +recent_files = [] + diff --git a/src/egon/data/datasets/gas_neighbours/__init__.py b/src/egon/data/datasets/gas_neighbours/__init__.py index ebfac3534..71e02f2a2 100755 --- a/src/egon/data/datasets/gas_neighbours/__init__.py +++ b/src/egon/data/datasets/gas_neighbours/__init__.py @@ -2,8 +2,9 @@ The central module containing definition of the datasets dealing with gas neighbours """ +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data import config -from egon.data.datasets import Dataset + from egon.data.datasets.gas_neighbours.eGon100RE import ( insert_gas_neigbours_eGon100RE, ) @@ -43,6 +44,26 @@ def no_gas_neighbours_required(): class GasNeighbours(Dataset): + + + sources = DatasetSources( + files={ + "tyndp_capacities": "TYNDP-2020-Scenario-Datafile.xlsx.zip", + }, + tables={ + "buses": "grid.egon_etrago_bus", + "links": "grid.egon_etrago_link", + }, + ) + targets = DatasetTargets( + tables={ + "generators": "grid.egon_etrago_generator", + "loads": "grid.egon_etrago_load", + "load_timeseries": "grid.egon_etrago_load_timeseries", + "stores": "grid.egon_etrago_store", + "links": "grid.egon_etrago_link", + } + ) """ Insert the missing gas data abroad. @@ -72,7 +93,7 @@ class GasNeighbours(Dataset): #: name: str = "GasNeighbours" #: - version: str = "0.0.5" + version: str = "0.0.8" def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/gas_neighbours/eGon100RE.py b/src/egon/data/datasets/gas_neighbours/eGon100RE.py index 53b98eed8..6723050ce 100644 --- a/src/egon/data/datasets/gas_neighbours/eGon100RE.py +++ b/src/egon/data/datasets/gas_neighbours/eGon100RE.py @@ -22,6 +22,7 @@ insert_gas_grid_capacities, ) from egon.data.datasets.pypsaeur import read_network +from egon.data.datasets import load_sources_and_targets countries = [ "AT", @@ -132,32 +133,32 @@ def set_foreign_country(link, foreign): return country - sources = config.datasets()["gas_neighbours"]["sources"] + sources, _ = load_sources_and_targets("GasNeighbours") gas_pipelines_list_CH4 = db.select_geodataframe( f""" SELECT * FROM grid.egon_etrago_link WHERE ("bus0" IN ( SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country != 'DE' AND country != 'RU' AND carrier = 'CH4' AND scn_name = 'eGon100RE') AND "bus1" IN (SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country = 'DE' AND carrier = 'CH4' AND scn_name = 'eGon100RE')) OR ("bus0" IN ( SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country = 'DE' AND carrier = 'CH4' AND scn_name = 'eGon100RE') AND "bus1" IN ( SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country != 'DE' AND country != 'RU' AND carrier = 'CH4' @@ -167,17 +168,15 @@ def set_foreign_country(link, foreign): """, epsg=4326, ) - gas_nodes_list_100 = db.select_geodataframe( f""" - SELECT * FROM {sources['buses']['schema']}.{sources['buses']['table']} + SELECT * FROM {sources.tables['buses']} WHERE scn_name = 'eGon100RE' AND carrier = 'CH4' AND country <> 'RU' """, epsg=4326, ) - foreign_bus = gas_nodes_list_100[ gas_nodes_list_100.country != "DE" ].set_index("bus_id") diff --git a/src/egon/data/datasets/gas_neighbours/eGon2035.py b/src/egon/data/datasets/gas_neighbours/eGon2035.py index d29ac605e..f49dbd508 100755 --- a/src/egon/data/datasets/gas_neighbours/eGon2035.py +++ b/src/egon/data/datasets/gas_neighbours/eGon2035.py @@ -23,6 +23,7 @@ import pypsa from egon.data import config, db +from egon.data.datasets import load_sources_and_targets from egon.data.datasets.electrical_neighbours import ( get_foreign_bus_id, get_map_buses, @@ -68,7 +69,8 @@ def get_foreign_gas_bus_id(carrier="CH4"): List of mapped node_ids from TYNDP and etragos bus_id """ - sources = config.datasets()["gas_neighbours"]["sources"] + sources, _ = load_sources_and_targets("GasNeighbours") + scn_name = "eGon2035" bus_id = db.select_geodataframe( @@ -83,7 +85,7 @@ def get_foreign_gas_bus_id(carrier="CH4"): ) # insert installed capacities - file = zipfile.ZipFile(f"tyndp/{sources['tyndp_capacities']}") + file = zipfile.ZipFile(f"tyndp/{sources.files['tyndp_capacities']}") # Select buses in neighbouring countries as geodataframe buses = pd.read_excel( @@ -187,27 +189,12 @@ def read_LNG_capacities(): def calc_capacities(): """ Calculates gas production capacities of neighbouring countries - - For each neigbouring country, this function calculates the gas - generation capacity in 2035 using the function - :py:func:`calc_capacity_per_year` for 2030 and 2040 and - interpolates the results. These capacities include LNG import, as - well as conventional and biogas production. - Two conventional gas generators are added for Norway and Russia - interpolating the supply potential (min) values from the TYNPD 2020 - for 2030 and 2040. - - Returns - ------- - grouped_capacities: pandas.DataFrame - Gas production capacities per foreign node - + ... (docstring) ... """ - - sources = config.datasets()["gas_neighbours"]["sources"] + sources, _ = load_sources_and_targets("GasNeighbours") # insert installed capacities - file = zipfile.ZipFile(f"tyndp/{sources['tyndp_capacities']}") + file = zipfile.ZipFile(f"tyndp/{sources.files['tyndp_capacities']}") df0 = pd.read_excel( file.open("TYNDP-2020-Scenario-Datafile.xlsx").read(), sheet_name="Gas Data", @@ -470,27 +457,10 @@ def calc_capacity_per_year(df, lng, year): def insert_generators(gen): """Insert gas generators for foreign countries into the database - - Insert gas generators for foreign countries into the database. - The marginal cost of the methane is calculated as the sum of the - imported LNG cost, the conventional natural gas cost and the - biomethane cost, weighted by their share in the total import/ - production capacity. - LNG gas is considered to be 30% more expensive than the natural gas - transported by pipelines (source: iwd, 2022). - - Parameters - ---------- - gen : pandas.DataFrame - Gas production capacities per foreign node and energy carrier - - Returns - ------- - None - + ... (docstring) ... """ - sources = config.datasets()["gas_neighbours"]["sources"] - targets = config.datasets()["gas_neighbours"]["targets"] + sources, targets = load_sources_and_targets("GasNeighbours") + map_buses = get_map_buses() scn_params = get_sector_parameters("gas", "eGon2035") @@ -498,10 +468,10 @@ def insert_generators(gen): db.execute_sql( f""" DELETE FROM - {targets['generators']['schema']}.{targets['generators']['table']} + {targets.tables['generators']} WHERE bus IN ( SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country != 'DE' AND scn_name = 'eGon2035') AND scn_name = 'eGon2035' @@ -543,9 +513,9 @@ def insert_generators(gen): # Insert data to db gen.to_sql( - targets["generators"]["table"], + targets.get_table_name("generators").split('.')[-1], db.engine(), - schema=targets["generators"]["schema"], + schema=targets.get_table_schema("generators"), index=False, if_exists="append", ) @@ -554,21 +524,11 @@ def insert_generators(gen): def calc_global_ch4_demand(Norway_global_demand_1y): """ Calculates global CH4 demands abroad for eGon2035 scenario - - The data comes from TYNDP 2020 according to NEP 2021 from the - scenario 'Distributed Energy'; linear interpolates between 2030 - and 2040. - - Returns - ------- - pandas.DataFrame - Global (yearly) CH4 final demand per foreign node - + ... (docstring) ... """ + sources, _ = load_sources_and_targets("GasNeighbours") - sources = config.datasets()["gas_neighbours"]["sources"] - - file = zipfile.ZipFile(f"tyndp/{sources['tyndp_capacities']}") + file = zipfile.ZipFile(f"tyndp/{sources.files['tyndp_capacities']}") df = pd.read_excel( file.open("TYNDP-2020-Scenario-Datafile.xlsx").read(), sheet_name="Gas Data", @@ -679,21 +639,10 @@ def import_ch4_demandTS(): def insert_ch4_demand(global_demand, normalized_ch4_demandTS): """Insert CH4 demands abroad into the database for eGon2035 - - Parameters - ---------- - global_demand : pandas.DataFrame - Global CH4 demand per foreign node in 1 year - gas_demandTS : pandas.DataFrame - Normalized time series of the demand per foreign country - - Returns - ------- - None - + ... (docstring) ... """ - sources = config.datasets()["gas_neighbours"]["sources"] - targets = config.datasets()["gas_neighbours"]["targets"] + sources, targets = load_sources_and_targets("GasNeighbours") + map_buses = get_map_buses() scn_name = "eGon2035" @@ -703,17 +652,13 @@ def insert_ch4_demand(global_demand, normalized_ch4_demandTS): db.execute_sql( f""" DELETE FROM - { - targets['load_timeseries']['schema'] - }.{ - targets['load_timeseries']['table'] - } + {targets.tables['load_timeseries']} WHERE "load_id" IN ( SELECT load_id FROM - {targets['loads']['schema']}.{targets['loads']['table']} + {targets.tables['loads']} WHERE bus IN ( SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country != 'DE' AND scn_name = '{scn_name}') AND scn_name = '{scn_name}' @@ -725,10 +670,10 @@ def insert_ch4_demand(global_demand, normalized_ch4_demandTS): db.execute_sql( f""" DELETE FROM - {targets['loads']['schema']}.{targets['loads']['table']} + {targets.tables['loads']} WHERE bus IN ( SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country != 'DE' AND scn_name = '{scn_name}') AND scn_name = '{scn_name}' @@ -763,9 +708,9 @@ def insert_ch4_demand(global_demand, normalized_ch4_demandTS): # Insert data to db global_demand.to_sql( - targets["loads"]["table"], + targets.get_table_name("loads").split('.')[-1], db.engine(), - schema=targets["loads"]["schema"], + schema=targets.get_table_schema("loads"), index=False, if_exists="append", ) @@ -796,9 +741,9 @@ def insert_ch4_demand(global_demand, normalized_ch4_demandTS): # Insert data to DB ch4_demand_TS.to_sql( - targets["load_timeseries"]["table"], + targets.get_table_name("load_timeseries").split('.')[-1], db.engine(), - schema=targets["load_timeseries"]["schema"], + schema=targets.get_table_schema("load_timeseries"), index=False, if_exists="append", ) @@ -911,36 +856,19 @@ def calc_ch4_storage_capacities(): def insert_storage(ch4_storage_capacities): """ Inserts CH4 stores for foreign countries into the database - - This function inserts the CH4 stores for foreign countries - with the following steps: - * Receive as argument the CH4 store capacities per foreign node - * Clean the database - * Add missing columns (scn_name, carrier and store_id) - * Insert the table into the database - - Parameters - ---------- - ch4_storage_capacities : pandas.DataFrame - Methane gas storage capacities per country in MWh - - Returns - ------- - None - + ... (docstring) ... """ - sources = config.datasets()["gas_neighbours"]["sources"] - targets = config.datasets()["gas_neighbours"]["targets"] + sources, targets = load_sources_and_targets("GasNeighbours") # Clean table db.execute_sql( f""" - DELETE FROM {targets['stores']['schema']}.{targets['stores']['table']} + DELETE FROM {targets.tables['stores']} WHERE "carrier" = 'CH4' AND scn_name = 'eGon2035' AND bus IN ( SELECT bus_id - FROM {sources['buses']['schema']}.{sources['buses']['table']} + FROM {sources.tables['buses']} WHERE scn_name = 'eGon2035' AND country != 'DE' ); @@ -964,9 +892,9 @@ def insert_storage(ch4_storage_capacities): ch4_storage_capacities = ch4_storage_capacities.reset_index(drop=True) # Insert data to db ch4_storage_capacities.to_sql( - targets["stores"]["table"], + targets.get_table_name("stores").split('.')[-1], db.engine(), - schema=targets["stores"]["schema"], + schema=targets.get_table_schema("stores"), index=False, if_exists="append", ) @@ -974,21 +902,11 @@ def insert_storage(ch4_storage_capacities): def calc_global_power_to_h2_demand(): """Calculate H2 demand abroad for eGon2035 scenario - - Calculates global power demand abroad linked to H2 production. - The data comes from TYNDP 2020 according to NEP 2021 from the - scenario 'Distributed Energy'; linear interpolate between 2030 - and 2040. - - Returns - ------- - global_power_to_h2_demand : pandas.DataFrame - Global hourly power-to-h2 demand per foreign node - + ... (docstring) ... """ - sources = config.datasets()["gas_neighbours"]["sources"] + sources, _ = load_sources_and_targets("GasNeighbours") - file = zipfile.ZipFile(f"tyndp/{sources['tyndp_capacities']}") + file = zipfile.ZipFile(f"tyndp/{sources.files['tyndp_capacities']}") df = pd.read_excel( file.open("TYNDP-2020-Scenario-Datafile.xlsx").read(), sheet_name="Gas Data", @@ -1069,22 +987,10 @@ def calc_global_power_to_h2_demand(): def insert_power_to_h2_demand(global_power_to_h2_demand): """ Insert H2 demands into the database for eGon2035 - - These loads are considered as constant and are attributed to AC - buses. - - Parameters - ---------- - global_power_to_h2_demand : pandas.DataFrame - Global hourly power-to-h2 demand per foreign node - - Returns - ------- - None - + ... (docstring) ... """ - sources = config.datasets()["gas_neighbours"]["sources"] - targets = config.datasets()["gas_neighbours"]["targets"] + sources, targets = load_sources_and_targets("GasNeighbours") + map_buses = get_map_buses() scn_name = "eGon2035" @@ -1093,10 +999,10 @@ def insert_power_to_h2_demand(global_power_to_h2_demand): db.execute_sql( f""" DELETE FROM - {targets['loads']['schema']}.{targets['loads']['table']} + {targets.tables['loads']} WHERE bus IN ( SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country != 'DE' AND scn_name = '{scn_name}') AND scn_name = '{scn_name}' @@ -1144,9 +1050,9 @@ def insert_power_to_h2_demand(global_power_to_h2_demand): # Insert data to db global_power_to_h2_demand.to_sql( - targets["loads"]["table"], + targets.get_table_name("loads").split('.')[-1], db.engine(), - schema=targets["loads"]["schema"], + schema=targets.get_table_schema("loads"), index=False, if_exists="append", ) @@ -1168,7 +1074,7 @@ def calculate_ch4_grid_capacities(): country """ - sources = config.datasets()["gas_neighbours"]["sources"] + sources, _ = load_sources_and_targets("GasNeighbours") # Download file basename = "ENTSOG_TYNDP_2020_Annex_C2_Capacities_per_country.xlsx" @@ -1335,31 +1241,31 @@ def calculate_ch4_grid_capacities(): ].map(dict_cross_pipes_DE) DE_pipe_capacities_list = DE_pipe_capacities_list.set_index("country_code") - schema = sources["buses"]["schema"] - table = sources["buses"]["table"] + schema_bus = sources.get_table_schema("buses") + table_bus = sources.get_table_name("buses").split('.')[-1] for country_code in [e for e in countries if e not in ("GB", "SE", "UK")]: # Select cross-bording links cap_DE = db.select_dataframe( f"""SELECT link_id, bus0, bus1 - FROM {sources['links']['schema']}.{sources['links']['table']} + FROM {sources.tables['links']} WHERE scn_name = 'eGon2035' AND carrier = 'CH4' AND (("bus0" IN ( - SELECT bus_id FROM {schema}.{table} + SELECT bus_id FROM {schema_bus}.{table_bus} WHERE country = 'DE' AND carrier = 'CH4' AND scn_name = 'eGon2035') - AND "bus1" IN (SELECT bus_id FROM {schema}.{table} + AND "bus1" IN (SELECT bus_id FROM {schema_bus}.{table_bus} WHERE country = '{country_code}' AND carrier = 'CH4' AND scn_name = 'eGon2035') ) OR ("bus0" IN ( - SELECT bus_id FROM {schema}.{table} + SELECT bus_id FROM {schema_bus}.{table_bus} WHERE country = '{country_code}' AND carrier = 'CH4' AND scn_name = 'eGon2035') - AND "bus1" IN (SELECT bus_id FROM {schema}.{table} + AND "bus1" IN (SELECT bus_id FROM {schema_bus}.{table_bus} WHERE country = 'DE' AND carrier = 'CH4' AND scn_name = 'eGon2035')) @@ -1509,20 +1415,12 @@ def grid(): def calculate_ocgt_capacities(): """ Calculate gas turbine capacities abroad for eGon2035 - - Calculate gas turbine capacities abroad for eGon2035 based on TYNDP - 2020, scenario "Distributed Energy", interpolated between 2030 and 2040. - - Returns - ------- - df_ocgt: pandas.DataFrame - Gas turbine capacities per foreign node - + ... (docstring) ... """ - sources = config.datasets()["gas_neighbours"]["sources"] + sources, _ = load_sources_and_targets("GasNeighbours") # insert installed capacities - file = zipfile.ZipFile(f"tyndp/{sources['tyndp_capacities']}") + file = zipfile.ZipFile(f"tyndp/{sources.files['tyndp_capacities']}") df = pd.read_excel( file.open("TYNDP-2020-Scenario-Datafile.xlsx").read(), sheet_name="Capacity", @@ -1567,16 +1465,7 @@ def calculate_ocgt_capacities(): def insert_ocgt_abroad(): """Insert gas turbine capacities abroad for eGon2035 in the database - - Parameters - ---------- - df_ocgt: pandas.DataFrame - Gas turbine capacities per foreign node - - Returns - ------- - None - + ... (docstring) ... """ scn_name = "eGon2035" carrier = "OCGT" diff --git a/src/egon/data/datasets/gas_neighbours/gas_abroad.py b/src/egon/data/datasets/gas_neighbours/gas_abroad.py index b6ae5cf2e..645cc327c 100755 --- a/src/egon/data/datasets/gas_neighbours/gas_abroad.py +++ b/src/egon/data/datasets/gas_neighbours/gas_abroad.py @@ -9,6 +9,7 @@ from geoalchemy2.types import Geometry from egon.data import config, db +from egon.data.datasets import load_sources_and_targets def insert_gas_grid_capacities(Neighbouring_pipe_capacities_list, scn_name): @@ -31,8 +32,7 @@ def insert_gas_grid_capacities(Neighbouring_pipe_capacities_list, scn_name): Name of the scenario """ - sources = config.datasets()["gas_neighbours"]["sources"] - targets = config.datasets()["gas_neighbours"]["targets"] + sources, targets = load_sources_and_targets("GasNeighbours") # Delete existing data if scn_name == "eGon2035": @@ -42,16 +42,16 @@ def insert_gas_grid_capacities(Neighbouring_pipe_capacities_list, scn_name): db.execute_sql( f""" DELETE FROM - {sources['links']['schema']}.{sources['links']['table']} + {targets.tables['links']} WHERE "bus0" IN ( SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country != 'DE' AND carrier = '{carrier_bus}' AND scn_name = '{scn_name}') OR "bus1" IN ( SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country != 'DE' AND carrier = '{carrier_bus}' AND scn_name = '{scn_name}') @@ -71,27 +71,27 @@ def insert_gas_grid_capacities(Neighbouring_pipe_capacities_list, scn_name): db.execute_sql( f""" DELETE FROM - {sources['links']['schema']}.{sources['links']['table']} + {targets.tables['links']} WHERE ("bus0" IN ( SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country != 'DE' AND carrier = '{carriers[c]["bus_abroad"]}' AND scn_name = '{scn_name}') AND "bus1" IN (SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country = 'DE' AND carrier = '{carriers[c]["bus_inDE"]}' AND scn_name = '{scn_name}')) OR ("bus0" IN ( SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country = 'DE' AND carrier = '{carriers[c]["bus_inDE"]}' AND scn_name = '{scn_name}') AND "bus1" IN ( SELECT bus_id FROM - {sources['buses']['schema']}.{sources['buses']['table']} + {sources.tables['buses']} WHERE country != 'DE' AND carrier = '{carriers[c]["bus_abroad"]}' AND scn_name = '{scn_name}')) @@ -117,7 +117,7 @@ def insert_gas_grid_capacities(Neighbouring_pipe_capacities_list, scn_name): f""" select UpdateGeometrySRID('grid', 'egon_etrago_gas_link', 'topo', 4326) ; - INSERT INTO {targets['links']['schema']}.{targets['links']['table']} ( + INSERT INTO {targets.tables['links']} ( scn_name, link_id, carrier, bus0, bus1, p_nom, p_min_pu, length, geom, topo) diff --git a/src/egon/data/datasets/heat_demand/__init__.py b/src/egon/data/datasets/heat_demand/__init__.py index c0f9ce682..9b7879163 100644 --- a/src/egon/data/datasets/heat_demand/__init__.py +++ b/src/egon/data/datasets/heat_demand/__init__.py @@ -34,13 +34,22 @@ import rasterio from egon.data import db, subprocess -from egon.data.datasets import Dataset -from egon.data.datasets.scenario_parameters import get_sector_parameters -from egon.data.metadata import context, license_ccby, meta_metadata, sources +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets +from egon.data.datasets.scenario_parameters import ( + get_sector_parameters, +) +from egon.data.metadata import ( + context, + license_ccby, + meta_metadata, + sources, +) import egon.data.config + class HeatDemandImport(Dataset): + """ Insert the annual heat demand per census cell for each scenario @@ -62,10 +71,43 @@ class HeatDemandImport(Dataset): """ + #: name: str = "heat-demands" #: - version: str = "0.0.4" + version: str = "0.0.6" + + sources = DatasetSources( + tables={ + "boundaries": "boundaries.vg250_sta_union", + "zensus_population": "society.destatis_zensus_population_per_ha", + }, + urls={ + "peta_res_zip": "https://arcgis.com/sharing/rest/content/items/d7d18b63250240a49eb81db972aa573e/data", + "peta_ser_zip": "https://arcgis.com/sharing/rest/content/items/52ff5e02111142459ed5c2fe3d80b3a0/data", + }, + files={ + "peta_res_zip": "Peta5_0_1_HD_res.zip", + "peta_ser_zip": "Peta5_0_1_HD_ser.zip", + "res_cutout_tif": "Peta_5_0_1/res_hd_2015_GER.tif", + "ser_cutout_tif": "Peta_5_0_1/ser_hd_2015_GER.tif", + "scenario_res_glob": "heat_scenario_raster/res_HD_*.tif", + "scenario_ser_glob": "heat_scenario_raster/ser_HD_*.tif", + }, + ) + + targets = DatasetTargets( + tables={ + "heat_demand": { + "schema": "demand", + "table": "egon_peta_heat", + } + }, + files={ + "scenario_dir": "heat_scenario_raster", + }, + ) + def __init__(self, dependencies): super().__init__( @@ -122,31 +164,17 @@ def download_peta5_0_1_heat_demands(): """ - data_config = egon.data.config.datasets() - - # residential heat demands 2015 - peta5_resheatdemands_config = data_config["peta5_0_1_res_heat_demands"][ - "original_data" - ] - - target_file_res = peta5_resheatdemands_config["target"]["path"] + target_file_res = HeatDemandImport.sources.files["peta_res_zip"] if not os.path.isfile(target_file_res): - urlretrieve( - peta5_resheatdemands_config["source"]["url"], target_file_res - ) + urlretrieve(HeatDemandImport.sources.urls["peta_res_zip"], target_file_res) # service-sector heat demands 2015 - peta5_serheatdemands_config = data_config["peta5_0_1_ser_heat_demands"][ - "original_data" - ] - - target_file_ser = peta5_serheatdemands_config["target"]["path"] + + target_file_ser = HeatDemandImport.sources.files["peta_ser_zip"] if not os.path.isfile(target_file_ser): - urlretrieve( - peta5_serheatdemands_config["source"]["url"], target_file_ser - ) + urlretrieve(HeatDemandImport.sources.urls["peta_ser_zip"], target_file_ser) return None @@ -170,21 +198,11 @@ def unzip_peta5_0_1_heat_demands(): """ - # Get information from data configuration file - data_config = egon.data.config.datasets() - peta5_res_heatdemands_orig = data_config["peta5_0_1_res_heat_demands"][ - "original_data" - ] - # path to the downloaded residential heat demand 2015 data - filepath_zip_res = peta5_res_heatdemands_orig["target"]["path"] + filepath_zip_res = HeatDemandImport.sources.files["peta_res_zip"] + filepath_zip_ser = HeatDemandImport.sources.files["peta_ser_zip"] - peta5_ser_heatdemands_orig = data_config["peta5_0_1_ser_heat_demands"][ - "original_data" - ] - # path to the downloaded service-sector heat demand 2015 data - filepath_zip_ser = peta5_ser_heatdemands_orig["target"]["path"] + directory_to_extract_to = os.path.dirname(HeatDemandImport.sources.files["res_cutout_tif"]) - directory_to_extract_to = "Peta_5_0_1" # Create the folder, if it does not exists already if not os.path.exists(directory_to_extract_to): os.mkdir(directory_to_extract_to) @@ -243,8 +261,7 @@ def cutout_heat_demand_germany(): # Load the German boundaries from the local database using a dissolved # dataset which provides one multipolygon - table_name = "vg250_sta_union" - schema = "boundaries" + local_engine = db.engine() # Recommened way: gpd.read_postgis() @@ -253,14 +270,10 @@ def cutout_heat_demand_germany(): # using ST_Dump: https://postgis.net/docs/ST_Dump.html gdf_boundaries = gpd.read_postgis( - ( - f"SELECT (ST_Dump(geometry)).geom As geometry" - f" FROM {schema}.{table_name}" - ), + f"SELECT (ST_Dump(geometry)).geom AS geometry FROM {HeatDemandImport.sources.tables['boundaries']}", local_engine, geom_col="geometry", ) - # rasterio wants the mask to be a GeoJSON-like dict or an object that # implements the Python geo interface protocol (such as a Shapely Polygon) @@ -301,9 +314,7 @@ def cutout_heat_demand_germany(): } ) - with rasterio.open( - "Peta_5_0_1/res_hd_2015_GER.tif", "w", **out_meta - ) as dest: + with rasterio.open(HeatDemandImport.sources.files["res_cutout_tif"], "w", **out_meta) as dest: dest.write(out_image) # Do the same for the service-sector @@ -327,9 +338,7 @@ def cutout_heat_demand_germany(): } ) - with rasterio.open( - "Peta_5_0_1/ser_hd_2015_GER.tif", "w", **out_meta - ) as dest: + with rasterio.open(HeatDemandImport.sources.files["ser_cutout_tif"], "w", **out_meta) as dest: dest.write(out_image) return None @@ -415,7 +424,7 @@ def future_heat_demand_germany(scenario_name): ser_hd_reduction = heat_parameters["DE_demand_reduction_service"] # Define the directory where the created rasters will be saved - scenario_raster_directory = "heat_scenario_raster" + scenario_raster_directory = HeatDemandImport.targets.files["scenario_dir"] if not os.path.exists(scenario_raster_directory): os.mkdir(scenario_raster_directory) @@ -426,7 +435,7 @@ def future_heat_demand_germany(scenario_name): # the new file's profile, the profile of the source is adjusted. # Residential heat demands first - res_cutout = "Peta_5_0_1/res_hd_2015_GER.tif" + res_cutout = HeatDemandImport.sources.files["res_cutout_tif"] with rasterio.open(res_cutout) as src: # open raster dataset res_hd_2015 = src.read(1) # read as numpy array; band 1; masked=True?? @@ -442,15 +451,13 @@ def future_heat_demand_germany(scenario_name): ) # Save the scenario's residential heat demands as tif file # Define the filename for export - res_result_filename = ( - scenario_raster_directory + "/res_HD_" + scenario_name + ".tif" - ) + res_result_filename = os.path.join(scenario_raster_directory, f"res_HD_{scenario_name}.tif") # Open raster dataset in 'w' write mode using the adjusted meta data with rasterio.open(res_result_filename, "w", **res_profile) as dst: dst.write(res_scenario_raster.astype(rasterio.float32), 1) # Do the same for the service-sector - ser_cutout = "Peta_5_0_1/ser_hd_2015_GER.tif" + ser_cutout = HeatDemandImport.sources.files["ser_cutout_tif"] with rasterio.open(ser_cutout) as src: # open raster dataset ser_hd_2015 = src.read(1) # read as numpy array; band 1; masked=True?? @@ -462,9 +469,7 @@ def future_heat_demand_germany(scenario_name): ser_profile.update(dtype=rasterio.float32, count=1, compress="lzw") # Save the scenario's service-sector heat demands as tif file # Define the filename for export - ser_result_filename = ( - scenario_raster_directory + "/ser_HD_" + scenario_name + ".tif" - ) + ser_result_filename = os.path.join(scenario_raster_directory, f"ser_HD_{scenario_name}.tif") # Open raster dataset in 'w' write mode using the adjusted meta data with rasterio.open(ser_result_filename, "w", **ser_profile) as dst: dst.write(ser_scenario_raster.astype(rasterio.float32), 1) @@ -505,14 +510,13 @@ def heat_demand_to_db_table(): Define version number correctly """ - # Define the raster file type to be imported - sources = ["*.tif"] - # Define the directory from with all raster files having the defined type - # will be imported sources = [ path - for pattern in sources - for path in Path("heat_scenario_raster").glob(pattern) + for pattern in ( + HeatDemandImport.sources.files["scenario_res_glob"], + HeatDemandImport.sources.files["scenario_ser_glob"], + ) + for path in Path(".").glob(pattern) ] # Create the schema for the final table, if needed @@ -522,7 +526,11 @@ def heat_demand_to_db_table(): os.path.dirname(__file__), "raster2cells-and-centroids.sql" ) - db.execute_sql("DELETE FROM demand.egon_peta_heat;") + db.execute_sql( + f"DELETE FROM {HeatDemandImport.targets.tables['heat_demand']['schema']}." + f"{HeatDemandImport.targets.tables['heat_demand']['table']};" + ) + for source in sources: if not "2015" in source.stem: @@ -578,7 +586,7 @@ def adjust_residential_heat_to_zensus(scenario): # Select overall residential heat demand overall_demand = db.select_dataframe( f"""SELECT SUM(demand) as overall_demand - FROM demand.egon_peta_heat + FROM {HeatDemandImport.targets.tables['heat_demand']['schema']}.{HeatDemandImport.targets.tables['heat_demand']['table']} WHERE scenario = {'scenario'} and sector = 'residential' """ ).overall_demand[0] @@ -586,11 +594,11 @@ def adjust_residential_heat_to_zensus(scenario): # Select heat demand in populated cells df = db.select_dataframe( f"""SELECT * - FROM demand.egon_peta_heat + FROM {HeatDemandImport.targets.tables['heat_demand']['schema']}.{HeatDemandImport.targets.tables['heat_demand']['table']} WHERE scenario = {'scenario'} and sector = 'residential' AND zensus_population_id IN ( SELECT id - FROM society.destatis_zensus_population_per_ha_inside_germany + FROM {HeatDemandImport.sources.tables['zensus_population']} )""", index_col="id", ) @@ -600,8 +608,8 @@ def adjust_residential_heat_to_zensus(scenario): # Drop residential heat demands db.execute_sql( - f"""DELETE FROM demand.egon_peta_heat - WHERE scenario = {'scenario'} and sector = 'residential'""" + f"""DELETE FROM {HeatDemandImport.targets.tables['heat_demand']['schema']}.{HeatDemandImport.targets.tables['heat_demand']['table']} + WHERE scenario = {'scenario'} and sector = 'residential'""" ) # Insert adjusted heat demands in populated cells @@ -748,8 +756,16 @@ def scenario_data_import(): db.execute_sql("CREATE SCHEMA IF NOT EXISTS demand;") # drop table if exists # can be removed when table structure doesn't change anymore - db.execute_sql("DROP TABLE IF EXISTS demand.egon_peta_heat CASCADE") - db.execute_sql("DROP SEQUENCE IF EXISTS demand.egon_peta_heat_seq CASCADE") + db.execute_sql( + f"DROP TABLE IF EXISTS {HeatDemandImport.targets.tables['heat_demand']['schema']}." + f"{HeatDemandImport.targets.tables['heat_demand']['table']} CASCADE" + ) + + db.execute_sql( + f"DROP SEQUENCE IF EXISTS {HeatDemandImport.targets.tables['heat_demand']['schema']}." + f"{HeatDemandImport.targets.tables['heat_demand']['table']}_seq CASCADE" + ) + # create table EgonPetaHeat.__table__.create(bind=db.engine(), checkfirst=True) diff --git a/src/egon/data/datasets/heat_demand_europe.py b/src/egon/data/datasets/heat_demand_europe.py index bfb45b5d4..faec66652 100644 --- a/src/egon/data/datasets/heat_demand_europe.py +++ b/src/egon/data/datasets/heat_demand_europe.py @@ -18,7 +18,7 @@ import os from egon.data import subprocess -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets import egon.data.config @@ -35,14 +35,18 @@ class HeatDemandEurope(Dataset): """ - #: name: str = "heat-demands-europe" - #: - version: str = ( - egon.data.config.datasets()[ - "hotmaps_current_policy_scenario_heat_demands_buildings" - ]["targets"]["path"] - + "_hotmaps.0.1" + version: str = "0.4.0" + + sources = DatasetSources( + urls={ + "hotmaps_heat_demand": "https://gitlab.com/hotmaps/building-stock/-/raw/master/output_csv/3_indicator/1_Data_for_graphs/part_2_energy_demands/CSV_Actions_Total_energy_demand_by_building_type_in_2050_NUTS0.csv" + } + ) + targets = DatasetTargets( + files={ + "heat_demand_europe": "pypsa-eur/resources/heat_demands_in_2050_NUTS0_hotmaps.csv" + } ) def __init__(self, dependencies): @@ -71,18 +75,14 @@ def download(): """ - data_config = egon.data.config.datasets() - - # heat demands - hotmapsheatdemands_config = data_config[ - "hotmaps_current_policy_scenario_heat_demands_buildings" - ] + url = HeatDemandEurope.sources.urls["hotmaps_heat_demand"] + target_file = HeatDemandEurope.targets.files["heat_demand_europe"] - target_file = hotmapsheatdemands_config["targets"]["path"] + os.makedirs(os.path.dirname(target_file), exist_ok=True) if not os.path.isfile(target_file): subprocess.run( - f"curl { hotmapsheatdemands_config['sources']['url']} > {target_file}", + f"curl {url} > {target_file}", shell=True, ) - return None + return None \ No newline at end of file diff --git a/src/egon/data/datasets/heat_demand_timeseries/__init__.py b/src/egon/data/datasets/heat_demand_timeseries/__init__.py index a2f011f97..0cc5c8d0f 100644 --- a/src/egon/data/datasets/heat_demand_timeseries/__init__.py +++ b/src/egon/data/datasets/heat_demand_timeseries/__init__.py @@ -21,7 +21,9 @@ from math import ceil -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets + + from egon.data.datasets.heat_demand_timeseries.daily import ( daily_demand_shares_per_climate_zone, map_climate_zones_to_zensus, @@ -103,37 +105,37 @@ def create_timeseries_for_building(building_id, scenario): FROM (SELECT demand FROM - demand.egon_peta_heat + {HeatTimeSeries.sources.tables['heat_demand_cts']} WHERE scenario = '{scenario}' AND sector = 'residential' AND zensus_population_id IN( SELECT zensus_population_id FROM - demand.egon_heat_timeseries_selected_profiles + {HeatTimeSeries.sources.tables['selected_profiles']} WHERE building_id = {building_id})) as demand, - (SELECT COUNT(building_id) - FROM demand.egon_heat_timeseries_selected_profiles + (SELECT COUNT(building_id) FROM + {HeatTimeSeries.sources.tables['selected_profiles']} WHERE zensus_population_id IN( SELECT zensus_population_id FROM - demand.egon_heat_timeseries_selected_profiles + {HeatTimeSeries.sources.tables['selected_profiles']} WHERE building_id = {building_id})) as building, (SELECT daily_demand_share, day_of_year FROM - demand.egon_daily_heat_demand_per_climate_zone + {HeatTimeSeries.sources.tables['daily_heat_demand_per_climate_zone']} WHERE climate_zone = ( - SELECT climate_zone FROM boundaries.egon_map_zensus_climate_zones + SELECT climate_zone FROM {HeatTimeSeries.sources.tables['climate_zones']} WHERE zensus_population_id = ( SELECT zensus_population_id - FROM demand.egon_heat_timeseries_selected_profiles + FROM {HeatTimeSeries.sources.tables['selected_profiles']} WHERE building_id = {building_id} ) )) as daily_demand) as daily_demand JOIN (SELECT b.idp, ordinality as day - FROM demand.egon_heat_timeseries_selected_profiles a, + FROM {HeatTimeSeries.sources.tables['selected_profiles']} a, UNNEST (a.selected_idp_profiles) WITH ORDINALITY as selected_idp - JOIN demand.egon_heat_idp_pool b + JOIN {HeatTimeSeries.sources.tables['idp_pool']} b ON selected_idp = b.index WHERE a.building_id = {building_id}) as demand_profile ON demand_profile.day = daily_demand.day_of_year @@ -181,31 +183,31 @@ def create_district_heating_profile(scenario, area_id): FROM (SELECT zensus_population_id, demand FROM - demand.egon_peta_heat + {HeatTimeSeries.sources.tables['heat_demand_cts']} WHERE scenario = '{scenario}' AND sector = 'residential' AND zensus_population_id IN( SELECT zensus_population_id FROM - demand.egon_map_zensus_district_heating_areas + {HeatTimeSeries.sources.tables['district_heating_areas']} WHERE scenario = '{scenario}' AND area_id = {area_id} )) as demand - JOIN boundaries.egon_map_zensus_climate_zones b + JOIN {HeatTimeSeries.sources.tables['climate_zones']} b ON demand.zensus_population_id = b.zensus_population_id - JOIN demand.egon_daily_heat_demand_per_climate_zone c + JOIN {HeatTimeSeries.sources.tables['daily_heat_demand_per_climate_zone']} c ON c.climate_zone = b.climate_zone JOIN ( SELECT e.idp, ordinality as day, zensus_population_id, building_id - FROM demand.egon_heat_timeseries_selected_profiles d, + FROM {HeatTimeSeries.sources.tables['selected_profiles']} d, UNNEST (d.selected_idp_profiles) WITH ORDINALITY as selected_idp - JOIN demand.egon_heat_idp_pool e + JOIN {HeatTimeSeries.sources.tables['idp_pool']} e ON selected_idp = e.index WHERE zensus_population_id IN ( SELECT zensus_population_id FROM - demand.egon_map_zensus_district_heating_areas + {HeatTimeSeries.sources.tables['district_heating_areas']} WHERE scenario = '{scenario}' AND area_id = {area_id} )) demand_profile @@ -213,13 +215,13 @@ def create_district_heating_profile(scenario, area_id): demand_profile.zensus_population_id = b.zensus_population_id) JOIN (SELECT COUNT(building_id), zensus_population_id - FROM demand.egon_heat_timeseries_selected_profiles + FROM {HeatTimeSeries.sources.tables['selected_profiles']} WHERE zensus_population_id IN( - SELECT zensus_population_id FROM - demand.egon_heat_timeseries_selected_profiles + SELECT zensus_population_id + FROM {HeatTimeSeries.sources.tables['selected_profiles']} WHERE zensus_population_id IN ( SELECT zensus_population_id FROM - demand.egon_map_zensus_district_heating_areas + {HeatTimeSeries.sources.tables['district_heating_areas']} WHERE scenario = '{scenario}' AND area_id = {area_id} )) @@ -262,8 +264,8 @@ def create_district_heating_profile_python_like(scenario="eGon2035"): start_time = datetime.now() idp_df = db.select_dataframe( - """ - SELECT index, idp FROM demand.egon_heat_idp_pool + f""" + SELECT index, idp FROM {HeatTimeSeries.sources.tables['idp_pool']} """, index_col="index", ) @@ -271,7 +273,7 @@ def create_district_heating_profile_python_like(scenario="eGon2035"): district_heating_grids = db.select_dataframe( f""" SELECT area_id - FROM demand.egon_district_heating_areas + FROM {HeatTimeSeries.sources.tables['district_heating_areas']} WHERE scenario = '{scenario}' """ ) @@ -284,20 +286,20 @@ def create_district_heating_profile_python_like(scenario="eGon2035"): area_id, demand as demand_total FROM - demand.egon_peta_heat a + {HeatTimeSeries.sources.tables['heat_demand_cts']} a INNER JOIN ( - SELECT * FROM demand.egon_map_zensus_district_heating_areas + SELECT * FROM {HeatTimeSeries.sources.tables['district_heating_areas']} WHERE scenario = '{scenario}' ) b ON a.zensus_population_id = b.zensus_population_id JOIN (SELECT COUNT(building_id), zensus_population_id - FROM demand.egon_heat_timeseries_selected_profiles + FROM {HeatTimeSeries.sources.tables['selected_profiles']} WHERE zensus_population_id IN( - SELECT zensus_population_id FROM - demand.egon_heat_timeseries_selected_profiles + SELECT zensus_population_id + FROM {HeatTimeSeries.sources.tables['selected_profiles']} WHERE zensus_population_id IN ( SELECT zensus_population_id FROM - boundaries.egon_map_zensus_grid_districts + {HeatTimeSeries.sources.tables['map_zensus_grid_districts']} )) GROUP BY zensus_population_id)c ON a.zensus_population_id = c.zensus_population_id @@ -316,9 +318,9 @@ def create_district_heating_profile_python_like(scenario="eGon2035"): ] daily_demand_shares = db.select_dataframe( - """ + f""" SELECT climate_zone, day_of_year as day, daily_demand_share FROM - demand.egon_daily_heat_demand_per_climate_zone + {HeatTimeSeries.sources.tables['daily_heat_demand_per_climate_zone']} """ ) @@ -335,14 +337,14 @@ def create_district_heating_profile_python_like(scenario="eGon2035"): f""" SELECT a.zensus_population_id, building_id, c.climate_zone, selected_idp, ordinality as day, b.area_id - FROM demand.egon_heat_timeseries_selected_profiles a - INNER JOIN boundaries.egon_map_zensus_climate_zones c + FROM {HeatTimeSeries.sources.tables['selected_profiles']} a + INNER JOIN {HeatTimeSeries.sources.tables['climate_zones']} c ON a.zensus_population_id = c.zensus_population_id INNER JOIN ( - SELECT * FROM demand.egon_map_zensus_district_heating_areas + SELECT * FROM {HeatTimeSeries.sources.tables['district_heating_areas']} WHERE scenario = '{scenario}' AND area_id = '{area}' - ) b ON a.zensus_population_id = b.zensus_population_id , + ) b ON a.zensus_population_id = b.zensus_population_id, UNNEST (selected_idp_profiles) WITH ORDINALITY as selected_idp @@ -486,44 +488,44 @@ def create_individual_heat_per_mv_grid(scenario="eGon2035", mv_grid_id=1564): FROM (SELECT zensus_population_id, demand FROM - demand.egon_peta_heat + {HeatTimeSeries.sources.tables['heat_demand_cts']} WHERE scenario = '{scenario}' AND sector = 'residential' AND zensus_population_id IN ( SELECT zensus_population_id FROM - boundaries.egon_map_zensus_grid_districts + {HeatTimeSeries.sources.tables['map_zensus_grid_districts']} WHERE bus_id = {mv_grid_id} )) as demand - JOIN boundaries.egon_map_zensus_climate_zones b + JOIN {HeatTimeSeries.sources.tables['climate_zones']} b ON demand.zensus_population_id = b.zensus_population_id - JOIN demand.egon_daily_heat_demand_per_climate_zone c + JOIN {HeatTimeSeries.sources.tables['daily_heat_demand_per_climate_zone']} c ON c.climate_zone = b.climate_zone JOIN ( SELECT e.idp, ordinality as day, zensus_population_id, building_id - FROM demand.egon_heat_timeseries_selected_profiles d, + FROM {HeatTimeSeries.sources.tables['selected_profiles']} d, UNNEST (d.selected_idp_profiles) WITH ORDINALITY as selected_idp - JOIN demand.egon_heat_idp_pool e + JOIN {HeatTimeSeries.sources.tables['idp_pool']} e ON selected_idp = e.index WHERE zensus_population_id IN ( SELECT zensus_population_id FROM - boundaries.egon_map_zensus_grid_districts + {HeatTimeSeries.sources.tables['map_zensus_grid_districts']} WHERE bus_id = {mv_grid_id} )) demand_profile ON (demand_profile.day = c.day_of_year AND demand_profile.zensus_population_id = b.zensus_population_id) JOIN (SELECT COUNT(building_id), zensus_population_id - FROM demand.egon_heat_timeseries_selected_profiles + FROM {HeatTimeSeries.sources.tables['selected_profiles']} WHERE zensus_population_id IN( SELECT zensus_population_id FROM - demand.egon_heat_timeseries_selected_profiles + {HeatTimeSeries.sources.tables['selected_profiles']} WHERE zensus_population_id IN ( SELECT zensus_population_id FROM - boundaries.egon_map_zensus_grid_districts + {HeatTimeSeries.sources.tables['map_zensus_grid_districts']} WHERE bus_id = {mv_grid_id} )) GROUP BY zensus_population_id) building @@ -580,8 +582,9 @@ def create_individual_heating_peak_loads(scenario="eGon2035"): start_time = datetime.now() idp_df = db.select_dataframe( - """ - SELECT index, idp FROM demand.egon_heat_idp_pool + f""" + SELECT index, idp + FROM {HeatTimeSeries.sources.tables['idp_pool']} """, index_col="index", ) @@ -589,29 +592,29 @@ def create_individual_heating_peak_loads(scenario="eGon2035"): annual_demand = db.select_dataframe( f""" SELECT a.zensus_population_id, demand/c.count as per_building, bus_id - FROM demand.egon_peta_heat a + FROM {HeatTimeSeries.sources.tables['heat_demand_cts']} a JOIN (SELECT COUNT(building_id), zensus_population_id - FROM demand.egon_heat_timeseries_selected_profiles + FROM {HeatTimeSeries.sources.tables['selected_profiles']} WHERE zensus_population_id IN( SELECT zensus_population_id FROM - demand.egon_heat_timeseries_selected_profiles + {HeatTimeSeries.sources.tables['selected_profiles']} WHERE zensus_population_id IN ( SELECT zensus_population_id FROM - boundaries.egon_map_zensus_grid_districts + {HeatTimeSeries.sources.tables['map_zensus_grid_districts']} )) GROUP BY zensus_population_id)c ON a.zensus_population_id = c.zensus_population_id - JOIN boundaries.egon_map_zensus_grid_districts d + JOIN {HeatTimeSeries.sources.tables['map_zensus_grid_districts']} d ON a.zensus_population_id = d.zensus_population_id WHERE a.scenario = '{scenario}' AND a.sector = 'residential' AND a.zensus_population_id NOT IN ( SELECT zensus_population_id - FROM demand.egon_map_zensus_district_heating_areas + FROM {HeatTimeSeries.sources.tables['district_heating_areas']} WHERE scenario = '{scenario}' ) @@ -620,9 +623,11 @@ def create_individual_heating_peak_loads(scenario="eGon2035"): ) daily_demand_shares = db.select_dataframe( - """ - SELECT climate_zone, day_of_year as day, daily_demand_share FROM - demand.egon_daily_heat_demand_per_climate_zone + f""" + SELECT climate_zone, day_of_year as day, daily_demand_share + FROM {HeatTimeSeries.sources.tables['daily_heat_demand_per_climate_zone']} + + """ ) @@ -632,8 +637,8 @@ def create_individual_heating_peak_loads(scenario="eGon2035"): f""" SELECT a.zensus_population_id, building_id, c.climate_zone, selected_idp, ordinality as day - FROM demand.egon_heat_timeseries_selected_profiles a - INNER JOIN boundaries.egon_map_zensus_climate_zones c + FROM {HeatTimeSeries.sources.tables['selected_profiles']} a + INNER JOIN {HeatTimeSeries.sources.tables['climate_zones']} c ON a.zensus_population_id = c.zensus_population_id , @@ -641,12 +646,12 @@ def create_individual_heating_peak_loads(scenario="eGon2035"): WHERE a.zensus_population_id NOT IN ( SELECT zensus_population_id - FROM demand.egon_map_zensus_district_heating_areas + FROM {HeatTimeSeries.sources.tables['district_heating_areas']} WHERE scenario = '{scenario}' ) AND a.zensus_population_id IN ( SELECT zensus_population_id - FROM boundaries.egon_map_zensus_grid_districts + FROM {HeatTimeSeries.sources.tables['map_zensus_grid_districts']} WHERE bus_id = '{grid}' ) @@ -683,7 +688,7 @@ def create_individual_heating_profile_python_like(scenario="eGon2035"): idp_df = db.select_dataframe( f""" - SELECT index, idp FROM demand.egon_heat_idp_pool + SELECT index, idp FROM {HeatTimeSeries.sources.tables['idp_pool']} """, index_col="index", ) @@ -695,29 +700,29 @@ def create_individual_heating_profile_python_like(scenario="eGon2035"): demand / c.count as per_building, demand as demand_total, bus_id - FROM demand.egon_peta_heat a + FROM {HeatTimeSeries.sources.tables['heat_demand_cts']} a JOIN (SELECT COUNT(building_id), zensus_population_id - FROM demand.egon_heat_timeseries_selected_profiles + FROM {HeatTimeSeries.sources.tables['selected_profiles']} WHERE zensus_population_id IN( SELECT zensus_population_id FROM - demand.egon_heat_timeseries_selected_profiles + {HeatTimeSeries.sources.tables['selected_profiles']} WHERE zensus_population_id IN ( SELECT zensus_population_id FROM - boundaries.egon_map_zensus_grid_districts + {HeatTimeSeries.sources.tables['map_zensus_grid_districts']} )) GROUP BY zensus_population_id)c ON a.zensus_population_id = c.zensus_population_id - JOIN boundaries.egon_map_zensus_grid_districts d + JOIN {HeatTimeSeries.sources.tables['map_zensus_grid_districts']} d ON a.zensus_population_id = d.zensus_population_id WHERE a.scenario = '{scenario}' AND a.sector = 'residential' AND a.zensus_population_id NOT IN ( SELECT zensus_population_id - FROM demand.egon_map_zensus_district_heating_areas + FROM {HeatTimeSeries.sources.tables['district_heating_areas']} WHERE scenario = '{scenario}' ) @@ -726,9 +731,10 @@ def create_individual_heating_profile_python_like(scenario="eGon2035"): ) daily_demand_shares = db.select_dataframe( - """ - SELECT climate_zone, day_of_year as day, daily_demand_share FROM - demand.egon_daily_heat_demand_per_climate_zone + f""" + SELECT climate_zone, day_of_year as day, daily_demand_share + + FROM {HeatTimeSeries.sources.tables['daily_heat_demand_per_climate_zone']} """ ) @@ -752,20 +758,21 @@ def create_individual_heating_profile_python_like(scenario="eGon2035"): f""" SELECT a.zensus_population_id, building_id, c.climate_zone, selected_idp, ordinality as day - FROM demand.egon_heat_timeseries_selected_profiles a - INNER JOIN boundaries.egon_map_zensus_climate_zones c + FROM {HeatTimeSeries.sources.tables['selected_profiles']} a + INNER JOIN {HeatTimeSeries.sources.tables['climate_zones']} c ON a.zensus_population_id = c.zensus_population_id , UNNEST (selected_idp_profiles) WITH ORDINALITY as selected_idp WHERE a.zensus_population_id NOT IN ( - SELECT zensus_population_id FROM demand.egon_map_zensus_district_heating_areas + SELECT zensus_population_id + FROM {HeatTimeSeries.sources.tables['district_heating_areas']} WHERE scenario = '{scenario}' ) AND a.zensus_population_id IN ( SELECT zensus_population_id - FROM boundaries.egon_map_zensus_grid_districts + FROM {HeatTimeSeries.sources.tables['map_zensus_grid_districts']} WHERE bus_id = '{grid}' ) @@ -856,9 +863,9 @@ def district_heating(method="python"): ) ids = db.select_dataframe( - """ + f""" SELECT area_id, scenario - FROM demand.egon_district_heating_areas + FROM {HeatTimeSeries.sources.tables['district_heating_areas']} """ ) @@ -953,9 +960,9 @@ def individual_heating_per_mv_grid(method="python"): df = pd.DataFrame(columns=["bus_id", "scenario", "dist_aggregated_mw"]) ids = db.select_dataframe( - """ + f""" SELECT bus_id - FROM grid.egon_mv_grid_district + FROM {HeatTimeSeries.sources.tables['map_zensus_grid_districts']} """ ) @@ -1019,32 +1026,32 @@ def store_national_profiles(): FROM (SELECT zensus_population_id, demand FROM - demand.egon_peta_heat + {HeatTimeSeries.sources.tables['heat_demand_cts']} WHERE scenario = '{scenario}' AND sector = 'residential' ) as demand - JOIN boundaries.egon_map_zensus_climate_zones b + JOIN {HeatTimeSeries.sources.tables['climate_zones']} b ON demand.zensus_population_id = b.zensus_population_id - JOIN demand.egon_daily_heat_demand_per_climate_zone c + JOIN {HeatTimeSeries.sources.tables['daily_heat_demand_per_climate_zone']} c ON c.climate_zone = b.climate_zone JOIN ( SELECT e.idp, ordinality as day, zensus_population_id, building_id - FROM demand.egon_heat_timeseries_selected_profiles d, + FROM {HeatTimeSeries.sources.tables['selected_profiles']} d, UNNEST (d.selected_idp_profiles) WITH ORDINALITY as selected_idp - JOIN demand.egon_heat_idp_pool e + JOIN {HeatTimeSeries.sources.tables['idp_pool']} e ON selected_idp = e.index ) demand_profile ON (demand_profile.day = c.day_of_year AND demand_profile.zensus_population_id = b.zensus_population_id) JOIN (SELECT COUNT(building_id), zensus_population_id - FROM demand.egon_heat_timeseries_selected_profiles + FROM {HeatTimeSeries.sources.tables['selected_profiles']} WHERE zensus_population_id IN( SELECT zensus_population_id FROM - demand.egon_heat_timeseries_selected_profiles + {HeatTimeSeries.sources.tables['selected_profiles']} ) GROUP BY zensus_population_id) building ON building.zensus_population_id = b.zensus_population_id, @@ -1072,7 +1079,8 @@ def store_national_profiles(): f""" SELECT sum(nullif(demand, 'NaN')) as "urban central" - FROM demand.egon_timeseries_district_heating, + FROM {HeatTimeSeries.targets.tables['district_heating_timeseries']['schema']}. + {HeatTimeSeries.targets.tables['district_heating_timeseries']['table']}, UNNEST (dist_aggregated_mw) WITH ORDINALITY as demand WHERE scenario = '{scenario}' @@ -1147,7 +1155,8 @@ def metadata(): ] meta_district = { - "name": "demand.egon_timeseries_district_heating", + "name": f"{HeatTimeSeries.targets.tables['district_heating_timeseries']['schema']}." + f"{HeatTimeSeries.targets.tables['district_heating_timeseries']['table']}", "title": "eGon heat demand time series for district heating grids", "id": "WILL_BE_SET_AT_PUBLICATION", "description": "Heat demand time series for district heating grids", @@ -1179,7 +1188,8 @@ def metadata(): "resources": [ { "profile": "tabular-data-resource", - "name": "demand.egon_timeseries_district_heating", + "name": f"{HeatTimeSeries.targets.tables['district_heating_timeseries']['schema']}." + f"{HeatTimeSeries.targets.tables['district_heating_timeseries']['table']}", "path": None, "format": "PostgreSQL", "encoding": "UTF-8", @@ -1244,7 +1254,44 @@ class HeatTimeSeries(Dataset): #: name: str = "HeatTimeSeries" #: - version: str = "0.0.12" + version: str = "0.0.16" + + sources = DatasetSources( + tables={ + "heat_demand_cts": "demand.egon_peta_heat", + "district_heating_areas": "demand.egon_map_zensus_district_heating_areas", + "map_zensus_grid_districts": "boundaries.egon_map_zensus_grid_districts", + "climate_zones": "boundaries.egon_map_zensus_climate_zones", + "daily_heat_demand_per_climate_zone": "demand.egon_daily_heat_demand_per_climate_zone", + "selected_profiles": "demand.egon_heat_timeseries_selected_profiles", + "idp_pool": "demand.egon_heat_idp_pool", + "map_zensus_vg250": "boundaries.egon_map_zensus_vg250", + "zensus_population": "society.destatis_zensus_population_per_ha_inside_germany", + "era5_weather_cells": "supply.egon_era5_weather_cells", + "household_electricity_profiles": "demand.egon_household_electricity_profile_of_buildings" + }, + ) + + targets = DatasetTargets( + tables={ + "district_heating_timeseries": { + "schema": "demand", + "table": "egon_timeseries_district_heating", + }, + "etrago_timeseries_individual_heating": { + "schema": "demand", + "table": "egon_etrago_timeseries_individual_heating", + }, + "individual_heating_peak_loads": { + "schema": "demand", + "table": "egon_individual_heating_peak_loads", + }, + "etrago_heat_cts": { + "schema": "demand", + "table": "egon_etrago_heat_cts", + }, + } + ) def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/heat_demand_timeseries/daily.py b/src/egon/data/datasets/heat_demand_timeseries/daily.py index 61bf12b14..199911e4a 100644 --- a/src/egon/data/datasets/heat_demand_timeseries/daily.py +++ b/src/egon/data/datasets/heat_demand_timeseries/daily.py @@ -12,6 +12,9 @@ from egon.data.datasets.scenario_parameters import get_sector_parameters import egon.data.datasets.era5 as era +from egon.data.datasets import load_sources_and_targets + + Base = declarative_base() @@ -107,6 +110,7 @@ def map_climate_zones_to_zensus(): None. """ + sources, targets = load_sources_and_targets("HeatTimeSeries") # Drop old table and create new one engine = db.engine() EgonMapZensusClimateZones.__table__.drop(bind=engine, checkfirst=True) @@ -127,12 +131,12 @@ def map_climate_zones_to_zensus(): census_cells = db.select_geodataframe( f""" SELECT id as zensus_population_id, geom_point as geom - FROM society.destatis_zensus_population_per_ha_inside_germany + FROM {sources.tables["zensus_population"]} """, index_col="zensus_population_id", epsg=4326, ) - + # Join climate zones and census cells join = ( census_cells.sjoin(temperature_zones) @@ -281,6 +285,7 @@ def temperature_profile_extract(): Temperatur profile of all TRY Climate Zones 2011 """ + sources, targets = load_sources_and_targets("HeatTimeSeries") cutout = era.import_cutout(boundary="Germany") @@ -295,8 +300,8 @@ def temperature_profile_extract(): ) weather_cells = db.select_geodataframe( - """ - SELECT geom FROM supply.egon_era5_weather_cells + f""" + SELECT geom FROM {sources.tables["era5_weather_cells"]} """, epsg=4326, ) diff --git a/src/egon/data/datasets/heat_demand_timeseries/idp_pool.py b/src/egon/data/datasets/heat_demand_timeseries/idp_pool.py index b32860c8e..e7102753b 100644 --- a/src/egon/data/datasets/heat_demand_timeseries/idp_pool.py +++ b/src/egon/data/datasets/heat_demand_timeseries/idp_pool.py @@ -9,6 +9,11 @@ from egon.data import db import egon +from egon.data.datasets import load_sources_and_targets + + + + Base = declarative_base() @@ -104,6 +109,7 @@ def idp_pool_generator(): "household_heat_demand_profiles", "household_heat_demand_profiles.hdf5", ) + index = pd.date_range(datetime(2011, 1, 1, 0), periods=8760, freq="H") sfh = pd.read_hdf(path, key="SFH") @@ -371,26 +377,27 @@ def annual_demand_generator(scenario): respective associated Station """ + sources, targets = load_sources_and_targets("HeatTimeSeries") demand_zone = db.select_dataframe( f""" - SELECT a.demand, a.zensus_population_id, a.scenario, c.climate_zone - FROM demand.egon_peta_heat a - JOIN boundaries.egon_map_zensus_climate_zones c - ON a.zensus_population_id = c.zensus_population_id - WHERE a.sector = 'residential' - AND a.scenario = '{scenario}' - """, + SELECT a.demand, a.zensus_population_id, a.scenario, c.climate_zone + FROM {sources.tables["heat_demand_cts"]} a + JOIN {sources.tables["climate_zones"]} c + ON a.zensus_population_id = c.zensus_population_id + WHERE a.sector = 'residential' + AND a.scenario = '{scenario}' + """, index_col="zensus_population_id", ) house_count_MFH = db.select_dataframe( - """ + f""" SELECT cell_id as zensus_population_id, COUNT(*) as number FROM ( SELECT cell_id, COUNT(*), building_id - FROM demand.egon_household_electricity_profile_of_buildings + FROM {sources.tables["household_electricity_profiles"]} GROUP BY (cell_id, building_id) ) a @@ -401,12 +408,12 @@ def annual_demand_generator(scenario): ) house_count_SFH = db.select_dataframe( - """ + f""" SELECT cell_id as zensus_population_id, COUNT(*) as number FROM ( SELECT cell_id, COUNT(*), building_id - FROM demand.egon_household_electricity_profile_of_buildings + FROM {sources.tables["household_electricity_profiles"]} GROUP BY (cell_id, building_id) ) a WHERE a.count = 1 @@ -441,21 +448,22 @@ def select(): engine = db.engine() EgonHeatTimeseries.__table__.drop(bind=engine, checkfirst=True) EgonHeatTimeseries.__table__.create(bind=engine, checkfirst=True) + sources, targets = load_sources_and_targets("HeatTimeSeries") # Select all intra-day-profiles idp_df = db.select_dataframe( - """ + f""" SELECT index, house, temperature_class - FROM demand.egon_heat_idp_pool + FROM {sources.tables["idp_pool"]} """, index_col="index", ) # Select daily heat demand shares per climate zone from table temperature_classes = db.select_dataframe( - """ + f""" SELECT climate_zone, day_of_year, temperature_class - FROM demand.egon_daily_heat_demand_per_climate_zone + FROM {sources.tables["daily_heat_demand_per_climate_zone"]} """ ) @@ -522,12 +530,12 @@ def select(): result_SFH["building_id"] = ( db.select_dataframe( - """ + f""" SELECT cell_id as zensus_population_id, building_id FROM ( SELECT cell_id, COUNT(*), building_id - FROM demand.egon_household_electricity_profile_of_buildings + FROM {sources.tables["household_electricity_profiles"]} GROUP BY (cell_id, building_id) ) a WHERE a.count = 1 @@ -554,12 +562,12 @@ def select(): result_MFH["building_id"] = ( db.select_dataframe( - """ + f""" SELECT cell_id as zensus_population_id, building_id FROM ( SELECT cell_id, COUNT(*), building_id - FROM demand.egon_household_electricity_profile_of_buildings + FROM {sources.tables["household_electricity_profiles"]} GROUP BY (cell_id, building_id) ) a WHERE a.count > 1 @@ -590,12 +598,12 @@ def select(): ), "building_id": ( db.select_dataframe( - """ + f""" SELECT cell_id as zensus_population_id, building_id FROM ( SELECT cell_id, COUNT(*), building_id - FROM demand.egon_household_electricity_profile_of_buildings + FROM {sources.tables["household_electricity_profiles"]} GROUP BY (cell_id, building_id) ) a WHERE a.count = 1 @@ -642,12 +650,12 @@ def select(): ), "building_id": ( db.select_dataframe( - """ + f""" SELECT cell_id as zensus_population_id, building_id FROM ( SELECT cell_id, COUNT(*), building_id - FROM demand.egon_household_electricity_profile_of_buildings + FROM {sources.tables["household_electricity_profiles"]} GROUP BY (cell_id, building_id) ) a WHERE a.count > 1 diff --git a/src/egon/data/datasets/heat_demand_timeseries/service_sector.py b/src/egon/data/datasets/heat_demand_timeseries/service_sector.py index fcd120917..016d2eccb 100644 --- a/src/egon/data/datasets/heat_demand_timeseries/service_sector.py +++ b/src/egon/data/datasets/heat_demand_timeseries/service_sector.py @@ -9,6 +9,8 @@ from disaggregator import temporal except ImportError as e: pass +from egon.data.datasets import load_sources_and_targets + Base = declarative_base() @@ -48,12 +50,13 @@ def cts_demand_per_aggregation_level(aggregation_level, scenario): zensu population id """ + sources, targets = load_sources_and_targets("HeatTimeSeries") demand_nuts = db.select_dataframe( f""" SELECT demand, a.zensus_population_id, b.vg250_nuts3 - FROM demand.egon_peta_heat a - JOIN boundaries.egon_map_zensus_vg250 b + FROM {sources.tables["heat_demand_cts"]} a + JOIN {sources.tables['map_zensus_vg250']} b ON a.zensus_population_id = b.zensus_population_id WHERE a.sector = 'service' @@ -91,7 +94,7 @@ def cts_demand_per_aggregation_level(aggregation_level, scenario): district_heating = db.select_dataframe( f""" SELECT area_id, zensus_population_id - FROM demand.egon_map_zensus_district_heating_areas + FROM {sources.tables["district_heating_areas"]} WHERE scenario = '{scenario}' """ ) @@ -117,9 +120,9 @@ def cts_demand_per_aggregation_level(aggregation_level, scenario): mv_grid_ind = db.select_dataframe( f""" SELECT bus_id, a.zensus_population_id - FROM boundaries.egon_map_zensus_grid_districts a + FROM {sources.tables["map_zensus_grid_districts"]} a - JOIN demand.egon_peta_heat c + JOIN {sources.tables["heat_demand_cts"]} c ON a.zensus_population_id = c.zensus_population_id WHERE c.scenario = '{scenario}' @@ -195,6 +198,7 @@ def CTS_demand_scale(aggregation_level): Profiles scaled up to annual demand """ + sources, targets = load_sources_and_targets("HeatTimeSeries") scenarios = config.settings()["egon-data"]["--scenarios"] CTS_district = pd.DataFrame() @@ -214,7 +218,7 @@ def CTS_demand_scale(aggregation_level): demand = db.select_dataframe( f""" SELECT demand, zensus_population_id - FROM demand.egon_peta_heat + FROM {sources.tables["heat_demand_cts"]} WHERE sector = 'service' AND scenario = '{scenario}' ORDER BY zensus_population_id @@ -225,7 +229,7 @@ def CTS_demand_scale(aggregation_level): district_heating = db.select_dataframe( f""" SELECT area_id, zensus_population_id - FROM demand.egon_map_zensus_district_heating_areas + FROM {sources.tables["district_heating_areas"]} WHERE scenario = '{scenario}' """ ) @@ -270,9 +274,9 @@ def CTS_demand_scale(aggregation_level): mv_grid_ind = db.select_dataframe( f""" SELECT bus_id, a.zensus_population_id - FROM boundaries.egon_map_zensus_grid_districts a + FROM {sources.tables["map_zensus_grid_districts"]} a - JOIN demand.egon_peta_heat c + JOIN {sources.tables["heat_demand_cts"]} c ON a.zensus_population_id = c.zensus_population_id WHERE c.scenario = '{scenario}' diff --git a/src/egon/data/datasets/heat_etrago/__init__.py b/src/egon/data/datasets/heat_etrago/__init__.py index 8139417a6..30d11c5ce 100644 --- a/src/egon/data/datasets/heat_etrago/__init__.py +++ b/src/egon/data/datasets/heat_etrago/__init__.py @@ -5,7 +5,7 @@ import pandas as pd from egon.data import config, db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.etrago_setup import link_geom_from_buses from egon.data.datasets.heat_etrago.power_to_heat import ( insert_central_power_to_heat, @@ -27,8 +27,8 @@ def insert_buses(carrier, scenario): Name of the scenario. """ - sources = config.datasets()["etrago_heat"]["sources"] - target = config.datasets()["etrago_heat"]["targets"]["heat_buses"] + sources = HeatEtrago.sources + target = HeatEtrago.targets.tables["heat_buses"] # Delete existing heat buses (central or rural) db.execute_sql( f""" @@ -56,8 +56,8 @@ def insert_buses(carrier, scenario): areas = db.select_geodataframe( f""" SELECT area_id, geom_polygon as geom - FROM {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} + FROM {sources.tables['map_district_heating_areas']['schema']}. + {sources.tables['map_district_heating_areas']['table']} WHERE scenario = '{scenario}' """, index_col="area_id", @@ -69,17 +69,20 @@ def insert_buses(carrier, scenario): mv_grids = db.select_geodataframe( f""" SELECT ST_Centroid(geom) AS geom - FROM {sources['mv_grids']['schema']}. - {sources['mv_grids']['table']} + FROM {sources.tables['mv_grids']['schema']}. + {sources.tables['mv_grids']['table']} WHERE bus_id IN (SELECT DISTINCT bus_id - FROM boundaries.egon_map_zensus_grid_districts a - JOIN demand.egon_peta_heat b + FROM {sources.tables['map_zensus_grid_districts']['schema']}. + {sources.tables['map_zensus_grid_districts']['table']} a + JOIN {sources.tables['heat_demand']['schema']}. + {sources.tables['heat_demand']['table']} b ON a.zensus_population_id = b.zensus_population_id WHERE b.scenario = '{scenario}' AND b.zensus_population_id NOT IN ( SELECT zensus_population_id FROM - demand.egon_map_zensus_district_heating_areas + {sources.tables['map_district_heating_areas']['schema']}. + {sources.tables['map_district_heating_areas']['table']} WHERE scenario = '{scenario}' ) ) @@ -105,13 +108,13 @@ def insert_buses(carrier, scenario): def insert_store(scenario, carrier): - sources = config.datasets()["etrago_heat"]["sources"] - targets = config.datasets()["etrago_heat"]["targets"] + sources = HeatEtrago.sources + targets = HeatEtrago.targets db.execute_sql( f""" - DELETE FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + DELETE FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE carrier = '{carrier}_store' AND scn_name = '{scenario}' AND country = 'DE' @@ -119,34 +122,34 @@ def insert_store(scenario, carrier): ) db.execute_sql( f""" - DELETE FROM {targets['heat_links']['schema']}. - {targets['heat_links']['table']} + DELETE FROM {targets.tables['heat_links']['schema']}. + {targets.tables['heat_links']['table']} WHERE carrier LIKE '{carrier}_store%' AND scn_name = '{scenario}' AND bus0 IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') AND bus1 IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') """ ) db.execute_sql( f""" - DELETE FROM {targets['heat_stores']['schema']}. - {targets['heat_stores']['table']} + DELETE FROM {targets.tables['heat_stores']['schema']}. + {targets.tables['heat_stores']['table']} WHERE carrier = '{carrier}_store' AND scn_name = '{scenario}' AND bus IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') """ @@ -155,8 +158,8 @@ def insert_store(scenario, carrier): dh_bus = db.select_geodataframe( f""" SELECT * FROM - {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE carrier = '{carrier}' AND scn_name = '{scenario}' AND country = 'DE' @@ -172,8 +175,8 @@ def insert_store(scenario, carrier): ) water_tank_bus.to_postgis( - targets["heat_buses"]["table"], - schema=targets["heat_buses"]["schema"], + targets.tables["heat_buses"]["table"], + schema=targets.tables["heat_buses"]["schema"], con=db.engine(), if_exists="append", index=False, @@ -200,8 +203,8 @@ def insert_store(scenario, carrier): ) water_tank_charger.to_sql( - targets["heat_links"]["table"], - schema=targets["heat_links"]["schema"], + targets.tables["heat_links"]["table"], + schema=targets.tables["heat_links"]["schema"], con=db.engine(), if_exists="append", index=False, @@ -228,8 +231,8 @@ def insert_store(scenario, carrier): ) water_tank_discharger.to_sql( - targets["heat_links"]["table"], - schema=targets["heat_links"]["schema"], + targets.tables["heat_links"]["table"], + schema=targets.tables["heat_links"]["schema"], con=db.engine(), if_exists="append", index=False, @@ -255,8 +258,8 @@ def insert_store(scenario, carrier): ) water_tank_store.to_sql( - targets["heat_stores"]["table"], - schema=targets["heat_stores"]["schema"], + targets.tables["heat_stores"]["table"], + schema=targets.tables["heat_stores"]["schema"], con=db.engine(), if_exists="append", index=False, @@ -283,19 +286,19 @@ def insert_rural_direct_heat(scenario): None. """ - sources = config.datasets()["etrago_heat"]["sources"] - targets = config.datasets()["etrago_heat"]["targets"] + sources = HeatEtrago.sources + targets = HeatEtrago.targets db.execute_sql( f""" - DELETE FROM {targets['heat_generators']['schema']}. - {targets['heat_generators']['table']} + DELETE FROM {targets.tables['heat_generators']['schema']}. + {targets.tables['heat_generators']['table']} WHERE carrier IN ('rural_solar_thermal') AND scn_name = '{scenario}' AND bus IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') """ @@ -303,13 +306,13 @@ def insert_rural_direct_heat(scenario): db.execute_sql( f""" - DELETE FROM {targets['heat_generator_timeseries']['schema']}. - {targets['heat_generator_timeseries']['table']} + DELETE FROM {targets.tables['heat_generator_timeseries']['schema']}. + {targets.tables['heat_generator_timeseries']['table']} WHERE scn_name = '{scenario}' AND generator_id NOT IN ( SELECT generator_id FROM - {targets['heat_generators']['schema']}. - {targets['heat_generators']['table']} + {targets.tables['heat_generators']['schema']}. + {targets.tables['heat_generators']['table']} WHERE scn_name = '{scenario}') """ ) @@ -318,10 +321,10 @@ def insert_rural_direct_heat(scenario): f""" SELECT mv_grid_id as power_bus, a.carrier, capacity, b.bus_id as heat_bus, geom as geometry - FROM {sources['individual_heating_supply']['schema']}. - {sources['individual_heating_supply']['table']} a - JOIN {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} b + FROM {sources.tables['individual_heating_supply']['schema']}. + {sources.tables['individual_heating_supply']['table']} a + JOIN {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} b ON ST_Intersects( ST_Buffer(ST_Transform(ST_Centroid(a.geometry), 4326), 0.00000001), geom) @@ -352,8 +355,8 @@ def insert_rural_direct_heat(scenario): weather_cells = db.select_geodataframe( f""" SELECT w_id, geom - FROM {sources['weather_cells']['schema']}. - {sources['weather_cells']['table']} + FROM {sources.tables['weather_cells']['schema']}. + {sources.tables['weather_cells']['table']} """, index_col="w_id", ) @@ -366,8 +369,8 @@ def insert_rural_direct_heat(scenario): feedin = db.select_dataframe( f""" SELECT w_id, feedin - FROM {sources['feedin_timeseries']['schema']}. - {sources['feedin_timeseries']['table']} + FROM {sources.tables['feedin_timeseries']['schema']}. + {sources.tables['feedin_timeseries']['table']} WHERE carrier = 'solar_thermal' AND weather_year = {weather_year} """, @@ -388,15 +391,15 @@ def insert_rural_direct_heat(scenario): generator = generator.set_index("generator_id") generator.to_sql( - targets["heat_generators"]["table"], - schema=targets["heat_generators"]["schema"], + targets.tables["heat_generators"]["table"], + schema=targets.tables["heat_generators"]["schema"], if_exists="append", con=db.engine(), ) timeseries.to_sql( - targets["heat_generator_timeseries"]["table"], - schema=targets["heat_generator_timeseries"]["schema"], + targets.tables["heat_generator_timeseries"]["table"], + schema=targets.tables["heat_generator_timeseries"]["schema"], if_exists="append", con=db.engine(), ) @@ -415,19 +418,19 @@ def insert_central_direct_heat(scenario): None. """ - sources = config.datasets()["etrago_heat"]["sources"] - targets = config.datasets()["etrago_heat"]["targets"] + sources = HeatEtrago.sources + targets = HeatEtrago.targets db.execute_sql( f""" - DELETE FROM {targets['heat_generators']['schema']}. - {targets['heat_generators']['table']} + DELETE FROM {targets.tables['heat_generators']['schema']}. + {targets.tables['heat_generators']['table']} WHERE carrier IN ('solar_thermal_collector', 'geo_thermal') AND scn_name = '{scenario}' AND bus IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') """ @@ -435,13 +438,13 @@ def insert_central_direct_heat(scenario): db.execute_sql( f""" - DELETE FROM {targets['heat_generator_timeseries']['schema']}. - {targets['heat_generator_timeseries']['table']} + DELETE FROM {targets.tables['heat_generator_timeseries']['schema']}. + {targets.tables['heat_generator_timeseries']['table']} WHERE scn_name = '{scenario}' AND generator_id NOT IN ( SELECT generator_id FROM - {targets['heat_generators']['schema']}. - {targets['heat_generators']['table']} + {targets.tables['heat_generators']['schema']}. + {targets.tables['heat_generators']['table']} WHERE scn_name = '{scenario}') """ ) @@ -449,8 +452,8 @@ def insert_central_direct_heat(scenario): central_thermal = db.select_geodataframe( f""" SELECT district_heating_id, capacity, geometry, carrier - FROM {sources['district_heating_supply']['schema']}. - {sources['district_heating_supply']['table']} + FROM {sources.tables['district_heating_supply']['schema']}. + {sources.tables['district_heating_supply']['table']} WHERE scenario = '{scenario}' AND carrier IN ( 'solar_thermal_collector', 'geo_thermal') @@ -462,10 +465,10 @@ def insert_central_direct_heat(scenario): map_dh_id_bus_id = db.select_dataframe( f""" SELECT bus_id, area_id, id FROM - {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} - JOIN {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} + {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} + JOIN {sources.tables['map_district_heating_areas']['schema']}. + {sources.tables['map_district_heating_areas']['table']} ON ST_Intersects( ST_Transform( ST_Buffer(ST_Centroid(geom_polygon), @@ -496,8 +499,8 @@ def insert_central_direct_heat(scenario): weather_cells = db.select_geodataframe( f""" SELECT w_id, geom - FROM {sources['weather_cells']['schema']}. - {sources['weather_cells']['table']} + FROM {sources.tables['weather_cells']['schema']}. + {sources.tables['weather_cells']['table']} """, index_col="w_id", ) @@ -510,8 +513,8 @@ def insert_central_direct_heat(scenario): feedin = db.select_dataframe( f""" SELECT w_id, feedin - FROM {sources['feedin_timeseries']['schema']}. - {sources['feedin_timeseries']['table']} + FROM {sources.tables['feedin_timeseries']['schema']}. + {sources.tables['feedin_timeseries']['table']} WHERE carrier = 'solar_thermal' AND weather_year = {weather_year} """, @@ -532,15 +535,15 @@ def insert_central_direct_heat(scenario): generator = generator.set_index("generator_id") generator.to_sql( - targets["heat_generators"]["table"], - schema=targets["heat_generators"]["schema"], + targets.tables["heat_generators"]["table"], + schema=targets.tables["heat_generators"]["schema"], if_exists="append", con=db.engine(), ) timeseries.to_sql( - targets["heat_generator_timeseries"]["table"], - schema=targets["heat_generator_timeseries"]["schema"], + targets.tables["heat_generator_timeseries"]["table"], + schema=targets.tables["heat_generator_timeseries"]["schema"], if_exists="append", con=db.engine(), ) @@ -560,24 +563,30 @@ def insert_central_gas_boilers(scenario): """ - sources = config.datasets()["etrago_heat"]["sources"] - targets = config.datasets()["etrago_heat"]["targets"] + sources = HeatEtrago.sources + targets = HeatEtrago.targets db.execute_sql( f""" - DELETE FROM {targets['heat_links']['schema']}. - {targets['heat_links']['table']} + DELETE FROM {targets.tables['heat_links']['schema']}. + {targets.tables['heat_links']['table']} WHERE carrier LIKE '%central_gas_boiler%' AND scn_name = '{scenario}' AND link_id IN( - SELECT link_id FROM grid.egon_etrago_link + SELECT link_id FROM + {targets.tables['heat_links']['schema']}. + {targets.tables['heat_links']['table']} WHERE bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM + {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE country = 'DE' AND scn_name = '{scenario}' ) AND bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM + {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE country = 'DE' AND scn_name = '{scenario}' ) @@ -589,13 +598,13 @@ def insert_central_gas_boilers(scenario): f""" SELECT c.bus_id as bus0, b.bus_id as bus1, capacity, a.carrier, scenario as scn_name - FROM {sources['district_heating_supply']['schema']}. - {sources['district_heating_supply']['table']} a - JOIN {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} b + FROM {sources.tables['district_heating_supply']['schema']}. + {sources.tables['district_heating_supply']['table']} a + JOIN {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} b ON ST_Transform(ST_Centroid(geometry), 4326) = geom - JOIN {sources['ch4_voronoi']['schema']}. - {sources['ch4_voronoi']['table']} c + JOIN {sources.tables['ch4_voronoi']['schema']}. + {sources.tables['ch4_voronoi']['table']} c ON ST_Intersects(ST_Transform(a.geometry, 4326), c.geom) WHERE scenario = '{scenario}' AND b.scn_name = '{scenario}' @@ -633,8 +642,8 @@ def insert_central_gas_boilers(scenario): central_boilers.carrier = "central_gas_boiler" central_boilers.reset_index().to_postgis( - targets["heat_links"]["table"], - schema=targets["heat_links"]["schema"], + targets.tables["heat_links"]["table"], + schema=targets.tables["heat_links"]["schema"], con=db.engine(), if_exists="append", ) @@ -654,25 +663,25 @@ def insert_rural_gas_boilers(scenario): """ - sources = config.datasets()["etrago_heat"]["sources"] - targets = config.datasets()["etrago_heat"]["targets"] + sources = HeatEtrago.sources + targets = HeatEtrago.targets db.execute_sql( f""" - DELETE FROM {targets['heat_links']['schema']}. - {targets['heat_links']['table']} + DELETE FROM {targets.tables['heat_links']['schema']}. + {targets.tables['heat_links']['table']} WHERE carrier = 'rural_gas_boiler' AND scn_name = '{scenario}' AND bus0 IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') AND bus1 IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') """ @@ -682,13 +691,13 @@ def insert_rural_gas_boilers(scenario): f""" SELECT c.bus_id as bus0, b.bus_id as bus1, capacity, a.carrier, scenario as scn_name - FROM {sources['individual_heating_supply']['schema']}. - {sources['individual_heating_supply']['table']} a - JOIN {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} b + FROM {sources.tables['individual_heating_supply']['schema']}. + {sources.tables['individual_heating_supply']['table']} a + JOIN {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} b ON ST_Transform(ST_Centroid(a.geometry), 4326) = b.geom - JOIN {sources['ch4_voronoi']['schema']}. - {sources['ch4_voronoi']['table']} c + JOIN {sources.tables['ch4_voronoi']['schema']}. + {sources.tables['ch4_voronoi']['table']} c ON ST_Intersects(ST_Transform(a.geometry, 4326), c.geom) WHERE scenario = '{scenario}' AND b.scn_name = '{scenario}' @@ -727,8 +736,8 @@ def insert_rural_gas_boilers(scenario): rural_boilers.carrier = "rural_gas_boiler" rural_boilers.reset_index().to_postgis( - targets["heat_links"]["table"], - schema=targets["heat_links"]["schema"], + targets.tables["heat_links"]["table"], + schema=targets.tables["heat_links"]["schema"], con=db.engine(), if_exists="append", ) @@ -804,7 +813,42 @@ class HeatEtrago(Dataset): #: name: str = "HeatEtrago" #: - version: str = "0.0.10" + version: str = "0.0.12" + + sources = DatasetSources( + tables={ + "scenario_capacities": {"schema": "supply", "table": "egon_scenario_capacities"}, + "district_heating_areas": {"schema": "demand", "table": "egon_district_heating_areas"}, + "map_district_heating_areas": {"schema": "demand", "table": "egon_map_zensus_district_heating_areas"}, + "mv_grids": {"schema": "grid", "table": "egon_mv_grid_district"}, + "district_heating_supply": {"schema": "supply", "table": "egon_district_heating"}, + "individual_heating_supply": {"schema": "supply", "table": "egon_individual_heating"}, + "weather_cells": {"schema": "supply", "table": "egon_era5_weather_cells"}, + "feedin_timeseries": {"schema": "supply", "table": "egon_era5_renewable_feedin"}, + "egon_mv_grid_district": {"schema": "grid", "table": "egon_mv_grid_district"}, + "heat_demand": {"schema": "demand", "table": "egon_peta_heat"}, + "ch4_voronoi": {"schema": "grid", "table": "egon_gas_voronoi"}, + "map_zensus_grid_districts": { + "schema": "boundaries", + "table": "egon_map_zensus_grid_districts", + }, + "zensus_population": { + "schema": "society", + "table": "destatis_zensus_population_per_ha", + }, + }, + ) + + targets = DatasetTargets( + tables={ + "heat_buses": {"schema": "grid", "table": "egon_etrago_bus"}, + "heat_generators": {"schema": "grid", "table": "egon_etrago_generator"}, + "heat_generator_timeseries": {"schema": "grid", "table": "egon_etrago_generator_timeseries"}, + "heat_links": {"schema": "grid", "table": "egon_etrago_link"}, + "heat_link_timeseries": {"schema": "grid", "table": "egon_etrago_link_timeseries"}, + "heat_stores": {"schema": "grid", "table": "egon_etrago_store"}, + }, + ) def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/heat_etrago/hts_etrago.py b/src/egon/data/datasets/heat_etrago/hts_etrago.py index 8b2ab0783..32939cdbe 100644 --- a/src/egon/data/datasets/heat_etrago/hts_etrago.py +++ b/src/egon/data/datasets/heat_etrago/hts_etrago.py @@ -6,14 +6,14 @@ import pandas as pd from egon.data import config, db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.scenario_parameters import get_sector_parameters from egon.data.db import next_etrago_id def hts_to_etrago(scenario): - sources = config.datasets()["etrago_heat"]["sources"] - targets = config.datasets()["etrago_heat"]["targets"] + sources = HtsEtragoTable.sources.tables + targets = HtsEtragoTable.targets.tables carriers = ["central_heat", "rural_heat", "rural_gas_boiler"] if "status" in scenario: @@ -26,8 +26,8 @@ def hts_to_etrago(scenario): bus_area = db.select_dataframe( f""" SELECT bus_id, area_id, id FROM - {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + {sources['heat_buses']['schema']}. + {sources['heat_buses']['table']} JOIN {sources['district_heating_areas']['schema']}. {sources['district_heating_areas']['table']} ON ST_Transform(ST_Centroid(geom_polygon), 4326) = geom @@ -42,7 +42,8 @@ def hts_to_etrago(scenario): disct_time_series = db.select_dataframe( f""" SELECT * FROM - demand.egon_timeseries_district_heating + {sources['district_heating_timeseries']['schema']}. + {sources['district_heating_timeseries']['table']} WHERE scenario ='{scenario}' """ ) @@ -55,19 +56,19 @@ def hts_to_etrago(scenario): # interlinking heat_bus_id and mv_grid bus_id bus_sub = db.select_dataframe( f""" - SELECT {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']}.bus_id as heat_bus_id, + SELECT {sources['heat_buses']['schema']}. + {sources['heat_buses']['table']}.bus_id as heat_bus_id, {sources['egon_mv_grid_district']['schema']}. {sources['egon_mv_grid_district']['table']}.bus_id as bus_id FROM - {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + {sources['heat_buses']['schema']}. + {sources['heat_buses']['table']} JOIN {sources['egon_mv_grid_district']['schema']}. {sources['egon_mv_grid_district']['table']} ON ST_Transform(ST_Centroid({sources['egon_mv_grid_district']['schema']}. - {sources['egon_mv_grid_district']['table']}.geom), - 4326) = {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']}.geom + {sources['egon_mv_grid_district']['table']}.geom), + 4326) = {sources['heat_buses']['schema']}. + {sources['heat_buses']['table']}.geom WHERE carrier = '{carrier}' AND scn_name = '{scenario}' """ @@ -78,7 +79,8 @@ def hts_to_etrago(scenario): ind_time_series = db.select_dataframe( f""" SELECT scenario, bus_id, dist_aggregated_mw FROM - demand.egon_etrago_timeseries_individual_heating + {sources['individual_heating_timeseries']['schema']}. + {sources['individual_heating_timeseries']['table']} WHERE scenario ='{scenario}' AND carrier = 'heat_pump' """ @@ -101,7 +103,8 @@ def hts_to_etrago(scenario): ind_time_series = db.select_dataframe( f""" SELECT * FROM - demand.egon_etrago_timeseries_individual_heating + {sources['individual_heating_timeseries']['schema']}. + {sources['individual_heating_timeseries']['table']} WHERE scenario ='{scenario}' AND carrier = 'CH4' """ @@ -120,7 +123,8 @@ def hts_to_etrago(scenario): gas_voronoi = db.select_geodataframe( f""" SELECT bus_id, geom FROM - grid.egon_gas_voronoi + {sources['ch4_voronoi']['schema']}. + {sources['ch4_voronoi']['table']} WHERE scn_name = '{scenario}' AND carrier = 'CH4' """ @@ -161,11 +165,12 @@ def hts_to_etrago(scenario): # Delete existing data from database db.execute_sql( f""" - DELETE FROM grid.egon_etrago_load + DELETE FROM {targets['loads']['schema']}.{targets['loads']['table']} WHERE scn_name = '{scenario}' AND carrier = '{carrier}' AND bus IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {sources['heat_buses']['schema']}. + {sources['heat_buses']['table']} WHERE country = 'DE' AND scn_name = '{scenario}' ) @@ -175,11 +180,11 @@ def hts_to_etrago(scenario): db.execute_sql( f""" DELETE FROM - grid.egon_etrago_load_timeseries + {targets['load_timeseries']['schema']}.{targets['load_timeseries']['table']} WHERE scn_name = '{scenario}' AND load_id NOT IN ( SELECT load_id FROM - grid.egon_etrago_load + {targets['loads']['schema']}.{targets['loads']['table']} WHERE scn_name = '{scenario}') """ ) @@ -196,8 +201,8 @@ def hts_to_etrago(scenario): etrago_load["sign"] = -1 etrago_load.to_sql( - "egon_etrago_load", - schema="grid", + targets["loads"]["table"], + schema=targets["loads"]["schema"], con=db.engine(), if_exists="append", index=False, @@ -210,8 +215,8 @@ def hts_to_etrago(scenario): etrago_load_timeseries["p_set"] = bus_ts.loc[:, "dist_aggregated_mw"] etrago_load_timeseries.to_sql( - "egon_etrago_load_timeseries", - schema="grid", + targets["load_timeseries"]["table"], + schema=targets["load_timeseries"]["schema"], con=db.engine(), if_exists="append", index=False, @@ -253,7 +258,50 @@ class HtsEtragoTable(Dataset): #: name: str = "HtsEtragoTable" #: - version: str = "0.0.6" + version: str = "0.0.7" + + sources = DatasetSources( + tables={ + # buses coming from HeatEtrago (used as source here) + "heat_buses": {"schema": "grid", "table": "egon_etrago_bus"}, + # polygons & MV grid districts + "district_heating_areas": { + "schema": "demand", + "table": "egon_district_heating_areas", + }, + "egon_mv_grid_district": { + "schema": "grid", + "table": "egon_mv_grid_district", + }, + # gas voronoi for CH4 + "ch4_voronoi": { + "schema": "grid", + "table": "egon_gas_voronoi", + }, + # time series inputs + "district_heating_timeseries": { + "schema": "demand", + "table": "egon_timeseries_district_heating", + }, + "individual_heating_timeseries": { + "schema": "demand", + "table": "egon_etrago_timeseries_individual_heating", + }, + }, + ) + + targets = DatasetTargets( + tables={ + "loads": { + "schema": "grid", + "table": "egon_etrago_load", + }, + "load_timeseries": { + "schema": "grid", + "table": "egon_etrago_load_timeseries", + }, + }, + ) def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/heat_etrago/power_to_heat.py b/src/egon/data/datasets/heat_etrago/power_to_heat.py index 4fa159869..6cf6b6bc6 100644 --- a/src/egon/data/datasets/heat_etrago/power_to_heat.py +++ b/src/egon/data/datasets/heat_etrago/power_to_heat.py @@ -5,8 +5,10 @@ import geopandas as gpd import pandas as pd -from egon.data import config, db +from egon.data import db from egon.data.datasets.scenario_parameters import get_sector_parameters +from egon.data.datasets import load_sources_and_targets + def insert_individual_power_to_heat(scenario): @@ -22,18 +24,16 @@ def insert_individual_power_to_heat(scenario): None. """ - - sources = config.datasets()["etrago_heat"]["sources"] - targets = config.datasets()["etrago_heat"]["targets"] + sources, targets = load_sources_and_targets("HeatEtrago") # Delete existing entries db.execute_sql( f""" - DELETE FROM {targets['heat_link_timeseries']['schema']}. - {targets['heat_link_timeseries']['table']} + DELETE FROM {targets.tables['heat_link_timeseries']['schema']}. + {targets.tables['heat_link_timeseries']['table']} WHERE link_id IN ( - SELECT link_id FROM {targets['heat_links']['schema']}. - {targets['heat_links']['table']} + SELECT link_id FROM {targets.tables['heat_links']['schema']}. + {targets.tables['heat_links']['table']} WHERE carrier IN ('individual_heat_pump', 'rural_heat_pump', 'rural_resisitive_heater') AND scn_name = '{scenario}') @@ -42,20 +42,20 @@ def insert_individual_power_to_heat(scenario): ) db.execute_sql( f""" - DELETE FROM {targets['heat_links']['schema']}. - {targets['heat_links']['table']} + DELETE FROM {targets.tables['heat_links']['schema']}. + {targets.tables['heat_links']['table']} WHERE carrier IN ('individual_heat_pump', 'rural_heat_pump', 'rural_resisitive_heater') AND bus0 IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') AND bus1 IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') """ @@ -66,19 +66,19 @@ def insert_individual_power_to_heat(scenario): f""" SELECT mv_grid_id as power_bus, a.carrier, capacity, b.bus_id as heat_bus, d.feedin as cop - FROM {sources['individual_heating_supply']['schema']}. - {sources['individual_heating_supply']['table']} a - JOIN {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} b + FROM {sources.tables['individual_heating_supply']['schema']}. + {sources.tables['individual_heating_supply']['table']} a + JOIN {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} b ON ST_Intersects( ST_Buffer(ST_Transform(ST_Centroid(a.geometry), 4326), 0.00000001), geom) - JOIN {sources['weather_cells']['schema']}. - {sources['weather_cells']['table']} c + JOIN {sources.tables['weather_cells']['schema']}. + {sources.tables['weather_cells']['table']} c ON ST_Intersects( b.geom, c.geom) - JOIN {sources['feedin_timeseries']['schema']}. - {sources['feedin_timeseries']['table']} d + JOIN {sources.tables['feedin_timeseries']['schema']}. + {sources.tables['feedin_timeseries']['table']} d ON c.w_id = d.w_id WHERE scenario = '{scenario}' AND scn_name = '{scenario}' @@ -110,10 +110,10 @@ def insert_individual_power_to_heat(scenario): f""" SELECT mv_grid_id as power_bus, a.carrier, capacity, b.bus_id as heat_bus - FROM {sources['individual_heating_supply']['schema']}. - {sources['individual_heating_supply']['table']} a - JOIN {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} b + FROM {sources.tables['individual_heating_supply']['schema']}. + {sources.tables['individual_heating_supply']['table']} a + JOIN {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} b ON ST_Intersects( ST_Buffer(ST_Transform(ST_Centroid(a.geometry), 4326), 0.00000001), geom) @@ -155,18 +155,16 @@ def insert_central_power_to_heat(scenario): None. """ - - sources = config.datasets()["etrago_heat"]["sources"] - targets = config.datasets()["etrago_heat"]["targets"] + sources, targets = load_sources_and_targets("HeatEtrago") # Delete existing entries db.execute_sql( f""" - DELETE FROM {targets['heat_link_timeseries']['schema']}. - {targets['heat_link_timeseries']['table']} + DELETE FROM {targets.tables['heat_link_timeseries']['schema']}. + {targets.tables['heat_link_timeseries']['table']} WHERE link_id IN ( - SELECT link_id FROM {targets['heat_links']['schema']}. - {targets['heat_links']['table']} + SELECT link_id FROM {targets.tables['heat_links']['schema']}. + {targets.tables['heat_links']['table']} WHERE carrier = 'central_heat_pump' AND scn_name = '{scenario}') AND scn_name = '{scenario}' @@ -175,19 +173,19 @@ def insert_central_power_to_heat(scenario): db.execute_sql( f""" - DELETE FROM {targets['heat_links']['schema']}. - {targets['heat_links']['table']} + DELETE FROM {targets.tables['heat_links']['schema']}. + {targets.tables['heat_links']['table']} WHERE carrier = 'central_heat_pump' AND bus0 IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') AND bus1 IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') """ @@ -197,14 +195,14 @@ def insert_central_power_to_heat(scenario): central_heat_pumps = db.select_geodataframe( f""" SELECT a.index, a.district_heating_id, a.carrier, a.category, a.capacity, a.geometry, a.scenario, d.feedin as cop - FROM {sources['district_heating_supply']['schema']}. - {sources['district_heating_supply']['table']} a - JOIN {sources['weather_cells']['schema']}. - {sources['weather_cells']['table']} c + FROM {sources.tables['district_heating_supply']['schema']}. + {sources.tables['district_heating_supply']['table']} a + JOIN {sources.tables['weather_cells']['schema']}. + {sources.tables['weather_cells']['table']} c ON ST_Intersects( ST_Transform(a.geometry, 4326), c.geom) - JOIN {sources['feedin_timeseries']['schema']}. - {sources['feedin_timeseries']['table']} d + JOIN {sources.tables['feedin_timeseries']['schema']}. + {sources.tables['feedin_timeseries']['table']} d ON c.w_id = d.w_id WHERE scenario = '{scenario}' AND a.carrier = 'heat_pump' @@ -244,19 +242,19 @@ def insert_central_power_to_heat(scenario): # Delete existing entries db.execute_sql( f""" - DELETE FROM {targets['heat_links']['schema']}. - {targets['heat_links']['table']} + DELETE FROM {targets.tables['heat_links']['schema']}. + {targets.tables['heat_links']['table']} WHERE carrier = 'central_resistive_heater' AND bus0 IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') AND bus1 IN (SELECT bus_id - FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' AND country = 'DE') """ @@ -266,8 +264,8 @@ def insert_central_power_to_heat(scenario): f""" SELECT district_heating_id, carrier, category, SUM(capacity) as capacity, geometry, scenario - FROM {sources['district_heating_supply']['schema']}. - {sources['district_heating_supply']['table']} + FROM {sources.tables['district_heating_supply']['schema']}. + {sources.tables['district_heating_supply']['table']} WHERE scenario = '{scenario}' AND carrier = 'resistive_heater' GROUP BY (district_heating_id, carrier, category, geometry, scenario) @@ -336,9 +334,7 @@ def insert_power_to_heat_per_level( None. """ - sources = config.datasets()["etrago_heat"]["sources"] - targets = config.datasets()["etrago_heat"]["targets"] - + sources, targets = load_sources_and_targets("HeatEtrago") if "central" in carrier: # Calculate heat pumps per electrical bus gdf = assign_electrical_bus( @@ -351,8 +347,8 @@ def insert_power_to_heat_per_level( # Select geometry of buses geom_buses = db.select_geodataframe( f""" - SELECT bus_id, geom FROM {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} + SELECT bus_id, geom FROM {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} WHERE scn_name = '{scenario}' """, index_col="bus_id", @@ -397,8 +393,8 @@ def insert_power_to_heat_per_level( # Insert data into database links.to_postgis( - targets["heat_links"]["table"], - schema=targets["heat_links"]["schema"], + targets.tables["heat_links"]["table"], + schema=targets.tables["heat_links"]["schema"], if_exists="append", con=db.engine(), ) @@ -418,8 +414,8 @@ def insert_power_to_heat_per_level( # Insert time-dependent data to database links_timeseries.to_sql( - targets["heat_link_timeseries"]["table"], - schema=targets["heat_link_timeseries"]["schema"], + targets.tables["heat_link_timeseries"]["table"], + schema=targets.tables["heat_link_timeseries"]["schema"], if_exists="append", con=db.engine(), index=False, @@ -495,18 +491,15 @@ def assign_electrical_bus( Heat pumps per electrical bus """ - - sources = config.datasets()["etrago_heat"]["sources"] - targets = config.datasets()["etrago_heat"]["targets"] - + sources, targets = load_sources_and_targets("HeatEtrago") # Map heat buses to district heating id and area_id heat_buses = db.select_dataframe( f""" SELECT bus_id, area_id, id FROM - {targets['heat_buses']['schema']}. - {targets['heat_buses']['table']} - JOIN {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} + {targets.tables['heat_buses']['schema']}. + {targets.tables['heat_buses']['table']} + JOIN {sources.tables['district_heating_areas']['schema']}. + {sources.tables['district_heating_areas']['table']} ON ST_Intersects( ST_Transform(ST_Buffer( ST_Centroid(geom_polygon), 0.0000001), 4326), geom) @@ -523,8 +516,8 @@ def assign_electrical_bus( mv_grid_district = db.select_geodataframe( f""" SELECT bus_id, geom FROM - {sources['egon_mv_grid_district']['schema']}. - {sources['egon_mv_grid_district']['table']} + {sources.tables['egon_mv_grid_district']['schema']}. + {sources.tables['egon_mv_grid_district']['table']} """, epsg=4326, ) @@ -534,14 +527,13 @@ def assign_electrical_bus( f""" SELECT area_id, a.zensus_population_id, geom_point as geom, sum(a.demand) as demand - FROM {sources['map_district_heating_areas']['schema']}. - {sources['map_district_heating_areas']['table']} b - JOIN {sources['heat_demand']['schema']}. - {sources['heat_demand']['table']} a + FROM {sources.tables['map_district_heating_areas']['schema']}. + {sources.tables['map_district_heating_areas']['table']} b + JOIN {sources.tables['heat_demand']['schema']}. + {sources.tables['heat_demand']['table']} a ON b.zensus_population_id = a.zensus_population_id - JOIN society.destatis_zensus_population_per_ha - ON society.destatis_zensus_population_per_ha.id = - a.zensus_population_id + JOIN {sources.tables['zensus_population']['schema']}.{sources.tables['zensus_population']['table']} + ON {sources.tables['zensus_population']['schema']}.{sources.tables['zensus_population']['table']}.id = a.zensus_population_id WHERE a.scenario = '{scenario}' AND b.scenario = '{scenario}' GROUP BY (area_id, a.zensus_population_id, geom_point) diff --git a/src/egon/data/datasets/heat_supply/__init__.py b/src/egon/data/datasets/heat_supply/__init__.py index bc4417257..b7870274f 100644 --- a/src/egon/data/datasets/heat_supply/__init__.py +++ b/src/egon/data/datasets/heat_supply/__init__.py @@ -12,7 +12,7 @@ import pandas as pd from egon.data import config, db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.district_heating_areas import EgonDistrictHeatingAreas from egon.data.datasets.heat_supply.district_heating import ( backup_gas_boilers, @@ -85,13 +85,12 @@ def district_heating(): None. """ - sources = config.datasets()["heat_supply"]["sources"] - targets = config.datasets()["heat_supply"]["targets"] + sources = HeatSupply.sources + targets = HeatSupply.targets db.execute_sql( f""" - DELETE FROM {targets['district_heating_supply']['schema']}. - {targets['district_heating_supply']['table']} + DELETE FROM {HeatSupply.targets.tables["district_heating_supply"]} """ ) @@ -101,8 +100,10 @@ def district_heating(): supply["scenario"] = scenario supply.to_postgis( - targets["district_heating_supply"]["table"], - schema=targets["district_heating_supply"]["schema"], + HeatSupply.targets.get_table_name("district_heating_supply"), + schema=HeatSupply.targets.get_table_schema( + "district_heating_supply" + ), con=db.engine(), if_exists="append", ) @@ -114,10 +115,8 @@ def district_heating(): f""" SELECT a.carrier, (SUM(a.capacity) - b.capacity) / SUM(a.capacity) as deviation - FROM {targets['district_heating_supply']['schema']}. - {targets['district_heating_supply']['table']} a, - {sources['scenario_capacities']['schema']}. - {sources['scenario_capacities']['table']} b + FROM {targets.tables['district_heating_supply']} a, + {sources.tables['scenario_capacities']} b WHERE a.scenario = '{scenario}' AND b.scenario_name = '{scenario}' AND b.carrier = CONCAT('urban_central_', a.carrier) @@ -135,8 +134,8 @@ def district_heating(): backup = backup_gas_boilers(scenario) backup.to_postgis( - targets["district_heating_supply"]["table"], - schema=targets["district_heating_supply"]["schema"], + targets.get_table_name("district_heating_supply"), + schema=targets.get_table_schema("district_heating_supply"), con=db.engine(), if_exists="append", ) @@ -147,8 +146,8 @@ def district_heating(): if not backup_rh.empty: backup_rh.to_postgis( - targets["district_heating_supply"]["table"], - schema=targets["district_heating_supply"]["schema"], + targets.get_table_name("district_heating_supply"), + schema=targets.get_table_schema("district_heating_supply"), con=db.engine(), if_exists="append", ) @@ -162,13 +161,12 @@ def individual_heating(): None. """ - targets = config.datasets()["heat_supply"]["targets"] + targets = HeatSupply.targets for scenario in config.settings()["egon-data"]["--scenarios"]: db.execute_sql( f""" - DELETE FROM {targets['individual_heating_supply']['schema']}. - {targets['individual_heating_supply']['table']} + DELETE FROM {targets.tables['individual_heating_supply']} WHERE scenario = '{scenario}' """ ) @@ -184,8 +182,8 @@ def individual_heating(): supply["scenario"] = scenario supply.to_postgis( - targets["individual_heating_supply"]["table"], - schema=targets["individual_heating_supply"]["schema"], + targets.get_table_name("individual_heating_supply"), + schema=targets.get_table_schema("individual_heating_supply"), con=db.engine(), if_exists="append", ) @@ -389,7 +387,29 @@ class HeatSupply(Dataset): #: name: str = "HeatSupply" #: - version: str = "0.0.13" + version: str = "0.0.14" + + sources = DatasetSources( + tables={ + "scenario_capacities": "supply.egon_scenario_capacities", + "district_heating_areas": "demand.egon_district_heating_areas", + "chp": "supply.egon_chp_plants", + "federal_states": "boundaries.vg250_lan", + "heat_demand": "demand.egon_peta_heat", + "map_zensus_grid": "boundaries.egon_map_zensus_grid_districts", + "map_vg250_grid": "boundaries.egon_map_mvgriddistrict_vg250", + "mv_grids": "grid.egon_mv_grid_district", + "map_dh": "demand.egon_map_zensus_district_heating_areas", + "etrago_buses": "grid.egon_etrago_bus", + } + ) + + targets = DatasetTargets( + tables={ + "district_heating_supply": "supply.egon_district_heating", + "individual_heating_supply": "supply.egon_individual_heating", + } + ) def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/heat_supply/district_heating.py b/src/egon/data/datasets/heat_supply/district_heating.py index 565a01ebf..2404fe303 100644 --- a/src/egon/data/datasets/heat_supply/district_heating.py +++ b/src/egon/data/datasets/heat_supply/district_heating.py @@ -8,6 +8,7 @@ from egon.data import config, db from egon.data.datasets.heat_supply.geothermal import calc_geothermal_costs +from egon.data.datasets import load_sources_and_targets def capacity_per_district_heating_category(district_heating_areas, scenario): @@ -26,13 +27,12 @@ def capacity_per_district_heating_category(district_heating_areas, scenario): Installed capacities per technology and size category """ - sources = config.datasets()["heat_supply"]["sources"] + sources, targets = load_sources_and_targets("HeatSupply") target_values = db.select_dataframe( f""" SELECT capacity, split_part(carrier, 'urban_central_', 2) as technology - FROM {sources['scenario_capacities']['schema']}. - {sources['scenario_capacities']['table']} + FROM {sources.tables['scenario_capacities']} WHERE carrier IN ( 'urban_central_heat_pump', 'urban_central_resistive_heater', @@ -125,7 +125,7 @@ def select_district_heating_areas(scenario): """ - sources = config.datasets()["heat_supply"]["sources"] + sources, targets = load_sources_and_targets("HeatSupply") max_demand_medium_district_heating = 96000 @@ -136,8 +136,7 @@ def select_district_heating_areas(scenario): SELECT id as district_heating_id, residential_and_service_demand as demand, geom_polygon as geom - FROM {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} + FROM {sources.tables['district_heating_areas']} WHERE scenario = '{scenario}' """, index_col="district_heating_id", @@ -195,7 +194,7 @@ def cascade_per_technology( List of plants per district heating grid for the selected technology """ - sources = config.datasets()["heat_supply"]["sources"] + sources, targets = load_sources_and_targets("HeatSupply") tech = technologies[technologies.priority == technologies.priority.max()] @@ -205,10 +204,8 @@ def cascade_per_technology( # Select chp plants from database gdf_chp = db.select_geodataframe( f"""SELECT a.geom, th_capacity as capacity, c.area_id - FROM {sources['chp']['schema']}. - {sources['chp']['table']} a, - {sources['district_heating_areas']['schema']}. - {sources['district_heating_areas']['table']} c + FROM {sources.tables['chp']} a, + {sources.tables['district_heating_areas']} c WHERE a.district_heating = True AND a.district_heating_area_id = c.area_id AND a.scenario = '{scenario}' @@ -431,6 +428,8 @@ def backup_resistive_heaters(scenario): """ + sources, targets = load_sources_and_targets("HeatSupply") + # Select district heating areas from database district_heating_areas = select_district_heating_areas(scenario) @@ -438,7 +437,7 @@ def backup_resistive_heaters(scenario): target_value = db.select_dataframe( f""" SELECT capacity - FROM supply.egon_scenario_capacities + FROM {sources.tables['scenario_capacities']} WHERE carrier = 'urban_central_resistive_heater' AND scenario_name = '{scenario}' """ @@ -447,7 +446,7 @@ def backup_resistive_heaters(scenario): distributed = db.select_dataframe( f""" SELECT SUM(capacity) as capacity - FROM supply.egon_district_heating + FROM {targets.tables['district_heating_supply']} WHERE carrier = 'resistive_heater' AND scenario = '{scenario}' """ diff --git a/src/egon/data/datasets/heat_supply/individual_heating.py b/src/egon/data/datasets/heat_supply/individual_heating.py index 0b9b6f552..439051a55 100644 --- a/src/egon/data/datasets/heat_supply/individual_heating.py +++ b/src/egon/data/datasets/heat_supply/individual_heating.py @@ -50,6 +50,8 @@ # get zensus cells with district heating from egon.data.datasets.zensus_mv_grid_districts import MapZensusGridDistricts +from egon.data.datasets import load_sources_and_targets + engine = db.engine() Base = declarative_base() @@ -602,7 +604,7 @@ def cascade_per_technology( List of plants per mv grid for the selected technology """ - sources = config.datasets()["heat_supply"]["sources"] + sources, targets = load_sources_and_targets("HeatSupply") tech = technologies[technologies.priority == technologies.priority.max()] @@ -613,10 +615,8 @@ def cascade_per_technology( target = db.select_dataframe( f""" SELECT DISTINCT ON (gen) gen as state, capacity - FROM {sources['scenario_capacities']['schema']}. - {sources['scenario_capacities']['table']} a - JOIN {sources['federal_states']['schema']}. - {sources['federal_states']['table']} b + FROM {sources.tables['scenario_capacities']} a + JOIN {sources.tables['federal_states']} b ON a.nuts = b.nuts WHERE scenario_name = '{scenario}' AND carrier = 'residential_rural_heat_pump' @@ -639,8 +639,7 @@ def cascade_per_technology( target = db.select_dataframe( f""" SELECT SUM(capacity) AS capacity - FROM {sources['scenario_capacities']['schema']}. - {sources['scenario_capacities']['table']} a + FROM {sources.tables['scenario_capacities']} a WHERE scenario_name = '{scenario}' AND carrier = 'rural_heat_pump' """ @@ -685,8 +684,7 @@ def cascade_per_technology( target = db.select_dataframe( f""" SELECT SUM(capacity) AS capacity - FROM {sources['scenario_capacities']['schema']}. - {sources['scenario_capacities']['table']} a + FROM {sources.tables['scenario_capacities']} a WHERE scenario_name = '{scenario}' AND carrier = 'rural_{tech.index[0]}' """ @@ -758,28 +756,24 @@ def cascade_heat_supply_indiv(scenario, distribution_level, plotting=True): """ - sources = config.datasets()["heat_supply"]["sources"] + sources, targets = load_sources_and_targets("HeatSupply") # Select residential heat demand per mv grid district and federal state heat_per_mv = db.select_geodataframe( f""" SELECT d.bus_id as bus_id, SUM(demand) as demand, c.vg250_lan as state, d.geom - FROM {sources['heat_demand']['schema']}. - {sources['heat_demand']['table']} a - JOIN {sources['map_zensus_grid']['schema']}. - {sources['map_zensus_grid']['table']} b + FROM {sources.tables['heat_demand']} a + JOIN {sources.tables['map_zensus_grid']} b ON a.zensus_population_id = b.zensus_population_id - JOIN {sources['map_vg250_grid']['schema']}. - {sources['map_vg250_grid']['table']} c + JOIN {sources.tables['map_vg250_grid']} c ON b.bus_id = c.bus_id - JOIN {sources['mv_grids']['schema']}. - {sources['mv_grids']['table']} d + JOIN {sources.tables['mv_grids']} d ON d.bus_id = c.bus_id WHERE scenario = '{scenario}' AND a.zensus_population_id NOT IN ( SELECT zensus_population_id - FROM {sources['map_dh']['schema']}.{sources['map_dh']['table']} + FROM {sources.tables['map_dh']} WHERE scenario = '{scenario}') GROUP BY d.bus_id, vg250_lan, geom """, diff --git a/src/egon/data/datasets/helpers.py b/src/egon/data/datasets/helpers.py new file mode 100644 index 000000000..898a30ebf --- /dev/null +++ b/src/egon/data/datasets/helpers.py @@ -0,0 +1,198 @@ +""" +Helpers: constants and functions for motorized individual travel +""" + +from pathlib import Path +import json + +import numpy as np +import pandas as pd + +import egon.data.config + +TESTMODE_OFF = ( + egon.data.config.settings()["egon-data"]["--dataset-boundary"] + == "Everything" +) +WORKING_DIR = Path(".", "emobility") +DATA_BUNDLE_DIR = Path( + ".", + "data_bundle_egon_data", + "emobility", +) +DATASET_CFG = egon.data.config.datasets()["emobility_mit"] +COLUMNS_KBA = [ + "reg_district", + "total", + "mini", + "medium", + "luxury", + "unknown", +] +CONFIG_EV = { + "bev_mini": { + "column": "mini", + "tech_share": "bev_mini_share", + "share": "mini_share", + "factor": "mini_factor", + }, + "bev_medium": { + "column": "medium", + "tech_share": "bev_medium_share", + "share": "medium_share", + "factor": "medium_factor", + }, + "bev_luxury": { + "column": "luxury", + "tech_share": "bev_luxury_share", + "share": "luxury_share", + "factor": "luxury_factor", + }, + "phev_mini": { + "column": "mini", + "tech_share": "phev_mini_share", + "share": "mini_share", + "factor": "mini_factor", + }, + "phev_medium": { + "column": "medium", + "tech_share": "phev_medium_share", + "share": "medium_share", + "factor": "medium_factor", + }, + "phev_luxury": { + "column": "luxury", + "tech_share": "phev_luxury_share", + "share": "luxury_share", + "factor": "luxury_factor", + }, +} +TRIP_COLUMN_MAPPING = { + "location": "location", + "use_case": "use_case", + "nominal_charging_capacity_kW": "charging_capacity_nominal", + "grid_charging_capacity_kW": "charging_capacity_grid", + "battery_charging_capacity_kW": "charging_capacity_battery", + "soc_start": "soc_start", + "soc_end": "soc_end", + "chargingdemand_kWh": "charging_demand", + "park_start_timesteps": "park_start", + "park_end_timesteps": "park_end", + "drive_start_timesteps": "drive_start", + "drive_end_timesteps": "drive_end", + "consumption_kWh": "consumption", +} +MVGD_MIN_COUNT = 3600 if TESTMODE_OFF else 150 + + +def read_kba_data(): + """Read KBA data from CSV""" + return pd.read_csv( + WORKING_DIR + / egon.data.config.datasets()["emobility_mit"]["original_data"][ + "sources" + ]["KBA"]["file_processed"] + ) + + +def read_rs7_data(): + """Read RegioStaR7 data from CSV""" + return pd.read_csv( + WORKING_DIR + / egon.data.config.datasets()["emobility_mit"]["original_data"][ + "sources" + ]["RS7"]["file_processed"] + ) + + +def read_simbev_metadata_file(scenario_name, section): + """Read metadata of simBEV run + + Parameters + ---------- + scenario_name : str + Scenario name + section : str + Metadata section to be returned, one of + * "tech_data" + * "charge_prob_slow" + * "charge_prob_fast" + + Returns + ------- + pd.DataFrame + Config data + """ + trips_cfg = DATASET_CFG["original_data"]["sources"]["trips"] + meta_file = DATA_BUNDLE_DIR / Path( + "mit_trip_data", + trips_cfg[scenario_name]["file"].split(".")[0], + trips_cfg[scenario_name]["file_metadata"], + ) + with open(meta_file) as f: + meta = json.loads(f.read()) + return pd.DataFrame.from_dict(meta.get(section, dict()), orient="index") + + +def reduce_mem_usage( + df: pd.DataFrame, show_reduction: bool = False +) -> pd.DataFrame: + """Function to automatically check if columns of a pandas DataFrame can + be reduced to a smaller data type. Source: + https://www.mikulskibartosz.name/how-to-reduce-memory-usage-in-pandas/ + + Parameters + ---------- + df: pd.DataFrame + DataFrame to reduce memory usage on + show_reduction : bool + If True, print amount of memory reduced + + Returns + ------- + pd.DataFrame + DataFrame with memory usage decreased + """ + start_mem = df.memory_usage().sum() / 1024 ** 2 + + for col in df.columns: + col_type = df[col].dtype + + if col_type != object and str(col_type) != "category": + c_min = df[col].min() + c_max = df[col].max() + + if str(col_type)[:3] == "int": + if ( + c_min > np.iinfo(np.int16).min + and c_max < np.iinfo(np.int16).max + ): + df[col] = df[col].astype("int16") + elif ( + c_min > np.iinfo(np.int32).min + and c_max < np.iinfo(np.int32).max + ): + df[col] = df[col].astype("int32") + else: + df[col] = df[col].astype("int64") + else: + if ( + c_min > np.finfo(np.float32).min + and c_max < np.finfo(np.float32).max + ): + df[col] = df[col].astype("float32") + else: + df[col] = df[col].astype("float64") + + else: + df[col] = df[col].astype("category") + + end_mem = df.memory_usage().sum() / 1024 ** 2 + + if show_reduction is True: + print( + "Reduced memory usage of DataFrame by " + f"{(1 - end_mem/start_mem) * 100:.2f} %." + ) + + return df diff --git a/src/egon/data/datasets/hydrogen_etrago/__init__.py b/src/egon/data/datasets/hydrogen_etrago/__init__.py index ba32504d0..04ff3d24a 100755 --- a/src/egon/data/datasets/hydrogen_etrago/__init__.py +++ b/src/egon/data/datasets/hydrogen_etrago/__init__.py @@ -16,7 +16,7 @@ """ from egon.data import config -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.hydrogen_etrago.bus import insert_hydrogen_buses from egon.data.datasets.hydrogen_etrago.h2_grid import insert_h2_pipelines from egon.data.datasets.hydrogen_etrago.h2_to_ch4 import insert_h2_to_ch4_to_h2 @@ -53,7 +53,25 @@ class HydrogenBusEtrago(Dataset): #: name: str = "HydrogenBusEtrago" #: - version: str = "0.0.1" + version: str = "0.0.4" + + sources = DatasetSources( + tables={ + "saltcavern_data": {"schema": "grid", "table": "egon_saltstructures_storage_potential"}, + "buses": {"schema": "grid", "table": "egon_etrago_bus"}, + "H2_AC_map": {"schema": "grid", "table": "egon_etrago_ac_h2"}, + "vg250_federal_states": {"schema": "boundaries", "table": "vg250_lan"}, + "saltcaverns": {"schema": "boundaries", "table": "inspee_saltstructures"}, + }, + ) + + targets = DatasetTargets( + tables={ + "hydrogen_buses": {"schema": "grid", "table": "egon_etrago_bus"}, + "H2_AC_map": {"schema": "grid", "table": "egon_etrago_ac_h2"}, + "storage_potential": {"schema": "grid", "table": "egon_saltstructures_storage_potential"}, + }, + ) def __init__(self, dependencies): super().__init__( @@ -95,7 +113,20 @@ class HydrogenStoreEtrago(Dataset): #: name: str = "HydrogenStoreEtrago" #: - version: str = "0.0.3" + version: str = "0.0.6" + + sources = DatasetSources( + tables={ + "saltcavern_data": {"schema": "grid", "table": "egon_saltstructures_storage_potential"}, + "buses": {"schema": "grid", "table": "egon_etrago_bus"}, + "H2_AC_map": {"schema": "grid", "table": "egon_etrago_ac_h2"}, + }, + ) + targets = DatasetTargets( + tables={ + "hydrogen_stores": {"schema": "grid", "table": "egon_etrago_store"}, + }, + ) def __init__(self, dependencies): super().__init__( @@ -133,7 +164,69 @@ class HydrogenPowerLinkEtrago(Dataset): #: name: str = "HydrogenPowerLinkEtrago" #: - version: str = "0.0.4" + version: str = "0.0.6" + + sources = DatasetSources( + tables={ + "buses": {"schema": "grid", "table": "egon_etrago_bus"}, + "links": {"schema": "grid", "table": "egon_etrago_link"}, + "H2_AC_map": {"schema": "grid", "table": "egon_etrago_ac_h2"}, + "ehv_substation": { + "schema": "grid", + "table": "egon_ehv_substation", + }, + "hvmv_substation": { + "schema": "grid", + "table": "egon_hvmv_substation", + }, + "loads": { + "schema": "grid", + "table": "egon_etrago_load", + }, + "load_timeseries": { + "schema": "grid", + "table": "egon_etrago_load_timeseries", + }, + "mv_districts": { + "schema": "grid", + "table": "egon_mv_grid_district", + }, + "ehv_voronoi": { + "schema": "grid", + "table": "egon_ehv_substation_voronoi", + }, + "district_heating_area": { + "schema": "demand", + "table": "egon_district_heating_areas", + }, + "o2_load_profile": { + "schema": "demand", + "table": "egon_demandregio_timeseries_cts_ind", + }, + }, + ) + targets = DatasetTargets( + tables={ + "hydrogen_links": {"schema": "grid", "table": "egon_etrago_link"}, + "loads": { + "schema": "grid", + "table": "egon_etrago_load", + }, + "load_timeseries": { + "schema": "grid", + "table": "egon_etrago_load_timeseries", + }, + "generators": { + "schema": "grid", + "table": "egon_etrago_generator", + }, + "buses": { + "schema": "grid", + "table": "egon_etrago_bus", + }, + }, + + ) def __init__(self, dependencies): super().__init__( @@ -170,14 +263,26 @@ class HydrogenMethaneLinkEtrago(Dataset): #: name: str = "HydrogenMethaneLinkEtrago" #: - version: str = "0.0.5" + version: str = "0.0.6" + + sources = DatasetSources( + tables={ + "buses": {"schema": "grid", "table": "egon_etrago_bus"}, + "links": {"schema": "grid", "table": "egon_etrago_link"}, + }, + ) + targets = DatasetTargets( + tables={ + "hydrogen_links": {"schema": "grid", "table": "egon_etrago_link"}, + }, + ) def __init__(self, dependencies): super().__init__( name=self.name, version=self.version, dependencies=dependencies, - tasks=(insert_h2_to_ch4_to_h2), + tasks=(insert_h2_to_ch4_to_h2,), ) @@ -206,14 +311,37 @@ class HydrogenGridEtrago(Dataset): #: name: str = "HydrogenGridEtrago" #: - version: str = "0.0.2" + version: str = "0.0.3" + + sources = DatasetSources( + urls={ + "new_constructed_pipes": "https://fnb-gas.de/wp-content/uploads/2024/07/2024_07_22_Anlage3_FNB_Massnahmenliste_Neubau.xlsx", + "converted_ch4_pipes": "https://fnb-gas.de/wp-content/uploads/2024/07/2024_07_22_Anlage4_FNB_Massnahmenliste_Umstellung.xlsx", + "pipes_of_further_h2_grid_operators": "https://fnb-gas.de/wp-content/uploads/2024/07/2024_07_22_Anlage2_Leitungsmeldungen_weiterer_potenzieller_Wasserstoffnetzbetreiber.xlsx", + }, + files={ + "new_constructed_pipes": "Anlage_3_Wasserstoffkernnetz_Neubau.xlsx", + "converted_ch4_pipes": "Anlage_4_Wasserstoffkernnetz_Umstellung.xlsx", + "pipes_of_further_h2_grid_operators": "Anlage_2_Wasserstoffkernetz_weitere_Leitungen.xlsx", + }, + tables={ + "buses": {"schema": "grid", "table": "egon_etrago_bus"}, + "links": {"schema": "grid", "table": "egon_etrago_link"}, + }, + ) + + targets = DatasetTargets( + tables={ + "hydrogen_links": {"schema": "grid", "table": "egon_etrago_link"}, + }, + ) def __init__(self, dependencies): super().__init__( name=self.name, version=self.version, dependencies=dependencies, - tasks=insert_h2_pipelines_for_scn, + tasks=(insert_h2_pipelines_for_scn,), ) diff --git a/src/egon/data/datasets/hydrogen_etrago/bus.py b/src/egon/data/datasets/hydrogen_etrago/bus.py index 8cb8d8d3d..4e0073f18 100755 --- a/src/egon/data/datasets/hydrogen_etrago/bus.py +++ b/src/egon/data/datasets/hydrogen_etrago/bus.py @@ -23,11 +23,14 @@ import numpy as np import pandas as pd -from egon.data import config, db +from egon.data import db from egon.data.datasets.etrago_helpers import ( finalize_bus_insertion, initialise_bus_insertion, ) +from egon.data.datasets import load_sources_and_targets + + def insert_hydrogen_buses(scn_name): @@ -42,6 +45,7 @@ def insert_hydrogen_buses(scn_name): Name of scenario """ + sources, targets = load_sources_and_targets("HydrogenBusEtrago") h2_input = pd.read_csv( Path(".") @@ -53,10 +57,7 @@ def insert_hydrogen_buses(scn_name): lambda wkb_hex: loads(bytes.fromhex(wkb_hex)) ) - sources = config.datasets()["etrago_hydrogen"]["sources"] - target_buses = config.datasets()["etrago_hydrogen"]["targets"][ - "hydrogen_buses" - ] + target_buses = targets.tables["hydrogen_buses"] h2_buses = initialise_bus_insertion( "H2_grid", target_buses, scenario=scn_name ) @@ -142,11 +143,11 @@ def insert_hydrogen_buses(scn_name): "H2_saltcavern", target_buses, scenario=scn_name ) insert_H2_buses_from_saltcavern( - hydrogen_buses, "H2_saltcavern", sources, target_buses, scn_name + hydrogen_buses, "H2_saltcavern", sources, targets, scn_name ) -def insert_H2_buses_from_saltcavern(gdf, carrier, sources, target, scn_name): +def insert_H2_buses_from_saltcavern(gdf, carrier, sources, targets, scn_name): """ Insert the H2 buses based on saltcavern locations into the database. @@ -159,9 +160,9 @@ def insert_H2_buses_from_saltcavern(gdf, carrier, sources, target, scn_name): GeoDataFrame containing the empty bus data. carrier : str Name of the carrier. - sources : dict + sources : DatasetSources Sources schema and table information. - target : dict + targets : DatasetTargets Target schema and table information. scn_name : str Name of the scenario. @@ -171,20 +172,21 @@ def insert_H2_buses_from_saltcavern(gdf, carrier, sources, target, scn_name): None """ + target_buses = targets.tables["hydrogen_buses"] + target_map = targets.tables["H2_AC_map"] # electrical buses related to saltcavern storage el_buses = db.select_dataframe( f""" SELECT bus_id - FROM {sources['saltcavern_data']['schema']}. - {sources['saltcavern_data']['table']}""" + FROM {sources.tables['saltcavern_data']['schema']}.{sources.tables['saltcavern_data']['table']}""" )["bus_id"] # locations of electrical buses (filtering not necessarily required) locations = db.select_geodataframe( f""" SELECT bus_id, geom - FROM {sources['buses']['schema']}. - {sources['buses']['table']} WHERE scn_name = '{scn_name}' + FROM {sources.tables['buses']['schema']}.{sources.tables['buses']['table']} + WHERE scn_name = '{scn_name}' AND country = 'DE'""", index_col="bus_id", ).to_crs(epsg=4326) @@ -199,7 +201,7 @@ def insert_H2_buses_from_saltcavern(gdf, carrier, sources, target, scn_name): # create H2 bus data hydrogen_bus_ids = finalize_bus_insertion( - locations, carrier, target, scenario=scn_name + locations, carrier, target_buses, scenario=scn_name ) gdf_H2_cavern = hydrogen_bus_ids[["bus_id"]].rename( @@ -210,9 +212,9 @@ def insert_H2_buses_from_saltcavern(gdf, carrier, sources, target, scn_name): # Insert data to db gdf_H2_cavern.to_sql( - "egon_etrago_ac_h2", + target_map["table"], db.engine(), - schema="grid", + schema=target_map["schema"], index=False, if_exists="replace", ) diff --git a/src/egon/data/datasets/hydrogen_etrago/h2_grid.py b/src/egon/data/datasets/hydrogen_etrago/h2_grid.py index 3f8da061d..f9eb0dbbb 100755 --- a/src/egon/data/datasets/hydrogen_etrago/h2_grid.py +++ b/src/egon/data/datasets/hydrogen_etrago/h2_grid.py @@ -19,15 +19,22 @@ import numpy as np import pandas as pd -from egon.data import config, db +from egon.data import db, config from egon.data.datasets.scenario_parameters import get_sector_parameters from egon.data.datasets.scenario_parameters.parameters import ( annualize_capital_costs, ) +from egon.data.datasets import load_sources_and_targets + + + + def insert_h2_pipelines(scn_name): "Insert H2_grid based on Input Data from FNB-Gas" + sources, targets = load_sources_and_targets("HydrogenGridEtrago") + download_h2_grid_data() H2_grid_Neubau, H2_grid_Umstellung, H2_grid_Erweiterung = ( @@ -41,12 +48,12 @@ def insert_h2_pipelines(scn_name): ) con = db.engine() - sources = config.datasets()["etrago_hydrogen"]["sources"] - target = config.datasets()["etrago_hydrogen"]["targets"]["hydrogen_links"] + target = targets.tables["hydrogen_links"] + h2_buses_df = pd.read_sql( f""" - SELECT bus_id, x, y FROM {sources["buses"]["schema"]}.{sources["buses"]["table"]} + SELECT bus_id, x, y FROM {sources.tables['buses']['schema']}.{sources.tables['buses']['table']} WHERE carrier in ('H2_grid') AND scn_name = '{scn_name}' """, @@ -56,17 +63,17 @@ def insert_h2_pipelines(scn_name): # Delete old entries db.execute_sql( f""" - DELETE FROM {target["schema"]}.{target["table"]} + DELETE FROM {target['schema']}.{target['table']} WHERE "carrier" = 'H2_grid' AND scn_name = '{scn_name}' AND bus0 IN ( SELECT bus_id - FROM {sources["buses"]["schema"]}.{sources["buses"]["table"]} + FROM {sources.tables['buses']['schema']}.{sources.tables['buses']['table']} WHERE country = 'DE' ) """ ) - target = config.datasets()["etrago_hydrogen"]["targets"]["hydrogen_links"] + for df in [H2_grid_Neubau, H2_grid_Umstellung, H2_grid_Erweiterung]: @@ -478,17 +485,15 @@ def download_h2_grid_data(): None """ + download_config = config.datasets()["etrago_hydrogen"]["sources"]["H2_grid"] + path = Path("datasets/h2_data") os.makedirs(path, exist_ok=True) - - download_config = config.datasets()["etrago_hydrogen"]["sources"][ - "H2_grid" - ] + target_file_Um = path / download_config["converted_ch4_pipes"]["path"] target_file_Neu = path / download_config["new_constructed_pipes"]["path"] - target_file_Erw = ( - path / download_config["pipes_of_further_h2_grid_operators"]["path"] - ) + target_file_Erw = path / download_config["pipes_of_further_h2_grid_operators"]["path"] + for target_file in [target_file_Neu, target_file_Um, target_file_Erw]: if target_file is target_file_Um: @@ -514,11 +519,9 @@ def read_h2_excel_sheets(): """ - + download_config = config.datasets()["etrago_hydrogen"]["sources"]["H2_grid"] path = Path(".") / "datasets" / "h2_data" - download_config = config.datasets()["etrago_hydrogen"]["sources"][ - "H2_grid" - ] + excel_file_Um = pd.ExcelFile( f'{path}/{download_config["converted_ch4_pipes"]["path"]}' ) @@ -622,26 +625,26 @@ def connect_saltcavern_to_h2_grid(scn_name): None """ + sources, targets = load_sources_and_targets("HydrogenGridEtrago") + - targets = config.datasets()["etrago_hydrogen"]["targets"] - sources = config.datasets()["etrago_hydrogen"]["sources"] engine = db.engine() db.execute_sql( f""" - DELETE FROM {targets["hydrogen_links"]["schema"]}.{targets["hydrogen_links"]["table"]} + DELETE FROM {targets.tables['hydrogen_links']['schema']}.{targets.tables['hydrogen_links']['table']} WHERE "carrier" in ('H2_saltcavern') AND scn_name = '{scn_name}'; """ ) h2_buses_query = f"""SELECT bus_id, x, y,ST_Transform(geom, 32632) as geom - FROM {sources["buses"]["schema"]}.{sources["buses"]["table"]} + FROM {sources.tables['buses']['schema']}.{sources.tables['buses']['table']} WHERE carrier = 'H2_grid' AND scn_name = '{scn_name}' """ h2_buses = gpd.read_postgis(h2_buses_query, engine) salt_caverns_query = f"""SELECT bus_id, x, y, ST_Transform(geom, 32632) as geom - FROM {sources["buses"]["schema"]}.{sources["buses"]["table"]} + FROM {sources.tables['buses']['schema']}.{sources.tables['buses']['table']} WHERE carrier = 'H2_saltcavern' AND scn_name = '{scn_name}' """ salt_caverns = gpd.read_postgis(salt_caverns_query, engine) @@ -690,9 +693,9 @@ def connect_saltcavern_to_h2_grid(scn_name): links_df = gpd.GeoDataFrame(links, geometry="geom", crs=4326) links_df.to_postgis( - targets["hydrogen_links"]["table"], + targets.tables["hydrogen_links"]["table"], engine, - schema=targets["hydrogen_links"]["schema"], + schema=targets.tables["hydrogen_links"]["schema"], index=False, if_exists="append", dtype={"geom": Geometry()}, @@ -710,14 +713,15 @@ def connect_h2_grid_to_neighbour_countries(scn_name): None """ + sources, targets = load_sources_and_targets("HydrogenGridEtrago") + engine = db.engine() - targets = config.datasets()["etrago_hydrogen"]["targets"] - sources = config.datasets()["etrago_hydrogen"]["sources"] + h2_buses_df = gpd.read_postgis( f""" SELECT bus_id, x, y, geom - FROM {sources["buses"]["schema"]}.{sources["buses"]["table"]} + FROM {sources.tables['buses']['schema']}.{sources.tables['buses']['table']} WHERE carrier in ('H2_grid') AND scn_name = '{scn_name}' @@ -728,7 +732,7 @@ def connect_h2_grid_to_neighbour_countries(scn_name): h2_links_df = pd.read_sql( f""" SELECT link_id, bus0, bus1, p_nom - FROM {sources["links"]["schema"]}.{sources["links"]["table"]} + FROM {sources.tables['links']['schema']}.{sources.tables['links']['table']} WHERE carrier in ('H2_grid') AND scn_name = '{scn_name}' @@ -739,7 +743,7 @@ def connect_h2_grid_to_neighbour_countries(scn_name): abroad_buses_df = gpd.read_postgis( f""" SELECT bus_id, x, y, geom, country - FROM {sources["buses"]["schema"]}.{sources["buses"]["table"]} + FROM {sources.tables['buses']['schema']}.{sources.tables['buses']['table']} WHERE carrier = 'H2' AND scn_name = '{scn_name}' AND country != 'DE' """, engine, @@ -863,9 +867,9 @@ def connect_h2_grid_to_neighbour_countries(scn_name): ) connection_links_df.to_postgis( - name=targets["hydrogen_links"]["table"], + name=targets.tables["hydrogen_links"]["table"], con=engine, - schema=targets["hydrogen_links"]["schema"], + schema=targets.tables["hydrogen_links"]["schema"], if_exists="append", index=False, ) diff --git a/src/egon/data/datasets/hydrogen_etrago/h2_to_ch4.py b/src/egon/data/datasets/hydrogen_etrago/h2_to_ch4.py index 0101825a3..1be653905 100755 --- a/src/egon/data/datasets/hydrogen_etrago/h2_to_ch4.py +++ b/src/egon/data/datasets/hydrogen_etrago/h2_to_ch4.py @@ -21,6 +21,9 @@ from egon.data import config, db from egon.data.datasets.scenario_parameters import get_sector_parameters +from egon.data.datasets import load_sources_and_targets + + def insert_h2_to_ch4_to_h2(): @@ -36,15 +39,13 @@ def insert_h2_to_ch4_to_h2(): None """ + sources, targets = load_sources_and_targets("HydrogenMethaneLinkEtrago") scenarios = config.settings()["egon-data"]["--scenarios"] con = db.engine() - target_links = config.datasets()["etrago_hydrogen"]["targets"][ - "hydrogen_links" - ] - target_buses = config.datasets()["etrago_hydrogen"]["targets"][ - "hydrogen_buses" - ] + target_links = targets.tables["hydrogen_links"] + target_buses = sources.tables["buses"] + if "status2019" in scenarios: scenarios.remove("status2019") diff --git a/src/egon/data/datasets/hydrogen_etrago/power_to_h2.py b/src/egon/data/datasets/hydrogen_etrago/power_to_h2.py index d82cc12f6..8944d10f2 100755 --- a/src/egon/data/datasets/hydrogen_etrago/power_to_h2.py +++ b/src/egon/data/datasets/hydrogen_etrago/power_to_h2.py @@ -30,6 +30,8 @@ from egon.data import config, db from egon.data.datasets.scenario_parameters import get_sector_parameters +from egon.data.datasets import load_sources_and_targets + def insert_power_to_h2_to_power(): @@ -62,6 +64,8 @@ def insert_power_to_h2_to_power(): None """ + sources, targets = load_sources_and_targets("HydrogenPowerLinkEtrago") + scenarios = config.settings()["egon-data"]["--scenarios"] # General Constant Parameters @@ -120,10 +124,6 @@ def insert_power_to_h2_to_power(): # connet to PostgreSQL database (to localhost) engine = db.engine() - data_config = config.datasets() - sources = data_config["PtH2_waste_heat_O2"]["sources"] - targets = data_config["PtH2_waste_heat_O2"]["targets"] - for SCENARIO_NAME in scenarios: if SCENARIO_NAME not in ["eGon100RE", "eGon2035"]: @@ -179,8 +179,8 @@ def insert_power_to_h2_to_power(): def export_o2_buses_to_db(df): max_bus_id = db.next_etrago_id("bus") next_bus_id = count(start=max_bus_id, step=1) - schema = targets["buses"]["schema"] - table_name = targets["buses"]["table"] + schema = targets.tables["buses"]["schema"] + table_name = targets.tables["buses"]["table"] db.execute_sql( f"DELETE FROM {schema}.{table_name} WHERE carrier = 'O2' AND scn_name='{SCENARIO_NAME}'" @@ -231,47 +231,47 @@ def export_o2_buses_to_db(df): queries = { WWTP: f""" SELECT bus_id AS id, geom, type AS ka_id - FROM {sources["buses"]["schema"]}.{sources["buses"]["table"]} + FROM {sources.tables["buses"]["schema"]}.{sources.tables["buses"]["table"]} WHERE carrier in ('O2') AND scn_name = '{SCENARIO_NAME}' """, H2: f""" SELECT bus_id AS id, geom - FROM {sources["buses"]["schema"]}.{sources["buses"]["table"]} + FROM {sources.tables["buses"]["schema"]}.{sources.tables["buses"]["table"]} WHERE carrier in ('H2_grid', 'H2') AND scn_name = '{SCENARIO_NAME}' AND country = 'DE' """, H2GRID: f""" SELECT link_id, geom, bus0, bus1 - FROM {sources["links"]["schema"]}.{sources["links"]["table"]} + FROM {sources.tables["links"]["schema"]}.{sources.tables["links"]["table"]} WHERE carrier in ('H2_grid') AND scn_name = '{SCENARIO_NAME}' """, AC: f""" SELECT bus_id AS id, geom - FROM {sources["buses"]["schema"]}.{sources["buses"]["table"]} + FROM {sources.tables["buses"]["schema"]}.{sources.tables["buses"]["table"]} WHERE carrier in ('AC') AND scn_name = '{SCENARIO_NAME}' AND v_nom = '110' """, ACSUB_HVMV: f""" SELECT bus_id AS id, point AS geom - FROM {sources["hvmv_substation"]["schema"]}.{sources["hvmv_substation"]["table"]} + FROM {sources.tables["hvmv_substation"]["schema"]}.{sources.tables["hvmv_substation"]["table"]} """, ACSUB_EHV: f""" SELECT bus_id AS id, point AS geom - FROM {sources["ehv_substation"]["schema"]}.{sources["ehv_substation"]["table"]} + FROM {sources.tables["ehv_substation"]["schema"]}.{sources.tables["ehv_substation"]["table"]} """, ACZONE_HVMV: f""" SELECT bus_id AS id, ST_Transform(geom, 4326) as geom - FROM {sources["mv_districts"]["schema"]}.{sources["mv_districts"]["table"]} + FROM {sources.tables["mv_districts"]["schema"]}.{sources.tables["mv_districts"]["table"]} """, ACZONE_EHV: f""" SELECT bus_id AS id, ST_Transform(geom, 4326) as geom - FROM {sources["ehv_voronoi"]["schema"]}.{sources["ehv_voronoi"]["table"]} + FROM {sources.tables["ehv_voronoi"]["schema"]}.{sources.tables["ehv_voronoi"]["table"]} """, HEAT_BUS: f""" SELECT bus_id AS id, geom - FROM {sources["buses"]["schema"]}.{sources["buses"]["table"]} + FROM {sources.tables["buses"]["schema"]}.{sources.tables["buses"]["table"]} WHERE carrier in ('central_heat') AND scn_name = '{SCENARIO_NAME}' AND country = 'DE' @@ -288,11 +288,11 @@ def export_o2_buses_to_db(df): with engine.connect() as conn: conn.execute( text( - f"""DELETE FROM {targets["links"]["schema"]}.{targets["links"]["table"]} + f"""DELETE FROM {sources.tables["links"]["schema"]}.{sources.tables["links"]["table"]} WHERE carrier IN ('power_to_H2', 'H2_to_power', 'PtH2_waste_heat', 'PtH2_O2') AND scn_name = '{SCENARIO_NAME}' AND bus0 IN ( SELECT bus_id - FROM {targets["buses"]["schema"]}.{targets["buses"]["table"]} + FROM {sources.tables["buses"]["schema"]}.{sources.tables["buses"]["table"]} WHERE country = 'DE' ) """ @@ -336,7 +336,7 @@ def prepare_dataframes_for_spartial_queries(): HEAT_AREA ] = f""" SELECT area_id, geom_polygon as geom - FROM {sources["district_heating_area"]["schema"]}.{sources["district_heating_area"]["table"]} + FROM {sources.tables["district_heating_area"]["schema"]}.{sources.tables["district_heating_area"]["table"]} WHERE scenario = '{SCENARIO_NAME}' """ dfs[HEAT_AREA] = gpd.read_postgis( @@ -377,7 +377,7 @@ def prepare_dataframes_for_spartial_queries(): HEAT_LOAD ] = f""" SELECT bus, load_id - FROM {sources["loads"]["schema"]}.{sources["loads"]["table"]} + FROM {sources.tables["loads"]["schema"]}.{sources.tables["loads"]["table"]} WHERE carrier in ('central_heat') AND scn_name = '{SCENARIO_NAME}' """ @@ -388,7 +388,7 @@ def prepare_dataframes_for_spartial_queries(): HEAT_TIMESERIES ] = f""" SELECT load_id, p_set - FROM {sources["load_timeseries"]["schema"]}.{sources["load_timeseries"]["table"]} + FROM {sources.tables["load_timeseries"]["schema"]}.{sources.tables["load_timeseries"]["table"]} WHERE load_id IN {load_ids} AND scn_name = '{SCENARIO_NAME}' """ @@ -1061,8 +1061,8 @@ def create_link_dataframes(links_h2, links_heat, links_O2): return power_to_H2, H2_to_power, power_to_Heat, power_to_O2 def export_links_to_db(df, carrier): - schema = targets["links"]["schema"] - table_name = targets["links"]["table"] + schema = targets.tables["hydrogen_links"]["schema"] + table_name = targets.tables["hydrogen_links"]["table"] gdf = gpd.GeoDataFrame(df, geometry="geom").set_crs(METRIC_CRS) gdf = gdf.to_crs(epsg=DATA_CRS) @@ -1083,8 +1083,8 @@ def export_links_to_db(df, carrier): def insert_o2_load_points(df): new_id = db.next_etrago_id("load") next_load_id = count(start=new_id, step=1) - schema = targets["loads"]["schema"] - table_name = targets["loads"]["table"] + schema = targets.tables["loads"]["schema"] + table_name = targets.tables["loads"]["table"] with engine.connect() as conn: conn.execute( f"DELETE FROM {schema}.{table_name} WHERE carrier = 'O2' AND scn_name = '{SCENARIO_NAME}'" @@ -1117,7 +1117,7 @@ def insert_o2_load_points(df): def insert_o2_load_timeseries(df): query_o2_timeseries = f""" SELECT load_curve - FROM {sources["o2_load_profile"]["schema"]}.{sources["o2_load_profile"]["table"]} + FROM {sources.tables["o2_load_profile"]["schema"]}.{sources.tables["o2_load_profile"]["table"]} WHERE slp = 'G3' AND wz = 3 """ @@ -1129,7 +1129,7 @@ def insert_o2_load_timeseries(df): with engine.connect() as conn: conn.execute( f""" - DELETE FROM {targets["load_timeseries"]["schema"]}.{targets["load_timeseries"]["table"]} + DELETE FROM {targets.tables["load_timeseries"]["schema"]}.{targets.tables["load_timeseries"]["table"]} WHERE load_id IN {tuple(df.load_id.values)} AND scn_name = '{SCENARIO_NAME}' """ @@ -1160,9 +1160,9 @@ def insert_o2_load_timeseries(df): lambda x: x.tolist() if isinstance(x, np.ndarray) else x ) timeseries_df[["scn_name", "load_id", "temp_id", "p_set"]].to_sql( - targets["load_timeseries"]["table"], + targets.tables["load_timeseries"]["table"], engine, - schema=targets["load_timeseries"]["schema"], + schema=targets.tables["load_timeseries"]["schema"], if_exists="append", index=False, ) @@ -1173,8 +1173,8 @@ def insert_o2_generators(df): new_id = db.next_etrago_id("generator") next_generator_id = count(start=new_id, step=1) - grid = targets["generators"]["schema"] - table_name = targets["generators"]["table"] + grid = targets.tables["generators"]["schema"] + table_name = targets.tables["generators"]["table"] with engine.connect() as conn: conn.execute( f"DELETE FROM {grid}.{table_name} WHERE carrier = 'O2' AND scn_name = '{SCENARIO_NAME}'" @@ -1211,7 +1211,7 @@ def adjust_ac_load_timeseries(df, o2_timeseries): AC_LOAD ] = f""" SELECT bus, load_id - FROM {sources["loads"]["schema"]}.{sources["loads"]["table"]} + FROM {sources.tables["loads"]["schema"]}.{sources.tables["loads"]["table"]} WHERE scn_name = '{SCENARIO_NAME}' """ dfs[AC_LOAD] = pd.read_sql(queries[AC_LOAD], engine) @@ -1227,7 +1227,7 @@ def adjust_ac_load_timeseries(df, o2_timeseries): select_query = text( f""" SELECT p_set - FROM {sources["load_timeseries"]["schema"]}.{sources["load_timeseries"]["table"]} + FROM {sources.tables["load_timeseries"]["schema"]}.{sources.tables["load_timeseries"]["table"]} WHERE load_id = :load_id and scn_name= :SCENARIO_NAME """ ) @@ -1256,7 +1256,7 @@ def adjust_ac_load_timeseries(df, o2_timeseries): ).tolist() update_query = text( f""" - UPDATE {targets["load_timeseries"]["schema"]}.{targets["load_timeseries"]["table"]} + UPDATE {targets.tables["load_timeseries"]["schema"]}.{targets.tables["load_timeseries"]["table"]} SET p_set = :adjusted_p_set WHERE load_id = :load_id AND scn_name = :SCENARIO_NAME """ @@ -1282,9 +1282,9 @@ def delete_unconnected_o2_buses(): with engine.connect() as conn: conn.execute( f""" - DELETE FROM {targets['buses']['schema']}.{targets['buses']['table']} + DELETE FROM {targets.tables['buses']['schema']}.{targets.tables['buses']['table']} WHERE carrier = 'O2' AND scn_name = '{SCENARIO_NAME}' - AND bus_id NOT IN (SELECT bus1 FROM {targets['links']['schema']}.{targets['links']['table']} + AND bus_id NOT IN (SELECT bus1 FROM {targets.tables['hydrogen_links']['schema']}.{targets.tables['hydrogen_links']['table']} WHERE carrier = 'PtH2_O2') """ ) diff --git a/src/egon/data/datasets/hydrogen_etrago/storage.py b/src/egon/data/datasets/hydrogen_etrago/storage.py index 4cbb7a542..4a68e0796 100755 --- a/src/egon/data/datasets/hydrogen_etrago/storage.py +++ b/src/egon/data/datasets/hydrogen_etrago/storage.py @@ -19,6 +19,10 @@ from egon.data import config, db from egon.data.datasets.etrago_helpers import copy_and_modify_stores from egon.data.datasets.scenario_parameters import get_sector_parameters +from egon.data.datasets import load_sources_and_targets + + + def insert_H2_overground_storage(): @@ -33,10 +37,8 @@ def insert_H2_overground_storage(): None """ - # The targets of etrago_hydrogen also serve as source here ಠ_ಠ - sources = config.datasets()["etrago_hydrogen"]["sources"] - targets = config.datasets()["etrago_hydrogen"]["targets"] - + sources, targets = load_sources_and_targets("HydrogenStoreEtrago") + s = config.settings()["egon-data"]["--scenarios"] scn = [] if "eGon2035" in s: @@ -49,9 +51,10 @@ def insert_H2_overground_storage(): storages = db.select_geodataframe( f""" SELECT bus_id, scn_name, geom - FROM {sources['buses']['schema']}. - {sources['buses']['table']} WHERE carrier IN ('H2', 'H2_grid') - AND scn_name = '{scn_name}' AND country = 'DE'""", + FROM {sources.tables["buses"]["schema"]}.{sources.tables["buses"]["table"]} + WHERE carrier IN ('H2', 'H2_grid') + AND scn_name = '{scn_name}' AND country = 'DE' + """, index_col="bus_id", ) @@ -75,24 +78,27 @@ def insert_H2_overground_storage(): # Clean table db.execute_sql( f""" - DELETE FROM grid.egon_etrago_store WHERE carrier = '{carrier}' AND - scn_name = '{scn_name}' AND bus not IN ( - SELECT bus_id FROM grid.egon_etrago_bus + DELETE FROM {targets.tables["hydrogen_stores"]["schema"]}.{targets.tables["hydrogen_stores"]["table"]} + WHERE carrier = '{carrier}' + AND scn_name = '{scn_name}' + AND bus not IN ( + SELECT bus_id + FROM {sources.tables["buses"]["schema"]}.{sources.tables["buses"]["table"]} WHERE scn_name = '{scn_name}' AND country != 'DE' ); """ ) # Select next id value - new_id = db.next_etrago_id("store") - storages["store_id"] = range(new_id, new_id + len(storages)) + storages["store_id"] = db.next_etrago_id("store", len(storages)) storages = storages.reset_index(drop=True) + # Insert data to db storages.to_sql( - targets["hydrogen_stores"]["table"], + targets.tables["hydrogen_stores"]["table"], db.engine(), - schema=targets["hydrogen_stores"]["schema"], + schema=targets.tables["hydrogen_stores"]["schema"], index=False, if_exists="append", ) @@ -111,8 +117,8 @@ def insert_H2_saltcavern_storage(): """ # Data tables sources and targets - sources = config.datasets()["etrago_hydrogen"]["sources"] - targets = config.datasets()["etrago_hydrogen"]["targets"] + sources, targets = load_sources_and_targets("HydrogenStoreEtrago") + s = config.settings()["egon-data"]["--scenarios"] scn = [] @@ -125,8 +131,8 @@ def insert_H2_saltcavern_storage(): storage_potentials = db.select_geodataframe( f""" SELECT * - FROM {sources['saltcavern_data']['schema']}. - {sources['saltcavern_data']['table']}""", + FROM {sources.tables["saltcavern_data"]["schema"]}.{sources.tables["saltcavern_data"]["table"]} + """, geom_col="geometry", ) @@ -134,8 +140,8 @@ def insert_H2_saltcavern_storage(): H2_AC_bus_map = db.select_dataframe( f""" SELECT * - FROM {sources['H2_AC_map']['schema']}. - {sources['H2_AC_map']['table']}""", + FROM {sources.tables["H2_AC_map"]["schema"]}.{sources.tables["H2_AC_map"]["table"]} + """, ) storage_potentials["storage_potential"] = ( @@ -179,9 +185,12 @@ def insert_H2_saltcavern_storage(): # Clean table db.execute_sql( f""" - DELETE FROM grid.egon_etrago_store WHERE carrier = '{carrier}' AND - scn_name = '{scn_name}' AND bus not IN ( - SELECT bus_id FROM grid.egon_etrago_bus + DELETE FROM {targets.tables["hydrogen_stores"]["schema"]}.{targets.tables["hydrogen_stores"]["table"]} + WHERE carrier = '{carrier}' + AND scn_name = '{scn_name}' + AND bus not IN ( + SELECT bus_id + FROM {sources.tables["buses"]["schema"]}.{sources.tables["buses"]["table"]} WHERE scn_name = '{scn_name}' AND country != 'DE' ); """ @@ -194,9 +203,9 @@ def insert_H2_saltcavern_storage(): # # Insert data to db storages.to_sql( - targets["hydrogen_stores"]["table"], + targets.tables["hydrogen_stores"]["table"], db.engine(), - schema=targets["hydrogen_stores"]["schema"], + schema=targets.tables["hydrogen_stores"]["schema"], index=False, if_exists="append", ) @@ -211,23 +220,23 @@ def calculate_and_map_saltcavern_storage_potential(): """ # select onshore vg250 data - sources = config.datasets()["bgr"]["sources"] - targets = config.datasets()["bgr"]["targets"] + sources, targets = load_sources_and_targets("HydrogenBusEtrago") vg250_data = db.select_geodataframe( - f"""SELECT * FROM - {sources['vg250_federal_states']['schema']}. - {sources['vg250_federal_states']['table']} - WHERE gf = '4'""", + f""" + SELECT * + FROM {sources.tables['vg250_federal_states']['schema']}.{sources.tables['vg250_federal_states']['table']} + WHERE gf = '4' + """, index_col="id", geom_col="geometry", ) # get saltcavern shapes saltcavern_data = db.select_geodataframe( - f"""SELECT * FROM - {sources['saltcaverns']['schema']}. - {sources['saltcaverns']['table']} - """, + f""" + SELECT * + FROM {sources.tables['saltcaverns']['schema']}.{sources.tables['saltcaverns']['table']} + """, geom_col="geometry", ) @@ -407,13 +416,13 @@ def write_saltcavern_potential(): """ potential_areas = calculate_and_map_saltcavern_storage_potential() + _, targets = load_sources_and_targets("HydrogenBusEtrago") + - # write information to saltcavern data - targets = config.datasets()["bgr"]["targets"] potential_areas.to_crs(epsg=4326).to_postgis( - targets["storage_potential"]["table"], + targets.tables["storage_potential"]["table"], db.engine(), - schema=targets["storage_potential"]["schema"], + schema=targets.tables["storage_potential"]["schema"], index=True, if_exists="replace", dtype={"geometry": Geometry()}, diff --git a/src/egon/data/datasets/industrial_gas_demand.py b/src/egon/data/datasets/industrial_gas_demand.py index 7e2e7c330..2edcd1647 100755 --- a/src/egon/data/datasets/industrial_gas_demand.py +++ b/src/egon/data/datasets/industrial_gas_demand.py @@ -21,7 +21,6 @@ from egon.data import config, db from egon.data.config import settings -from egon.data.datasets import Dataset from egon.data.datasets.etrago_helpers import ( finalize_bus_insertion, initialise_bus_insertion, @@ -31,9 +30,13 @@ from egon.data.datasets.scenario_parameters import get_sector_parameters logger = logging.getLogger(__name__) +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets + + class IndustrialGasDemand(Dataset): + """ Download the industrial gas demands from the opendata.ffe database @@ -44,21 +47,44 @@ class IndustrialGasDemand(Dataset): * :py:class:`ScenarioParameters ` """ - - #: name: str = "IndustrialGasDemand" - #: - version: str = "0.0.6" + version: str = "0.0.8" + + sources = DatasetSources( + tables={ + "region_mapping_json": "datasets/gas_data/demand/region_corr.json", + "industrial_demand_folder": "datasets/gas_data/demand", + "boundaries_vg250_krs": "boundaries.vg250_krs", + "egon_etrago_bus": "grid.egon_etrago_bus", + }, + files={ + "industrial_gas_bundle_src": "data_bundle_egon_data/industrial_gas_demand" + } + ) + + targets = DatasetTargets( + tables={ + "etrago_load": { + "schema": "grid", + "table": "egon_etrago_load" + }, + "etrago_load_timeseries": { + "schema": "grid", + "table": "egon_etrago_load_timeseries" + } + } + ) def __init__(self, dependencies): super().__init__( name=self.name, version=self.version, dependencies=dependencies, - tasks=(download_industrial_gas_demand), + tasks=(download_industrial_gas_demand,), ) + class IndustrialGasDemandeGon2035(Dataset): """Insert the hourly resolved industrial gas demands into the database for eGon2035 @@ -88,7 +114,7 @@ def __init__(self, dependencies): name=self.name, version=self.version, dependencies=dependencies, - tasks=(insert_industrial_gas_demand_egon2035), + tasks=(insert_industrial_gas_demand_egon2035,), ) @@ -121,7 +147,7 @@ def __init__(self, dependencies): name=self.name, version=self.version, dependencies=dependencies, - tasks=(insert_industrial_gas_demand_egon100RE), + tasks=(insert_industrial_gas_demand_egon100RE,), ) @@ -145,15 +171,14 @@ def read_industrial_demand(scn_name, carrier): Dataframe containing the industrial gas demand time series """ - target_file = Path(".") / "datasets/gas_data/demand/region_corr.json" + target_file = Path(IndustrialGasDemand.sources.tables["region_mapping_json"]) df_corr = pd.read_json(target_file) df_corr = df_corr.loc[:, ["id_region", "name_short"]] df_corr.set_index("id_region", inplace=True) target_file = ( - Path(".") - / "datasets/gas_data/demand" - / (carrier + "_" + scn_name + ".json") + Path(IndustrialGasDemand.sources.tables["industrial_demand_folder"]) + / f"{carrier}_{scn_name}.json" ) industrial_loads = pd.read_json(target_file) industrial_loads = industrial_loads.loc[:, ["id_region", "values"]] @@ -205,11 +230,12 @@ def read_industrial_demand(scn_name, carrier): industrial_loads_list = industrial_loads_list.set_index("nuts3") # Add the centroid point to each NUTS3 area - sql_vg250 = """SELECT nuts as nuts3, geometry as geom - FROM boundaries.vg250_krs - WHERE gf = 4 ;""" + sql_vg250 = f"""SELECT nuts as nuts3, geometry as geom + FROM {IndustrialGasDemand.sources.tables['boundaries_vg250_krs']} + WHERE gf = 4;""" gdf_vg250 = db.select_geodataframe(sql_vg250, epsg=4326) + point = [] for index, row in gdf_vg250.iterrows(): point.append(wkt.loads(str(row["geom"])).centroid) @@ -304,12 +330,12 @@ def delete_old_entries(scn_name): # Clean tables db.execute_sql( f""" - DELETE FROM grid.egon_etrago_load_timeseries + DELETE FROM {IndustrialGasDemand.targets.tables['etrago_load_timeseries']['schema']}.{IndustrialGasDemand.targets.tables['etrago_load_timeseries']['table']} WHERE "load_id" IN ( - SELECT load_id FROM grid.egon_etrago_load + SELECT load_id FROM {IndustrialGasDemand.targets.tables['etrago_load']['schema']}.{IndustrialGasDemand.targets.tables['etrago_load']['table']} WHERE "carrier" IN ('CH4_for_industry', 'H2_for_industry') AND scn_name = '{scn_name}' AND bus not IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {IndustrialGasDemand.sources.tables['egon_etrago_bus']} WHERE scn_name = '{scn_name}' AND country != 'DE' ) ); @@ -318,12 +344,12 @@ def delete_old_entries(scn_name): db.execute_sql( f""" - DELETE FROM grid.egon_etrago_load + DELETE FROM {IndustrialGasDemand.targets.tables['etrago_load']['schema']}.{IndustrialGasDemand.targets.tables['etrago_load']['table']} WHERE "load_id" IN ( - SELECT load_id FROM grid.egon_etrago_load + SELECT load_id FROM {IndustrialGasDemand.targets.tables['etrago_load']['schema']}.{IndustrialGasDemand.targets.tables['etrago_load']['table']} WHERE "carrier" IN ('CH4_for_industry', 'H2_for_industry') AND scn_name = '{scn_name}' AND bus not IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {IndustrialGasDemand.sources.tables['egon_etrago_bus']} WHERE scn_name = '{scn_name}' AND country != 'DE' ) ); @@ -377,9 +403,9 @@ def insert_new_entries(industrial_gas_demand, scn_name): engine = db.engine() # Insert data to db egon_etrago_load_gas.to_sql( - "egon_etrago_load", + IndustrialGasDemand.targets.tables['etrago_load']['table'], engine, - schema="grid", + schema=IndustrialGasDemand.targets.tables['etrago_load']['schema'], index=False, if_exists="append", ) @@ -644,9 +670,9 @@ def insert_industrial_gas_demand_time_series(egon_etrago_load_gas): # Insert data to db egon_etrago_load_gas_timeseries.to_sql( - "egon_etrago_load_timeseries", + IndustrialGasDemand.targets.tables['etrago_load_timeseries']['table'], engine, - schema="grid", + schema=IndustrialGasDemand.targets.tables['etrago_load_timeseries']['schema'], index=False, if_exists="append", ) @@ -672,7 +698,7 @@ def download_industrial_gas_demand(): # Read and save data result_corr = requests.get(correspondance_url) - target_file = Path(".") / "datasets/gas_data/demand/region_corr.json" + target_file = Path(IndustrialGasDemand.sources.tables["region_mapping_json"]) os.makedirs(os.path.dirname(target_file), exist_ok=True) pd.read_json(result_corr.content).to_json(target_file) @@ -694,9 +720,8 @@ def download_industrial_gas_demand(): # Read and save data result = requests.get(request) target_file = ( - Path(".") - / "datasets/gas_data/demand" - / (carrier + "_" + scn_name + ".json") + Path(IndustrialGasDemand.sources.tables["industrial_demand_folder"]) + / f"{carrier}_{scn_name}.json" ) pd.read_json(result.content).to_json(target_file) except: @@ -708,7 +733,7 @@ def download_industrial_gas_demand(): """ ) shutil.copytree( - "data_bundle_egon_data/industrial_gas_demand", - "datasets/gas_data/demand", + IndustrialGasDemand.sources.files["industrial_gas_bundle_src"], + IndustrialGasDemand.sources.tables["industrial_demand_folder"], dirs_exist_ok=True, ) diff --git a/src/egon/data/datasets/industrial_sites/.spyproject/config/codestyle.ini b/src/egon/data/datasets/industrial_sites/.spyproject/config/codestyle.ini new file mode 100644 index 000000000..0f54b4c43 --- /dev/null +++ b/src/egon/data/datasets/industrial_sites/.spyproject/config/codestyle.ini @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/src/egon/data/datasets/industrial_sites/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini b/src/egon/data/datasets/industrial_sites/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini new file mode 100644 index 000000000..0b95e5cee --- /dev/null +++ b/src/egon/data/datasets/industrial_sites/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini @@ -0,0 +1,5 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + diff --git a/src/egon/data/datasets/industrial_sites/.spyproject/config/defaults/defaults-encoding-0.2.0.ini b/src/egon/data/datasets/industrial_sites/.spyproject/config/defaults/defaults-encoding-0.2.0.ini new file mode 100644 index 000000000..0ce193c1e --- /dev/null +++ b/src/egon/data/datasets/industrial_sites/.spyproject/config/defaults/defaults-encoding-0.2.0.ini @@ -0,0 +1,3 @@ +[encoding] +text_encoding = utf-8 + diff --git a/src/egon/data/datasets/industrial_sites/.spyproject/config/defaults/defaults-vcs-0.2.0.ini b/src/egon/data/datasets/industrial_sites/.spyproject/config/defaults/defaults-vcs-0.2.0.ini new file mode 100644 index 000000000..ee2548333 --- /dev/null +++ b/src/egon/data/datasets/industrial_sites/.spyproject/config/defaults/defaults-vcs-0.2.0.ini @@ -0,0 +1,4 @@ +[vcs] +use_version_control = False +version_control_system = + diff --git a/src/egon/data/datasets/industrial_sites/.spyproject/config/defaults/defaults-workspace-0.2.0.ini b/src/egon/data/datasets/industrial_sites/.spyproject/config/defaults/defaults-workspace-0.2.0.ini new file mode 100644 index 000000000..2a73ab7ad --- /dev/null +++ b/src/egon/data/datasets/industrial_sites/.spyproject/config/defaults/defaults-workspace-0.2.0.ini @@ -0,0 +1,6 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + diff --git a/src/egon/data/datasets/industrial_sites/.spyproject/config/encoding.ini b/src/egon/data/datasets/industrial_sites/.spyproject/config/encoding.ini new file mode 100644 index 000000000..a17acedd7 --- /dev/null +++ b/src/egon/data/datasets/industrial_sites/.spyproject/config/encoding.ini @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/src/egon/data/datasets/industrial_sites/.spyproject/config/vcs.ini b/src/egon/data/datasets/industrial_sites/.spyproject/config/vcs.ini new file mode 100644 index 000000000..fd66eae01 --- /dev/null +++ b/src/egon/data/datasets/industrial_sites/.spyproject/config/vcs.ini @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/src/egon/data/datasets/industrial_sites/.spyproject/config/workspace.ini b/src/egon/data/datasets/industrial_sites/.spyproject/config/workspace.ini new file mode 100644 index 000000000..4d9540ac2 --- /dev/null +++ b/src/egon/data/datasets/industrial_sites/.spyproject/config/workspace.ini @@ -0,0 +1,12 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False +project_type = 'empty-project-type' +recent_files = [] + +[main] +version = 0.2.0 +recent_files = [] + diff --git a/src/egon/data/datasets/industrial_sites/__init__.py b/src/egon/data/datasets/industrial_sites/__init__.py index e6064297b..9ae12d62f 100644 --- a/src/egon/data/datasets/industrial_sites/__init__.py +++ b/src/egon/data/datasets/industrial_sites/__init__.py @@ -17,7 +17,7 @@ import pandas as pd from egon.data import db, subprocess -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets import egon.data.config Base = declarative_base() @@ -123,57 +123,33 @@ def create_tables(): None. """ - # Get data config - targets_sites = egon.data.config.datasets()["industrial_sites"]["targets"] + # Create target schema db.execute_sql("CREATE SCHEMA IF NOT EXISTS demand;") - # Drop outdated tables if still present, might be removed later - db.execute_sql("""DROP TABLE IF EXISTS demand.industrial_sites CASCADE;""") - - db.execute_sql( - """DROP TABLE IF EXISTS demand.hotmaps_industrial_sites CASCADE;""" - ) - - db.execute_sql( - """DROP TABLE IF EXISTS demand.seenergies_industrial_sites CASCADE;""" - ) + db.execute_sql( - """DROP TABLE IF EXISTS demand.schmidt_industrial_sites CASCADE;""" + f"""DROP TABLE IF EXISTS {MergeIndustrialSites.targets.tables['hotmaps']} CASCADE;""" ) - # Drop tables and sequences before recreating them db.execute_sql( - f"""DROP TABLE IF EXISTS - {targets_sites['hotmaps']['schema']}. - {targets_sites['hotmaps']['table']} CASCADE;""" + f"""DROP TABLE IF EXISTS {MergeIndustrialSites.targets.tables['seenergies']} CASCADE;""" ) db.execute_sql( - f"""DROP TABLE IF EXISTS - {targets_sites['seenergies']['schema']}. - {targets_sites['seenergies']['table']} CASCADE;""" + f"""DROP TABLE IF EXISTS {MergeIndustrialSites.targets.tables['schmidt']} CASCADE;""" ) - db.execute_sql( - f"""DROP TABLE IF EXISTS - {targets_sites['schmidt']['schema']}. - {targets_sites['schmidt']['table']} CASCADE;""" - ) db.execute_sql( - f"""DROP TABLE IF EXISTS - {targets_sites['sites']['schema']}. - {targets_sites['sites']['table']} CASCADE;""" + f"""DROP TABLE IF EXISTS {MergeIndustrialSites.targets.tables['sites']} CASCADE;""" ) # Drop sequence db.execute_sql( - f"""DROP SEQUENCE IF EXISTS - {targets_sites['sites']['schema']}. - {targets_sites['sites']['table']}_id_seq CASCADE;""" + f"""DROP SEQUENCE IF EXISTS {MergeIndustrialSites.targets.tables['sites']}_id_seq CASCADE;""" ) engine = db.engine() @@ -188,59 +164,50 @@ def create_tables(): def download_hotmaps(): - """Download csv file on hotmap's industrial sites.""" - hotmaps_config = egon.data.config.datasets()["industrial_sites"][ - "sources" - ]["hotmaps"] + download_directory = "industrial_sites" - # Create the folder, if it does not exists already + if not os.path.exists(download_directory): os.mkdir(download_directory) - target_file = Path(".") / "industrial_sites" / hotmaps_config["path"] + target_file = Path(MergeIndustrialSites.targets.files["hotmaps_download"]) + url = MergeIndustrialSites.sources.urls["hotmaps"] if not os.path.isfile(target_file): subprocess.run( - f"curl {hotmaps_config['url']} > {target_file}", shell=True + f"curl {url} > {target_file}", shell=True ) def download_seenergies(): """Download csv file on s-eenergies' industrial sites.""" - see_config = egon.data.config.datasets()["industrial_sites"]["sources"][ - "seenergies" - ] + download_directory = "industrial_sites" # Create the folder, if it does not exists already if not os.path.exists(download_directory): os.mkdir(download_directory) - target_file = Path(".") / "industrial_sites" / see_config["path"] + # Use the new class attributes for the target file and source URL + target_file = Path(MergeIndustrialSites.targets.files["seenergies_download"]) + url = MergeIndustrialSites.sources.urls["seenergies"] if not os.path.isfile(target_file): - urlretrieve(see_config["url"], target_file) + urlretrieve(url, target_file) def hotmaps_to_postgres(): """Import hotmaps data to postgres database""" - # Get information from data configuration file - hotmaps_targets = egon.data.config.datasets()["industrial_sites"][ - "targets" - ]["hotmaps"] - hotmaps_sources = egon.data.config.datasets()["industrial_sites"][ - "sources" - ]["hotmaps"] - input_file = Path(".") / "industrial_sites" / hotmaps_sources["path"] + input_file = Path(MergeIndustrialSites.targets.files["hotmaps_download"]) engine = db.engine() db.execute_sql( - f"DELETE FROM {hotmaps_targets['schema']}.{hotmaps_targets['table']}" + f"DELETE FROM {MergeIndustrialSites.targets.tables['hotmaps']}" ) # Read csv to dataframe df = pd.read_csv(input_file, delimiter=";") @@ -325,29 +292,25 @@ def hotmaps_to_postgres(): # Write data to db gdf.to_postgis( - hotmaps_targets["table"], + MergeIndustrialSites.targets.get_table_name("hotmaps"), engine, - schema=hotmaps_targets["schema"], + schema=MergeIndustrialSites.targets.get_table_schema("hotmaps"), if_exists="append", index=df.index, ) + def seenergies_to_postgres(): """Import seenergies data to postgres database""" # Get information from data configuration file - see_targets = egon.data.config.datasets()["industrial_sites"]["targets"][ - "seenergies" - ] - see_sources = egon.data.config.datasets()["industrial_sites"]["sources"][ - "seenergies" - ] - - input_file = Path(".") / "industrial_sites" / see_sources["path"] + + + input_file = Path(MergeIndustrialSites.targets.files["seenergies_download"]) engine = db.engine() db.execute_sql( - f"DELETE FROM {see_targets['schema']}.{see_targets['table']}" + f"DELETE FROM {MergeIndustrialSites.targets.tables['seenergies']}" ) # Read csv to dataframe @@ -431,9 +394,9 @@ def seenergies_to_postgres(): # Write data to db gdf.to_postgis( - see_targets["table"], + MergeIndustrialSites.targets.get_table_name("seenergies"), engine, - schema=see_targets["schema"], + schema=MergeIndustrialSites.targets.get_table_schema("seenergies"), if_exists="append", index=df.index, ) @@ -442,24 +405,18 @@ def seenergies_to_postgres(): def schmidt_to_postgres(): """Import data from Thesis by Danielle Schmidt to postgres database""" # Get information from data configuration file - schmidt_targets = egon.data.config.datasets()["industrial_sites"][ - "targets" - ]["schmidt"] - schmidt_sources = egon.data.config.datasets()["industrial_sites"][ - "sources" - ]["schmidt"] + input_file = ( Path(".") / "data_bundle_egon_data" - / "industrial_sites" - / schmidt_sources["path"] + / MergeIndustrialSites.sources.files["schmidt"] ) engine = db.engine() db.execute_sql( - f"DELETE FROM {schmidt_targets['schema']}.{schmidt_targets['table']}" + f"DELETE FROM {MergeIndustrialSites.targets.tables['schmidt']}" ) # Read csv to dataframe @@ -516,9 +473,9 @@ def schmidt_to_postgres(): # Write data to db gdf.to_postgis( - schmidt_targets["table"], + MergeIndustrialSites.targets.get_table_name("schmidt"), engine, - schema=schmidt_targets["schema"], + schema=MergeIndustrialSites.targets.get_table_schema("schmidt"), if_exists="append", index=df.index, ) @@ -554,99 +511,75 @@ def merge_inputs(): (hotmaps, seenergies, Thesis Schmidt) """ - # Get information from data configuration file - - hotmaps_targets = egon.data.config.datasets()["industrial_sites"][ - "targets" - ]["hotmaps"] - see_targets = egon.data.config.datasets()["industrial_sites"]["targets"][ - "seenergies" - ] - schmidt_targets = egon.data.config.datasets()["industrial_sites"][ - "targets" - ]["schmidt"] - sites_targets = egon.data.config.datasets()["industrial_sites"]["targets"][ - "sites" - ] - - sites_table = f"{sites_targets['schema']}" f".{sites_targets['table']}" - - hotmaps_table = ( - f"{hotmaps_targets['schema']}" f".{hotmaps_targets['table']}" - ) - - seenergies_table = f"{see_targets['schema']}" f".{see_targets['table']}" - - schmidt_table = ( - f"{schmidt_targets['schema']}" f".{schmidt_targets['table']}" - ) + # Insert data from Schmidt's Master thesis db.execute_sql( - f"""INSERT INTO {sites_table} + f"""INSERT INTO {MergeIndustrialSites.targets.tables['sites']} (companyname, subsector, wz, geom) SELECT h.plant, h.application, h.wz, h.geom - FROM {schmidt_table} h + FROM {MergeIndustrialSites.sources.tables['schmidt_processed']} h WHERE geom IS NOT NULL;""" ) # Insert data from s-EEnergies db.execute_sql( - f"""INSERT INTO {sites_table} + f"""INSERT INTO {MergeIndustrialSites.targets.tables['sites']} (companyname, address, subsector, wz, geom) SELECT s.companyname, s.address, s.subsector, s.wz, s.geom - FROM {seenergies_table} s + FROM {MergeIndustrialSites.sources.tables['seenergies_processed']} s WHERE s.country = 'DE' AND geom IS NOT NULL AND LOWER(SUBSTRING(s.companyname, 1, 3)) NOT IN (SELECT LOWER(SUBSTRING(h.companyname, 1, 3)) - FROM {sites_table} h, - {seenergies_table} s + FROM {MergeIndustrialSites.targets.tables['sites']} h, + {MergeIndustrialSites.sources.tables['seenergies_processed']} s WHERE ST_DWithin (h.geom, s.geom, 0.01) - AND (h.wz = s.wz) - AND (LOWER (SUBSTRING(h.companyname, 1, 3)) = + AND (h.wz = s.wz) + AND (LOWER (SUBSTRING(h.companyname, 1, 3)) = LOWER (SUBSTRING(s.companyname, 1, 3))));""" ) # Insert data from Hotmaps + db.execute_sql( - f"""INSERT INTO {sites_table} + f"""INSERT INTO {MergeIndustrialSites.targets.tables['sites']} (companyname, address, subsector, wz, geom) SELECT h.companyname, h.address, h.subsector, h.wz, h.geom - FROM {hotmaps_table} h + FROM {MergeIndustrialSites.sources.tables['hotmaps_processed']} h WHERE h.country = 'Germany' AND h.geom IS NOT NULL AND h.siteid NOT IN (SELECT a.siteid - FROM {seenergies_table} a + FROM {MergeIndustrialSites.sources.tables['seenergies_processed']} a WHERE a.country = 'DE' AND a.geom IS NOT NULL) AND h.geom NOT IN (SELECT a.geom - FROM {seenergies_table} a + FROM {MergeIndustrialSites.sources.tables['seenergies_processed']} a WHERE a.country = 'DE' AND a.geom IS NOT NULL) AND LOWER(SUBSTRING(h.companyname, 1, 3)) NOT IN (SELECT LOWER(SUBSTRING(s.companyname, 1, 3)) - FROM {sites_table} s, - {hotmaps_table} h + FROM {MergeIndustrialSites.targets.tables['sites']} s, + {MergeIndustrialSites.sources.tables['hotmaps_processed']} h WHERE ST_DWithin (s.geom, h.geom, 0.01) AND (h.wz = s.wz) AND (LOWER (SUBSTRING(h.companyname, 1, 3)) = LOWER (SUBSTRING(s.companyname, 1, 3))))""" ) - # Replace geometry by spatial information from table 'demand.schmidt_industrial_sites' if possible + db.execute_sql( - f"""UPDATE {sites_table} s + f"""UPDATE {MergeIndustrialSites.targets.tables['sites']} s SET geom = g.geom - FROM {schmidt_table} g + FROM {MergeIndustrialSites.sources.tables['schmidt_processed']} g WHERE ST_DWithin (g.geom, s.geom, 0.01) AND (g.wz = s.wz) AND (LOWER (SUBSTRING(g.plant, 1, 3)) = @@ -659,31 +592,50 @@ def map_nuts3(): Match resulting industrial sites with nuts3 codes and fill column 'nuts3' - Returns - ------- - None. - """ - # Get information from data configuration file - sites_targets = egon.data.config.datasets()["industrial_sites"]["targets"][ - "sites" - ] - sites_table = f"{sites_targets['schema']}" f".{sites_targets['table']}" db.execute_sql( - f"""UPDATE {sites_table} s + f"""UPDATE {MergeIndustrialSites.targets.tables['sites']} s SET nuts3 = krs.nuts - FROM boundaries.vg250_krs krs + FROM {MergeIndustrialSites.sources.tables['vg250_krs']} krs WHERE ST_WITHIN(s.geom, ST_TRANSFORM(krs.geometry,4326));""" ) class MergeIndustrialSites(Dataset): + sources = DatasetSources( + urls={ + "hotmaps": "https://gitlab.com/hotmaps/industrial_sites/industrial_sites_Industrial_Database/-/raw/388278c6df35889b1447a959fc3759e3d78bf659/data/Industrial_Database.csv?inline=false", + "seenergies": "https://opendata.arcgis.com/datasets/5e36c0af918040ed936b4e4c101f611d_0.csv", + }, + files={ + "schmidt": "industrial_sites/MA_Schmidt_Industriestandorte_georef.csv" + }, + tables={ + # These tables are targets of earlier steps, but sources for the final merge + "hotmaps_processed": "demand.egon_hotmaps_industrial_sites", + "seenergies_processed": "demand.egon_seenergies_industrial_sites", + "schmidt_processed": "demand.egon_schmidt_industrial_sites", + "vg250_krs": "boundaries.vg250_krs", + } + ) + targets = DatasetTargets( + files={ + "hotmaps_download": "industrial_sites/data_Industrial_Database.csv", + "seenergies_download": "industrial_sites/D5_1_Industry_Dataset_With_Demand_Data.csv", + }, + tables={ + "hotmaps": "demand.egon_hotmaps_industrial_sites", + "seenergies": "demand.egon_seenergies_industrial_sites", + "schmidt": "demand.egon_schmidt_industrial_sites", + "sites": "demand.egon_industrial_sites", + } + ) def __init__(self, dependencies): super().__init__( name="Merge_industrial_sites", - version="0.0.3", + version="0.0.4", dependencies=dependencies, tasks=(download_import_industrial_sites, merge_inputs, map_nuts3), ) diff --git a/src/egon/data/datasets/industry/__init__.py b/src/egon/data/datasets/industry/__init__.py index d0193ba9b..9b21b755c 100644 --- a/src/egon/data/datasets/industry/__init__.py +++ b/src/egon/data/datasets/industry/__init__.py @@ -1,8 +1,8 @@ """The central module containing all code dealing with the spatial - distribution of industrial electricity demands. - Industrial demands from DemandRegio are distributed from nuts3 level down - to osm landuse polygons and/or industrial sites also identified within this - processing step bringing three different inputs together. +distribution of industrial electricity demands. +Industrial demands from DemandRegio are distributed from nuts3 level down +to osm landuse polygons and/or industrial sites also identified within this +processing step bringing three different inputs together. """ @@ -12,12 +12,12 @@ import pandas as pd from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets +from egon.data.config import settings from egon.data.datasets.industry.temporal import ( insert_osm_ind_load, insert_sites_ind_load, ) -import egon.data.config Base = declarative_base() @@ -94,53 +94,45 @@ def create_tables(): None. """ - # Get data config - targets_spatial = egon.data.config.datasets()[ - "distributed_industrial_demand" - ]["targets"] - targets_temporal = egon.data.config.datasets()[ - "electrical_load_curves_industry" - ]["targets"] - # Create target schema db.execute_sql("CREATE SCHEMA IF NOT EXISTS demand;") # Drop tables and sequences before recreating them db.execute_sql( - f"""DROP TABLE IF EXISTS - {targets_spatial['sites']['schema']}. - {targets_spatial['sites']['table']} CASCADE;""" + f"""DROP TABLE IF EXISTS + {IndustrialDemandCurves.targets.tables["sites"]["schema"]}. + {IndustrialDemandCurves.targets.tables["sites"]["table"]} CASCADE;""" ) db.execute_sql( - f"""DROP TABLE IF EXISTS - {targets_spatial['osm']['schema']}. - {targets_spatial['osm']['table']} CASCADE;""" + f"""DROP TABLE IF EXISTS + {IndustrialDemandCurves.targets.tables["osm"]["schema"]}. + {IndustrialDemandCurves.targets.tables["osm"]["table"]} CASCADE;""" ) db.execute_sql( - f"""DROP TABLE IF EXISTS - {targets_temporal['osm_load']['schema']}. - {targets_temporal['osm_load']['table']} CASCADE;""" + f"""DROP TABLE IF EXISTS + {IndustrialDemandCurves.targets.tables["osm_load"]["schema"]}. + {IndustrialDemandCurves.targets.tables["osm_load"]["table"]} CASCADE;""" ) db.execute_sql( - f"""DROP TABLE IF EXISTS - {targets_temporal['osm_load_individual']['schema']}. - {targets_temporal['osm_load_individual']['table']} CASCADE;""" + f"""DROP TABLE IF EXISTS + {IndustrialDemandCurves.targets.tables["osm_load_individual"]["schema"]}. + {IndustrialDemandCurves.targets.tables["osm_load_individual"]["table"]} CASCADE;""" ) db.execute_sql( - f"""DROP TABLE IF EXISTS - {targets_temporal['sites_load']['schema']}. - {targets_temporal['sites_load']['table']} CASCADE;""" + f"""DROP TABLE IF EXISTS + {IndustrialDemandCurves.targets.tables["sites_load"]["schema"]}. + {IndustrialDemandCurves.targets.tables["sites_load"]["table"]} CASCADE;""" ) db.execute_sql( - f"""DROP TABLE IF EXISTS - {targets_temporal['sites_load_individual']['schema']}. - {targets_temporal['sites_load_individual']['table']} CASCADE;""" + f"""DROP TABLE IF EXISTS + {IndustrialDemandCurves.targets.tables["sites_load_individual"]["schema"]}. + {IndustrialDemandCurves.targets.tables["sites_load_individual"]["table"]} CASCADE;""" ) engine = db.engine() @@ -179,33 +171,25 @@ def industrial_demand_distr(): """ # Read information from configuration file - sources = egon.data.config.datasets()["distributed_industrial_demand"][ - "sources" - ] + sources = IndustrialDemandCurves.sources.tables - target_sites = egon.data.config.datasets()[ - "distributed_industrial_demand" - ]["targets"]["sites"] - target_osm = egon.data.config.datasets()["distributed_industrial_demand"][ - "targets" - ]["osm"] + target_sites = IndustrialDemandCurves.targets.tables["sites"] + target_osm = IndustrialDemandCurves.targets.tables["osm"] # Delete data from table db.execute_sql( - f"""DELETE FROM {target_sites['schema']}.{target_sites['table']}""" + f"""DELETE FROM {target_sites['schema']}.{target_sites['table']}""" ) db.execute_sql( - f"""DELETE FROM {target_osm['schema']}.{target_osm['table']}""" + f"""DELETE FROM {target_osm['schema']}.{target_osm['table']}""" ) - for scn in egon.data.config.settings()["egon-data"]["--scenarios"]: - # Select spatial information from local database - # Select administrative districts (Landkreise) including its boundaries + for scn in settings()["egon-data"]["--scenarios"]: boundaries = db.select_geodataframe( f"""SELECT nuts, geometry FROM - {sources['vg250_krs']['schema']}. - {sources['vg250_krs']['table']}""", + {sources["vg250_krs"]["schema"]}. + {sources["vg250_krs"]["table"]}""", index_col="nuts", geom_col="geometry", epsg=3035, @@ -214,14 +198,13 @@ def industrial_demand_distr(): # Select industrial landuse polygons landuse = db.select_geodataframe( f"""SELECT id, area_ha, geom FROM - {sources['osm_landuse']['schema']}. - {sources['osm_landuse']['table']} + {sources["osm_landuse"]["schema"]}. + {sources["osm_landuse"]["table"]} WHERE sector = 3 AND NOT ST_Intersects( geom, (SELECT ST_UNION(ST_Transform(geom,3035)) FROM - {sources['industrial_sites']['schema']}. - {sources['industrial_sites']['table']})) + {sources["industrial_sites"]["schema"]}.{sources["industrial_sites"]["table"]})) AND name NOT LIKE '%%kraftwerk%%' AND name NOT LIKE '%%Stadtwerke%%' AND name NOT LIKE '%%Müllverbrennung%%' @@ -245,7 +228,10 @@ def industrial_demand_distr(): ) # Spatially join vg250_krs and industrial landuse areas - landuse = gpd.sjoin(landuse, boundaries, how="inner", op="intersects") + landuse = gpd.sjoin( + landuse, boundaries, how="inner", predicate="intersects" + ) + # Rename column landuse = landuse.rename({"index_right": "nuts3"}, axis=1) @@ -255,8 +241,7 @@ def industrial_demand_distr(): # Select data on industrial sites sites = db.select_dataframe( f"""SELECT id, wz, nuts3 FROM - {sources['industrial_sites']['schema']}. - {sources['industrial_sites']['table']}""", + {sources["industrial_sites"]["schema"]}.{sources["industrial_sites"]["table"]}""", index_col=None, ) # Count number of industrial sites per subsector (wz) and nuts3 @@ -268,12 +253,11 @@ def industrial_demand_distr(): # Select industrial demands on nuts3 level from local database demand_nuts3_import = db.select_dataframe( f"""SELECT nuts3, demand, wz FROM - {sources['demandregio']['schema']}. - {sources['demandregio']['table']} + {sources["demandregio"]["schema"]}.{sources["demandregio"]["table"]} WHERE scenario = '{scn}' AND demand > 0 AND wz IN - (SELECT wz FROM demand.egon_demandregio_wz + (SELECT wz FROM {sources["wz"]["schema"]}.{sources["wz"]["table"]} WHERE sector = 'industry')""" ) @@ -401,6 +385,7 @@ def industrial_demand_distr(): schema=target_sites["schema"], if_exists="append", ) + landuse[["osm_id", "scenario", "wz", "demand"]].to_sql( target_osm["table"], @@ -438,12 +423,41 @@ class IndustrialDemandCurves(Dataset): * :py:class:`demand.egon_sites_ind_load_curves_individual ` is created and filled """ - + #: name: str = "Industrial_demand_curves" #: - version: str = "0.0.5" - + version: str = "0.0.7" + + sources = DatasetSources( + tables={ + "demandregio": {"schema": "demand", "table": "egon_demandregio_cts_ind"}, + "wz": {"schema": "demand", "table": "egon_demandregio_wz"}, + "osm_landuse": {"schema": "openstreetmap", "table": "osm_landuse"}, + "industrial_sites": {"schema": "demand", "table": "egon_industrial_sites"}, + "vg250_krs": {"schema": "boundaries", "table": "vg250_krs"}, + "osm": {"schema": "demand", "table": "egon_demandregio_osm_ind_electricity"}, + "sites": {"schema": "demand", "table": "egon_demandregio_sites_ind_electricity"}, + "sites_geom": {"schema": "demand", "table": "egon_industrial_sites"}, + "demandregio_industry": {"schema": "demand", "table": "egon_demandregio_cts_ind"}, + "demandregio_wz": {"schema": "demand", "table": "egon_demandregio_wz"}, + "demandregio_timeseries": {"schema": "demand", "table": "egon_demandregio_timeseries_cts_ind"}, + "hvmv_substation": {"schema": "grid", "table": "egon_hvmv_substation"}, + "egon_mv_grid_district": {"schema": "grid", "table": "egon_mv_grid_district"}, + "egon_ehv_voronoi": {"schema": "grid", "table": "egon_ehv_substation_voronoi"}, + } + ) + targets = DatasetTargets( + tables={ + "osm": {"schema": "demand", "table": "egon_demandregio_osm_ind_electricity"}, + "sites": {"schema": "demand", "table": "egon_demandregio_sites_ind_electricity"}, + "osm_load": {"schema": "demand", "table": "egon_osm_ind_load_curves"}, + "osm_load_individual": {"schema": "demand", "table": "egon_osm_ind_load_curves_individual"}, + "sites_load": {"schema": "demand", "table": "egon_sites_ind_load_curves"}, + "sites_load_individual": {"schema": "demand", "table": "egon_sites_ind_load_curves_individual"}, + } + ) + def __init__(self, dependencies): super().__init__( name=self.name, @@ -455,4 +469,4 @@ def __init__(self, dependencies): insert_osm_ind_load, insert_sites_ind_load, ), - ) + ) \ No newline at end of file diff --git a/src/egon/data/datasets/loadarea/__init__.py b/src/egon/data/datasets/loadarea/__init__.py index e7e5ffbc3..e061a7fe4 100644 --- a/src/egon/data/datasets/loadarea/__init__.py +++ b/src/egon/data/datasets/loadarea/__init__.py @@ -13,7 +13,7 @@ import importlib_resources as resources from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets import egon.data.config # will be later imported from another file ### @@ -57,7 +57,33 @@ class OsmLanduse(Dataset): #: name: str = "OsmLanduse" #: - version: str = "0.0.0" + version: str = "0.0.2" + + sources = DatasetSources( + files={ + "osm_landuse_extraction": "osm_landuse_extraction.sql" + }, + tables={ + "osm_polygons": { + "schema": "openstreetmap", + "table": "osm_polygon", + }, + "vg250": { + "schema": "boundaries", + "table": "vg250_sta_union", + }, + } + ) + + targets = DatasetTargets( + tables={ + "osm_landuse": { + "schema": "openstreetmap", + "table": "osm_landuse", + } + } + ) + def __init__(self, dependencies): super().__init__( @@ -98,8 +124,45 @@ class LoadArea(Dataset): #: name: str = "LoadArea" #: - version: str = "0.0.1" + version: str = "0.0.3" + + sources = DatasetSources( + files={ + "osm_landuse_melt": "osm_landuse_melt.sql", + "census_cells_melt": "census_cells_melt.sql", + "osm_landuse_census_cells_melt": "osm_landuse_census_cells_melt.sql", + "loadareas_create": "loadareas_create.sql", + "loadareas_add_demand_hh": "loadareas_add_demand_hh.sql", + "loadareas_add_demand_cts": "loadareas_add_demand_cts.sql", + "loadareas_add_demand_ind": "loadareas_add_demand_ind.sql", + "drop_temp_tables": "drop_temp_tables.sql", + }, + tables={ + "osm_landuse": { + "schema": "openstreetmap", + "table": "osm_landuse", + }, + "zensus_population": { + "schema": "society", + "table": "destatis_zensus_population_per_ha_inside_germany", + }, + "vg250": { + "schema": "boundaries", + "table": "vg250_sta_union", + }, + } + ) + targets = DatasetTargets( + tables={ + "egon_loadarea": { + "schema": "demand", + "table": "egon_loadarea", + } + } + ) + + def __init__(self, dependencies): super().__init__( name=self.name, @@ -122,7 +185,9 @@ def __init__(self, dependencies): def extract_osm_landuse(): db.execute_sql_script( - os.path.dirname(__file__) + "/osm_landuse_extraction.sql" + os.path.dirname(__file__) + + "/" + + OsmLanduse.sources.files["osm_landuse_extraction"] ) @@ -132,15 +197,18 @@ def create_landuse_table(): ------- None. """ - cfg = egon.data.config.datasets()["landuse"]["target"] + #cfg = egon.data.config.datasets()["landuse"]["target"] # Create schema if not exists - db.execute_sql(f"""CREATE SCHEMA IF NOT EXISTS {cfg['schema']};""") + db.execute_sql( + f"CREATE SCHEMA IF NOT EXISTS {OsmLanduse.targets.tables['osm_landuse']['schema']};" + ) # Drop tables db.execute_sql( - f"""DROP TABLE IF EXISTS - {cfg['schema']}.{cfg['table']} CASCADE;""" + f"DROP TABLE IF EXISTS " + f"{OsmLanduse.targets.tables['osm_landuse']['schema']}." + f"{OsmLanduse.targets.tables['osm_landuse']['table']} CASCADE;" ) engine = db.engine() @@ -161,7 +229,7 @@ def execute_sql_script(script): def osm_landuse_melt(): """Melt all OSM landuse areas by: buffer, union, unbuffer""" print("Melting OSM landuse areas from openstreetmap.osm_landuse...") - execute_sql_script("osm_landuse_melt.sql") + execute_sql_script(LoadArea.sources.files["osm_landuse_melt"]) def census_cells_melt(): @@ -170,7 +238,7 @@ def census_cells_melt(): "Melting census cells from " "society.destatis_zensus_population_per_ha_inside_germany..." ) - execute_sql_script("census_cells_melt.sql") + execute_sql_script(LoadArea.sources.files["census_cells_melt"]) def osm_landuse_census_cells_melt(): @@ -180,7 +248,7 @@ def osm_landuse_census_cells_melt(): "census cells from " "society.egon_destatis_zensus_cells_melted_cluster..." ) - execute_sql_script("osm_landuse_census_cells_melt.sql") + execute_sql_script(LoadArea.sources.files["osm_landuse_census_cells_melt"]) def loadareas_create(): @@ -195,27 +263,27 @@ def loadareas_create(): * Check for Loadareas without AGS code. """ print("Create initial load areas and add some sector stats...") - execute_sql_script("loadareas_create.sql") + execute_sql_script(LoadArea.sources.files["loadareas_create"]) def loadareas_add_demand_hh(): """Adds consumption and peak load to load areas for households""" print("Add consumption and peak loads to load areas for households...") - execute_sql_script("loadareas_add_demand_hh.sql") + execute_sql_script(LoadArea.sources.files["loadareas_add_demand_hh"]) def loadareas_add_demand_cts(): """Adds consumption and peak load to load areas for CTS""" print("Add consumption and peak loads to load areas for CTS...") - execute_sql_script("loadareas_add_demand_cts.sql") + execute_sql_script(LoadArea.sources.files["loadareas_add_demand_cts"]) def loadareas_add_demand_ind(): """Adds consumption and peak load to load areas for industry""" print("Add consumption and peak loads to load areas for industry...") - execute_sql_script("loadareas_add_demand_ind.sql") + execute_sql_script(LoadArea.sources.files["loadareas_add_demand_ind"]) def drop_temp_tables(): print("Dropping temp tables, views and sequences...") - execute_sql_script("drop_temp_tables.sql") + execute_sql_script(LoadArea.sources.files["drop_temp_tables"]) diff --git a/src/egon/data/datasets/low_flex_scenario/__init__.py b/src/egon/data/datasets/low_flex_scenario/__init__.py index f9e0440b6..e9b1e79b9 100644 --- a/src/egon/data/datasets/low_flex_scenario/__init__.py +++ b/src/egon/data/datasets/low_flex_scenario/__init__.py @@ -6,16 +6,26 @@ from importlib_resources import files from sqlalchemy.ext.declarative import declarative_base -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets Base = declarative_base() class LowFlexScenario(Dataset): + + sources = DatasetSources( + files={ + "low_flex_sql": "low_flex_eGon2035.sql" + } + ) + + targets = DatasetTargets() + + def __init__(self, dependencies): super().__init__( name="low_flex_scenario", - version="0.0.1", + version="0.0.4", dependencies=dependencies, tasks=( { @@ -29,4 +39,4 @@ def __init__(self, dependencies): ), }, ), - ) + ) \ No newline at end of file diff --git a/src/egon/data/datasets/mastr.py b/src/egon/data/datasets/mastr.py index f66e9a4ed..6a5a2e24d 100644 --- a/src/egon/data/datasets/mastr.py +++ b/src/egon/data/datasets/mastr.py @@ -7,8 +7,7 @@ from urllib.request import urlretrieve import os -from egon.data.datasets import Dataset -import egon.data.config +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets WORKING_DIR_MASTR_OLD = Path(".", "bnetza_mastr", "dump_2021-05-03") WORKING_DIR_MASTR_NEW = Path(".", "bnetza_mastr", "dump_2024-01-08") @@ -20,7 +19,7 @@ def download_mastr_data(): def download(dataset_name, download_dir): print(f"Downloading dataset {dataset_name} to {download_dir} ...") # Get parameters from config and set download URL - data_config = egon.data.config.datasets()[dataset_name] + data_config = mastr_data_setup.sources.tables[dataset_name]["zenodo"] zenodo_files_url = ( f"https://zenodo.org/record/" f"{data_config['deposit_id']}/files/" ) @@ -39,13 +38,27 @@ def download(dataset_name, download_dir): zenodo_files_url + filename, download_dir / filename ) - if not os.path.exists(WORKING_DIR_MASTR_OLD): - WORKING_DIR_MASTR_OLD.mkdir(exist_ok=True, parents=True) - if not os.path.exists(WORKING_DIR_MASTR_NEW): - WORKING_DIR_MASTR_NEW.mkdir(exist_ok=True, parents=True) + if not os.path.exists( + Path(mastr_data_setup.targets.tables["mastr"]["download_dir"]["path"]) + ): + Path(mastr_data_setup.targets.tables["mastr"]["download_dir"]["path"]).mkdir( + exist_ok=True, parents=True + ) + if not os.path.exists( + Path(mastr_data_setup.targets.tables["mastr_new"]["download_dir"]["path"]) + ): + Path(mastr_data_setup.targets.tables["mastr_new"]["download_dir"]["path"]).mkdir( + exist_ok=True, parents=True + ) - download(dataset_name="mastr", download_dir=WORKING_DIR_MASTR_OLD) - download(dataset_name="mastr_new", download_dir=WORKING_DIR_MASTR_NEW) + download( + dataset_name="mastr", + download_dir=Path(mastr_data_setup.targets.tables["mastr"]["download_dir"]["path"]) + ) + download( + dataset_name="mastr_new", + download_dir=Path(mastr_data_setup.targets.tables["mastr_new"]["download_dir"]["path"]) + ) class mastr_data_setup(Dataset): @@ -78,10 +91,61 @@ class mastr_data_setup(Dataset): #: name: str = "MastrData" #: - version: str = "0.0.2" + version: str = "0.0.4" #: tasks = (download_mastr_data,) + + sources = DatasetSources( + tables={ + "mastr": { + "zenodo": { + "deposit_id": "10480930", + "file_basename": "bnetza_mastr", + "technologies": [ + "wind", + "hydro", + "solar", + "biomass", + "combustion", + "nuclear", + "gsgk", + "storage", + ], + } + }, + "mastr_new": { + "zenodo": { + "deposit_id": "10491882", + "file_basename": "bnetza_mastr", + "technologies": [ + "biomass", + "combustion", + "gsgk", + "hydro", + "nuclear", + "solar", + "storage", + "wind", + ], + + } + }, + } + ) + + targets = DatasetTargets( + tables={ + "mastr": { + "download_dir": {"path": "./bnetza_mastr/dump_2021-05-03"}, + }, + "mastr_new": { + "download_dir": {"path": "./bnetza_mastr/dump_2024-01-08"}, + }, + } + ) + + def __init__(self, dependencies): super().__init__( name=self.name, diff --git a/src/egon/data/datasets/mv_grid_districts.py b/src/egon/data/datasets/mv_grid_districts.py index c968e6b5b..9a518b45b 100644 --- a/src/egon/data/datasets/mv_grid_districts.py +++ b/src/egon/data/datasets/mv_grid_districts.py @@ -21,10 +21,11 @@ from sqlalchemy.ext.declarative import declarative_base from egon.data import db -from egon.data.datasets import Dataset from egon.data.datasets.osmtgmod.substation import EgonHvmvSubstation from egon.data.datasets.substation_voronoi import EgonHvmvSubstationVoronoi from egon.data.db import session_scope +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets + Base = declarative_base() metadata = Base.metadata @@ -816,7 +817,23 @@ class mv_grid_districts_setup(Dataset): #: name: str = "MvGridDistricts" #: - version: str = "0.0.2" + version: str = "0.0.3" + + sources = DatasetSources( + tables={ + "vg250_gem_clean": "boundaries.vg250_gem_clean", + "hvmv_substation": "grid.egon_hvmv_substation", + "hvmv_substation_voronoi": "grid.egon_hvmv_substation_voronoi", + } + + ) + + targets = DatasetTargets( + tables={ + "egon_mv_grid_district": "grid.egon_mv_grid_district", + } + ) + def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/osm/__init__.py b/src/egon/data/datasets/osm/__init__.py index a63227f7c..53635958a 100644 --- a/src/egon/data/datasets/osm/__init__.py +++ b/src/egon/data/datasets/osm/__init__.py @@ -22,7 +22,7 @@ from egon.data import db, logger from egon.data.config import settings -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.metadata import ( context, generate_resource_fields_from_db_table, @@ -35,26 +35,24 @@ def download(): """Download OpenStreetMap `.pbf` file.""" - data_config = egon.data.config.datasets() - osm_config = data_config["openstreetmap"]["original_data"] - + download_directory = Path(".") / "openstreetmap" # Create the folder, if it does not exists already if not os.path.exists(download_directory): os.mkdir(download_directory) if settings()["egon-data"]["--dataset-boundary"] == "Everything": - source_url = osm_config["source"]["url"] - target_filename = osm_config["target"]["file"] + source_url = OpenStreetMap.sources.urls["germany"] + target_filename = Path(OpenStreetMap.targets.files["germany"]) else: - source_url = osm_config["source"]["url_testmode"] - target_filename = osm_config["target"]["file_testmode"] - + source_url = OpenStreetMap.sources.urls["schleswig-holstein"] + target_filename = Path(OpenStreetMap.targets.files["schleswig-holstein"]) + target_file = download_directory / target_filename if not os.path.isfile(target_file): urlretrieve(source_url, target_file) - + def to_postgres(cache_size=4096): """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL database. @@ -71,26 +69,26 @@ def to_postgres(cache_size=4096): # Read database configuration from docker-compose.yml docker_db_config = db.credentials() - # Get dataset config - data_config = egon.data.config.datasets() - osm_config = data_config["openstreetmap"]["original_data"] - + # Drop old target tables (the list is in OpenStreetMap.targets.tables) + for table in OpenStreetMap.targets.tables: + db.execute_sql(f"DROP TABLE IF EXISTS {OpenStreetMap.schema}.{table} CASCADE;") + if settings()["egon-data"]["--dataset-boundary"] == "Everything": - input_filename = osm_config["target"]["file"] + input_filename = Path(OpenStreetMap.targets.files["germany"]) logger.info("Using Everything DE dataset.") else: - input_filename = osm_config["target"]["file_testmode"] + input_filename = Path(OpenStreetMap.targets.files["schleswig-holstein"]) logger.info("Using testmode SH dataset.") input_file = Path(".") / "openstreetmap" / input_filename style_file = ( - Path(".") / "openstreetmap" / osm_config["source"]["stylefile"] + Path(".") / "openstreetmap" / OpenStreetMap.sources.files["stylefile"] ) with resources.path( - "egon.data.datasets.osm", osm_config["source"]["stylefile"] + "egon.data.datasets.osm", OpenStreetMap.sources.files["stylefile"] ) as p: shutil.copy(p, style_file) - + # Prepare osm2pgsql command cmd = [ "osm2pgsql", @@ -110,7 +108,7 @@ def to_postgres(cache_size=4096): "-U", f"{docker_db_config['POSTGRES_USER']}", "-p", - f"{osm_config['target']['table_prefix']}", + f"{OpenStreetMap.table_prefix}", # This line is updated "-S", f"{style_file.absolute()}", f"{input_file.absolute()}", @@ -122,21 +120,21 @@ def to_postgres(cache_size=4096): env={"PGPASSWORD": docker_db_config["POSTGRES_PASSWORD"]}, cwd=Path(__file__).parent, ) + def add_metadata(): """Writes metadata JSON string into table comment.""" - # Prepare variables - osm_config = egon.data.config.datasets()["openstreetmap"] + + if settings()["egon-data"]["--dataset-boundary"] == "Everything": - osm_url = osm_config["original_data"]["source"]["url"] - input_filename = osm_config["original_data"]["target"]["file"] + osm_url = OpenStreetMap.sources.urls["germany"] + input_filename = OpenStreetMap.targets.files["germany"] else: - osm_url = osm_config["original_data"]["source"]["url_testmode"] - input_filename = osm_config["original_data"]["target"]["file_testmode"] + osm_url = OpenStreetMap.sources.urls["schleswig-holstein"] + input_filename = OpenStreetMap.targets.files["schleswig-holstein"] - # Extract spatial extend and date (spatial_extend, osm_data_date) = re.compile( "^([\\w-]*).*-(\\d+)$" ).findall(Path(input_filename).name.split(".")[0])[0] @@ -144,11 +142,11 @@ def add_metadata(): osm_data_date, "%y%m%d" ).strftime("%y-%m-%d") - # Insert metadata for each table licenses = [license_odbl(attribution="© OpenStreetMap contributors")] - for table in osm_config["processed"]["tables"]: - schema_table = ".".join([osm_config["processed"]["schema"], table]) + + for table in OpenStreetMap.targets.tables: + schema_table = ".".join([OpenStreetMap.schema, table]) table_suffix = table.split("_")[1] meta = { "name": schema_table, @@ -217,7 +215,7 @@ def add_metadata(): "encoding": "UTF-8", "schema": { "fields": generate_resource_fields_from_db_table( - osm_config["processed"]["schema"], table + OpenStreetMap.schema, table ), "primaryKey": ["id"], "foreignKeys": [], @@ -227,10 +225,8 @@ def add_metadata(): ], "metaMetadata": meta_metadata(), } - meta_json = "'" + json.dumps(meta) + "'" - - db.submit_comment(meta_json, "openstreetmap", table) + db.submit_comment(meta_json, OpenStreetMap.schema, table) def modify_tables(): @@ -240,12 +236,10 @@ def modify_tables(): * Indices (GIST, GIN) are reset * The tables are moved to the schema configured as the "output_schema". """ - # Get dataset config - data_config = egon.data.config.datasets()["openstreetmap"] # Replace indices and primary keys for table in [ - f"{data_config['original_data']['target']['table_prefix']}_" + suffix + f"{OpenStreetMap.table_prefix}_" + suffix for suffix in ["line", "point", "polygon", "roads"] ]: @@ -278,26 +272,55 @@ def modify_tables(): for statement in sql_statements: db.execute_sql(statement) - # Move table to schema "openstreetmap" db.execute_sql( - f"CREATE SCHEMA IF NOT EXISTS {data_config['processed']['schema']};" + f"CREATE SCHEMA IF NOT EXISTS {OpenStreetMap.schema};" ) - for out_table in data_config["processed"]["tables"]: + for out_table in OpenStreetMap.targets.tables: db.execute_sql( f"DROP TABLE IF EXISTS " - f"{data_config['processed']['schema']}.{out_table};" + f"{OpenStreetMap.schema}.{out_table};" ) sql_statement = ( f"ALTER TABLE public.{out_table} " - f"SET SCHEMA {data_config['processed']['schema']};" + f"SET SCHEMA {OpenStreetMap.schema};" ) db.execute_sql(sql_statement) - - + class OpenStreetMap(Dataset): + + #: + name: str = "OpenStreetMap" + #: + version: str = "0.0.7" + + table_prefix: str = "osm" + schema: str = "openstreetmap" + + sources = DatasetSources( + files={"stylefile": "oedb.style"}, + urls={ + "germany": "https://download.geofabrik.de/europe/germany-240101.osm.pbf", + "schleswig-holstein": "https://download.geofabrik.de/europe/germany/schleswig-holstein-240101.osm.pbf", + }, + ) + targets = DatasetTargets( + files={ + "germany": "germany-240101.osm.pbf", + "schleswig-holstein": "schleswig-holstein-240101.osm.pbf", + }, + tables=[ + "osm_line", + "osm_nodes", + "osm_point", + "osm_polygon", + "osm_rels", + "osm_roads", + "osm_ways", + ], + ) """ Downloads OpenStreetMap data from Geofabrik and writes it to database. @@ -316,11 +339,7 @@ class OpenStreetMap(Dataset): See documentation section :ref:`osm-ref` for more information. """ - - #: - name: str = "OpenStreetMap" - #: - version: str = "0.0.4" + def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/osm_buildings_streets/__init__.py b/src/egon/data/datasets/osm_buildings_streets/__init__.py index 5677cf224..3e4511028 100644 --- a/src/egon/data/datasets/osm_buildings_streets/__init__.py +++ b/src/egon/data/datasets/osm_buildings_streets/__init__.py @@ -6,7 +6,7 @@ import os from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets def execute_sql_script(script): @@ -184,7 +184,89 @@ class OsmBuildingsStreets(Dataset): #: name: str = "OsmBuildingsStreets" #: - version: str = "0.0.7" + version: str = "0.0.8" + + sources = DatasetSources( + tables={ + "osm_polygon": { + "schema": "openstreetmap", + "table": "osm_polygon", + }, + "osm_point": { + "schema": "openstreetmap", + "table": "osm_point", + }, + "osm_line": { + "schema": "openstreetmap", + "table": "osm_line", + }, + "osm_ways": { + "schema": "openstreetmap", + "table": "osm_ways", + }, + "zensus_apartments": { + "schema": "society", + "table": "egon_destatis_zensus_apartment_building_population_per_ha", + }, + "zensus_population": { + "schema": "society", + "table": "destatis_zensus_population_per_ha", + }, + } + ) + + targets = DatasetTargets( + tables={ + "osm_buildings": { + "schema": "openstreetmap", + "table": "osm_buildings", + }, + "osm_buildings_filtered": { + "schema": "openstreetmap", + "table": "osm_buildings_filtered", + }, + "osm_buildings_residential": { + "schema": "openstreetmap", + "table": "osm_buildings_residential", + }, + "osm_amenities_shops_filtered": { + "schema": "openstreetmap", + "table": "osm_amenities_shops_filtered", + }, + "osm_buildings_with_amenities": { + "schema": "openstreetmap", + "table": "osm_buildings_with_amenities", + }, + "osm_buildings_without_amenities": { + "schema": "openstreetmap", + "table": "osm_buildings_without_amenities", + }, + "osm_amenities_not_in_buildings": { + "schema": "openstreetmap", + "table": "osm_amenities_not_in_buildings", + }, + "osm_ways_preprocessed": { + "schema": "openstreetmap", + "table": "osm_ways_preprocessed", + }, + "osm_ways_with_segments": { + "schema": "openstreetmap", + "table": "osm_ways_with_segments", + }, + "map_buildings_filtered": { + "schema": "boundaries", + "table": "egon_map_zensus_buildings_filtered", + }, + "map_buildings_filtered_all": { + "schema": "boundaries", + "table": "egon_map_zensus_buildings_filtered_all", + }, + "map_buildings_residential": { + "schema": "boundaries", + "table": "egon_map_zensus_buildings_residential", + }, + } + ) def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/osmtgmod/__init__.py b/src/egon/data/datasets/osmtgmod/__init__.py index 458cfa1ba..0ef23b7dd 100644 --- a/src/egon/data/datasets/osmtgmod/__init__.py +++ b/src/egon/data/datasets/osmtgmod/__init__.py @@ -13,7 +13,7 @@ from egon.data import db, logger from egon.data.config import settings -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.osmtgmod.substation import extract from egon.data.datasets.scenario_parameters import get_sector_parameters import egon.data.config @@ -24,13 +24,11 @@ def run(): sys.setrecursionlimit(5000) # execute osmTGmod - data_config = egon.data.config.datasets() - osm_config = data_config["openstreetmap"]["original_data"] if settings()["egon-data"]["--dataset-boundary"] == "Everything": - target_path = osm_config["target"]["file"] + target_path = "germany-240101.osm.pbf" else: - target_path = osm_config["target"]["file_testmode"] + target_path = "schleswig-holstein-240101.osm.pbf" filtered_osm_pbf_path_to_file = os.path.join( egon.data.__path__[0], "datasets", "osm", target_path @@ -94,13 +92,12 @@ def import_osm_data(): ] ) - data_config = egon.data.config.datasets() - osm_config = data_config["openstreetmap"]["original_data"] + if settings()["egon-data"]["--dataset-boundary"] == "Everything": - target_path = osm_config["target"]["file"] + target_path = "germany-240101.osm.pbf" else: - target_path = osm_config["target"]["file_testmode"] + target_path = "schleswig-holstein-240101.osm.pbf" filtered_osm_pbf_path_to_file = Path(".") / "openstreetmap" / target_path @@ -543,13 +540,14 @@ def osmtgmod( def to_pypsa(): db.execute_sql( + f""" + -- CLEAN UP OF TABLES + DELETE FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE carrier = 'AC'; + DELETE FROM {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']}; + DELETE FROM {Osmtgmod.targets.tables['etrago_transformer']['schema']}.{Osmtgmod.targets.tables['etrago_transformer']['table']}; """ - -- CLEAN UP OF TABLES - DELETE FROM grid.egon_etrago_bus - WHERE carrier = 'AC'; - DELETE FROM grid.egon_etrago_line; - DELETE FROM grid.egon_etrago_transformer; - """ + ) # for scenario_name in ["'eGon2035'", "'eGon100RE'", "'status2019'"]: @@ -566,11 +564,12 @@ def to_pypsa(): lifetime = get_sector_parameters( "electricity", scenario_name.replace("'", "") )["lifetime"] + db.execute_sql( f""" -- BUS DATA - INSERT INTO grid.egon_etrago_bus (scn_name, bus_id, v_nom, - geom, x, y, carrier, country) + INSERT INTO {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + (scn_name, bus_id, v_nom, geom, x, y, carrier, country) SELECT {scenario_name}, bus_i AS bus_id, @@ -580,21 +579,20 @@ def to_pypsa(): ST_Y(geom) as y, 'AC' as carrier, cntr_id - FROM osmtgmod_results.bus_data - WHERE result_id = 1; + FROM {Osmtgmod.sources.tables['osmtgmod_bus']['schema']}.{Osmtgmod.sources.tables['osmtgmod_bus']['table']} + WHERE result_id = 1; -- BRANCH DATA - INSERT INTO grid.egon_etrago_line (scn_name, line_id, bus0, - bus1, x, r, b, s_nom, s_nom_min, s_nom_extendable, - cables, v_nom, - geom, topo, carrier) + INSERT INTO {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']} + (scn_name, line_id, bus0, bus1, x, r, b, s_nom, s_nom_min, s_nom_extendable, + cables, v_nom, geom, topo, carrier) SELECT {scenario_name}, branch_id AS line_id, f_bus AS bus0, t_bus AS bus1, - br_x AS x, + br_x AS x, --- change base from 100MVA (osmtgmod) to the its individual s_nom (pypsa) br_r AS r, br_b as b, rate_a as s_nom, @@ -605,22 +603,19 @@ def to_pypsa(): geom, topo, 'AC' as carrier - FROM osmtgmod_results.branch_data - WHERE result_id = 1 and (link_type = 'line' or - link_type = 'cable'); + FROM {Osmtgmod.sources.tables['osmtgmod_branch']['schema']}.{Osmtgmod.sources.tables['osmtgmod_branch']['table']} + WHERE result_id = 1 AND (link_type = 'line' OR link_type = 'cable'); -- TRANSFORMER DATA - INSERT INTO grid.egon_etrago_transformer (scn_name, - trafo_id, bus0, bus1, x, - s_nom, s_nom_min, s_nom_extendable, tap_ratio, - phase_shift, geom, topo) + INSERT INTO {Osmtgmod.targets.tables['etrago_transformer']['schema']}.{Osmtgmod.targets.tables['etrago_transformer']['table']} + (scn_name, trafo_id, bus0, bus1, x, s_nom, s_nom_min, s_nom_extendable, tap_ratio, phase_shift, geom, topo) SELECT {scenario_name}, branch_id AS trafo_id, f_bus AS bus0, t_bus AS bus1, - br_x/(100 * rate_a) AS x, --- change base from 100MVA (osmtgmod) to the its individual s_nom (pypsa) + br_x/(100 * rate_a) AS x, rate_a as s_nom, rate_a as s_nom_min, TRUE, @@ -628,205 +623,200 @@ def to_pypsa(): shift AS phase_shift, geom, topo - FROM osmtgmod_results.branch_data - WHERE result_id = 1 and link_type = 'transformer'; - + FROM {Osmtgmod.sources.tables['osmtgmod_branch']['schema']}.{Osmtgmod.sources.tables['osmtgmod_branch']['table']} + WHERE result_id = 1 AND link_type = 'transformer'; -- per unit to absolute values - UPDATE grid.egon_etrago_line a + UPDATE {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']} a SET - r = r * (((SELECT v_nom - FROM grid.egon_etrago_bus b - WHERE bus_id=bus1 - AND a.scn_name = b.scn_name - )*1000)^2 / (100 * 10^6)), - x = x * (((SELECT v_nom - FROM grid.egon_etrago_bus b - WHERE bus_id=bus1 - AND a.scn_name = b.scn_name - )*1000)^2 / (100 * 10^6)), - b = b * (((SELECT v_nom - FROM grid.egon_etrago_bus b - WHERE bus_id=bus1 - AND a.scn_name = b.scn_name - )*1000)^2 / (100 * 10^6)) + r = r * (((SELECT v_nom + FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} b + WHERE bus_id = bus1 + AND a.scn_name = b.scn_name)*1000)^2 / (100 * 10^6)), + x = x * (((SELECT v_nom FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} b + WHERE bus_id = bus1 + AND a.scn_name = b.scn_name)*1000)^2 / (100 * 10^6)), + b = b * (((SELECT v_nom FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} b + WHERE bus_id = bus1 + AND a.scn_name = b.scn_name)*1000)^2 / (100 * 10^6)) WHERE scn_name = {scenario_name}; - -- calculate line length (in km) from geoms - - UPDATE grid.egon_etrago_line a - SET - length = result.length - FROM - (SELECT b.line_id, st_length(b.geom,false)/1000 as length - from grid.egon_etrago_line b) - as result - WHERE a.line_id = result.line_id + -- calculate line length in (km) from geoms + + UPDATE {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']} a + SET + length = result.length + FROM ( + SELECT b.line_id, ST_Length(b.geom,false)/1000 as length + FROM {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']} b) + as result + WHERE a.line_id = result.line_id AND scn_name = {scenario_name}; -- set capital costs for eHV-lines - UPDATE grid.egon_etrago_line + UPDATE {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']} SET capital_cost = {capital_cost['ac_ehv_overhead_line']} * length - WHERE v_nom > 110 + WHERE v_nom > 110 AND scn_name = {scenario_name}; - + -- set capital costs for HV-lines - UPDATE grid.egon_etrago_line + + UPDATE {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']} SET capital_cost = {capital_cost['ac_hv_overhead_line']} * length - WHERE v_nom = 110 + WHERE v_nom = 110 AND scn_name = {scenario_name}; -- set capital costs for transformers - UPDATE grid.egon_etrago_transformer a + UPDATE {Osmtgmod.targets.tables['etrago_transformer']['schema']}.{Osmtgmod.targets.tables['etrago_transformer']['table']} a SET capital_cost = {capital_cost['transformer_380_220']} - WHERE (a.bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 380) + WHERE (a.bus0 IN + (SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 380) AND a.bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 220)) + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 220)) OR (a.bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 220) + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 220) AND a.bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 380)) + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 380)) AND scn_name = {scenario_name}; - UPDATE grid.egon_etrago_transformer a + UPDATE {Osmtgmod.targets.tables['etrago_transformer']['schema']}.{Osmtgmod.targets.tables['etrago_transformer']['table']} a SET capital_cost = {capital_cost['transformer_380_110']} WHERE (a.bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} WHERE v_nom = 380) AND a.bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} WHERE v_nom = 110)) OR (a.bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} WHERE v_nom = 110) AND a.bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} WHERE v_nom = 380)) AND scn_name = {scenario_name}; - UPDATE grid.egon_etrago_transformer a + + UPDATE {Osmtgmod.targets.tables['etrago_transformer']['schema']}.{Osmtgmod.targets.tables['etrago_transformer']['table']} a SET capital_cost = {capital_cost['transformer_220_110']} WHERE (a.bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 220) + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 220) AND a.bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 110)) + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 110)) OR (a.bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 110) - AND a.bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 220)) + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 110) + AND a.bus1 IN (SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 220)) AND scn_name = {scenario_name}; -- set lifetime for eHV-lines - UPDATE grid.egon_etrago_line + UPDATE {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']} SET lifetime = {lifetime['ac_ehv_overhead_line']} - WHERE v_nom > 110 - AND scn_name = {scenario_name}; + WHERE v_nom > 110 AND scn_name = {scenario_name}; + -- set capital costs for HV-lines - UPDATE grid.egon_etrago_line + + UPDATE {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']} SET lifetime = {lifetime['ac_hv_overhead_line']} - WHERE v_nom = 110 - AND scn_name = {scenario_name}; + WHERE v_nom = 110 AND scn_name = {scenario_name}; -- set capital costs for transformers - UPDATE grid.egon_etrago_transformer a + UPDATE {Osmtgmod.targets.tables['etrago_transformer']['schema']}.{Osmtgmod.targets.tables['etrago_transformer']['table']} a SET lifetime = {lifetime['transformer_380_220']} WHERE (a.bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 380) + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 380) + AND a.bus1 IN ( + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 220)) + OR (a.bus0 IN ( + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 220) AND a.bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 220)) - OR (a.bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 220) - AND a.bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 380)) + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 380)) AND scn_name = {scenario_name}; - - UPDATE grid.egon_etrago_transformer a + + UPDATE {Osmtgmod.targets.tables['etrago_transformer']['schema']}.{Osmtgmod.targets.tables['etrago_transformer']['table']} a SET lifetime = {lifetime['transformer_380_110']} WHERE (a.bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} WHERE v_nom = 380) AND a.bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} WHERE v_nom = 110)) OR (a.bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} WHERE v_nom = 110) AND a.bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} WHERE v_nom = 380)) AND scn_name = {scenario_name}; - UPDATE grid.egon_etrago_transformer a + UPDATE {Osmtgmod.targets.tables['etrago_transformer']['schema']}.{Osmtgmod.targets.tables['etrago_transformer']['table']} a SET lifetime = {lifetime['transformer_220_110']} WHERE (a.bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 220) + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 220) AND a.bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 110)) + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 110)) OR (a.bus0 IN ( - SELECT bus_id FROM grid.egon_etrago_bus - WHERE v_nom = 110) + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE v_nom = 110) AND a.bus1 IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} WHERE v_nom = 220)) AND scn_name = {scenario_name}; + -- delete buses without connection to AC grid and generation or -- load assigned - DELETE FROM grid.egon_etrago_bus - WHERE scn_name={scenario_name} - AND carrier = 'AC' + DELETE FROM {Osmtgmod.targets.tables['etrago_bus']['schema']}.{Osmtgmod.targets.tables['etrago_bus']['table']} + WHERE scn_name = {scenario_name} + AND carrier = 'AC' AND bus_id NOT IN - (SELECT bus0 FROM grid.egon_etrago_line WHERE - scn_name={scenario_name}) + (SELECT bus0 FROM {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']} + WHERE scn_name = {scenario_name}) AND bus_id NOT IN - (SELECT bus1 FROM grid.egon_etrago_line WHERE - scn_name={scenario_name}) + (SELECT bus1 FROM {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']} + WHERE scn_name = {scenario_name}) AND bus_id NOT IN - (SELECT bus0 FROM grid.egon_etrago_transformer - WHERE scn_name={scenario_name}) + (SELECT bus0 FROM {Osmtgmod.targets.tables['etrago_transformer']['schema']}.{Osmtgmod.targets.tables['etrago_transformer']['table']} + WHERE scn_name = {scenario_name}) AND bus_id NOT IN - (SELECT bus1 FROM grid.egon_etrago_transformer - WHERE scn_name={scenario_name}); - """ + (SELECT bus1 FROM {Osmtgmod.targets.tables['etrago_transformer']['schema']}.{Osmtgmod.targets.tables['etrago_transformer']['table']} + WHERE scn_name = {scenario_name}); + """ ) def fix_transformer_snom(): db.execute_sql( - """ - UPDATE grid.egon_etrago_transformer AS t - SET s_nom = CAST( - LEAST( - (SELECT SUM(COALESCE(l.s_nom,0)) - FROM grid.egon_etrago_line AS l - WHERE (l.bus0 = t.bus0 OR l.bus1 = t.bus0) - AND l.scn_name = t.scn_name), - (SELECT SUM(COALESCE(l.s_nom,0)) - FROM grid.egon_etrago_line AS l - WHERE (l.bus0 = t.bus1 OR l.bus1 = t.bus1) - AND l.scn_name = t.scn_name) - ) AS smallint - ); - """ - ) + f""" + UPDATE {Osmtgmod.targets.tables['etrago_transformer']['schema']}.{Osmtgmod.targets.tables['etrago_transformer']['table']} AS t + SET s_nom = CAST( + LEAST( + (SELECT SUM(COALESCE(l.s_nom,0)) + FROM {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']} AS l + WHERE (l.bus0 = t.bus0 OR l.bus1 = t.bus0) + AND l.scn_name = t.scn_name), + (SELECT SUM(COALESCE(l.s_nom,0)) + FROM {Osmtgmod.targets.tables['etrago_line']['schema']}.{Osmtgmod.targets.tables['etrago_line']['table']} AS l + WHERE (l.bus0 = t.bus1 OR l.bus1 = t.bus1) + AND l.scn_name = t.scn_name) + ) AS smallint + ); + """) class Osmtgmod(Dataset): @@ -856,7 +846,61 @@ class Osmtgmod(Dataset): #: name: str = "Osmtgmod" #: - version: str = "0.0.7" + version: str = "0.0.10" + + sources = DatasetSources( + tables={ + "osmtgmod_bus": { + "schema": "osmtgmod_results", + "table": "bus_data", + }, + "osmtgmod_branch": { + "schema": "osmtgmod_results", + "table": "branch_data", + }, + "osmtgmod_dcline_data": { + "schema": "osmtgmod_results", + "table": "dcline_data", + }, + "osmtgmod_results_meta": { + "schema": "osmtgmod_results", + "table": "results_metadata", + }, + "ehv_transfer_buses": { + "schema": "grid", + "table": "egon_ehv_transfer_buses", + }, + "hvmv_transfer_buses": { + "schema": "grid", + "table": "egon_hvmv_transfer_buses", + }, + } + ) + + targets = DatasetTargets( + tables={ + "etrago_bus": { + "schema": "grid", + "table": "egon_etrago_bus", + }, + "etrago_line": { + "schema": "grid", + "table": "egon_etrago_line", + }, + "etrago_transformer": { + "schema": "grid", + "table": "egon_etrago_transformer", + }, + "ehv_substation": { + "schema": "grid", + "table": "egon_ehv_substation", + }, + "hvmv_substation": { + "schema": "grid", + "table": "egon_hvmv_substation", + }, + } + ) def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/osmtgmod/substation.py b/src/egon/data/datasets/osmtgmod/substation.py index dea2f3d36..f85041194 100644 --- a/src/egon/data/datasets/osmtgmod/substation.py +++ b/src/egon/data/datasets/osmtgmod/substation.py @@ -7,6 +7,7 @@ from sqlalchemy.ext.declarative import declarative_base from egon.data import db +from egon.data.datasets import load_sources_and_targets Base = declarative_base() @@ -97,6 +98,7 @@ def extract(): None. """ + sources, targets = load_sources_and_targets("Osmtgmod") # Create tables for substations create_tables() @@ -105,7 +107,7 @@ def extract(): f""" INSERT INTO {EgonEhvSubstation.__table__.schema}.{EgonEhvSubstation.__table__.name} - SELECT * FROM grid.egon_ehv_transfer_buses; + SELECT * FROM {sources.tables['ehv_transfer_buses']['schema']}.{sources.tables['ehv_transfer_buses']['table']}; -- update ehv_substation table with new column of respective osmtgmod bus_i @@ -114,9 +116,9 @@ def extract(): -- fill table with bus_i from osmtgmod UPDATE {EgonEhvSubstation.__table__.schema}.{EgonEhvSubstation.__table__.name} - SET otg_id = osmtgmod_results.bus_data.bus_i - FROM osmtgmod_results.bus_data - WHERE osmtgmod_results.bus_data.base_kv > 110 AND(SELECT TRIM(leading 'n' FROM TRIM(leading 'w' FROM TRIM(leading 'r' FROM grid.egon_ehv_substation.osm_id)))::BIGINT)=osmtgmod_results.bus_data.osm_substation_id; + SET otg_id = {sources.tables['osmtgmod_bus']['schema']}.{sources.tables['osmtgmod_bus']['table']}.bus_i + FROM {sources.tables['osmtgmod_bus']['schema']}.{sources.tables['osmtgmod_bus']['table']} + WHERE {sources.tables['osmtgmod_bus']['schema']}.{sources.tables['osmtgmod_bus']['table']}.base_kv > 110 AND (SELECT TRIM(leading 'n' FROM TRIM(leading 'w' FROM TRIM(leading 'r' FROM {targets.tables['ehv_substation']['schema']}.{targets.tables['ehv_substation']['table']}.osm_id)))::BIGINT) = {sources.tables['osmtgmod_bus']['schema']}.{sources.tables['osmtgmod_bus']['table']}.osm_substation_id; DELETE FROM {EgonEhvSubstation.__table__.schema}.{EgonEhvSubstation.__table__.name} WHERE otg_id IS NULL; @@ -133,7 +135,7 @@ def extract(): f""" INSERT INTO {EgonHvmvSubstation.__table__.schema}.{EgonHvmvSubstation.__table__.name} - SELECT * FROM grid.egon_hvmv_transfer_buses; + SELECT * FROM {sources.tables['hvmv_transfer_buses']['schema']}.{sources.tables['hvmv_transfer_buses']['table']}; ALTER TABLE {EgonHvmvSubstation.__table__.schema}.{EgonHvmvSubstation.__table__.name} @@ -141,9 +143,9 @@ def extract(): -- fill table with bus_i from osmtgmod UPDATE {EgonHvmvSubstation.__table__.schema}.{EgonHvmvSubstation.__table__.name} - SET otg_id = osmtgmod_results.bus_data.bus_i - FROM osmtgmod_results.bus_data - WHERE osmtgmod_results.bus_data.base_kv <= 110 AND (SELECT TRIM(leading 'n' FROM TRIM(leading 'w' FROM grid.egon_hvmv_substation.osm_id))::BIGINT)=osmtgmod_results.bus_data.osm_substation_id; + SET otg_id = {sources.tables['osmtgmod_bus']['schema']}.{sources.tables['osmtgmod_bus']['table']}.bus_i + FROM {sources.tables['osmtgmod_bus']['schema']}.{sources.tables['osmtgmod_bus']['table']} + WHERE {sources.tables['osmtgmod_bus']['schema']}.{sources.tables['osmtgmod_bus']['table']}.base_kv <= 110 AND (SELECT TRIM(leading 'n' FROM TRIM(leading 'w' FROM {targets.tables['hvmv_substation']['schema']}.{targets.tables['hvmv_substation']['table']}.osm_id))::BIGINT) = {sources.tables['osmtgmod_bus']['schema']}.{sources.tables['osmtgmod_bus']['table']}.osm_substation_id; DELETE FROM {EgonHvmvSubstation.__table__.schema}.{EgonHvmvSubstation.__table__.name} WHERE otg_id IS NULL; diff --git a/src/egon/data/datasets/power_etrago/__init__.py b/src/egon/data/datasets/power_etrago/__init__.py index e5ab9e083..7305cc778 100755 --- a/src/egon/data/datasets/power_etrago/__init__.py +++ b/src/egon/data/datasets/power_etrago/__init__.py @@ -2,7 +2,7 @@ The central module containing all code dealing with open cycle gas turbine """ -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.power_etrago.match_ocgt import ( insert_open_cycle_gas_turbines, ) @@ -29,7 +29,34 @@ class OpenCycleGasTurbineEtrago(Dataset): #: name: str = "OpenCycleGasTurbineEtrago" #: - version: str = "0.0.2" + version: str = "0.0.3" + + sources = DatasetSources( + tables={ + "power_plants": { + "schema": "supply", + "table": "egon_power_plants", + }, + "etrago_bus": { + "schema": "grid", + "table": "egon_etrago_bus", + }, + "etrago_link": { + "schema": "grid", + "table": "egon_etrago_link", + }, + } + ) + + targets = DatasetTargets( + tables={ + "etrago_link": { + "schema": "grid", + "table": "egon_etrago_link", + }, + } + ) + def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/power_etrago/match_ocgt.py b/src/egon/data/datasets/power_etrago/match_ocgt.py index 1f2ee059c..fd568c220 100755 --- a/src/egon/data/datasets/power_etrago/match_ocgt.py +++ b/src/egon/data/datasets/power_etrago/match_ocgt.py @@ -10,6 +10,7 @@ from egon.data import config, db from egon.data.datasets.etrago_setup import link_geom_from_buses from egon.data.datasets.scenario_parameters import get_sector_parameters +from egon.data.datasets import load_sources_and_targets def insert_open_cycle_gas_turbines(): @@ -30,7 +31,7 @@ def insert_open_cycle_gas_turbines_per_scenario(scn_name): None """ - + sources, targets = load_sources_and_targets("OpenCycleGasTurbineEtrago") # Connect to local database engine = db.engine() @@ -48,7 +49,8 @@ def insert_open_cycle_gas_turbines_per_scenario(scn_name): buses = tuple( db.select_dataframe( - f"""SELECT bus_id FROM grid.egon_etrago_bus + f"""SELECT bus_id FROM {sources["etrago_bus"]["schema"]}. + {sources["etrago_bus"]["table"]} WHERE scn_name = '{scn_name}' AND country = 'DE'; """ )["bus_id"] @@ -57,7 +59,8 @@ def insert_open_cycle_gas_turbines_per_scenario(scn_name): # Delete old entries db.execute_sql( f""" - DELETE FROM grid.egon_etrago_link WHERE "carrier" = '{carrier}' + DELETE FROM {targets["etrago_link"]["schema"]}.{targets["etrago_link"]["table"]} + WHERE "carrier" = '{carrier}' AND scn_name = '{scn_name}' AND bus0 IN {buses} AND bus1 IN {buses}; """ @@ -80,9 +83,9 @@ def insert_open_cycle_gas_turbines_per_scenario(scn_name): # Insert data to db gdf.to_postgis( - "egon_etrago_link", + targets["etrago_link"]["table"], engine, - schema="grid", + schema=targets["etrago_link"]["schema"], index=False, if_exists="append", dtype={"topo": Geometry()}, @@ -104,13 +107,14 @@ def map_buses(scn_name): GeoDataFrame with connected buses. """ + sources, _ = load_sources_and_targets("OpenCycleGasTurbineEtrago") # Create dataframes containing all gas buses and all the HV power buses sql_AC = f"""SELECT bus_id, el_capacity as p_nom, geom - FROM supply.egon_power_plants + FROM {sources["power_plants"]["schema"]}.{sources["power_plants"]["table"]} WHERE carrier = 'gas' AND scenario = '{scn_name}'; """ sql_gas = f"""SELECT bus_id, scn_name, geom - FROM grid.egon_etrago_bus + FROM {sources["etrago_bus"]["schema"]}.{sources["etrago_bus"]["table"]} WHERE carrier = 'CH4' AND scn_name = '{scn_name}' AND country = 'DE';""" diff --git a/src/egon/data/datasets/power_plants/__init__.py b/src/egon/data/datasets/power_plants/__init__.py index 1b7e9780b..f9d16fed7 100755 --- a/src/egon/data/datasets/power_plants/__init__.py +++ b/src/egon/data/datasets/power_plants/__init__.py @@ -17,6 +17,9 @@ from egon.data import db, logger from egon.data.datasets import Dataset, wrapped_partial + +from egon.data.datasets import DatasetSources, DatasetTargets + from egon.data.datasets.mastr import ( WORKING_DIR_MASTR_NEW, WORKING_DIR_MASTR_OLD, @@ -44,6 +47,7 @@ import egon.data.datasets.power_plants.wind_farms as wind_onshore import egon.data.datasets.power_plants.wind_offshore as wind_offshore + Base = declarative_base() @@ -68,15 +72,14 @@ def create_tables(): ------- None. """ - + + target_string = PowerPlants.targets.tables['power_plants'] + schema, table = target_string.split('.') + # Tables for future scenarios - cfg = egon.data.config.datasets()["power_plants"] - db.execute_sql(f"CREATE SCHEMA IF NOT EXISTS {cfg['target']['schema']};") + db.execute_sql(f"CREATE SCHEMA IF NOT EXISTS {schema};") engine = db.engine() - db.execute_sql( - f"""DROP TABLE IF EXISTS - {cfg['target']['schema']}.{cfg['target']['table']}""" - ) + db.execute_sql(f"DROP TABLE IF EXISTS {schema}.{table}") db.execute_sql("""DROP SEQUENCE IF EXISTS pp_seq""") EgonPowerPlants.__table__.create(bind=engine, checkfirst=True) @@ -118,18 +121,15 @@ def scale_prox2now(df, target, level="federal_state"): """ if level == "federal_state": df.loc[:, "Nettonennleistung"] = ( - ( - df.groupby(df.Bundesland) - .Nettonennleistung.apply(lambda grp: grp / grp.sum()) - .mul(target[df.Bundesland.values].values) - ) - .reset_index(level=[0]) - .Nettonennleistung + df.groupby(df.Bundesland) + .Nettonennleistung.apply(lambda grp: grp / grp.sum()) + .mul(target[df.Bundesland.values].values) + .values ) else: df.loc[:, "Nettonennleistung"] = df.Nettonennleistung * ( target / df.Nettonennleistung.sum() - ) + ).values df = df[df.Nettonennleistung > 0] @@ -152,15 +152,15 @@ def select_target(carrier, scenario): Target values for carrier and scenario """ - cfg = egon.data.config.datasets()["power_plants"] + return ( pd.read_sql( f"""SELECT DISTINCT ON (b.gen) REPLACE(REPLACE(b.gen, '-', ''), 'ü', 'ue') as state, a.capacity - FROM {cfg['sources']['capacities']} a, - {cfg['sources']['geom_federal_states']} b + FROM {PowerPlants.sources.tables['capacities']} a, + {PowerPlants.sources.tables['geom_federal_states']} b WHERE a.nuts = b.nuts AND scenario_name = '{scenario}' AND carrier = '{carrier}' @@ -190,7 +190,6 @@ def filter_mastr_geometry(mastr, federal_state=None): Power plants listed in MaStR with geometry inside German boundaries """ - cfg = egon.data.config.datasets()["power_plants"] if type(mastr) == pd.core.frame.DataFrame: # Drop entries without geometry for insert @@ -210,7 +209,7 @@ def filter_mastr_geometry(mastr, federal_state=None): # Drop entries outside of germany or federal state if not federal_state: - sql = f"SELECT geometry as geom FROM {cfg['sources']['geom_germany']}" + sql = f"SELECT geometry as geom FROM {PowerPlants.sources.tables['geom_germany']}" else: sql = f""" SELECT geometry as geom @@ -243,14 +242,13 @@ def insert_biomass_plants(scenario): None. """ - cfg = egon.data.config.datasets()["power_plants"] # import target values target = select_target("biomass", scenario) # import data for MaStR mastr = pd.read_csv( - WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_biomass"] + WORKING_DIR_MASTR_OLD / PowerPlants.sources.files["mastr_biomass"] ).query("EinheitBetriebsstatus=='InBetrieb'") # Drop entries without federal state or 'AusschließlichWirtschaftszone' @@ -259,7 +257,7 @@ def insert_biomass_plants(scenario): pd.read_sql( f"""SELECT DISTINCT ON (gen) REPLACE(REPLACE(gen, '-', ''), 'ü', 'ue') as states - FROM {cfg['sources']['geom_federal_states']}""", + FROM {PowerPlants.sources.tables['geom_federal_states']}""", con=db.engine(), ).states.values ) @@ -280,9 +278,9 @@ def insert_biomass_plants(scenario): # Assign bus_id if len(mastr_loc) > 0: mastr_loc["voltage_level"] = assign_voltage_level( - mastr_loc, cfg, WORKING_DIR_MASTR_OLD + mastr_loc, PowerPlants.sources.files, WORKING_DIR_MASTR_OLD ) - mastr_loc = assign_bus_id(mastr_loc, cfg) + mastr_loc = assign_bus_id(mastr_loc, PowerPlants.sources.tables) # Insert entries with location session = sessionmaker(bind=db.engine())() @@ -322,8 +320,6 @@ def insert_hydro_plants(scenario): None. """ - cfg = egon.data.config.datasets()["power_plants"] - # Map MaStR carriers to eGon carriers map_carrier = { "run_of_river": ["Laufwasseranlage"], @@ -352,7 +348,7 @@ def insert_hydro_plants(scenario): # import data for MaStR mastr = pd.read_csv( - WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_hydro"] + WORKING_DIR_MASTR_NEW / PowerPlants.sources.files["mastr_hydro"] ).query("EinheitBetriebsstatus=='InBetrieb'") # Choose only plants with specific carriers @@ -364,7 +360,7 @@ def insert_hydro_plants(scenario): pd.read_sql( f"""SELECT DISTINCT ON (gen) REPLACE(REPLACE(gen, '-', ''), 'ü', 'ue') as states - FROM {cfg['sources']['geom_federal_states']}""", + FROM {PowerPlants.sources.tables['geom_federal_states']}""", con=db.engine(), ).states.values ) @@ -386,9 +382,9 @@ def insert_hydro_plants(scenario): # Assign bus_id and voltage level if len(mastr_loc) > 0: mastr_loc["voltage_level"] = assign_voltage_level( - mastr_loc, cfg, WORKING_DIR_MASTR_NEW + mastr_loc, PowerPlants.sources.files, WORKING_DIR_MASTR_NEW ) - mastr_loc = assign_bus_id(mastr_loc, cfg) + mastr_loc = assign_bus_id(mastr_loc, PowerPlants.sources.tables) # Insert entries with location session = sessionmaker(bind=db.engine())() @@ -408,7 +404,7 @@ def insert_hydro_plants(scenario): session.commit() -def assign_voltage_level(mastr_loc, cfg, mastr_working_dir): +def assign_voltage_level(mastr_loc, sources, mastr_working_dir): """Assigns voltage level to power plants. If location data inluding voltage level is available from @@ -437,10 +433,11 @@ def assign_voltage_level(mastr_loc, cfg, mastr_working_dir): cols = ["MaStRNummer", "Spannungsebene"] else: raise ValueError("Invalid MaStR working directory!") + location = ( pd.read_csv( - mastr_working_dir / cfg["sources"]["mastr_location"], + mastr_working_dir / PowerPlants.sources.files["mastr_location"], usecols=cols, ) .rename(columns={"MaStRNummer": "LokationMastrNummer"}) @@ -522,7 +519,7 @@ def assign_voltage_level_by_capacity(mastr_loc): return mastr_loc.voltage_level -def assign_bus_id(power_plants, cfg, drop_missing=False): +def assign_bus_id(power_plants,sources, drop_missing=False): """Assigns bus_ids to power plants according to location and voltage level Parameters @@ -539,14 +536,14 @@ def assign_bus_id(power_plants, cfg, drop_missing=False): mv_grid_districts = db.select_geodataframe( f""" - SELECT * FROM {cfg['sources']['egon_mv_grid_district']} + SELECT * FROM {PowerPlants.sources.tables['egon_mv_grid_district']} """, epsg=4326, ) ehv_grid_districts = db.select_geodataframe( f""" - SELECT * FROM {cfg['sources']['ehv_voronoi']} + SELECT * FROM {PowerPlants.sources.tables['ehv_voronoi']} """, epsg=4326, ) @@ -598,10 +595,12 @@ def insert_hydro_biomass(): None. """ - cfg = egon.data.config.datasets()["power_plants"] + target_string = PowerPlants.targets.tables['power_plants'] + schema, table = target_string.split('.') + db.execute_sql( f""" - DELETE FROM {cfg['target']['schema']}.{cfg['target']['table']} + DELETE FROM {schema}.{table} WHERE carrier IN ('biomass', 'reservoir', 'run_of_river') AND scenario IN ('eGon2035', 'eGon100RE') """ @@ -638,12 +637,14 @@ def allocate_conventional_non_chp_power_plants(): carrier = ["oil", "gas"] - cfg = egon.data.config.datasets()["power_plants"] + + target_string = PowerPlants.targets.tables['power_plants'] + schema, table = target_string.split('.') # Delete existing plants in the target table db.execute_sql( f""" - DELETE FROM {cfg ['target']['schema']}.{cfg ['target']['table']} + DELETE FROM {schema}.{table} WHERE carrier IN ('gas', 'oil') AND scenario='eGon2035'; """ @@ -663,7 +664,7 @@ def allocate_conventional_non_chp_power_plants(): # Assign voltage level to MaStR mastr["voltage_level"] = assign_voltage_level( mastr.rename({"el_capacity": "Nettonennleistung"}, axis=1), - cfg, + PowerPlants.sources.files, # <--- Use .files WORKING_DIR_MASTR_OLD, ) @@ -751,14 +752,14 @@ def allocate_conventional_non_chp_power_plants(): # Load grid district polygons mv_grid_districts = db.select_geodataframe( f""" - SELECT * FROM {cfg['sources']['egon_mv_grid_district']} + SELECT * FROM {PowerPlants.sources.tables['egon_mv_grid_district']} """, epsg=4326, ) ehv_grid_districts = db.select_geodataframe( f""" - SELECT * FROM {cfg['sources']['ehv_voronoi']} + SELECT * FROM {PowerPlants.sources.tables['ehv_voronoi']} """, epsg=4326, ) @@ -803,13 +804,14 @@ def allocate_other_power_plants(): ): return - # Get configuration - cfg = egon.data.config.datasets()["power_plants"] boundary = egon.data.config.settings()["egon-data"]["--dataset-boundary"] + target_string = PowerPlants.targets.tables['power_plants'] + schema, table = target_string.split('.') + db.execute_sql( f""" - DELETE FROM {cfg['target']['schema']}.{cfg['target']['table']} + DELETE FROM {schema}.{table} WHERE carrier ='others' """ ) @@ -821,7 +823,7 @@ def allocate_other_power_plants(): target = db.select_dataframe( f""" SELECT sum(capacity) as capacity, carrier, scenario_name, nuts - FROM {cfg['sources']['capacities']} + FROM {PowerPlants.sources.tables['capacities']} WHERE scenario_name = '{scenario}' AND carrier = 'others' GROUP BY carrier, nuts, scenario_name; @@ -872,12 +874,13 @@ def allocate_other_power_plants(): # Select power plants representing carrier 'others' from MaStR files mastr_sludge = pd.read_csv( - WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_gsgk"] + WORKING_DIR_MASTR_OLD / PowerPlants.sources.files["mastr_gsgk"] ).query( - """EinheitBetriebsstatus=='InBetrieb'and Energietraeger=='Klärschlamm'""" # noqa: E501 + """EinheitBetriebsstatus=='InBetrieb' and Energietraeger=='Klärschlamm'""" ) + mastr_geothermal = pd.read_csv( - WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_gsgk"] + WORKING_DIR_MASTR_OLD / PowerPlants.sources.files["mastr_gsgk"] ).query( "EinheitBetriebsstatus=='InBetrieb' and Energietraeger=='Geothermie' " "and Technologie == 'ORCOrganicRankineCycleAnlage'" @@ -944,7 +947,7 @@ def allocate_other_power_plants(): ) # Assign bus_id - mastr_prox = assign_bus_id(mastr_prox, cfg) + mastr_prox = assign_bus_id(mastr_prox, PowerPlants.sources.tables) mastr_prox = mastr_prox.set_crs(4326, allow_override=True) # Insert into target table @@ -965,14 +968,18 @@ def allocate_other_power_plants(): def discard_not_available_generators(gen, max_date): - gen["decommissioning_date"] = pd.to_datetime(gen["decommissioning_date"]) + gen["decommissioning_date"] = pd.to_datetime( + gen["decommissioning_date"] + ) gen["commissioning_date"] = pd.to_datetime(gen["commissioning_date"]) # drop plants that are commissioned after the max date gen = gen[gen["commissioning_date"] < max_date] # drop decommissioned plants while keeping the ones decommissioned # after the max date - gen.loc[(gen["decommissioning_date"] > max_date), "status"] = "InBetrieb" + gen.loc[(gen["decommissioning_date"] > max_date), "status"] = ( + "InBetrieb" + ) gen = gen.loc[ gen["status"].isin(["InBetrieb", "VoruebergehendStillgelegt"]) @@ -1043,11 +1050,13 @@ def log_insert_capacity(df, tech): ) con = db.engine() - cfg = egon.data.config.datasets()["power_plants"] + + target_string = PowerPlants.targets.tables['power_plants'] + schema, table = target_string.split('.') db.execute_sql( f""" - DELETE FROM {cfg['target']['schema']}.{cfg['target']['table']} + DELETE FROM {schema}.{table} WHERE carrier IN ('wind_onshore', 'solar', 'biomass', 'run_of_river', 'reservoir', 'solar_rooftop', 'wind_offshore', 'nuclear', 'coal', 'lignite', 'oil', @@ -1070,7 +1079,7 @@ def log_insert_capacity(df, tech): mv_grid_districts = gpd.GeoDataFrame.from_postgis( f""" - SELECT * FROM {cfg['sources']['egon_mv_grid_district']} + SELECT * FROM {PowerPlants.sources.tables['egon_mv_grid_district']} """, con, ) @@ -1098,7 +1107,7 @@ def log_insert_capacity(df, tech): # Hydro Power Plants # ################### hydro = gpd.GeoDataFrame.from_postgis( - f"""SELECT *, city AS location FROM {cfg['sources']['hydro']} + f"""SELECT *, city AS location FROM {PowerPlants.sources.tables['hydro']} WHERE plant_type IN ('Laufwasseranlage', 'Speicherwasseranlage')""", con, geom_col="geom", @@ -1131,7 +1140,7 @@ def log_insert_capacity(df, tech): # Biomass # ################### biomass = gpd.GeoDataFrame.from_postgis( - f"""SELECT *, city AS location FROM {cfg['sources']['biomass']}""", + f"""SELECT *, city AS location FROM {PowerPlants.sources.tables['biomass']}""", con, geom_col="geom", ) @@ -1162,7 +1171,7 @@ def log_insert_capacity(df, tech): # Solar # ################### solar = gpd.GeoDataFrame.from_postgis( - f"""SELECT *, city AS location FROM {cfg['sources']['pv']} + f"""SELECT *, city AS location FROM {PowerPlants.sources.tables['pv']} WHERE site_type IN ('Freifläche', 'Bauliche Anlagen (Hausdach, Gebäude und Fassade)') """, con, @@ -1195,7 +1204,7 @@ def log_insert_capacity(df, tech): # Wind # ################### wind_onshore = gpd.GeoDataFrame.from_postgis( - f"""SELECT *, city AS location FROM {cfg['sources']['wind']}""", + f"""SELECT *, city AS location FROM {PowerPlants.sources.tables['wind']}""", con, geom_col="geom", ) @@ -1222,7 +1231,6 @@ def log_insert_capacity(df, tech): def get_conventional_power_plants_non_chp(scn_name): - cfg = egon.data.config.datasets()["power_plants"] # Write conventional power plants in supply.egon_power_plants common_columns = [ "EinheitMastrNummer", @@ -1237,12 +1245,12 @@ def get_conventional_power_plants_non_chp(scn_name): ] # import nuclear power plants nuclear = pd.read_csv( - WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_nuclear"], + WORKING_DIR_MASTR_OLD / PowerPlants.sources.files["mastr_nuclear"], usecols=common_columns, ) # import combustion power plants comb = pd.read_csv( - WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_combustion"], + WORKING_DIR_MASTR_OLD / PowerPlants.sources.files["mastr_combustion"], usecols=common_columns + ["ThermischeNutzleistung"], ) @@ -1353,12 +1361,14 @@ def import_gas_gen_egon100(): return con = db.engine() session = sessionmaker(bind=db.engine())() - cfg = egon.data.config.datasets()["power_plants"] scenario_date_max = "2045-12-31 23:59:00" + target_string = PowerPlants.targets.tables['power_plants'] + schema, table = target_string.split('.') + db.execute_sql( f""" - DELETE FROM {cfg['target']['schema']}.{cfg['target']['table']} + DELETE FROM {schema}.{table} WHERE carrier = 'gas' AND bus_id IN (SELECT bus_id from grid.egon_etrago_bus WHERE scn_name = '{scn_name}' @@ -1381,7 +1391,7 @@ def import_gas_gen_egon100(): mv_grid_districts = gpd.GeoDataFrame.from_postgis( f""" - SELECT * FROM {cfg['sources']['egon_mv_grid_district']} + SELECT * FROM {PowerPlants.sources.tables['egon_mv_grid_district']} """, con, ) @@ -1396,7 +1406,7 @@ def import_gas_gen_egon100(): ).iat[0, 0] conv = pd.read_csv( - WORKING_DIR_MASTR_OLD / cfg["sources"]["mastr_combustion"], + WORKING_DIR_MASTR_OLD / PowerPlants.sources.files["mastr_combustion"], usecols=[ "EinheitMastrNummer", "Energietraeger", @@ -1532,18 +1542,83 @@ def import_gas_gen_egon100(): ) for scn_name in egon.data.config.settings()["egon-data"]["--scenarios"]: - tasks += ( - wrapped_partial( - assign_weather_data.weatherId_and_busId, - scn_name=scn_name, - postfix=f"_{scn_name}", - ), - ) + tasks += (wrapped_partial(assign_weather_data.weatherId_and_busId, + scn_name=scn_name, + postfix=f"_{scn_name}" + ),) tasks += (pp_metadata.metadata,) class PowerPlants(Dataset): + sources = DatasetSources( + tables={ + 'geom_federal_states': "boundaries.vg250_lan", + 'geom_germany': "boundaries.vg250_sta_union", + 'egon_mv_grid_district': "grid.egon_mv_grid_district", + 'ehv_voronoi': "grid.egon_ehv_substation_voronoi", + 'capacities': "supply.egon_scenario_capacities", + 'hydro': 'supply.egon_power_plants_hydro', + 'biomass': 'supply.egon_power_plants_biomass', + 'pv': 'supply.egon_power_plants_pv', + 'wind': 'supply.egon_power_plants_wind', + "mastr_combustion_without_chp": "supply.egon_mastr_conventional_without_chp", + "nep_conv": "supply.egon_nep_2021_conventional_powerplants", + "buses_data": "osmtgmod_results.bus_data", + "storages": "supply.egon_storages", + "wind_potential_areas": "supply.egon_re_potential_area_wind", + "hvmv_substation": "grid.egon_hvmv_substation", + "electricity_demand": "demand.egon_demandregio_zensus_electricity", + "map_zensus_grid_districts": "boundaries.egon_map_zensus_grid_districts", + "map_grid_boundaries": "boundaries.egon_map_mvgriddistrict_vg250", + "federal_states": "boundaries.vg250_lan", # Alias + "scenario_capacities": "supply.egon_scenario_capacities", # Alias + "weather_cells": "supply.egon_era5_weather_cells", + "solar_feedin": "supply.egon_era5_renewable_feedin", + "potential_area_pv_road_railway": "supply.egon_re_potential_area_pv_road_railway", + "potential_area_pv_agriculture": "supply.egon_re_potential_area_pv_agriculture", + }, + files={ + 'mastr_biomass': "bnetza_mastr_biomass_cleaned.csv", + 'mastr_combustion': "bnetza_mastr_combustion_cleaned.csv", + 'mastr_gsgk': "bnetza_mastr_gsgk_cleaned.csv", + 'mastr_hydro': "bnetza_mastr_hydro_cleaned.csv", + 'mastr_location': "location_elec_generation_raw.csv", + 'mastr_nuclear': "bnetza_mastr_nuclear_cleaned.csv", + 'mastr_pv': "bnetza_mastr_solar_cleaned.csv", + 'mastr_storage': "bnetza_mastr_storage_cleaned.csv", + 'mastr_wind': "bnetza_mastr_wind_cleaned.csv", + # --- Config/Meta values --- + "osm_config": "https://download.geofabrik.de/europe/germany-240101.osm.pbf", + "nep_2035": "NEP2035_V2021_scnC2035.xlsx", + "mastr_deposit_id": "10491882", + "data_bundle_deposit_id": "16576506", + "status2023_date_max": "2023-12-31 23:59:00", + "status2019_date_max": "2019-12-31 23:59:00", + "egon2021_date_max": "2021-12-31 23:59:00", + "eGon2035_date_max": "2035-01-01", + "eGon100RE_date_max": "2050-01-01", + "mastr_geocoding_path": "data_bundle_egon_data/mastr_geocoding", + } + ) + + targets = DatasetTargets( + tables={ + 'power_plants': 'supply.egon_power_plants', + "generators": "grid.egon_etrago_generator", + "generator_timeseries": "grid.egon_etrago_generator_timeseries", + "mastr_geocoded": "supply.egon_mastr_geocoded", + "power_plants_pv": "supply.egon_power_plants_pv", + "power_plants_wind": "supply.egon_power_plants_wind", + "power_plants_biomass": "supply.egon_power_plants_biomass", + "power_plants_hydro": "supply.egon_power_plants_hydro", + "power_plants_combustion": "supply.egon_power_plants_combustion", + "power_plants_gsgk": "supply.egon_power_plants_gsgk", + "power_plants_nuclear": "supply.egon_power_plants_nuclear", + "power_plants_storage": "supply.egon_power_plants_storage", + } + ) + """ This dataset deals with the distribution and allocation of power plants @@ -1616,7 +1691,7 @@ class PowerPlants(Dataset): #: name: str = "PowerPlants" #: - version: str = "0.0.28" + version: str = "0.0.36" def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/power_plants/assign_weather_data.py b/src/egon/data/datasets/power_plants/assign_weather_data.py index e79b1e32c..ff264bd8f 100644 --- a/src/egon/data/datasets/power_plants/assign_weather_data.py +++ b/src/egon/data/datasets/power_plants/assign_weather_data.py @@ -3,9 +3,10 @@ from egon.data import db from egon.data.datasets.power_plants.pv_rooftop_buildings import timer_func import egon.data.config +from egon.data.datasets import load_sources_and_targets -def assign_bus_id(power_plants, cfg): +def assign_bus_id(power_plants): """Assigns bus_ids to power plants according to location and voltage level Parameters @@ -19,17 +20,18 @@ def assign_bus_id(power_plants, cfg): Power plants including voltage level and bus_id """ + sources, targets = load_sources_and_targets("PowerPlants") mv_grid_districts = db.select_geodataframe( f""" - SELECT * FROM {cfg['sources']['egon_mv_grid_district']} + SELECT * FROM {sources.tables['egon_mv_grid_district']} """, epsg=4326, ) ehv_grid_districts = db.select_geodataframe( f""" - SELECT * FROM {cfg['sources']['ehv_voronoi']} + SELECT * FROM {sources.tables['ehv_voronoi']} """, epsg=4326, ) @@ -70,16 +72,18 @@ def assign_bus_id(power_plants, cfg): return power_plants +@timer_func @timer_func def add_missing_bus_ids(scn_name): """Assign busses by spatal intersection of mvgrid districts or ehv voronois.""" + sources, targets = load_sources_and_targets("PowerPlants") sql = f""" -- Assign missing buses to mv grid district buses for HV and below - UPDATE supply.egon_power_plants AS epp + UPDATE {targets.tables['power_plants']} AS epp SET bus_id = ( SELECT emgd.bus_id - FROM grid.egon_mv_grid_district AS emgd + FROM {sources.tables['egon_mv_grid_district']} AS emgd WHERE ST_Intersects(ST_Transform(epp.geom, 4326), ST_Transform(emgd.geom, 4326)) ORDER BY ST_Transform(emgd.geom, 4326) <-> ST_Transform(epp.geom, 4326) LIMIT 1 @@ -94,10 +98,10 @@ def add_missing_bus_ids(scn_name): -- Assign missing buses to EHV buses for EHV - UPDATE supply.egon_power_plants AS epp + UPDATE {targets.tables['power_plants']} AS epp SET bus_id = ( SELECT eesv.bus_id - FROM grid.egon_ehv_substation_voronoi AS eesv + FROM {sources.tables['ehv_voronoi']} AS eesv WHERE ST_Intersects(ST_Transform(epp.geom, 4326), ST_Transform(eesv.geom, 4326)) ORDER BY ST_Transform(eesv.geom, 4326) <-> ST_Transform(epp.geom, 4326) LIMIT 1 @@ -116,13 +120,15 @@ def add_missing_bus_ids(scn_name): db.execute_sql(sql) +@timer_func @timer_func def find_weather_id(scn_name): + sources, targets = load_sources_and_targets("PowerPlants") - sql = f"""UPDATE supply.egon_power_plants AS epp + sql = f"""UPDATE {targets.tables['power_plants']} AS epp SET weather_cell_id = ( SELECT eewc.w_id - FROM supply.egon_era5_weather_cells AS eewc + FROM {sources.tables['weather_cells']} AS eewc WHERE ST_Intersects(epp.geom, eewc.geom) ORDER BY eewc.geom <-> epp.geom LIMIT 1 diff --git a/src/egon/data/datasets/power_plants/conventional.py b/src/egon/data/datasets/power_plants/conventional.py index 41226730f..b266216b1 100644 --- a/src/egon/data/datasets/power_plants/conventional.py +++ b/src/egon/data/datasets/power_plants/conventional.py @@ -8,7 +8,7 @@ from egon.data import db import egon.data.config - +from egon.data.datasets import load_sources_and_targets def select_nep_power_plants(carrier): """Select power plants with location from NEP's list of power plants @@ -24,14 +24,14 @@ def select_nep_power_plants(carrier): Waste power plants from NEP list """ - cfg = egon.data.config.datasets()["power_plants"] + sources, targets = load_sources_and_targets("PowerPlants") # Select plants with geolocation from list of conventional power plants nep = db.select_dataframe( f""" SELECT bnetza_id, name, carrier, capacity, postcode, city, federal_state, c2035_capacity - FROM {cfg['sources']['nep_conv']} + FROM {sources.tables['nep_conv']} WHERE carrier = '{carrier}' AND chp = 'Nein' AND c2035_chp = 'Nein' @@ -40,13 +40,11 @@ def select_nep_power_plants(carrier): """ ) - # Removing plants out of Germany nep["postcode"] = nep["postcode"].astype(str) nep = nep[~nep["postcode"].str.contains("A")] nep = nep[~nep["postcode"].str.contains("L")] nep = nep[~nep["postcode"].str.contains("nan")] - # Remove the subunits from the bnetza_id nep["bnetza_id"] = nep["bnetza_id"].str[0:7] return nep @@ -67,7 +65,8 @@ def select_no_chp_combustion_mastr(carrier): Power plants from NEP list """ - cfg = egon.data.config.datasets()["power_plants"] + sources, targets = load_sources_and_targets("PowerPlants") + # import data for MaStR mastr = db.select_geodataframe( f""" @@ -78,7 +77,7 @@ def select_no_chp_combustion_mastr(carrier): plz, city, federal_state - FROM {cfg['sources']['mastr_combustion_without_chp']} + FROM {sources.tables['mastr_combustion_without_chp']} WHERE carrier = '{carrier}'; """, index_col=None, diff --git a/src/egon/data/datasets/power_plants/mastr.py b/src/egon/data/datasets/power_plants/mastr.py index 804457f6f..39541125e 100644 --- a/src/egon/data/datasets/power_plants/mastr.py +++ b/src/egon/data/datasets/power_plants/mastr.py @@ -49,6 +49,7 @@ from egon.data.datasets.power_plants.pv_rooftop_buildings import ( federal_state_data, ) +from egon.data.datasets import load_sources_and_targets TESTMODE_OFF = ( config.settings()["egon-data"]["--dataset-boundary"] == "Everything" @@ -161,16 +162,20 @@ def voltage_levels(p: float) -> int: def import_mastr() -> None: """Import MaStR data into database""" + sources, targets = load_sources_and_targets("PowerPlants") + engine = db.engine() # import geocoded data - cfg = config.datasets()["mastr_new"] - path_parts = cfg["geocoding_path"] - path = Path(*["."] + path_parts).resolve() + path_parts = sources.files["mastr_geocoding_path"] + # Handle path if it's a string (from files dict) or list (if keeping original structure) + # Assuming "data_bundle_egon_data/mastr_geocoding" is a string path relative to root: + path = Path(path_parts).resolve() path = list(path.iterdir())[0] deposit_id_geocoding = int(path.parts[-1].split(".")[0].split("_")[-1]) - deposit_id_mastr = cfg["deposit_id"] + + deposit_id_mastr = int(sources.files["mastr_deposit_id"]) if deposit_id_geocoding != deposit_id_mastr: raise AssertionError( @@ -191,15 +196,13 @@ def import_mastr() -> None: EgonMastrGeocoded.__table__.create(bind=engine, checkfirst=True) geocoding_gdf.to_postgis( - name=EgonMastrGeocoded.__tablename__, + name=targets.get_table_name("mastr_geocoded"), con=engine, if_exists="append", - schema=EgonMastrGeocoded.__table_args__["schema"], + schema=targets.get_table_schema("mastr_geocoded"), index=True, ) - cfg = config.datasets()["power_plants"] - cols_mapping = { "all": { "EinheitMastrNummer": "gens_id", @@ -266,26 +269,26 @@ def import_mastr() -> None: } source_files = { - "pv": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_pv"], - "wind": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"], - "biomass": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_biomass"], - "hydro": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_hydro"], + "pv": WORKING_DIR_MASTR_NEW / sources.files["mastr_pv"], + "wind": WORKING_DIR_MASTR_NEW / sources.files["mastr_wind"], + "biomass": WORKING_DIR_MASTR_NEW / sources.files["mastr_biomass"], + "hydro": WORKING_DIR_MASTR_NEW / sources.files["mastr_hydro"], "combustion": WORKING_DIR_MASTR_NEW - / cfg["sources"]["mastr_combustion"], - "gsgk": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_gsgk"], - "nuclear": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_nuclear"], - "storage": WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_storage"], + / sources.files["mastr_combustion"], + "gsgk": WORKING_DIR_MASTR_NEW / sources.files["mastr_gsgk"], + "nuclear": WORKING_DIR_MASTR_NEW / sources.files["mastr_nuclear"], + "storage": WORKING_DIR_MASTR_NEW / sources.files["mastr_storage"], } - target_tables = { - "pv": EgonPowerPlantsPv, - "wind": EgonPowerPlantsWind, - "biomass": EgonPowerPlantsBiomass, - "hydro": EgonPowerPlantsHydro, - "combustion": EgonPowerPlantsCombustion, - "gsgk": EgonPowerPlantsGsgk, - "nuclear": EgonPowerPlantsNuclear, - "storage": EgonPowerPlantsStorage, + target_table_keys = { + "pv": "power_plants_pv", + "wind": "power_plants_wind", + "biomass": "power_plants_biomass", + "hydro": "power_plants_hydro", + "combustion": "power_plants_combustion", + "gsgk": "power_plants_gsgk", + "nuclear": "power_plants_nuclear", + "storage": "power_plants_storage", } vlevel_mapping = { @@ -300,14 +303,14 @@ def import_mastr() -> None: # import locations locations = pd.read_csv( - WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"], + WORKING_DIR_MASTR_NEW / sources.files["mastr_location"], index_col=None, ) # import grid districts mv_grid_districts = db.select_geodataframe( f""" - SELECT * FROM {cfg['sources']['egon_mv_grid_district']} + SELECT * FROM {sources.tables['egon_mv_grid_district']} """, epsg=4326, ) @@ -356,7 +359,7 @@ def import_mastr() -> None: # (eGon2021 scenario) len_old = len(units) ts = pd.Timestamp( - config.datasets()["mastr_new"]["status2023_date_max"] + sources.files["status2023_date_max"] ) units = units.loc[pd.to_datetime(units.Inbetriebnahmedatum) <= ts] logger.debug( @@ -530,11 +533,13 @@ def import_mastr() -> None: # write to DB logger.info(f"Writing {len(units)} units to DB...") + target_key = target_table_keys[tech] + units.to_postgis( - name=target_tables[tech].__tablename__, + name=targets.get_table_name(target_key), con=engine, if_exists="append", - schema=target_tables[tech].__table_args__["schema"], + schema=targets.get_table_schema(target_key), ) add_metadata() diff --git a/src/egon/data/datasets/power_plants/mastr_db_classes.py b/src/egon/data/datasets/power_plants/mastr_db_classes.py index ed3a2c8aa..bb2fd8662 100644 --- a/src/egon/data/datasets/power_plants/mastr_db_classes.py +++ b/src/egon/data/datasets/power_plants/mastr_db_classes.py @@ -23,6 +23,7 @@ meta_metadata, sources, ) +from egon.data.datasets import load_sources_and_targets Base = declarative_base() @@ -309,7 +310,7 @@ class EgonPowerPlantsStorage(Base): def add_metadata(): - technologies = config.datasets()["mastr_new"]["technologies"] + dataset_sources, targets = load_sources_and_targets("PowerPlants") target_tables = { "solar": EgonPowerPlantsPv, @@ -321,11 +322,11 @@ def add_metadata(): "nuclear": EgonPowerPlantsNuclear, "storage": EgonPowerPlantsStorage, } + + technologies = list(target_tables.keys()) - deposit_id_data_bundle = config.datasets()["data-bundle"]["sources"][ - "zenodo" - ]["deposit_id"] - deposit_id_mastr = config.datasets()["mastr_new"]["deposit_id"] + deposit_id_data_bundle = dataset_sources.files["data_bundle_deposit_id"] + deposit_id_mastr = dataset_sources.files["mastr_deposit_id"] contris = contributors(["kh", "kh"]) @@ -361,7 +362,8 @@ def add_metadata(): }, "temporal": { "referenceDate": ( - config.datasets()["mastr_new"]["egon2021_date_max"].split( + # <--- REFACTORING: Use sources.files + dataset_sources.files["egon2021_date_max"].split( " " )[0] ), diff --git a/src/egon/data/datasets/power_plants/metadata.py b/src/egon/data/datasets/power_plants/metadata.py index 7ab7e376c..926f42ef4 100644 --- a/src/egon/data/datasets/power_plants/metadata.py +++ b/src/egon/data/datasets/power_plants/metadata.py @@ -10,6 +10,7 @@ license_ccby, licenses_datenlizenz_deutschland, ) +from egon.data.datasets import load_sources_and_targets def metadata(): @@ -20,9 +21,15 @@ def metadata(): None. """ + sources, targets = load_sources_and_targets("PowerPlants") + + schema = targets.get_table_schema("power_plants") + table = targets.get_table_name("power_plants") + full_name = f"{schema}.{table}" + meta = { - "name": "supply.egon_power_plants", - "title": "supply.egon_power_plants", + "name": full_name, + "title": full_name, "id": "", "description": "Database of powerplants ", "language": "en-GB", @@ -164,7 +171,7 @@ def metadata(): "resources": [ { "profile": "tabular-data-resource", - "name": "supply.egon_power_plants", + "name": full_name, # <--- Updated "path": "", "format": "PostgreSQL", "encoding": "UTF-8", @@ -263,5 +270,5 @@ def metadata(): } db.submit_comment( - "'" + json.dumps(meta) + "'", "supply", "egon_power_plants" - ) + "'" + json.dumps(meta) + "'", schema, table + ) \ No newline at end of file diff --git a/src/egon/data/datasets/power_plants/pv_ground_mounted.py b/src/egon/data/datasets/power_plants/pv_ground_mounted.py index 8eb37ab4f..264c2a187 100644 --- a/src/egon/data/datasets/power_plants/pv_ground_mounted.py +++ b/src/egon/data/datasets/power_plants/pv_ground_mounted.py @@ -7,9 +7,11 @@ from egon.data import db from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW import egon.data.config +from egon.data.datasets import load_sources_and_targets def insert(): + sources, targets = load_sources_and_targets("PowerPlants") def mastr_existing_pv(pow_per_area): """Import MaStR data from csv-files. @@ -20,14 +22,12 @@ def mastr_existing_pv(pow_per_area): pv farms depending on area in kW/m² """ - # get config - cfg = egon.data.config.datasets()["power_plants"] # import MaStR data: locations, grid levels and installed capacities # get relevant pv plants: ground mounted df = pd.read_csv( - WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_pv"], + WORKING_DIR_MASTR_NEW / sources.files["mastr_pv"], usecols=[ "Lage", "Laengengrad", @@ -78,8 +78,9 @@ def mastr_existing_pv(pow_per_area): # derive voltage level mastr["voltage_level"] = pd.Series(dtype=int) + lvl = pd.read_csv( - WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"], + WORKING_DIR_MASTR_NEW / sources.files["mastr_location"], usecols=["Spannungsebene", "MaStRNummer"], ) @@ -160,7 +161,7 @@ def potential_areas(con, join_buffer): # roads and railway sql = ( "SELECT id, geom FROM " - "supply.egon_re_potential_area_pv_road_railway" + f"{sources.tables['potential_area_pv_road_railway']}" ) potentials_rora = gpd.GeoDataFrame.from_postgis(sql, con) potentials_rora = potentials_rora.set_index("id") @@ -168,7 +169,7 @@ def potential_areas(con, join_buffer): # agriculture sql = ( "SELECT id, geom FROM " - "supply.egon_re_potential_area_pv_agriculture" + f"{sources.tables['potential_area_pv_agriculture']}" ) potentials_agri = gpd.GeoDataFrame.from_postgis(sql, con) potentials_agri = potentials_agri.set_index("id") @@ -403,8 +404,8 @@ def adapt_grid_level(pv_pot, max_dist_hv, con): if len(pv_pot_mv_to_hv) > 0: # import data for HV substations - - sql = "SELECT point, voltage FROM grid.egon_hvmv_substation" + + sql = f"SELECT point, voltage FROM {sources.tables['hvmv_substation']}" hvmv_substation = gpd.GeoDataFrame.from_postgis( sql, con, geom_col="point" ) @@ -450,7 +451,7 @@ def adapt_grid_level(pv_pot, max_dist_hv, con): pv_pot = pd.concat([pv_pot_mv, pv_pot_hv]) return pv_pot - + def build_additional_pv(potentials, pv, pow_per_area, con): """Build additional pv parks if pv parks on selected potential areas do not hit the target value. @@ -470,7 +471,7 @@ def build_additional_pv(potentials, pv, pow_per_area, con): """ # get MV grid districts - sql = "SELECT bus_id, geom FROM grid.egon_mv_grid_district" + sql = f"SELECT bus_id, geom FROM {sources.tables['egon_mv_grid_district']}" distr = gpd.GeoDataFrame.from_postgis(sql, con) distr = distr.set_index("bus_id") @@ -754,7 +755,7 @@ def keep_existing_pv(mastr, con): pv_exist = gpd.GeoDataFrame(pv_exist, geometry="centroid", crs=3035) # German states - sql = "SELECT geometry as geom, gf FROM boundaries.vg250_lan" + sql = f"SELECT geometry as geom, gf FROM {sources.tables['geom_federal_states']}" land = gpd.GeoDataFrame.from_postgis(sql, con).to_crs(3035) land = land[(land["gf"] != 1) & (land["gf"] != 2)] land = land.unary_union @@ -1188,7 +1189,7 @@ def insert_pv_parks( con = db.engine() # maximum ID in egon_power_plants - sql = "SELECT MAX(id) FROM supply.egon_power_plants" + sql = f"SELECT MAX(id) FROM {targets.tables['power_plants']}" max_id = pd.read_sql(sql, con) max_id = max_id["max"].iat[0] if max_id is None: @@ -1224,8 +1225,8 @@ def insert_pv_parks( # insert into database insert_pv_parks.reset_index().to_postgis( - "egon_power_plants", - schema="supply", + targets.get_table_name("power_plants"), + schema=targets.get_table_schema("power_plants"), con=db.engine(), if_exists="append", ) @@ -1337,4 +1338,4 @@ def insert_pv_parks( else: pv_parks_100RE = gpd.GeoDataFrame() - return pv_parks, pv_parks_100RE + return pv_parks, pv_parks_100RE \ No newline at end of file diff --git a/src/egon/data/datasets/power_plants/pv_rooftop.py b/src/egon/data/datasets/power_plants/pv_rooftop.py index 109659bbe..25a340832 100644 --- a/src/egon/data/datasets/power_plants/pv_rooftop.py +++ b/src/egon/data/datasets/power_plants/pv_rooftop.py @@ -9,6 +9,7 @@ import pandas as pd from egon.data import config, db +from egon.data.datasets import load_sources_and_targets from egon.data.datasets.power_plants.pv_rooftop_buildings import ( PV_CAP_PER_SQ_M, ROOF_FACTOR, @@ -61,26 +62,22 @@ def pv_rooftop_per_mv_grid_and_scenario(scenario, level): """ # Select sources and targets from dataset configuration - sources = config.datasets()["solar_rooftop"]["sources"] - targets = config.datasets()["solar_rooftop"]["targets"] + sources, targets = load_sources_and_targets("PowerPlants") # Delete existing rows db.execute_sql( f""" - DELETE FROM {targets['generators']['schema']}. - {targets['generators']['table']} + DELETE FROM {targets.tables['generators']} WHERE carrier IN ('solar_rooftop') AND scn_name = '{scenario}' AND bus IN (SELECT bus_id FROM - {sources['egon_mv_grid_district']['schema']}. - {sources['egon_mv_grid_district']['table']} ) + {sources.tables['egon_mv_grid_district']}) """ ) db.execute_sql( f""" - DELETE FROM {targets['generator_timeseries']['schema']}. - {targets['generator_timeseries']['table']} + DELETE FROM {targets.tables['generator_timeseries']} WHERE scn_name = '{scenario}' AND generator_id NOT IN ( SELECT generator_id FROM @@ -94,13 +91,10 @@ def pv_rooftop_per_mv_grid_and_scenario(scenario, level): f""" SELECT SUM(demand) as demand, b.bus_id, vg250_lan - FROM {sources['electricity_demand']['schema']}. - {sources['electricity_demand']['table']} a - JOIN {sources['map_zensus_grid_districts']['schema']}. - {sources['map_zensus_grid_districts']['table']} b + FROM {sources.tables['electricity_demand']} a + JOIN {sources.tables['map_zensus_grid_districts']} b ON a.zensus_population_id = b.zensus_population_id - JOIN {sources['map_grid_boundaries']['schema']}. - {sources['map_grid_boundaries']['table']} c + JOIN {sources.tables['map_grid_boundaries']} c ON c.bus_id = b.bus_id WHERE scenario = '{scenario}' GROUP BY (b.bus_id, vg250_lan) @@ -126,10 +120,8 @@ def pv_rooftop_per_mv_grid_and_scenario(scenario, level): targets_per_federal_state = db.select_dataframe( f""" SELECT DISTINCT ON (gen) capacity, gen - FROM {sources['scenario_capacities']['schema']}. - {sources['scenario_capacities']['table']} a - JOIN {sources['federal_states']['schema']}. - {sources['federal_states']['table']} b + FROM {sources.tables['scenario_capacities']} a + JOIN {sources.tables['federal_states']} b ON a.nuts = b.nuts WHERE carrier = 'solar_rooftop' AND scenario_name = '{scenario}' @@ -156,8 +148,7 @@ def pv_rooftop_per_mv_grid_and_scenario(scenario, level): target = db.select_dataframe( f""" SELECT capacity - FROM {sources['scenario_capacities']['schema']}. - {sources['scenario_capacities']['table']} a + FROM {sources.tables['scenario_capacities']} a WHERE carrier = 'solar_rooftop' AND scenario_name = '{scenario}' """ @@ -172,11 +163,11 @@ def pv_rooftop_per_mv_grid_and_scenario(scenario, level): dataset = config.settings()["egon-data"]["--dataset-boundary"] if dataset == "Schleswig-Holstein": - sources_scn = config.datasets()["scenario_input"]["sources"] - + # <--- REFACTORING: Use sources.files lookup instead of config.datasets() + path = Path( f"./data_bundle_egon_data/nep2035_version2021/" - f"{sources_scn['eGon2035']['capacities']}" + f"{sources.files['nep_2035_capacities']}" ).resolve() total_2035 = ( @@ -258,8 +249,7 @@ def pv_rooftop_per_mv_grid_and_scenario(scenario, level): weather_cells = db.select_geodataframe( f""" SELECT w_id, geom - FROM {sources['weather_cells']['schema']}. - {sources['weather_cells']['table']} + FROM {sources.tables['weather_cells']} """, index_col="w_id", ) @@ -267,8 +257,7 @@ def pv_rooftop_per_mv_grid_and_scenario(scenario, level): mv_grid_districts = db.select_geodataframe( f""" SELECT bus_id as bus_id, ST_Centroid(geom) as geom - FROM {sources['egon_mv_grid_district']['schema']}. - {sources['egon_mv_grid_district']['table']} + FROM {sources.tables['egon_mv_grid_district']} """, index_col="bus_id", ) @@ -279,8 +268,7 @@ def pv_rooftop_per_mv_grid_and_scenario(scenario, level): feedin = db.select_dataframe( f""" SELECT w_id, feedin - FROM {sources['solar_feedin']['schema']}. - {sources['solar_feedin']['table']} + FROM {sources.tables['solar_feedin']} WHERE carrier = 'pv' AND weather_year = 2011 """, @@ -306,15 +294,15 @@ def pv_rooftop_per_mv_grid_and_scenario(scenario, level): # Insert data to database pv_rooftop.to_sql( - targets["generators"]["table"], - schema=targets["generators"]["schema"], + targets.get_table_name("generators"), + schema=targets.get_table_schema("generators"), if_exists="append", con=db.engine(), ) timeseries.to_sql( - targets["generator_timeseries"]["table"], - schema=targets["generator_timeseries"]["schema"], + targets.get_table_name("generator_timeseries"), + schema=targets.get_table_schema("generator_timeseries"), if_exists="append", con=db.engine(), - ) + ) \ No newline at end of file diff --git a/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py b/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py index d7f07f0cc..5c5050e71 100644 --- a/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py +++ b/src/egon/data/datasets/power_plants/pv_rooftop_buildings.py @@ -27,6 +27,7 @@ import pandas as pd from egon.data import config, db +from egon.data.datasets import load_sources_and_targets from egon.data.datasets.electricity_demand_timeseries.hh_buildings import ( OsmBuildingsSynthetic, ) @@ -1161,12 +1162,14 @@ def cap_per_bus_id( pandas.DataFrame DataFrame with total rooftop capacity per mv grid. """ - if "status" in scenario: - sources = config.datasets()["solar_rooftop"]["sources"] + # <--- REFACTORING: Load sources/targets + sources, targets = load_sources_and_targets("PowerPlants") + if "status" in scenario: + # <--- REFACTORING: Use sources.tables sql = f""" SELECT bus_id, SUM(el_capacity) as capacity - FROM {sources['power_plants']['schema']}.{sources['power_plants']['table']} + FROM {sources.tables['power_plants']} WHERE carrier = 'solar_rooftop' AND scenario = '{scenario}' GROUP BY bus_id @@ -1175,11 +1178,10 @@ def cap_per_bus_id( df = db.select_dataframe(sql, index_col="bus_id") else: - targets = config.datasets()["solar_rooftop"]["targets"] - + # <--- REFACTORING: Use targets.tables sql = f""" SELECT bus as bus_id, control, p_nom as capacity - FROM {targets['generators']['schema']}.{targets['generators']['table']} + FROM {targets.tables['generators']} WHERE carrier = 'solar_rooftop' AND scn_name = '{scenario}' """ @@ -2151,13 +2153,16 @@ class EgonPowerPlantPvRoofBuilding(Base): def add_metadata(): + # <--- REFACTORING: Load sources (renamed to avoid conflict with imports) + dataset_sources, dataset_targets = load_sources_and_targets("PowerPlants") + schema = "supply" table = "egon_power_plants_pv_roof_building" name = f"{schema}.{table}" - deposit_id_mastr = config.datasets()["mastr_new"]["deposit_id"] - deposit_id_data_bundle = config.datasets()["data-bundle"]["sources"][ - "zenodo" - ]["deposit_id"] + + # <--- REFACTORING: Retrieve IDs from __init__.py files mapping + deposit_id_mastr = dataset_sources.files["mastr_deposit_id"] + deposit_id_data_bundle = dataset_sources.files["data_bundle_deposit_id"] contris = contributors(["kh", "kh"]) @@ -2187,9 +2192,8 @@ def add_metadata(): }, "temporal": { "referenceDate": ( - config.datasets()["mastr_new"]["egon2021_date_max"].split(" ")[ - 0 - ] + # <--- REFACTORING: Retrieve date from __init__.py + dataset_sources.files["egon2021_date_max"].split(" ")[0] ), "timeseries": {}, }, @@ -2404,13 +2408,16 @@ def voltage_levels(p: float) -> int: def pv_rooftop_to_buildings(): """Main script, executed as task""" + # <--- REFACTORING: Load sources + sources, targets = load_sources_and_targets("PowerPlants") mastr_gdf = load_mastr_data() status_quo = "status2023" # FIXME: Hard coded + # <--- REFACTORING: Use sources.files ts = pd.Timestamp( - config.datasets()["mastr_new"][f"{status_quo}_date_max"], tz="UTC" + sources.files[f"{status_quo}_date_max"], tz="UTC" ) mastr_gdf = mastr_gdf.loc[mastr_gdf.commissioning_date <= ts] @@ -2436,8 +2443,9 @@ def pv_rooftop_to_buildings(): if scenario == status_quo: scenario_buildings_gdf = scenario_buildings_gdf_sq.copy() elif "status" in scenario: + # <--- REFACTORING: Use sources.files ts = pd.Timestamp( - config.datasets()["mastr_new"][f"{scenario}_date_max"], + sources.files[f"{scenario}_date_max"], tz="UTC", ) @@ -2477,4 +2485,4 @@ def pv_rooftop_to_buildings(): all_buildings_gdf = add_bus_ids_sq(all_buildings_gdf) # export scenario - create_scenario_table(infer_voltage_level(all_buildings_gdf)) + create_scenario_table(infer_voltage_level(all_buildings_gdf)) \ No newline at end of file diff --git a/src/egon/data/datasets/power_plants/wind_farms.py b/src/egon/data/datasets/power_plants/wind_farms.py index 2e1a47717..7db2adb94 100644 --- a/src/egon/data/datasets/power_plants/wind_farms.py +++ b/src/egon/data/datasets/power_plants/wind_farms.py @@ -9,6 +9,7 @@ from egon.data import db from egon.data.datasets.mastr import WORKING_DIR_MASTR_NEW import egon.data.config +from egon.data.datasets import load_sources_and_targets def insert(): @@ -20,11 +21,12 @@ def insert(): *No parameters required """ + sources, targets = load_sources_and_targets("PowerPlants") con = db.engine() # federal_std has the shapes of the German states - sql = "SELECT gen, gf, nuts, geometry FROM boundaries.vg250_lan" + sql = f"SELECT gen, gf, nuts, geometry FROM {sources.tables['geom_federal_states']}" federal_std = gpd.GeoDataFrame.from_postgis( sql, con, geom_col="geometry", crs=4326 ) @@ -32,12 +34,12 @@ def insert(): # target_power_df has the expected capacity of each federal state sql = ( "SELECT carrier, capacity, nuts, scenario_name FROM " - "supply.egon_scenario_capacities" + f"{sources.tables['capacities']}" ) target_power_df = pd.read_sql(sql, con) # mv_districts has geographic info of medium voltage districts in Germany - sql = "SELECT geom FROM grid.egon_mv_grid_district" + sql = f"SELECT geom FROM {sources.tables['egon_mv_grid_district']}" mv_districts = gpd.GeoDataFrame.from_postgis(sql, con) # Delete all the water bodies from the federal states shapes @@ -70,8 +72,10 @@ def insert(): # Create the shape for full Germany target_power_df.at["DE", "geom"] = target_power_df["geom"].unary_union target_power_df.at["DE", "name"] = "Germany" + # Generate WFs for Germany based on potential areas and existing WFs - wf_areas, wf_areas_ni = generate_wind_farms() + # Passing sources to helper function + wf_areas, wf_areas_ni = generate_wind_farms(sources) # Change the columns "geometry" of this GeoDataFrames wf_areas.set_geometry("centroid", inplace=True) @@ -93,6 +97,7 @@ def insert(): "eGon100RE", "wind_onshore", "DE", + sources, targets # <--- Pass sources and targets ) target_power_df = target_power_df[ target_power_df["scenario_name"] != "eGon100RE" @@ -122,25 +127,24 @@ def insert(): scenario_year, source, fed_state, + sources, targets # <--- Pass sources and targets ) summary_t = pd.concat([summary_t, summary_state]) farms = pd.concat([farms, wind_farms_state]) - generate_map() + generate_map(sources, targets) # <--- Pass sources and targets return - -def generate_wind_farms(): +def generate_wind_farms(sources): """Generate wind farms based on existing wind farms. Parameters ---------- - *No parameters required + sources : DatasetSources + Contains information about database tables and file paths """ - # get config - cfg = egon.data.config.datasets()["power_plants"] # Due to typos in some inputs, some areas of existing wind farms # should be discarded using perimeter and area filters @@ -172,18 +176,20 @@ def voltage(x): # Connect to the data base con = db.engine() - sql = "SELECT geom FROM supply.egon_re_potential_area_wind" + sql = f"SELECT geom FROM {sources.tables['wind_potential_areas']}" + # wf_areas has all the potential areas geometries for wind farms wf_areas = gpd.GeoDataFrame.from_postgis(sql, con) # bus has the connection points of the wind farms bus = pd.read_csv( - WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_location"], + WORKING_DIR_MASTR_NEW / sources.files["mastr_location"], index_col="MaStRNummer", ) # Drop all the rows without connection point bus.dropna(subset=["NetzanschlusspunktMastrNummer"], inplace=True) # wea has info of each wind turbine in Germany. - wea = pd.read_csv(WORKING_DIR_MASTR_NEW / cfg["sources"]["mastr_wind"]) + # <--- REFACTORING: Use sources.files['mastr_wind'] + wea = pd.read_csv(WORKING_DIR_MASTR_NEW / sources.files["mastr_wind"]) # Delete all the rows without information about geographical location wea = wea[(pd.notna(wea["Laengengrad"])) & (pd.notna(wea["Breitengrad"]))] @@ -273,6 +279,7 @@ def wind_power_states( scenario_year, source, fed_state, + sources, targets ): """Import OSM data from a Geofabrik `.pbf` file into a PostgreSQL database. @@ -293,6 +300,8 @@ def wind_power_states( Type of energy genetor. Always "Wind_onshore" for this script. fed_state: str, mandatory Name of the state where the wind farms will be allocated + sources: DatasetSources, mandatory + targets: DatasetTargets, mandatory """ @@ -302,7 +311,8 @@ def match_district_se(x): return hvmv_substation.at[sub, "point"] con = db.engine() - sql = "SELECT point, voltage FROM grid.egon_hvmv_substation" + # <--- REFACTORING: Use sources.tables['hvmv_substation'] + sql = f"SELECT point, voltage FROM {sources.tables['hvmv_substation']}" # hvmv_substation has the information about HV transmission lines in # Germany hvmv_substation = gpd.GeoDataFrame.from_postgis(sql, con, geom_col="point") @@ -331,7 +341,7 @@ def match_district_se(x): ] if fed_state == "DE": - sql = f"""SELECT * FROM boundaries.vg250_lan + sql = f"""SELECT * FROM {sources.tables['geom_federal_states']} WHERE gen in {tuple(north)} """ north_states = gpd.GeoDataFrame.from_postgis( @@ -493,7 +503,7 @@ def match_district_se(x): print(i) # Look for the maximum id in the table egon_power_plants - sql = "SELECT MAX(id) FROM supply.egon_power_plants" + sql = f"SELECT MAX(id) FROM {targets.tables['power_plants']}" max_id = pd.read_sql(sql, con) max_id = max_id["max"].iat[0] if max_id is None: @@ -530,7 +540,7 @@ def match_district_se(x): # Delete old wind_onshore generators db.execute_sql( - f"""DELETE FROM supply.egon_power_plants + f"""DELETE FROM {targets.tables['power_plants']} WHERE carrier = 'wind_onshore' AND scenario = '{scenario_year}' """ @@ -538,20 +548,21 @@ def match_district_se(x): # Insert into database insert_wind_farms.reset_index().to_postgis( - "egon_power_plants", - schema="supply", + targets.get_table_name("power_plants"), + schema=targets.get_table_schema("power_plants"), con=db.engine(), if_exists="append", ) return wind_farms, summary -def generate_map(): +def generate_map(sources, targets): """Generates a map with the position of all the wind farms Parameters ---------- - *No parameters required + sources: DatasetSources + targets: DatasetTargets """ con = db.engine() @@ -559,7 +570,7 @@ def generate_map(): # Import wind farms from egon-data sql = ( "SELECT carrier, el_capacity, geom, scenario FROM " - "supply.egon_power_plants WHERE carrier = 'wind_onshore'" + f"{targets.tables['power_plants']} WHERE carrier = 'wind_onshore'" ) wind_farms_t = gpd.GeoDataFrame.from_postgis( sql, con, geom_col="geom", crs=4326 @@ -570,7 +581,7 @@ def generate_map(): wind_farms = wind_farms_t[wind_farms_t["scenario"] == scenario] # mv_districts has geographic info of medium voltage districts in # Germany - sql = "SELECT geom FROM grid.egon_mv_grid_district" + sql = f"SELECT geom FROM {sources.tables['egon_mv_grid_district']}" mv_districts = gpd.GeoDataFrame.from_postgis(sql, con) mv_districts = mv_districts.to_crs(3035) @@ -596,4 +607,4 @@ def generate_map(): }, ) plt.savefig(f"wind_farms_{scenario}.png", dpi=300) - return 0 + return 0 \ No newline at end of file diff --git a/src/egon/data/datasets/power_plants/wind_offshore.py b/src/egon/data/datasets/power_plants/wind_offshore.py index afc8892c2..1a3cdd73d 100644 --- a/src/egon/data/datasets/power_plants/wind_offshore.py +++ b/src/egon/data/datasets/power_plants/wind_offshore.py @@ -7,14 +7,14 @@ from egon.data import db import egon.data.config +from egon.data.datasets import load_sources_and_targets -def map_id_bus(scenario): +def map_id_bus(scenario, sources): # Import manually generated list of wind offshore farms with their # connection points (OSM_id) - osm_year = egon.data.config.datasets()["openstreetmap"]["original_data"][ - "source" - ]["url"] + + osm_year = sources.files["osm_config"] if scenario in ["eGon2035", "eGon100RE"]: id_bus = { @@ -160,16 +160,16 @@ def insert(): ---------- *No parameters required """ - # Read file with all required input/output tables' names - cfg = egon.data.config.datasets()["power_plants"] + sources, targets = load_sources_and_targets("PowerPlants") + scenarios = egon.data.config.settings()["egon-data"]["--scenarios"] for scenario in scenarios: - # Delete previous generators + db.execute_sql( f""" - DELETE FROM {cfg['target']['schema']}.{cfg['target']['table']} + DELETE FROM {targets.tables['power_plants']} WHERE carrier = 'wind_offshore' AND scenario = '{scenario}' """ @@ -177,13 +177,14 @@ def insert(): # load file if scenario == "eGon2035": + filename = "NEP2035_V2021_scnC2035.xlsx" + offshore_path = ( Path(".") / "data_bundle_egon_data" / "nep2035_version2021" - / cfg["sources"]["nep_2035"] + / filename ) - offshore = pd.read_excel( offshore_path, sheet_name="WInd_Offshore_NEP", @@ -202,7 +203,7 @@ def insert(): Path(".") / "data_bundle_egon_data" / "nep2035_version2021" - / cfg["sources"]["nep_2035"] + / sources.files["nep_2035"] ) offshore = pd.read_excel( @@ -225,7 +226,7 @@ def insert(): Path(".") / "data_bundle_egon_data" / "wind_offshore_status2019" - / cfg["sources"]["wind_offshore_status2019"] + / sources.files["wind_offshore_status2019"] ) offshore = pd.read_excel( offshore_path, @@ -252,7 +253,7 @@ def insert(): else: raise ValueError(f"{scenario=} is not valid.") - id_bus = map_id_bus(scenario) + id_bus = map_id_bus(scenario, sources) # Match wind offshore table with the corresponding OSM_id offshore["osm_id"] = offshore["Netzverknuepfungspunkt"].map(id_bus) @@ -260,7 +261,7 @@ def insert(): buses = db.select_geodataframe( f""" SELECT bus_i as bus_id, base_kv, geom as point, CAST(osm_substation_id AS text) - as osm_id FROM {cfg["sources"]["buses_data"]} + as osm_id FROM {sources.tables['buses_data']} """, epsg=4326, geom_col="point", @@ -308,7 +309,7 @@ def insert(): cap_100RE = db.select_dataframe( f""" SELECT SUM(capacity) - FROM {cfg["sources"]["capacities"]} + FROM {sources.tables['capacities']} WHERE scenario_name = 'eGon100RE' AND carrier = 'wind_offshore' """ @@ -348,10 +349,7 @@ def insert(): # Look for the maximum id in the table egon_power_plants next_id = db.select_dataframe( - "SELECT MAX(id) FROM " - + cfg["target"]["schema"] - + "." - + cfg["target"]["table"] + f"SELECT MAX(id) FROM {targets.tables['power_plants']}" ).iloc[0, 0] if next_id: @@ -366,8 +364,8 @@ def insert(): # Insert into database offshore.reset_index().to_postgis( - cfg["target"]["table"], - schema=cfg["target"]["schema"], + targets.get_table_name("power_plants"), + schema=targets.get_table_schema("power_plants"), con=db.engine(), if_exists="append", ) @@ -377,4 +375,4 @@ def insert(): {len(offshore)} wind_offshore generators with a total installed capacity of {offshore['el_capacity'].sum()}MW were inserted into the db """ - ) + ) \ No newline at end of file diff --git a/src/egon/data/datasets/pypsaeur/__init__.py b/src/egon/data/datasets/pypsaeur/__init__.py index 6ec597422..11d5b44f1 100755 --- a/src/egon/data/datasets/pypsaeur/__init__.py +++ b/src/egon/data/datasets/pypsaeur/__init__.py @@ -17,7 +17,7 @@ import yaml from egon.data import __path__, config, db, logger -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.scenario_parameters import get_sector_parameters from egon.data.datasets.scenario_parameters.parameters import ( annualize_capital_costs, @@ -27,6 +27,10 @@ class PreparePypsaEur(Dataset): + sources = DatasetSources( + files={"era5_weather_data": "cutouts"} + ) + targets = DatasetTargets() def __init__(self, dependencies): super().__init__( name="PreparePypsaEur", @@ -40,6 +44,12 @@ def __init__(self, dependencies): class RunPypsaEur(Dataset): + sources = DatasetSources( + tables={"scenario_parameters": "scenario.egon_scenario_parameters"} + ) + targets = DatasetTargets( + tables={"scenario_parameters": "scenario.egon_scenario_parameters"} + ) def __init__(self, dependencies): super().__init__( name="SolvePypsaEur", @@ -238,17 +248,15 @@ def download(): ) # Copy era5 weather data to folder for pypsaeur - era5_pypsaeur_path = filepath / "pypsa-eur" / "cutouts" + era5_pypsa_eur_path = filepath / "pypsa-eur" / "cutouts" - if not era5_pypsaeur_path.exists(): - era5_pypsaeur_path.mkdir(parents=True, exist_ok=True) - copy_from = config.datasets()["era5_weather_data"]["targets"][ - "weather_data" - ]["path"] + if not era5_pypsa_eur_path.exists(): + era5_pypsa_eur_path.mkdir(parents=True, exist_ok=True) + copy_from = PreparePypsaEur.sources.files["era5_weather_data"] filename = "europe-2011-era5.nc" shutil.copy( - copy_from + "/" + filename, era5_pypsaeur_path / filename - ) + Path(copy_from) / filename, era5_pypsa_eur_path / filename + ) # Workaround to download natura, shipdensity and globalenergymonitor # data, which is not working in the regular snakemake workflow. @@ -1782,9 +1790,10 @@ def overwrite_H2_pipeline_share(): """ scn_name = "eGon100RE" # Select source and target from dataset configuration - target = egon.data.config.datasets()["pypsa-eur-sec"]["target"] - n = read_network(planning_horizon=2045) + + + n = read_network() H2_pipelines = n.links[n.links["carrier"] == "H2 pipeline retrofitted"] CH4_pipelines = n.links[n.links["carrier"] == "gas pipeline"] @@ -1803,7 +1812,7 @@ def overwrite_H2_pipeline_share(): parameters = db.select_dataframe( f""" SELECT * - FROM {target['scenario_parameters']['schema']}.{target['scenario_parameters']['table']} + FROM {RunPypsaEur.sources.tables['scenario_parameters']} WHERE name = '{scn_name}' """ ) @@ -1815,7 +1824,7 @@ def overwrite_H2_pipeline_share(): # Update data in db db.execute_sql( f""" - UPDATE {target['scenario_parameters']['schema']}.{target['scenario_parameters']['table']} + UPDATE {RunPypsaEur.targets.tables['scenario_parameters']} SET gas_parameters = '{gas_param}' WHERE name = '{scn_name}'; """ diff --git a/src/egon/data/datasets/re_potential_areas/__init__.py b/src/egon/data/datasets/re_potential_areas/__init__.py index bcb34af86..1ba85319d 100644 --- a/src/egon/data/datasets/re_potential_areas/__init__.py +++ b/src/egon/data/datasets/re_potential_areas/__init__.py @@ -10,7 +10,7 @@ import geopandas as gpd from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets import egon.data.config Base = declarative_base() @@ -55,10 +55,8 @@ class EgonRePotentialAreaWind(Base): def create_tables(): """Create tables for RE potential areas""" - data_config = egon.data.config.datasets() - schema = data_config["re_potential_areas"]["target"].get( - "schema", "supply" - ) + schema = re_potential_area_setup.targets.tables["egon_re_potential_area_wind"]["schema"] + db.execute_sql(f"CREATE SCHEMA IF NOT EXISTS {schema};") engine = db.engine() @@ -110,7 +108,7 @@ def insert_data(): data.rename(columns={"geometry": "geom"}, inplace=True) data.set_geometry("geom", inplace=True) - schema = pa_config["target"].get("schema", "supply") + schema = re_potential_area_setup.targets.tables["egon_re_potential_area_wind"]["schema"] # create database table from geopandas dataframe data[["id", "geom"]].to_postgis( @@ -142,10 +140,39 @@ class re_potential_area_setup(Dataset): #: name: str = "RePotentialAreas" #: - version: str = "0.0.1" + version: str = "0.0.3" #: tasks = (create_tables, insert_data) + sources = DatasetSources( + files={ + "potentialarea_pv_agriculture": "data_bundle_egon_data/re_potential_areas/potentialarea_pv_agriculture.gpkg", + "potentialarea_pv_road_railway": "data_bundle_egon_data/re_potential_areas/potentialarea_pv_road_railway.gpkg", + "potentialarea_wind": "data_bundle_egon_data/re_potential_areas/potentialarea_wind.gpkg", + "potentialarea_pv_agriculture_SH": "data_bundle_egon_data/re_potential_areas/potentialarea_pv_agriculture_SH.gpkg", + "potentialarea_pv_road_railway_SH": "data_bundle_egon_data/re_potential_areas/potentialarea_pv_road_railway_SH.gpkg", + "potentialarea_wind_SH": "data_bundle_egon_data/re_potential_areas/potentialarea_wind_SH.gpkg", + } + ) + + + targets = DatasetTargets( + tables={ + "egon_re_potential_area_pv_agriculture": { + "schema": "supply", + "table": "egon_re_potential_area_pv_agriculture" + }, + "egon_re_potential_area_pv_road_railway": { + "schema": "supply", + "table": "egon_re_potential_area_pv_road_railway" + }, + "egon_re_potential_area_wind": { + "schema": "supply", + "table": "egon_re_potential_area_wind" + } + } + ) + def __init__(self, dependencies): super().__init__( name=self.name, diff --git a/src/egon/data/datasets/renewable_feedin.py b/src/egon/data/datasets/renewable_feedin.py index 0a56b1ee6..b194dc5de 100644 --- a/src/egon/data/datasets/renewable_feedin.py +++ b/src/egon/data/datasets/renewable_feedin.py @@ -13,13 +13,15 @@ import pandas as pd from egon.data import db -from egon.data.datasets import Dataset -from egon.data.datasets.era5 import ( - EgonEra5Cells, - EgonRenewableFeedIn, - import_cutout, -) +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets +from egon.data.datasets.era5 import EgonEra5Cells, EgonRenewableFeedIn, import_cutout from egon.data.datasets.scenario_parameters import get_sector_parameters +from egon.data.metadata import ( + context, + license_ccby, + meta_metadata, + sources, +) from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa from egon.data.metadata import context, license_ccby, meta_metadata, sources import egon.data.config @@ -49,7 +51,33 @@ class RenewableFeedin(Dataset): #: name: str = "RenewableFeedin" #: - version: str = "0.0.7" + version: str = "0.0.9" + + sources = DatasetSources( + tables={ + "weather_cells": { + "schema": "supply", + "table": "egon_era5_weather_cells", + }, + "vg250_lan_union": { + "schema": "boundaries", + "table": "vg250_lan_union", + }, + } + ) + + targets = DatasetTargets( + tables={ + "feedin_table": { + "schema": "supply", + "table": "egon_era5_renewable_feedin", + }, + "map_zensus_weather_cell": { + "schema": "boundaries", + "table": "egon_map_zensus_weather_cell", + }, + } + ) def __init__(self, dependencies): super().__init__( @@ -72,8 +100,10 @@ def __init__(self, dependencies): class MapZensusWeatherCell(Base): - __tablename__ = "egon_map_zensus_weather_cell" - __table_args__ = {"schema": "boundaries"} + __tablename__ = RenewableFeedin.targets.tables["map_zensus_weather_cell"]["table"] + __table_args__ = { + "schema": RenewableFeedin.targets.tables["map_zensus_weather_cell"]["schema"] + } zensus_population_id = Column( Integer, @@ -94,12 +124,12 @@ def weather_cells_in_germany(geom_column="geom"): """ - cfg = egon.data.config.datasets()["renewable_feedin"]["sources"] + + cfg = RenewableFeedin.sources.tables return db.select_geodataframe( f"""SELECT w_id, geom_point, geom - FROM {cfg['weather_cells']['schema']}. - {cfg['weather_cells']['table']} + FROM {cfg['weather_cells']['schema']}.{cfg['weather_cells']['table']} WHERE ST_Intersects('SRID=4326; POLYGON((5 56, 15.5 56, 15.5 47, 5 47, 5 56))', geom)""", geom_col=geom_column, @@ -117,12 +147,11 @@ def offshore_weather_cells(geom_column="geom"): """ - cfg = egon.data.config.datasets()["renewable_feedin"]["sources"] + cfg = RenewableFeedin.sources.tables return db.select_geodataframe( f"""SELECT w_id, geom_point, geom - FROM {cfg['weather_cells']['schema']}. - {cfg['weather_cells']['table']} + FROM {cfg['weather_cells']['schema']}.{cfg['weather_cells']['table']} WHERE ST_Intersects('SRID=4326; POLYGON((5.5 55.5, 14.5 55.5, 14.5 53.5, 5.5 53.5, 5.5 55.5))', geom)""", @@ -145,15 +174,14 @@ def federal_states_per_weather_cell(): """ - cfg = egon.data.config.datasets()["renewable_feedin"]["sources"] + cfg = RenewableFeedin.sources.tables # Select weather cells and ferear states from database weather_cells = weather_cells_in_germany(geom_column="geom_point") federal_states = db.select_geodataframe( f"""SELECT gen, geometry - FROM {cfg['vg250_lan_union']['schema']}. - {cfg['vg250_lan_union']['table']}""", + FROM {cfg['vg250_lan_union']['schema']}.{cfg['vg250_lan_union']['table']}""", geom_col="geometry", index_col="gen", ) @@ -346,7 +374,8 @@ def wind(): """ - cfg = egon.data.config.datasets()["renewable_feedin"]["targets"] + + cfg = RenewableFeedin.targets.tables # Get weather cells with turbine type weather_cells = turbine_per_weather_cell() @@ -500,7 +529,8 @@ def heat_pump_cop(): carrier = "heat_pump_cop" # Load configuration - cfg = egon.data.config.datasets()["renewable_feedin"] + + cfg = RenewableFeedin.targets.tables # Get weather cells in Germany weather_cells = weather_cells_in_germany() @@ -535,15 +565,15 @@ def heat_pump_cop(): # Delete existing rows for carrier db.execute_sql( f""" - DELETE FROM {cfg['targets']['feedin_table']['schema']}. - {cfg['targets']['feedin_table']['table']} - WHERE carrier = '{carrier}'""" + DELETE FROM {cfg['feedin_table']['schema']}. + {cfg['feedin_table']['table']} + WHERE carrier = '{carrier}'""" ) # Insert values into database df.to_sql( - cfg["targets"]["feedin_table"]["table"], - schema=cfg["targets"]["feedin_table"]["schema"], + cfg["feedin_table"]["table"], + schema=cfg["feedin_table"]["schema"], con=db.engine(), if_exists="append", ) @@ -570,7 +600,7 @@ def insert_feedin(data, carrier, weather_year): data = data.transpose().to_pandas() # Load configuration - cfg = egon.data.config.datasets()["renewable_feedin"] + cfg = RenewableFeedin.targets.tables # Initialize DataFrame df = pd.DataFrame( @@ -589,15 +619,15 @@ def insert_feedin(data, carrier, weather_year): # Delete existing rows for carrier db.execute_sql( f""" - DELETE FROM {cfg['targets']['feedin_table']['schema']}. - {cfg['targets']['feedin_table']['table']} - WHERE carrier = '{carrier}'""" + DELETE FROM {cfg['feedin_table']['schema']}. + {cfg['feedin_table']['table']} + WHERE carrier = '{carrier}'""" ) # Insert values into database df.to_sql( - cfg["targets"]["feedin_table"]["table"], - schema=cfg["targets"]["feedin_table"]["schema"], + cfg["feedin_table"]["table"], + schema=cfg["feedin_table"]["schema"], con=db.engine(), if_exists="append", ) @@ -743,6 +773,6 @@ def add_metadata(): # Add metadata as a comment to the table db.submit_comment( meta_json, - EgonRenewableFeedIn.__table__.schema, - EgonRenewableFeedIn.__table__.name, + RenewableFeedin.targets.tables["feedin_table"]["schema"], + RenewableFeedin.targets.tables["feedin_table"]["table"], ) diff --git a/src/egon/data/datasets/saltcavern/__init__.py b/src/egon/data/datasets/saltcavern/__init__.py index 186a449be..ae045b747 100755 --- a/src/egon/data/datasets/saltcavern/__init__.py +++ b/src/egon/data/datasets/saltcavern/__init__.py @@ -13,7 +13,7 @@ import geopandas as gpd from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets import egon.data.config @@ -25,24 +25,17 @@ def to_postgres(): """ - # Get information from data configuraiton file - data_config = egon.data.config.datasets() - bgr_processed = data_config["bgr"]["processed"] - + schema = SaltcavernData.targets.tables["saltcaverns"]["schema"] + table = SaltcavernData.targets.tables["saltcaverns"]["table"] # Create target schema - db.execute_sql(f"CREATE SCHEMA IF NOT EXISTS {bgr_processed['schema']};") + db.execute_sql(f"CREATE SCHEMA IF NOT EXISTS {schema};") engine_local_db = db.engine() # Extract shapefiles from zip archive and send it to postgres db - for filename, table in bgr_processed["file_table_map"].items(): + for filename, path in SaltcavernData.sources.files.items(): # Open files and read .shp (within .zip) with geopandas - shp_file_path = ( - Path(".") - / "data_bundle_egon_data" - / "hydrogen_storage_potential_saltstructures" - / filename - ) + shp_file_path = Path(path) data = gpd.read_file(shp_file_path).to_crs(epsg=4326) data = ( data[ @@ -64,15 +57,13 @@ def to_postgres(): # Drop table before inserting data db.execute_sql( - f"DROP TABLE IF EXISTS " - f"{bgr_processed['schema']}.{table} CASCADE;" - ) + f"DROP TABLE IF EXISTS {schema}.{table} CASCADE;") # create database table from geopandas dataframe data.to_postgis( table, engine_local_db, - schema=bgr_processed["schema"], + schema= schema, index=True, if_exists="replace", dtype={"geometry": Geometry()}, @@ -80,14 +71,12 @@ def to_postgres(): # add primary key db.execute_sql( - f"ALTER TABLE {bgr_processed['schema']}.{table} " - f"ADD PRIMARY KEY (saltstructure_id);" + f"ALTER TABLE {schema}.{table} ADD PRIMARY KEY (saltstructure_id);" ) # Add index on geometry column db.execute_sql( - f"CREATE INDEX {table}_geometry_idx ON " - f"{bgr_processed['schema']}.{table} USING gist (geometry);" + f"CREATE INDEX {table}_geometry_idx ON {schema}.{table} USING gist (geometry);" ) @@ -110,7 +99,23 @@ class SaltcavernData(Dataset): #: name: str = "SaltcavernData" #: - version: str = "0.0.1" + version: str = "0.0.2" + + + sources = DatasetSources( + files={ + "inspee_saltstructures": "data_bundle_egon_data/hydrogen_storage_potential_saltstructures/Potenzialbewertung_InSpEE_InSpEE_DS.shp" + } + ) + + targets = DatasetTargets( + tables={ + "saltcaverns": { + "schema": "boundaries", + "table": "inspee_saltstructures", + }, + } + ) def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/sanity_checks.py b/src/egon/data/datasets/sanity_checks.py index 400822162..3d62f6838 100755 --- a/src/egon/data/datasets/sanity_checks.py +++ b/src/egon/data/datasets/sanity_checks.py @@ -18,7 +18,7 @@ import seaborn as sns from egon.data import config, db, logger -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.electricity_demand_timeseries.cts_buildings import ( EgonCtsElectricityDemandBuildingShare, EgonCtsHeatDemandBuildingShare, @@ -116,10 +116,10 @@ def etrago_eGon2035_electricity(): if carrier == "biomass": sum_output = db.select_dataframe( - """SELECT scn_name, SUM(p_nom::numeric) as output_capacity_mw - FROM grid.egon_etrago_generator + f"""SELECT scn_name, SUM(p_nom::numeric) as output_capacity_mw + FROM {SanityChecks.sources.tables["etrago"]["generator"]["schema"]}.{SanityChecks.sources.tables["etrago"]["generator"]["table"]} WHERE bus IN ( - SELECT bus_id FROM grid.egon_etrago_bus + SELECT bus_id FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = 'eGon2035' AND country = 'DE') AND carrier IN ('biomass', 'industrial_biomass_CHP', @@ -133,14 +133,14 @@ def etrago_eGon2035_electricity(): sum_output = db.select_dataframe( f"""SELECT scn_name, SUM(p_nom::numeric) as output_capacity_mw - FROM grid.egon_etrago_generator + FROM {SanityChecks.sources.tables["etrago"]["generator"]["schema"]}.{SanityChecks.sources.tables["etrago"]["generator"]["table"]} WHERE scn_name = '{scn}' AND carrier IN ('{carrier}') - AND bus IN - (SELECT bus_id - FROM grid.egon_etrago_bus - WHERE scn_name = 'eGon2035' - AND country = 'DE') + AND bus IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} + WHERE scn_name = 'eGon2035' + AND country = 'DE') GROUP BY (scn_name); """, warning=False, @@ -148,7 +148,7 @@ def etrago_eGon2035_electricity(): sum_input = db.select_dataframe( f"""SELECT carrier, SUM(capacity::numeric) as input_capacity_mw - FROM supply.egon_scenario_capacities + FROM {SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["schema"]}.{SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["table"]} WHERE carrier= '{carrier}' AND scenario_name ='{scn}' GROUP BY (carrier); @@ -206,14 +206,14 @@ def etrago_eGon2035_electricity(): sum_output = db.select_dataframe( f"""SELECT scn_name, SUM(p_nom::numeric) as output_capacity_mw - FROM grid.egon_etrago_storage + FROM {SanityChecks.sources.tables["etrago"]["storage"]["schema"]}.{SanityChecks.sources.tables["etrago"]["storage"]["table"]} WHERE scn_name = '{scn}' AND carrier IN ('{carrier}') - AND bus IN - (SELECT bus_id - FROM grid.egon_etrago_bus - WHERE scn_name = 'eGon2035' - AND country = 'DE') + AND bus IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} + WHERE scn_name = 'eGon2035' + AND country = 'DE') GROUP BY (scn_name); """, warning=False, @@ -221,7 +221,7 @@ def etrago_eGon2035_electricity(): sum_input = db.select_dataframe( f"""SELECT carrier, SUM(capacity::numeric) as input_capacity_mw - FROM supply.egon_scenario_capacities + FROM {SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["schema"]}.{SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["table"]} WHERE carrier= '{carrier}' AND scenario_name ='{scn}' GROUP BY (carrier); @@ -273,13 +273,13 @@ def etrago_eGon2035_electricity(): ) output_demand = db.select_dataframe( - """SELECT a.scn_name, a.carrier, SUM((SELECT SUM(p) - FROM UNNEST(b.p_set) p))/1000000::numeric as load_twh - FROM grid.egon_etrago_load a - JOIN grid.egon_etrago_load_timeseries b - ON (a.load_id = b.load_id) - JOIN grid.egon_etrago_bus c - ON (a.bus=c.bus_id) + f"""SELECT a.scn_name, a.carrier, + SUM((SELECT SUM(p) FROM UNNEST(b.p_set) p))/1000000::numeric as load_twh + FROM {SanityChecks.sources.tables["etrago"]["load"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load"]["table"]} a + JOIN {SanityChecks.sources.tables["etrago"]["load_timeseries"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load_timeseries"]["table"]} b + ON (a.load_id = b.load_id) + JOIN {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} c + ON (a.bus=c.bus_id) AND b.scn_name = 'eGon2035' AND a.scn_name = 'eGon2035' AND a.carrier = 'AC' @@ -292,9 +292,9 @@ def etrago_eGon2035_electricity(): )["load_twh"].values[0] input_cts_ind = db.select_dataframe( - """SELECT scenario, - SUM(demand::numeric/1000000) as demand_mw_regio_cts_ind - FROM demand.egon_demandregio_cts_ind + f"""SELECT scenario, + SUM(demand::numeric/1000000) as demand_mw_regio_cts_ind + FROM {SanityChecks.sources.tables["demand"]["demandregio_cts_ind"]["schema"]}.{SanityChecks.sources.tables["demand"]["demandregio_cts_ind"]["table"]} WHERE scenario= 'eGon2035' AND year IN ('2035') GROUP BY (scenario); @@ -304,8 +304,9 @@ def etrago_eGon2035_electricity(): )["demand_mw_regio_cts_ind"].values[0] input_hh = db.select_dataframe( - """SELECT scenario, SUM(demand::numeric/1000000) as demand_mw_regio_hh - FROM demand.egon_demandregio_hh + f"""SELECT scenario, + SUM(demand::numeric/1000000) as demand_mw_regio_hh + FROM {SanityChecks.sources.tables["demand"]["demandregio_hh"]["schema"]}.{SanityChecks.sources.tables["demand"]["demandregio_hh"]["table"]} WHERE scenario= 'eGon2035' AND year IN ('2035') GROUP BY (scenario); @@ -350,14 +351,13 @@ def etrago_eGon2035_heat(): # Sanity checks for heat demand output_heat_demand = db.select_dataframe( - """SELECT a.scn_name, - (SUM( - (SELECT SUM(p) FROM UNNEST(b.p_set) p))/1000000)::numeric as load_twh - FROM grid.egon_etrago_load a - JOIN grid.egon_etrago_load_timeseries b - ON (a.load_id = b.load_id) - JOIN grid.egon_etrago_bus c - ON (a.bus=c.bus_id) + f"""SELECT a.scn_name, + (SUM((SELECT SUM(p) FROM UNNEST(b.p_set) p))/1000000)::numeric as load_twh + FROM {SanityChecks.sources.tables["etrago"]["load"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load"]["table"]} a + JOIN {SanityChecks.sources.tables["etrago"]["load_timeseries"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load_timeseries"]["table"]} b + ON (a.load_id = b.load_id) + JOIN {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} c + ON (a.bus=c.bus_id) AND b.scn_name = 'eGon2035' AND a.scn_name = 'eGon2035' AND c.scn_name= 'eGon2035' @@ -369,8 +369,9 @@ def etrago_eGon2035_heat(): )["load_twh"].values[0] input_heat_demand = db.select_dataframe( - """SELECT scenario, SUM(demand::numeric/1000000) as demand_mw_peta_heat - FROM demand.egon_peta_heat + f"""SELECT scenario, + SUM(demand::numeric/1000000) as demand_mw_peta_heat + FROM {SanityChecks.sources.tables["demand"]["peta_heat"]["schema"]}.{SanityChecks.sources.tables["demand"]["peta_heat"]["table"]} WHERE scenario= 'eGon2035' GROUP BY (scenario); """, @@ -393,8 +394,8 @@ def etrago_eGon2035_heat(): # Comparison for central heat pumps heat_pump_input = db.select_dataframe( - """SELECT carrier, SUM(capacity::numeric) as Urban_central_heat_pump_mw - FROM supply.egon_scenario_capacities + f"""SELECT carrier, SUM(capacity::numeric) as Urban_central_heat_pump_mw + FROM {SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["schema"]}.{SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["table"]} WHERE carrier= 'urban_central_heat_pump' AND scenario_name IN ('eGon2035') GROUP BY (carrier); @@ -403,8 +404,8 @@ def etrago_eGon2035_heat(): )["urban_central_heat_pump_mw"].values[0] heat_pump_output = db.select_dataframe( - """SELECT carrier, SUM(p_nom::numeric) as Central_heat_pump_mw - FROM grid.egon_etrago_link + f"""SELECT carrier, SUM(p_nom::numeric) as Central_heat_pump_mw + FROM {SanityChecks.sources.tables["etrago"]["link"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link"]["table"]} WHERE carrier= 'central_heat_pump' AND scn_name IN ('eGon2035') GROUP BY (carrier); @@ -421,8 +422,8 @@ def etrago_eGon2035_heat(): # Comparison for residential heat pumps input_residential_heat_pump = db.select_dataframe( - """SELECT carrier, SUM(capacity::numeric) as residential_heat_pump_mw - FROM supply.egon_scenario_capacities + f"""SELECT carrier, SUM(capacity::numeric) as residential_heat_pump_mw + FROM {SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["schema"]}.{SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["table"]} WHERE carrier= 'residential_rural_heat_pump' AND scenario_name IN ('eGon2035') GROUP BY (carrier); @@ -431,8 +432,8 @@ def etrago_eGon2035_heat(): )["residential_heat_pump_mw"].values[0] output_residential_heat_pump = db.select_dataframe( - """SELECT carrier, SUM(p_nom::numeric) as rural_heat_pump_mw - FROM grid.egon_etrago_link + f"""SELECT carrier, SUM(p_nom::numeric) as rural_heat_pump_mw + FROM {SanityChecks.sources.tables["etrago"]["link"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link"]["table"]} WHERE carrier= 'rural_heat_pump' AND scn_name IN ('eGon2035') GROUP BY (carrier); @@ -452,9 +453,8 @@ def etrago_eGon2035_heat(): # Comparison for resistive heater resistive_heater_input = db.select_dataframe( - """SELECT carrier, - SUM(capacity::numeric) as Urban_central_resistive_heater_MW - FROM supply.egon_scenario_capacities + f"""SELECT carrier, SUM(capacity::numeric) as Urban_central_resistive_heater_MW + FROM {SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["schema"]}.{SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["table"]} WHERE carrier= 'urban_central_resistive_heater' AND scenario_name IN ('eGon2035') GROUP BY (carrier); @@ -463,8 +463,8 @@ def etrago_eGon2035_heat(): )["urban_central_resistive_heater_mw"].values[0] resistive_heater_output = db.select_dataframe( - """SELECT carrier, SUM(p_nom::numeric) as central_resistive_heater_MW - FROM grid.egon_etrago_link + f"""SELECT carrier, SUM(p_nom::numeric) as central_resistive_heater_MW + FROM {SanityChecks.sources.tables["etrago"]["link"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link"]["table"]} WHERE carrier= 'central_resistive_heater' AND scn_name IN ('eGon2035') GROUP BY (carrier); @@ -486,8 +486,8 @@ def etrago_eGon2035_heat(): # Comparison for solar thermal collectors input_solar_thermal = db.select_dataframe( - """SELECT carrier, SUM(capacity::numeric) as solar_thermal_collector_mw - FROM supply.egon_scenario_capacities + f"""SELECT carrier, SUM(capacity::numeric) as solar_thermal_collector_mw + FROM {SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["schema"]}.{SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["table"]} WHERE carrier= 'urban_central_solar_thermal_collector' AND scenario_name IN ('eGon2035') GROUP BY (carrier); @@ -496,8 +496,8 @@ def etrago_eGon2035_heat(): )["solar_thermal_collector_mw"].values[0] output_solar_thermal = db.select_dataframe( - """SELECT carrier, SUM(p_nom::numeric) as solar_thermal_collector_mw - FROM grid.egon_etrago_generator + f"""SELECT carrier, SUM(p_nom::numeric) as solar_thermal_collector_mw + FROM {SanityChecks.sources.tables["etrago"]["generator"]["schema"]}.{SanityChecks.sources.tables["etrago"]["generator"]["table"]} WHERE carrier= 'solar_thermal_collector' AND scn_name IN ('eGon2035') GROUP BY (carrier); @@ -517,9 +517,8 @@ def etrago_eGon2035_heat(): # Comparison for geothermal input_geo_thermal = db.select_dataframe( - """SELECT carrier, - SUM(capacity::numeric) as Urban_central_geo_thermal_MW - FROM supply.egon_scenario_capacities + f"""SELECT carrier, SUM(capacity::numeric) as Urban_central_geo_thermal_MW + FROM {SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["schema"]}.{SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["table"]} WHERE carrier= 'urban_central_geo_thermal' AND scenario_name IN ('eGon2035') GROUP BY (carrier); @@ -528,8 +527,8 @@ def etrago_eGon2035_heat(): )["urban_central_geo_thermal_mw"].values[0] output_geo_thermal = db.select_dataframe( - """SELECT carrier, SUM(p_nom::numeric) as geo_thermal_MW - FROM grid.egon_etrago_generator + f"""SELECT carrier, SUM(p_nom::numeric) as geo_thermal_MW + FROM {SanityChecks.sources.tables["etrago"]["generator"]["schema"]}.{SanityChecks.sources.tables["etrago"]["generator"]["table"]} WHERE carrier= 'geo_thermal' AND scn_name IN ('eGon2035') GROUP BY (carrier); @@ -553,19 +552,19 @@ def residential_electricity_annual_sum(rtol=1e-5): """ df_nuts3_annual_sum = db.select_dataframe( - sql=""" + sql=f""" SELECT dr.nuts3, dr.scenario, dr.demand_regio_sum, profiles.profile_sum FROM ( SELECT scenario, SUM(demand) AS profile_sum, vg250_nuts3 - FROM demand.egon_demandregio_zensus_electricity AS egon, - boundaries.egon_map_zensus_vg250 AS boundaries + FROM {SanityChecks.sources.tables["demand"]["demandregio_zensus_electricity"]["schema"]}.{SanityChecks.sources.tables["demand"]["demandregio_zensus_electricity"]["table"]} AS egon, + {SanityChecks.sources.tables["boundaries"]["zensus_vg250"]["schema"]}.{SanityChecks.sources.tables["boundaries"]["zensus_vg250"]["table"]} AS boundaries Where egon.zensus_population_id = boundaries.zensus_population_id AND sector = 'residential' GROUP BY vg250_nuts3, scenario ) AS profiles JOIN ( SELECT nuts3, scenario, sum(demand) AS demand_regio_sum - FROM demand.egon_demandregio_hh + FROM {SanityChecks.sources.tables["demand"]["demandregio_hh"]["schema"]}.{SanityChecks.sources.tables["demand"]["demandregio_hh"]["table"]} GROUP BY year, scenario, nuts3 ) AS dr ON profiles.vg250_nuts3 = dr.nuts3 and profiles.scenario = dr.scenario @@ -593,12 +592,12 @@ def residential_electricity_hh_refinement(rtol=1e-5): was applied and compare it to the original census values.""" df_refinement = db.select_dataframe( - sql=""" + sql=f""" SELECT refined.nuts3, refined.characteristics_code, refined.sum_refined::int, census.sum_census::int FROM( SELECT nuts3, characteristics_code, SUM(hh_10types) as sum_refined - FROM society.egon_destatis_zensus_household_per_ha_refined + FROM {SanityChecks.sources.tables["zensus_households"]["households_per_ha_refined"]["schema"]}.{SanityChecks.sources.tables["zensus_households"]["households_per_ha_refined"]["table"]} GROUP BY nuts3, characteristics_code) AS refined JOIN( @@ -606,7 +605,7 @@ def residential_electricity_hh_refinement(rtol=1e-5): FROM( SELECT nuts3, cell_id, characteristics_code, sum(DISTINCT(hh_5types))as orig - FROM society.egon_destatis_zensus_household_per_ha_refined + FROM {SanityChecks.sources.tables["zensus_households"]["households_per_ha_refined"]["schema"]}.{SanityChecks.sources.tables["zensus_households"]["households_per_ha_refined"]["table"]} GROUP BY cell_id, characteristics_code, nuts3) AS t GROUP BY t.nuts3, t.characteristics_code ) AS census ON refined.nuts3 = census.nuts3 @@ -680,9 +679,9 @@ def cts_heat_demand_share(rtol=1e-5): def sanitycheck_pv_rooftop_buildings(): def egon_power_plants_pv_roof_building(): - sql = """ + sql = f""" SELECT * - FROM supply.egon_power_plants_pv_roof_building + FROM {SanityChecks.sources.tables["pv_rooftop_buildings"]["pv_roof_building"]["schema"]}.{SanityChecks.sources.tables["pv_rooftop_buildings"]["pv_roof_building"]["table"]} """ return db.select_dataframe(sql, index_col="index") @@ -755,13 +754,13 @@ def egon_power_plants_pv_roof_building(): f"{merge_df.loc[merge_df.scenario == scenario].capacity.sum()}" ) elif scenario == "eGon100RE": - sources = config.datasets()["solar_rooftop"]["sources"] + sources = SanityChecks.sources.tables["solar_rooftop"] + target = db.select_dataframe( f""" SELECT capacity - FROM {sources['scenario_capacities']['schema']}. - {sources['scenario_capacities']['table']} a + FROM {sources['scenario_capacities']['schema']}.{sources['scenario_capacities']['table']} a WHERE carrier = 'solar_rooftop' AND scenario_name = '{scenario}' """ @@ -770,12 +769,8 @@ def egon_power_plants_pv_roof_building(): dataset = config.settings()["egon-data"]["--dataset-boundary"] if dataset == "Schleswig-Holstein": - sources = config.datasets()["scenario_input"]["sources"] - path = Path( - f"./data_bundle_egon_data/nep2035_version2021/" - f"{sources['eGon2035']['capacities']}" - ).resolve() + path = Path(SanityChecks.sources.files["nep2035_capacities"]).resolve() total_2035 = ( pd.read_excel( @@ -1367,17 +1362,17 @@ def sanitycheck_home_batteries(): scenarios = constants["scenarios"] cbat_pbat_ratio = get_cbat_pbat_ratio() - sources = config.datasets()["home_batteries"]["sources"] - targets = config.datasets()["home_batteries"]["targets"] + sources = SanityChecks.sources.tables["home_batteries"] + targets = SanityChecks.targets.tables["home_batteries"] + for scenario in scenarios: # get home battery capacity per mv grid id sql = f""" - SELECT el_capacity as p_nom, bus_id FROM - {sources["storage"]["schema"]} - .{sources["storage"]["table"]} + SELECT el_capacity as p_nom, bus_id + FROM {sources["storage"]["schema"]}.{sources["storage"]["table"]} WHERE carrier = 'home_battery' - AND scenario = '{scenario}' + AND scenario = '{scenario}' """ home_batteries_df = db.select_dataframe(sql, index_col="bus_id") @@ -1387,9 +1382,8 @@ def sanitycheck_home_batteries(): ) sql = f""" - SELECT * FROM - {targets["home_batteries"]["schema"]} - .{targets["home_batteries"]["table"]} + SELECT * + FROM {targets["home_batteries"]["schema"]}.{targets["home_batteries"]["table"]} WHERE scenario = '{scenario}' """ @@ -1442,18 +1436,18 @@ def sanity_check_gas_buses(scn): isolated_gas_buses = db.select_dataframe( f""" SELECT bus_id, carrier, country - FROM grid.egon_etrago_bus + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND carrier = '{key}' AND country = 'DE' - AND bus_id NOT IN - (SELECT bus0 - FROM grid.egon_etrago_link + AND bus_id NOT IN ( + SELECT bus0 + FROM {SanityChecks.sources.tables["etrago"]["link"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link"]["table"]} WHERE scn_name = '{scn}' AND carrier = '{corresponding_carriers[scn][key]}') - AND bus_id NOT IN - (SELECT bus1 - FROM grid.egon_etrago_link + AND bus_id NOT IN ( + SELECT bus1 + FROM {SanityChecks.sources.tables["etrago"]["link"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link"]["table"]} WHERE scn_name = '{scn}' AND carrier = '{corresponding_carriers[scn][key]}') ; @@ -1465,9 +1459,8 @@ def sanity_check_gas_buses(scn): logger.info(isolated_gas_buses) # Deviation of the gas grid buses number - target_file = ( - Path(".") / "datasets" / "gas_data" / "data" / "IGGIELGN_Nodes.csv" - ) + target_file = Path(SanityChecks.sources.files["gas_nodes"]).resolve() + Grid_buses_list = pd.read_csv( target_file, @@ -1485,7 +1478,7 @@ def sanity_check_gas_buses(scn): output_grid_buses_df = db.select_dataframe( f""" SELECT bus_id - FROM grid.egon_etrago_bus + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE' AND carrier = '{carrier}'; @@ -1527,12 +1520,12 @@ def sanity_check_CH4_stores(scn): """ output_CH4_stores = db.select_dataframe( f"""SELECT SUM(e_nom::numeric) as e_nom_germany - FROM grid.egon_etrago_store + FROM {SanityChecks.sources.tables["etrago"]["store"]["schema"]}.{SanityChecks.sources.tables["etrago"]["store"]["table"]} WHERE scn_name = '{scn}' AND carrier = 'CH4' - AND bus IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE' AND carrier = 'CH4'); @@ -1588,12 +1581,12 @@ def sanity_check_H2_saltcavern_stores(scn): """ output_H2_stores = db.select_dataframe( f"""SELECT SUM(e_nom_max::numeric) as e_nom_max_germany - FROM grid.egon_etrago_store + FROM {SanityChecks.sources.tables["etrago"]["store"]["schema"]}.{SanityChecks.sources.tables["etrago"]["store"]["table"]} WHERE scn_name = '{scn}' AND carrier = 'H2_underground' - AND bus IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE' AND carrier = 'H2_saltcavern'); @@ -1638,12 +1631,12 @@ def sanity_check_gas_one_port(scn): isolated_one_port_c = db.select_dataframe( f""" SELECT load_id, bus, carrier, scn_name - FROM grid.egon_etrago_load + FROM {SanityChecks.sources.tables["etrago"]["load"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load"]["table"]} WHERE scn_name = '{scn}' AND carrier = 'CH4_for_industry' - AND bus NOT IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus NOT IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE' AND carrier = 'CH4') @@ -1659,12 +1652,12 @@ def sanity_check_gas_one_port(scn): isolated_one_port_c = db.select_dataframe( f""" SELECT load_id, bus, carrier, scn_name - FROM grid.egon_etrago_load + FROM {SanityChecks.sources.tables["etrago"]["load"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load"]["table"]} WHERE scn_name = '{scn}' AND carrier = 'CH4' - AND bus NOT IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus NOT IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country != 'DE' AND carrier = 'CH4') @@ -1680,18 +1673,19 @@ def sanity_check_gas_one_port(scn): isolated_one_port_c = db.select_dataframe( f""" SELECT load_id, bus, carrier, scn_name - FROM grid.egon_etrago_load + FROM {SanityChecks.sources.tables["etrago"]["load"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load"]["table"]} WHERE scn_name = '{scn}' AND carrier = 'H2_for_industry' - AND (bus NOT IN - (SELECT bus_id - FROM grid.egon_etrago_bus - WHERE scn_name = '{scn}' - AND country = 'DE' - AND carrier = 'H2_grid') - AND bus NOT IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND ( + bus NOT IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} + WHERE scn_name = '{scn}' + AND country = 'DE' + AND carrier = 'H2_grid') + AND bus NOT IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country != 'DE' AND carrier = 'AC')) @@ -1707,12 +1701,12 @@ def sanity_check_gas_one_port(scn): isolated_one_port_c = db.select_dataframe( f""" SELECT generator_id, bus, carrier, scn_name - FROM grid.egon_etrago_generator + FROM {SanityChecks.sources.tables["etrago"]["generator"]["schema"]}.{SanityChecks.sources.tables["etrago"]["generator"]["table"]} WHERE scn_name = '{scn}' AND carrier = 'CH4' - AND bus NOT IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus NOT IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND carrier = 'CH4'); ; @@ -1733,12 +1727,12 @@ def sanity_check_gas_one_port(scn): isolated_one_port_c = db.select_dataframe( f""" SELECT store_id, bus, carrier, scn_name - FROM grid.egon_etrago_store + FROM {SanityChecks.sources.tables["etrago"]["store"]["schema"]}.{SanityChecks.sources.tables["etrago"]["store"]["table"]} WHERE scn_name = '{scn}' AND carrier = '{corresponding_carriers[key]}' - AND bus NOT IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus NOT IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND carrier = '{key}') ; @@ -1753,18 +1747,18 @@ def sanity_check_gas_one_port(scn): isolated_one_port_c = db.select_dataframe( f""" SELECT store_id, bus, carrier, scn_name - FROM grid.egon_etrago_store + FROM {SanityChecks.sources.tables["etrago"]["store"]["schema"]}.{SanityChecks.sources.tables["etrago"]["store"]["table"]} WHERE scn_name = '{scn}' AND carrier = 'H2_overground' - AND bus NOT IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus NOT IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE' AND carrier = 'H2_saltcavern') - AND bus NOT IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus NOT IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE' AND carrier = 'H2_grid') @@ -1805,18 +1799,18 @@ def sanity_check_CH4_grid(scn): grid_carrier = "CH4" output_gas_grid = db.select_dataframe( f"""SELECT SUM(p_nom::numeric) as p_nom_germany - FROM grid.egon_etrago_link + FROM {SanityChecks.sources.tables["etrago"]["link"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link"]["table"]} WHERE scn_name = '{scn}' AND carrier = '{grid_carrier}' - AND bus0 IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus0 IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE' AND carrier = '{grid_carrier}') - AND bus1 IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus1 IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE' AND carrier = '{grid_carrier}') @@ -1886,16 +1880,17 @@ def sanity_check_gas_links(scn): link_with_missing_bus = db.select_dataframe( f""" SELECT link_id, bus0, bus1, carrier, scn_name - FROM grid.egon_etrago_link + FROM {SanityChecks.sources.tables["etrago"]["link"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link"]["table"]} WHERE scn_name = '{scn}' AND carrier = '{c}' - AND (bus0 NOT IN - (SELECT bus_id - FROM grid.egon_etrago_bus - WHERE scn_name = '{scn}') - OR bus1 NOT IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND ( + bus0 NOT IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} + WHERE scn_name = '{scn}') + OR bus1 NOT IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}')) ; """, @@ -1949,8 +1944,8 @@ def etrago_eGon2035_gas_DE(): # Loads logger.info("LOADS") - path = Path(".") / "datasets" / "gas_data" / "demand" - corr_file = path / "region_corr.json" + corr_file = Path(SanityChecks.sources.files["gas_region_corr"]).resolve() + #path = corr_file.parent df_corr = pd.read_json(corr_file) df_corr = df_corr.loc[:, ["id_region", "name_short"]] df_corr.set_index("id_region", inplace=True) @@ -1958,25 +1953,29 @@ def etrago_eGon2035_gas_DE(): for carrier in ["CH4_for_industry", "H2_for_industry"]: output_gas_demand = db.select_dataframe( - f"""SELECT (SUM( - (SELECT SUM(p) - FROM UNNEST(b.p_set) p))/1000000)::numeric as load_twh - FROM grid.egon_etrago_load a - JOIN grid.egon_etrago_load_timeseries b + f""" + SELECT ( + SUM( + (SELECT SUM(p) + FROM UNNEST(b.p_set) p) + )/1000000 + )::numeric as load_twh + FROM {SanityChecks.sources.tables["etrago"]["load"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load"]["table"]} a + JOIN {SanityChecks.sources.tables["etrago"]["load_timeseries"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load_timeseries"]["table"]} b ON (a.load_id = b.load_id) - JOIN grid.egon_etrago_bus c + JOIN {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} c ON (a.bus=c.bus_id) - AND b.scn_name = '{scn}' - AND a.scn_name = '{scn}' - AND c.scn_name = '{scn}' - AND c.country = 'DE' - AND a.carrier = '{carrier}'; + AND b.scn_name = '{scn}' + AND a.scn_name = '{scn}' + AND c.scn_name = '{scn}' + AND c.country = 'DE' + AND a.carrier = '{carrier}'; """, warning=False, )["load_twh"].values[0] input_gas_demand = pd.read_json( - path / (carrier + "_eGon2035.json") + Path(SanityChecks.sources.files[f"gas_{carrier}_eGon2035"]) ) input_gas_demand = input_gas_demand.loc[:, ["id_region", "value"]] input_gas_demand.set_index("id_region", inplace=True) @@ -2006,12 +2005,12 @@ def etrago_eGon2035_gas_DE(): output_gas_generation = db.select_dataframe( f"""SELECT SUM(p_nom::numeric) as p_nom_germany - FROM grid.egon_etrago_generator + FROM {SanityChecks.sources.tables["etrago"]["generator"]["schema"]}.{SanityChecks.sources.tables["etrago"]["generator"]["table"]} WHERE scn_name = '{scn}' AND carrier = '{carrier_generator}' - AND bus IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE' AND carrier = '{carrier_generator}'); @@ -2019,13 +2018,8 @@ def etrago_eGon2035_gas_DE(): warning=False, )["p_nom_germany"].values[0] - target_file = ( - Path(".") - / "datasets" - / "gas_data" - / "data" - / "IGGIELGN_Productions.csv" - ) + target_file = Path(SanityChecks.sources.files["gas_productions"]).resolve() + NG_generators_list = pd.read_csv( target_file, @@ -2045,10 +2039,10 @@ def etrago_eGon2035_gas_DE(): conversion_factor = 437.5 # MCM/day to MWh/h p_NG = p_NG * conversion_factor - basename = "Biogaspartner_Einspeiseatlas_Deutschland_2021.xlsx" - target_file = ( - Path(".") / "data_bundle_egon_data" / "gas_data" / basename - ) + target_file = Path( + SanityChecks.sources.files["gas_biogaspartner_einspeiseatlas"] + ).resolve() + conversion_factor_b = 0.01083 # m^3/h to MWh/h p_biogas = ( @@ -2131,18 +2125,18 @@ def etrago_eGon2035_gas_abroad(): isolated_gas_buses_abroad = db.select_dataframe( f""" SELECT bus_id, carrier, country - FROM grid.egon_etrago_bus + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND carrier = '{key}' AND country != 'DE' - AND bus_id NOT IN - (SELECT bus0 - FROM grid.egon_etrago_link + AND bus_id NOT IN ( + SELECT bus0 + FROM {SanityChecks.sources.tables["etrago"]["link"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link"]["table"]} WHERE scn_name = '{scn}' AND carrier = '{corresponding_carriers[scn][key]}') - AND bus_id NOT IN - (SELECT bus1 - FROM grid.egon_etrago_link + AND bus_id NOT IN ( + SELECT bus1 + FROM {SanityChecks.sources.tables["etrago"]["link"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link"]["table"]} WHERE scn_name = '{scn}' AND carrier = '{corresponding_carriers[scn][key]}') ; @@ -2170,10 +2164,10 @@ def etrago_eGon2035_gas_abroad(): f"""SELECT (SUM( (SELECT SUM(p) FROM UNNEST(b.p_set) p)))::numeric as load_mwh - FROM grid.egon_etrago_load a - JOIN grid.egon_etrago_load_timeseries b + FROM {SanityChecks.sources.tables["etrago"]["load"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load"]["table"]} a + JOIN {SanityChecks.sources.tables["etrago"]["load_timeseries"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load_timeseries"]["table"]} b ON (a.load_id = b.load_id) - JOIN grid.egon_etrago_bus c + JOIN {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} c ON (a.bus=c.bus_id) AND b.scn_name = '{scn}' AND a.scn_name = '{scn}' @@ -2199,12 +2193,12 @@ def etrago_eGon2035_gas_abroad(): output_H2_demand = db.select_dataframe( f"""SELECT SUM(p_set::numeric) as p_set_abroad - FROM grid.egon_etrago_load + FROM {SanityChecks.sources.tables["etrago"]["load"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load"]["table"]} WHERE scn_name = '{scn}' AND carrier = 'H2_for_industry' - AND bus IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country != 'DE' AND carrier = 'AC'); @@ -2228,12 +2222,12 @@ def etrago_eGon2035_gas_abroad(): output_CH4_gen = db.select_dataframe( f"""SELECT SUM(p_nom::numeric) as p_nom_abroad - FROM grid.egon_etrago_generator + FROM {SanityChecks.sources.tables["etrago"]["generator"]["schema"]}.{SanityChecks.sources.tables["etrago"]["generator"]["table"]} WHERE scn_name = '{scn}' AND carrier = 'CH4' - AND bus IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country != 'DE' AND carrier = 'CH4'); @@ -2257,12 +2251,12 @@ def etrago_eGon2035_gas_abroad(): output_CH4_stores = db.select_dataframe( f"""SELECT SUM(e_nom::numeric) as e_nom_abroad - FROM grid.egon_etrago_store + FROM {SanityChecks.sources.tables["etrago"]["store"]["schema"]}.{SanityChecks.sources.tables["etrago"]["store"]["table"]} WHERE scn_name = '{scn}' AND carrier = 'CH4' - AND bus IN - (SELECT bus_id - FROM grid.egon_etrago_bus + AND bus IN ( + SELECT bus_id + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country != 'DE' AND carrier = 'CH4'); @@ -2287,18 +2281,18 @@ def etrago_eGon2035_gas_abroad(): grid_carrier = "CH4" output_gas_grid = db.select_dataframe( f"""SELECT SUM(p_nom::numeric) as p_nom - FROM grid.egon_etrago_link + FROM {SanityChecks.sources.tables["etrago"]["link"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link"]["table"]} WHERE scn_name = '{scn}' AND carrier = '{grid_carrier}' AND (bus0 IN (SELECT bus_id - FROM grid.egon_etrago_bus + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country != 'DE' AND carrier = '{grid_carrier}') OR bus1 IN (SELECT bus_id - FROM grid.egon_etrago_bus + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country != 'DE' AND carrier = '{grid_carrier}')) @@ -2329,7 +2323,8 @@ def df_from_series(s: pd.Series): for scenario in ["eGon2035", "eGon100RE"]: # p_min and p_max sql = f""" - SELECT link_id, bus0 as bus, p_nom FROM grid.egon_etrago_link + SELECT link_id, bus0 as bus, p_nom + FROM {SanityChecks.sources.tables["etrago"]["link"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link"]["table"]} WHERE carrier = 'dsm' AND scn_name = '{scenario}' ORDER BY link_id @@ -2340,7 +2335,7 @@ def df_from_series(s: pd.Series): sql = f""" SELECT link_id, p_min_pu, p_max_pu - FROM grid.egon_etrago_link_timeseries + FROM {SanityChecks.sources.tables["etrago"]["link_timeseries"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link_timeseries"]["table"]} WHERE scn_name = '{scenario}' AND link_id IN ({link_ids}) ORDER BY link_id @@ -2354,7 +2349,8 @@ def df_from_series(s: pd.Series): p_max_df.columns = meta_df.bus.tolist() p_min_df.columns = meta_df.bus.tolist() - targets = config.datasets()["DSM_CTS_industry"]["targets"] + targets = SanityChecks.targets.tables["DSM_CTS_industry"] + tables = [ "cts_loadcurves_dsm", @@ -2411,7 +2407,8 @@ def df_from_series(s: pd.Series): # e_min and e_max sql = f""" - SELECT store_id, bus, e_nom FROM grid.egon_etrago_store + SELECT store_id, bus, e_nom + FROM {SanityChecks.sources.tables["etrago"]["store"]["schema"]}.{SanityChecks.sources.tables["etrago"]["store"]["table"]} WHERE carrier = 'dsm' AND scn_name = '{scenario}' ORDER BY store_id @@ -2422,7 +2419,7 @@ def df_from_series(s: pd.Series): sql = f""" SELECT store_id, e_min_pu, e_max_pu - FROM grid.egon_etrago_store_timeseries + FROM {SanityChecks.sources.tables["etrago"]["store_timeseries"]["schema"]}.{SanityChecks.sources.tables["etrago"]["store_timeseries"]["table"]} WHERE scn_name = '{scenario}' AND store_id IN ({store_ids}) ORDER BY store_id @@ -2495,7 +2492,7 @@ def generators_links_storages_stores_100RE(scn="eGon100RE"): # Generators scn_capacities = db.select_dataframe( f""" - SELECT * FROM supply.egon_scenario_capacities + SELECT * FROM {SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["schema"]}.{SanityChecks.sources.tables["solar_rooftop"]["scenario_capacities"]["table"]} WHERE scenario_name = '{scn}' """, index_col="index", @@ -2532,9 +2529,9 @@ def generators_links_storages_stores_100RE(scn="eGon100RE"): gen_etrago = db.select_dataframe( f""" - SELECT * FROM grid.egon_etrago_generator + SELECT * FROM {SanityChecks.sources.tables["etrago"]["generator"]["schema"]}.{SanityChecks.sources.tables["etrago"]["generator"]["table"]} WHERE scn_name = '{scn}' - AND bus IN (SELECT bus_id from grid.egon_etrago_bus + AND bus IN (SELECT bus_id from {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE') """, @@ -2574,13 +2571,13 @@ def generators_links_storages_stores_100RE(scn="eGon100RE"): link_etrago = db.select_dataframe( f""" - SELECT * FROM grid.egon_etrago_link + SELECT * FROM {SanityChecks.sources.tables["etrago"]["link"]["schema"]}.{SanityChecks.sources.tables["etrago"]["link"]["table"]} WHERE scn_name = '{scn}' - AND (bus0 IN (SELECT bus_id from grid.egon_etrago_bus + AND (bus0 IN (SELECT bus_id from {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE') OR - bus1 IN (SELECT bus_id from grid.egon_etrago_bus + bus1 IN (SELECT bus_id from {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE') ) @@ -2610,9 +2607,9 @@ def generators_links_storages_stores_100RE(scn="eGon100RE"): # storage storage_etrago = db.select_dataframe( f""" - SELECT * FROM grid.egon_etrago_storage + SELECT * FROM {SanityChecks.sources.tables["etrago"]["storage"]["schema"]}.{SanityChecks.sources.tables["etrago"]["storage"]["table"]} WHERE scn_name = '{scn}' - AND bus IN (SELECT bus_id from grid.egon_etrago_bus + AND bus IN (SELECT bus_id from {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE') """, @@ -2640,9 +2637,9 @@ def generators_links_storages_stores_100RE(scn="eGon100RE"): # stores stores_etrago = db.select_dataframe( f""" - SELECT * FROM grid.egon_etrago_store + SELECT * FROM {SanityChecks.sources.tables["etrago"]["store"]["schema"]}.{SanityChecks.sources.tables["etrago"]["store"]["table"]} WHERE scn_name = '{scn}' - AND bus IN (SELECT bus_id from grid.egon_etrago_bus + AND bus IN (SELECT bus_id from {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE') """, @@ -2696,13 +2693,18 @@ def electrical_load_100RE(scn="eGon100RE"): ) load_summary.loc["total", "eGon100RE"] = db.select_dataframe( - """SELECT a.scn_name, a.carrier, SUM((SELECT SUM(p) - FROM UNNEST(b.p_set) p))/1000000::numeric as load_twh - FROM grid.egon_etrago_load a - JOIN grid.egon_etrago_load_timeseries b - ON (a.load_id = b.load_id) - JOIN grid.egon_etrago_bus c - ON (a.bus=c.bus_id) + f""" + SELECT a.scn_name, + a.carrier, + SUM( + (SELECT SUM(p) + FROM UNNEST(b.p_set) p) + )/1000000::numeric as load_twh + FROM {SanityChecks.sources.tables["etrago"]["load"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load"]["table"]} a + JOIN {SanityChecks.sources.tables["etrago"]["load_timeseries"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load_timeseries"]["table"]} b + ON (a.load_id = b.load_id) + JOIN {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} c + ON (a.bus=c.bus_id) AND b.scn_name = 'eGon100RE' AND a.scn_name = 'eGon100RE' AND a.carrier = 'AC' @@ -2713,11 +2715,10 @@ def electrical_load_100RE(scn="eGon100RE"): warning=False, )["load_twh"].values[0] - sources = egon.data.config.datasets()["etrago_electricity"]["sources"] + sources = SanityChecks.sources.tables["etrago_electricity"] cts_curves = db.select_dataframe( - f"""SELECT bus_id AS bus, p_set FROM - {sources['cts_curves']['schema']}. - {sources['cts_curves']['table']} + f"""SELECT bus_id AS bus, p_set + FROM {sources['cts_curves']['schema']}.{sources['cts_curves']['table']} WHERE scn_name = '{scn}'""", ) sum_cts_curves = ( @@ -2727,10 +2728,10 @@ def electrical_load_100RE(scn="eGon100RE"): # Select data on industrial demands assigned to osm landuse areas ind_curves_osm = db.select_dataframe( - f"""SELECT bus, p_set FROM - {sources['osm_curves']['schema']}. - {sources['osm_curves']['table']} - WHERE scn_name = '{scn}'""", + f""" + SELECT bus, p_set + FROM {sources['osm_curves']['schema']}.{sources['osm_curves']['table']} + WHERE scn_name = '{scn}'""", ) sum_ind_curves_osm = ( ind_curves_osm.apply(lambda x: sum(x["p_set"]), axis=1).sum() / 1000000 @@ -2739,10 +2740,10 @@ def electrical_load_100RE(scn="eGon100RE"): # Select data on industrial demands assigned to industrial sites ind_curves_sites = db.select_dataframe( - f"""SELECT bus, p_set FROM - {sources['sites_curves']['schema']}. - {sources['sites_curves']['table']} - WHERE scn_name = '{scn}'""", + f""" + SELECT bus, p_set + FROM {sources['sites_curves']['schema']}.{sources['sites_curves']['table']} + WHERE scn_name = '{scn}'""", ) sum_ind_curves_sites = ( ind_curves_sites.apply(lambda x: sum(x["p_set"]), axis=1).sum() @@ -2755,10 +2756,10 @@ def electrical_load_100RE(scn="eGon100RE"): # Select data on household electricity demands per bus hh_curves = db.select_dataframe( - f"""SELECT bus_id AS bus, p_set FROM - {sources['household_curves']['schema']}. - {sources['household_curves']['table']} - WHERE scn_name = '{scn}'""", + f""" + SELECT bus_id AS bus, p_set + FROM {sources['household_curves']['schema']}.{sources['household_curves']['table']} + WHERE scn_name = '{scn}'""", ) sum_hh_curves = ( hh_curves.apply(lambda x: sum(x["p_set"]), axis=1).sum() / 1000000 @@ -2799,13 +2800,17 @@ def heat_gas_load_egon100RE(scn="eGon100RE"): # filter out NaN values central_heat timeseries NaN_load_ids = db.select_dataframe( - """ - SELECT load_id from grid.egon_etrago_load_timeseries - WHERE load_id IN (Select load_id - FROM grid.egon_etrago_load - WHERE carrier = 'central_heat') AND (SELECT - bool_or(value::double precision::text = 'NaN') - FROM unnest(p_set) AS value + f""" + SELECT load_id + from {SanityChecks.sources.tables["etrago"]["load_timeseries"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load_timeseries"]["table"]} + WHERE load_id IN ( + Select load_id + FROM {SanityChecks.sources.tables["etrago"]["load"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load"]["table"]} + WHERE carrier = 'central_heat' + ) + AND ( + SELECT bool_or(value::double precision::text = 'NaN') + FROM unnest(p_set) AS value ) """ ) @@ -2822,15 +2827,16 @@ def heat_gas_load_egon100RE(scn="eGon100RE"): FROM UNNEST(t.p_set) p) ) AS total_p_set_timeseries FROM - grid.egon_etrago_load l + {SanityChecks.sources.tables["etrago"]["load"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load"]["table"]} l LEFT JOIN - grid.egon_etrago_load_timeseries t ON l.load_id = t.load_id + {SanityChecks.sources.tables["etrago"]["load_timeseries"]["schema"]}.{SanityChecks.sources.tables["etrago"]["load_timeseries"]["table"]} t + ON l.load_id = t.load_id WHERE l.scn_name = '{scn}' AND l.carrier != 'AC' AND l.bus IN ( SELECT bus_id - FROM grid.egon_etrago_bus + FROM {SanityChecks.sources.tables["etrago"]["bus"]["schema"]}.{SanityChecks.sources.tables["etrago"]["bus"]["table"]} WHERE scn_name = '{scn}' AND country = 'DE' ) @@ -2974,7 +2980,176 @@ class SanityChecks(Dataset): #: name: str = "SanityChecks" #: - version: str = "0.0.8" + version: str = "0.0.10" + + sources = DatasetSources( + tables={ + "etrago": { + "generator": {"schema": "grid", "table": "egon_etrago_generator"}, + "bus": {"schema": "grid", "table": "egon_etrago_bus"}, + "storage": {"schema": "grid", "table": "egon_etrago_storage"}, + "load": {"schema": "grid", "table": "egon_etrago_load"}, + "load_timeseries": { + "schema": "grid", + "table": "egon_etrago_load_timeseries", + }, + "link": {"schema": "grid", "table": "egon_etrago_link"}, + "store": {"schema": "grid", "table": "egon_etrago_store"}, + "generator_timeseries": { + "schema": "grid", + "table": "egon_etrago_generator_timeseries", + }, + "link_timeseries": { + "schema": "grid", + "table": "egon_etrago_link_timeseries", + }, + "store_timeseries": { + "schema": "grid", + "table": "egon_etrago_store_timeseries", + }, + "storage_timeseries": { + "schema": "grid", + "table": "egon_etrago_storage_timeseries", + }, + }, + + "etrago_electricity": { + "cts_curves": { + "schema": "demand", + "table": "egon_etrago_electricity_cts", + }, + "osm_curves": { + "schema": "demand", + "table": "egon_osm_ind_load_curves", + }, + "sites_curves": { + "schema": "demand", + "table": "egon_sites_ind_load_curves", + }, + "household_curves": { + "schema": "demand", + "table": "egon_etrago_electricity_households", + }, + }, + + "home_batteries": { + "storage": {"schema": "supply", "table": "egon_storages"}, + }, + + "solar_rooftop": { + "scenario_capacities": { + "schema": "supply", + "table": "egon_scenario_capacities", + }, + }, + + "DSM_CTS_industry": { + "cts_loadcurves_dsm": { + "schema": "demand", + "table": "egon_etrago_electricity_cts_dsm_timeseries", + }, + "ind_osm_loadcurves_individual_dsm": { + "schema": "demand", + "table": "egon_osm_ind_load_curves_individual_dsm_timeseries", + }, + "demandregio_ind_sites_dsm": { + "schema": "demand", + "table": "egon_demandregio_sites_ind_electricity_dsm_timeseries", + }, + "ind_sites_loadcurves_individual": { + "schema": "demand", + "table": "egon_sites_ind_load_curves_individual_dsm_timeseries", + }, + }, + + "demand": { + "demandregio_cts_ind": { + "schema": "demand", + "table": "egon_demandregio_cts_ind", + }, + "demandregio_hh": { + "schema": "demand", + "table": "egon_demandregio_hh", + }, + "peta_heat": { + "schema": "demand", + "table": "egon_peta_heat", + }, + "demandregio_zensus_electricity": { + "schema": "demand", + "table": "egon_demandregio_zensus_electricity", + }, + }, + + "boundaries": { + "zensus_vg250": { + "schema": "boundaries", + "table": "egon_map_zensus_vg250", + }, + }, + + "zensus_households": { + "households_per_ha_refined": { + "schema": "society", + "table": "egon_destatis_zensus_household_per_ha_refined", + }, + }, + + "pv_rooftop_buildings": { + "pv_roof_building": { + "schema": "supply", + "table": "egon_power_plants_pv_roof_building", + }, + }, + }, + files={ + + "nep2035_capacities": ( + "data_bundle_egon_data/nep2035_version2021/" + "NEP2035_V2021_scnC2035.xlsx" + ), + + "gas_nodes": "datasets/gas_data/data/IGGIELGN_Nodes.csv", + "gas_productions": "datasets/gas_data/data/IGGIELGN_Productions.csv", + + "gas_region_corr": "datasets/gas_data/demand/region_corr.json", + "gas_CH4_for_industry_eGon2035": ( + "datasets/gas_data/demand/CH4_for_industry_eGon2035.json" + ), + "gas_H2_for_industry_eGon2035": ( + "datasets/gas_data/demand/H2_for_industry_eGon2035.json" + ), + + "gas_biogaspartner_einspeiseatlas": ( + "data_bundle_egon_data/gas_data/" + "Biogaspartner_Einspeiseatlas_Deutschland_2021.xlsx" + ), + }, + ) + + + targets = DatasetTargets( + tables={ + "home_batteries": { + "home_batteries": {"schema": "supply", "table": "egon_home_batteries"}, + }, + "DSM_CTS_industry": { + "cts_loadcurves_dsm": {"schema": "demand", "table": "egon_etrago_electricity_cts_dsm_timeseries"}, + "ind_osm_loadcurves_individual_dsm": { + "schema": "demand", + "table": "egon_osm_ind_load_curves_individual_dsm_timeseries", + }, + "demandregio_ind_sites_dsm": { + "schema": "demand", + "table": "egon_demandregio_sites_ind_electricity_dsm_timeseries", + }, + "ind_sites_loadcurves_individual": { + "schema": "demand", + "table": "egon_sites_ind_load_curves_individual_dsm_timeseries", + }, + }, + } + ) def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/scenario_capacities.py b/src/egon/data/datasets/scenario_capacities.py index c810fc2ab..d7b333a65 100755 --- a/src/egon/data/datasets/scenario_capacities.py +++ b/src/egon/data/datasets/scenario_capacities.py @@ -15,7 +15,7 @@ import yaml from egon.data import config, db -from egon.data.datasets import Dataset, wrapped_partial +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets, wrapped_partial from egon.data.metadata import ( context, generate_resource_fields_from_sqla_model, @@ -113,7 +113,7 @@ def insert_capacities_status_quo(scenario: str) -> None: """ - targets = config.datasets()["scenario_input"]["targets"] + targets = ScenarioCapacities.targets.tables # Delete rows if already exist db.execute_sql( @@ -201,8 +201,8 @@ def insert_capacities_per_federal_state_nep(): """ - sources = config.datasets()["scenario_input"]["sources"] - targets = config.datasets()["scenario_input"]["targets"] + sources = ScenarioCapacities.sources + targets = ScenarioCapacities.targets.tables # Connect to local database engine = db.engine() @@ -219,12 +219,7 @@ def insert_capacities_per_federal_state_nep(): ) # read-in installed capacities per federal state of germany - target_file = ( - Path(".") - / "data_bundle_egon_data" - / "nep2035_version2021" - / sources["eGon2035"]["capacities"] - ) + target_file = Path(".") / sources.files["eGon2035_capacities"] df = pd.read_excel( target_file, @@ -288,7 +283,7 @@ def insert_capacities_per_federal_state_nep(): map_nuts = pd.read_sql( f""" SELECT DISTINCT ON (nuts) gen, nuts - FROM {sources['boundaries']['schema']}.{sources['boundaries']['table']} + FROM {sources.tables['boundaries']['schema']}.{sources.tables['boundaries']['table']} """, engine, index_col="gen", @@ -391,14 +386,13 @@ def population_share(): """ - sources = config.datasets()["scenario_input"]["sources"] + sources = ScenarioCapacities.sources return ( pd.read_sql( f""" SELECT SUM(population) - FROM {sources['zensus_population']['schema']}. - {sources['zensus_population']['table']} + FROM {sources.tables['zensus_population']['schema']}.{sources.tables['zensus_population']['table']} WHERE population>0 """, con=db.engine(), @@ -495,19 +489,14 @@ def insert_nep_list_powerplants(export=True): List of conventional power plants from nep if export=False """ - sources = config.datasets()["scenario_input"]["sources"] - targets = config.datasets()["scenario_input"]["targets"] + sources = ScenarioCapacities.sources + targets = ScenarioCapacities.targets.tables # Connect to local database engine = db.engine() # Read-in data from csv-file - target_file = ( - Path(".") - / "data_bundle_egon_data" - / "nep2035_version2021" - / sources["eGon2035"]["list_conv_pp"] - ) + target_file = Path(".") / sources.files["eGon2035_list_conv_pp"] kw_liste_nep = pd.read_csv(target_file, delimiter=";", decimal=",") @@ -596,15 +585,10 @@ def district_heating_input(): """ - sources = config.datasets()["scenario_input"]["sources"] + sources = ScenarioCapacities.sources # import data to dataframe - file = ( - Path(".") - / "data_bundle_egon_data" - / "nep2035_version2021" - / sources["eGon2035"]["capacities"] - ) + file = Path(".") / sources.files["eGon2035_capacities"] df = pd.read_excel( file, sheet_name="Kurzstudie_KWK", dtype={"Wert": float} ) @@ -681,8 +665,8 @@ def eGon100_capacities(): """ - sources = config.datasets()["scenario_input"]["sources"] - targets = config.datasets()["scenario_input"]["targets"] + sources = ScenarioCapacities.sources + targets = ScenarioCapacities.targets.tables # read-in installed capacities cwd = Path(".") @@ -700,18 +684,12 @@ def eGon100_capacities(): / "results" / data_config["run"]["name"] / "csvs" - / sources["eGon100RE"]["capacities"] + / Path(sources.files["eGon100RE_capacities"]).name ) - else: - target_file = ( - cwd - / "data_bundle_egon_data" - / "pypsa_eur" - / "csvs" - / sources["eGon100RE"]["capacities"] - ) + else: + target_file = cwd / sources.files["eGon100RE_capacities"] df = pd.read_csv(target_file, delimiter=",", skiprows=3) df.columns = [ "component", @@ -1043,8 +1021,37 @@ class ScenarioCapacities(Dataset): #: name: str = "ScenarioCapacities" #: - version: str = "0.0.19" + version: str = "0.0.20" + sources = DatasetSources( + files={ + "eGon2035_capacities": "data_bundle_egon_data/nep2035_version2021/NEP2035_V2021_scnC2035.xlsx", + "eGon2035_list_conv_pp": "data_bundle_egon_data/nep2035_version2021/Kraftwerksliste_NEP_2021_konv.csv", + "eGon100RE_capacities": "data_bundle_egon_data/pypsa_eur/csvs/nodal_capacities.csv", + }, + tables={ + "boundaries": { + "schema": "boundaries", + "table": "vg250_lan", + }, + "zensus_population": { + "schema": "society", + "table": "destatis_zensus_population_per_ha", + }, + }, + ) + targets = DatasetTargets( + tables={ + "scenario_capacities": { + "schema": "supply", + "table": "egon_scenario_capacities", + }, + "nep_conventional_powerplants": { + "schema": "supply", + "table": "egon_nep_2021_conventional_powerplants", + }, + } + ) def __init__(self, dependencies): super().__init__( name=self.name, diff --git a/src/egon/data/datasets/scenario_parameters/__init__.py b/src/egon/data/datasets/scenario_parameters/__init__.py index c5276841e..c70335eea 100755 --- a/src/egon/data/datasets/scenario_parameters/__init__.py +++ b/src/egon/data/datasets/scenario_parameters/__init__.py @@ -13,7 +13,7 @@ import pandas as pd from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets import egon.data.config import egon.data.datasets.scenario_parameters.parameters as parameters @@ -39,9 +39,13 @@ def create_table(): None. """ engine = db.engine() - db.execute_sql("CREATE SCHEMA IF NOT EXISTS scenario;") db.execute_sql( - "DROP TABLE IF EXISTS scenario.egon_scenario_parameters CASCADE;" + f"CREATE SCHEMA IF NOT EXISTS {ScenarioParameters.targets.tables['egon_scenario_parameters']['schema']};" + ) + db.execute_sql( + f"DROP TABLE IF EXISTS " + f"{ScenarioParameters.targets.tables['egon_scenario_parameters']['schema']}." + f"{ScenarioParameters.targets.tables['egon_scenario_parameters']['table']} CASCADE;" ) EgonScenario.__table__.create(bind=engine, checkfirst=True) @@ -70,7 +74,10 @@ def insert_scenarios(): """ - db.execute_sql("DELETE FROM scenario.egon_scenario_parameters CASCADE;") + db.execute_sql( + f"DELETE FROM {ScenarioParameters.targets.tables['egon_scenario_parameters']['schema']}." + f"{ScenarioParameters.targets.tables['egon_scenario_parameters']['table']} CASCADE;" + ) session = sessionmaker(bind=db.engine())() @@ -209,13 +216,16 @@ def get_sector_parameters(sector, scenario=None): if ( scenario in db.select_dataframe( - "SELECT name FROM scenario.egon_scenario_parameters" + f"SELECT name FROM " + f"{ScenarioParameters.targets.tables['egon_scenario_parameters']['schema']}." + f"{ScenarioParameters.targets.tables['egon_scenario_parameters']['table']}" ).name.values ): values = db.select_dataframe( f""" SELECT {sector}_parameters as val - FROM scenario.egon_scenario_parameters + FROM {ScenarioParameters.targets.tables['egon_scenario_parameters']['schema']}. + {ScenarioParameters.targets.tables['egon_scenario_parameters']['table']} WHERE name = '{scenario}';""" ).val[0] else: @@ -226,9 +236,10 @@ def get_sector_parameters(sector, scenario=None): pd.DataFrame( db.select_dataframe( f""" - SELECT {sector}_parameters as val - FROM scenario.egon_scenario_parameters - WHERE name='eGon2035'""" + SELECT {sector}_parameters as val + FROM {ScenarioParameters.targets.tables['egon_scenario_parameters']['schema']}. + {ScenarioParameters.targets.tables['egon_scenario_parameters']['table']} + WHERE name='eGon2035'""" ).val[0], index=["eGon2035"], ), @@ -236,7 +247,8 @@ def get_sector_parameters(sector, scenario=None): db.select_dataframe( f""" SELECT {sector}_parameters as val - FROM scenario.egon_scenario_parameters + FROM {ScenarioParameters.targets.tables['egon_scenario_parameters']['schema']}. + {ScenarioParameters.targets.tables['egon_scenario_parameters']['table']} WHERE name='eGon100RE'""" ).val[0], index=["eGon100RE"], @@ -245,7 +257,8 @@ def get_sector_parameters(sector, scenario=None): db.select_dataframe( f""" SELECT {sector}_parameters as val - FROM scenario.egon_scenario_parameters + FROM {ScenarioParameters.targets.tables['egon_scenario_parameters']['schema']}. + {ScenarioParameters.targets.tables['egon_scenario_parameters']['table']} WHERE name='eGon2021'""" ).val[0], index=["eGon2021"], @@ -259,23 +272,17 @@ def get_sector_parameters(sector, scenario=None): def download_pypsa_technology_data(): """Downlad PyPSA technology data results.""" - data_path = Path(".") / "pypsa_technology_data" + data_path = Path(ScenarioParameters.targets.files["technology_data"]).parent # Delete folder if it already exists if data_path.exists() and data_path.is_dir(): shutil.rmtree(data_path) - # Get parameters from config and set download URL - sources = egon.data.config.datasets()["pypsa-technology-data"]["sources"][ - "zenodo" - ] - url = f"""https://zenodo.org/record/{sources['deposit_id']}/files/{sources['file']}""" - target_file = egon.data.config.datasets()["pypsa-technology-data"][ - "targets" - ]["file"] - # Retrieve files - urlretrieve(url, target_file) + urlretrieve( + ScenarioParameters.sources.urls["pypsa_technology_data"]["url"], + ScenarioParameters.targets.files["pypsa_zip"], + ) - with zipfile.ZipFile(target_file, "r") as zip_ref: + with zipfile.ZipFile(ScenarioParameters.targets.files["pypsa_zip"], "r") as zip_ref: zip_ref.extractall(".") @@ -302,7 +309,30 @@ class ScenarioParameters(Dataset): #: name: str = "ScenarioParameters" #: - version: str = "0.0.18" + version: str = "0.0.20" + + + sources = DatasetSources( + urls={ + "pypsa_technology_data": { + "url": "https://zenodo.org/record/5544025/files/PyPSA/technology-data-v0.3.0.zip", + } + } + ) + + targets = DatasetTargets( + tables={ + "egon_scenario_parameters": { + "schema": "scenario", + "table": "egon_scenario_parameters", + } + }, + files={ + "pypsa_zip": "pypsa_technology_data_egon_data.zip", + "data_dir": "PyPSA-technology-data-94085a8/outputs/", + "technology_data": "pypsa_technology_data/technology_data.xlsx", + } + ) def __init__(self, dependencies): super().__init__( diff --git a/src/egon/data/datasets/scenario_parameters/parameters.py b/src/egon/data/datasets/scenario_parameters/parameters.py index f44e0ba25..f94ee6e4d 100755 --- a/src/egon/data/datasets/scenario_parameters/parameters.py +++ b/src/egon/data/datasets/scenario_parameters/parameters.py @@ -2,18 +2,18 @@ """ import pandas as pd +from egon.data.datasets import load_sources_and_targets -import egon.data.config -def read_csv(year): - source = egon.data.config.datasets()["pypsa-technology-data"]["targets"][ - "data_dir" - ] +def read_csv(year): + _, targets = load_sources_and_targets("ScenarioParameters") + source = targets.files["data_dir"] return pd.read_csv(f"{source}costs_{year}.csv") + def read_costs(df, technology, parameter, value_only=True): result = df.loc[ (df.technology == technology) & (df.parameter == parameter) diff --git a/src/egon/data/datasets/society_prognosis.py b/src/egon/data/datasets/society_prognosis.py index b0a42e96f..cf8969d3e 100755 --- a/src/egon/data/datasets/society_prognosis.py +++ b/src/egon/data/datasets/society_prognosis.py @@ -1,25 +1,49 @@ """The central module containing all code dealing with processing and forecast Zensus data. """ - -from sqlalchemy import Column, Float, Integer -from sqlalchemy.ext.declarative import declarative_base import numpy as np +import egon.data.config import pandas as pd - from egon.data import db -from egon.data.datasets import Dataset -import egon.data.config +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets +from sqlalchemy import Column, Float, Integer +from sqlalchemy.ext.declarative import declarative_base # will be later imported from another file ### Base = declarative_base() - +# ############################################################ class SocietyPrognosis(Dataset): + name: str = "SocietyPrognosis" + version: str = "0.0.4" + + sources = DatasetSources( + tables={ + "map_zensus_vg250": "boundaries.egon_map_zensus_vg250", + "zensus_population": "society.destatis_zensus_population_per_ha", + "zensus_households": "society.egon_destatis_zensus_household_per_ha", + "demandregio_population": "society.egon_demandregio_population", + "demandregio_households": "society.egon_demandregio_household", + } + ) + + targets = DatasetTargets( + tables={ + "population_prognosis": { + "schema": "society", + "table": "egon_population_prognosis" + }, + "household_prognosis": { + "schema": "society", + "table": "egon_household_prognosis" + } + } + ) + def __init__(self, dependencies): super().__init__( - name="SocietyPrognosis", - version="0.0.1", + name=self.name, + version=self.version, dependencies=dependencies, tasks=(create_tables, {zensus_population, zensus_household}), ) @@ -52,27 +76,23 @@ def create_tables(): def zensus_population(): """Bring population prognosis from DemandRegio to Zensus grid""" - cfg = egon.data.config.datasets()["society_prognosis"] - + local_engine = db.engine() # Input: Zensus2011 population data including the NUTS3-Code zensus_district = db.select_dataframe( f"""SELECT zensus_population_id, vg250_nuts3 - FROM {cfg['soucres']['map_zensus_vg250']['schema']}. - {cfg['soucres']['map_zensus_vg250']['table']} + FROM {SocietyPrognosis.sources.tables['map_zensus_vg250']} WHERE zensus_population_id IN ( SELECT id - FROM {cfg['soucres']['zensus_population']['schema']}. - {cfg['soucres']['zensus_population']['table']})""", + FROM {SocietyPrognosis.sources.tables['zensus_population']})""", index_col="zensus_population_id", - ) + ) zensus = db.select_dataframe( f"""SELECT id, population - FROM {cfg['soucres']['zensus_population']['schema']}. - {cfg['soucres']['zensus_population']['table']} - WHERE population > 0""", + FROM {SocietyPrognosis.sources.tables['zensus_population']} + WHERE population > 0""", index_col="id", ) @@ -92,16 +112,15 @@ def zensus_population(): ).values db.execute_sql( - f"""DELETE FROM {cfg['target']['population_prognosis']['schema']}. - {cfg['target']['population_prognosis']['table']}""" + f"DELETE FROM {SocietyPrognosis.targets.tables['population_prognosis']['schema']}." + f"{SocietyPrognosis.targets.tables['population_prognosis']['table']}" ) # Scale to pogosis values from demandregio for year in [2035, 2050]: # Input: dataset on population prognosis on district-level (NUTS3) prognosis = db.select_dataframe( f"""SELECT nuts3, population - FROM {cfg['soucres']['demandregio_population']['schema']}. - {cfg['soucres']['demandregio_population']['table']} + FROM {SocietyPrognosis.sources.tables['demandregio_population']} WHERE year={year}""", index_col="nuts3", ) @@ -116,10 +135,10 @@ def zensus_population(): # Insert to database df.to_sql( - cfg["target"]["population_prognosis"]["table"], - schema=cfg["target"]["population_prognosis"]["schema"], - con=local_engine, - if_exists="append", + SocietyPrognosis.targets.tables["population_prognosis"]["table"], + schema=SocietyPrognosis.targets.tables["population_prognosis"]["schema"], + con=local_engine, + if_exists="append", ) @@ -145,14 +164,14 @@ def household_prognosis_per_year(prognosis_nuts3, zensus, year): # Rounding process to meet exact values from demandregio on nuts3-level for name, group in prognosis.groupby(prognosis.nuts3): - print(f"start progosis nuts3 {name}") + print(f"start prognosis nuts3 {name}") while prognosis_total[name] > group["rounded"].sum(): index = np.random.choice( group["rest"].index.values[group["rest"] == max(group["rest"])] ) group.at[index, "rounded"] += 1 group.at[index, "rest"] = 0 - print(f"finished progosis nuts3 {name}") + print(f"finished prognosis nuts3 {name}") prognosis[prognosis.index.isin(group.index)] = group prognosis = prognosis.drop(["nuts3", "quantity", "rest"], axis=1).rename( @@ -165,22 +184,20 @@ def household_prognosis_per_year(prognosis_nuts3, zensus, year): def zensus_household(): """Bring household prognosis from DemandRegio to Zensus grid""" - cfg = egon.data.config.datasets()["society_prognosis"] + local_engine = db.engine() # Input: Zensus2011 household data including the NUTS3-Code district = db.select_dataframe( f"""SELECT zensus_population_id, vg250_nuts3 - FROM {cfg['soucres']['map_zensus_vg250']['schema']}. - {cfg['soucres']['map_zensus_vg250']['table']}""", + FROM {SocietyPrognosis.sources.tables['map_zensus_vg250']}""", index_col="zensus_population_id", ) zensus = db.select_dataframe( f"""SELECT zensus_population_id, quantity - FROM {cfg['soucres']['zensus_households']['schema']}. - {cfg['soucres']['zensus_households']['table']}""", + FROM {SocietyPrognosis.sources.tables['zensus_households']}""", index_col="zensus_population_id", ) @@ -198,8 +215,8 @@ def zensus_household(): ) db.execute_sql( - f"""DELETE FROM {cfg['target']['household_prognosis']['schema']}. - {cfg['target']['household_prognosis']['table']}""" + f"DELETE FROM {SocietyPrognosis.targets.tables['household_prognosis']['schema']}." + f"{SocietyPrognosis.targets.tables['household_prognosis']['table']}" ) # Apply prognosis function @@ -208,16 +225,15 @@ def zensus_household(): # Input: dataset on household prognosis on district-level (NUTS3) prognosis_nuts3 = db.select_dataframe( f"""SELECT nuts3, hh_size, households - FROM {cfg['soucres']['demandregio_households']['schema']}. - {cfg['soucres']['demandregio_households']['table']} + FROM {SocietyPrognosis.sources.tables['demandregio_households']} WHERE year={year}""", index_col="nuts3", ) # Insert into database household_prognosis_per_year(prognosis_nuts3, zensus, year).to_sql( - cfg["target"]["household_prognosis"]["table"], - schema=cfg["target"]["household_prognosis"]["schema"], + SocietyPrognosis.targets.tables["household_prognosis"]["table"], + schema=SocietyPrognosis.targets.tables["household_prognosis"]["schema"], con=local_engine, if_exists="append", ) diff --git a/src/egon/data/datasets/storages/__init__.py b/src/egon/data/datasets/storages/__init__.py index 6ecda8b2c..db00300dd 100755 --- a/src/egon/data/datasets/storages/__init__.py +++ b/src/egon/data/datasets/storages/__init__.py @@ -12,7 +12,8 @@ import pandas as pd from egon.data import config, db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets +from egon.data.datasets.scenario_parameters import get_sector_parameters from egon.data.datasets.electrical_neighbours import entsoe_to_bus_etrago from egon.data.datasets.mastr import ( WORKING_DIR_MASTR_NEW, @@ -51,6 +52,33 @@ class EgonStorages(Base): class Storages(Dataset): + + sources = DatasetSources( + files={ + "mastr_storage": "bnetza_mastr_storage_cleaned.csv", + "nep_capacities": "NEP2035_V2021_scnC2035.xlsx", + "mastr_location": "location_elec_generation_raw.csv", + }, + tables={ + "capacities": "supply.egon_scenario_capacities", + "generators": "grid.egon_etrago_generator", + "bus": "grid.egon_etrago_bus", + "egon_mv_grid_district": "grid.egon_mv_grid_district", + "ehv_voronoi": "grid.egon_ehv_substation_voronoi", + # Added for pumped_hydro.py + "nep_conv": "supply.egon_nep_2021_conventional_powerplants", + # Added for home_batteries.py + "etrago_storage": "grid.egon_etrago_storage", + }, + ) + targets = DatasetTargets( + tables={ + "storages": "supply.egon_storages", + # Added for home_batteries.py + "home_batteries": "supply.egon_home_batteries", + } + ) + """ Allocates storage units such as pumped hydro and home batteries @@ -85,7 +113,7 @@ class Storages(Dataset): #: name: str = "Storages" #: - version: str = "0.0.8" + version: str = "0.0.10" def __init__(self, dependencies): super().__init__( @@ -108,13 +136,11 @@ def create_tables(): ------- None. """ - - cfg = config.datasets()["storages"] - db.execute_sql(f"CREATE SCHEMA IF NOT EXISTS {cfg['target']['schema']};") + db.execute_sql(f"CREATE SCHEMA IF NOT EXISTS supply;") engine = db.engine() db.execute_sql( f"""DROP TABLE IF EXISTS - {cfg['target']['schema']}.{cfg['target']['table']}""" + {Storages.targets.tables['storages']}""" ) db.execute_sql("""DROP SEQUENCE IF EXISTS pp_seq""") @@ -140,7 +166,6 @@ def allocate_pumped_hydro(scn, export=True): carrier = "pumped_hydro" - cfg = config.datasets()["power_plants"] nep = select_nep_pumped_hydro(scn=scn) mastr = select_mastr_pumped_hydro() @@ -148,7 +173,7 @@ def allocate_pumped_hydro(scn, export=True): # Assign voltage level to MaStR mastr["voltage_level"] = assign_voltage_level( mastr.rename({"el_capacity": "Nettonennleistung"}, axis=1), - cfg, + Storages.sources, WORKING_DIR_MASTR_OLD, ) @@ -257,14 +282,14 @@ def allocate_pumped_hydro(scn, export=True): # Load grid district polygons mv_grid_districts = db.select_geodataframe( f""" - SELECT * FROM {cfg['sources']['egon_mv_grid_district']} + SELECT * FROM {Storages.sources.tables['egon_mv_grid_district']} """, epsg=4326, ) ehv_grid_districts = db.select_geodataframe( f""" - SELECT * FROM {cfg['sources']['ehv_voronoi']} + SELECT * FROM {Storages.sources.tables['ehv_voronoi']} """, epsg=4326, ) @@ -286,7 +311,7 @@ def allocate_pumped_hydro(scn, export=True): # Delete existing units in the target table db.execute_sql( - f""" DELETE FROM {cfg ['sources']['storages']} + f""" DELETE FROM {Storages.targets.tables['storages']} WHERE carrier IN ('pumped_hydro') AND scenario='{scn}';""" ) @@ -330,7 +355,6 @@ def allocate_storage_units_sq(scn_name, storage_types): ------- """ - sources = config.datasets()["power_plants"]["sources"] scn_parameters = get_sector_parameters("global", scn_name) scenario_date_max = str(scn_parameters["weather_year"]) + "-12-31 23:59:00" @@ -345,7 +369,7 @@ def allocate_storage_units_sq(scn_name, storage_types): for storage_type in storage_types: # Read-in data from MaStR mastr_ph = pd.read_csv( - WORKING_DIR_MASTR_NEW / sources["mastr_storage"], + WORKING_DIR_MASTR_NEW / Storages.sources.files["mastr_storage"], delimiter=",", usecols=[ "Nettonennleistung", @@ -493,7 +517,7 @@ def allocate_storage_units_sq(scn_name, storage_types): # Asign buses within germany mastr_ph = assign_bus_id( - mastr_ph, cfg=config.datasets()["power_plants"], drop_missing=True + mastr_ph, sources=Storages.sources, drop_missing=True ) mastr_ph["bus_id"] = mastr_ph["bus_id"].astype(int) @@ -597,7 +621,6 @@ def allocate_pumped_hydro_eGon100RE(): """ carrier = "pumped_hydro" - cfg = config.datasets()["power_plants"] boundary = config.settings()["egon-data"]["--dataset-boundary"] # Select installed capacity for pumped_hydro in eGon100RE scenario from @@ -605,7 +628,7 @@ def allocate_pumped_hydro_eGon100RE(): capacity = db.select_dataframe( f""" SELECT capacity - FROM {cfg['sources']['capacities']} + FROM {Storages.sources.tables['capacities']} WHERE carrier = '{carrier}' AND scenario_name = 'eGon100RE'; """ @@ -664,7 +687,6 @@ def home_batteries_per_scenario(scenario): None """ - cfg = config.datasets()["storages"] dataset = config.settings()["egon-data"]["--dataset-boundary"] if scenario == "eGon2035": @@ -672,7 +694,7 @@ def home_batteries_per_scenario(scenario): Path(".") / "data_bundle_egon_data" / "nep2035_version2021" - / cfg["sources"]["nep_capacities"] + /Storages.sources.files["nep_capacities"] ) capacities_nep = pd.read_excel( @@ -688,7 +710,7 @@ def home_batteries_per_scenario(scenario): target = db.select_dataframe( f""" SELECT capacity - FROM {cfg['sources']['capacities']} + FROM {Storages.sources.tables['capacities']} WHERE scenario_name = '{scenario}' AND carrier = 'battery'; """ @@ -697,11 +719,11 @@ def home_batteries_per_scenario(scenario): pv_rooftop = db.select_dataframe( f""" SELECT bus, p_nom, generator_id - FROM {cfg['sources']['generators']} + FROM {Storages.sources.tables['generators']} WHERE scn_name = '{scenario}' AND carrier = 'solar_rooftop' AND bus IN - (SELECT bus_id FROM {cfg['sources']['bus']} + (SELECT bus_id FROM {Storages.sources.tables['bus']} WHERE scn_name = '{scenario}' AND country = 'DE' ); """ ) @@ -761,4 +783,4 @@ def allocate_pumped_hydro_scn(): def allocate_other_storage_units(): for scn in config.settings()["egon-data"]["--scenarios"]: if "status" in scn: - allocate_storage_units_sq(scn_name=scn, storage_types=["battery"]) + allocate_storage_units_sq(scn_name=scn, storage_types=["battery"]) \ No newline at end of file diff --git a/src/egon/data/datasets/storages/home_batteries.py b/src/egon/data/datasets/storages/home_batteries.py index d8a69df3e..b516ba2a4 100644 --- a/src/egon/data/datasets/storages/home_batteries.py +++ b/src/egon/data/datasets/storages/home_batteries.py @@ -44,7 +44,7 @@ from sqlalchemy.ext.declarative import declarative_base import numpy as np import pandas as pd - +from egon.data.datasets import load_sources_and_targets from egon.data import config, db from egon.data.datasets.scenario_parameters import get_sector_parameters from egon.data.metadata import ( @@ -59,6 +59,15 @@ Base = declarative_base() +# This block is added because they are constant and needs to be independant from config.dataset +CONSTANTS = { + "cbat_ppv_ratio": 1, + "rtol": 0.05, + "max_it": 100, + "deposit_id_mastr": 10491882, + "deposit_id_data_bundle": 16576506 +} + def get_cbat_pbat_ratio(): """ @@ -71,12 +80,11 @@ def get_cbat_pbat_ratio(): Mean ratio between the storage capacity and the power of the pv rooftop system """ - sources = config.datasets()["home_batteries"]["sources"] + sources, targets = load_sources_and_targets("Storages") sql = f""" SELECT max_hours - FROM {sources["etrago_storage"]["schema"]} - .{sources["etrago_storage"]["table"]} + FROM {sources.tables["etrago_storage"]} WHERE carrier = 'home_battery' """ @@ -87,16 +95,15 @@ def allocate_home_batteries_to_buildings(): """ Allocate home battery storage systems to buildings with pv rooftop systems """ - # get constants - constants = config.datasets()["home_batteries"]["constants"] + sources, targets = load_sources_and_targets("Storages") + scenarios = config.settings()["egon-data"]["--scenarios"] if "status2019" in scenarios: scenarios.remove("status2019") - cbat_ppv_ratio = constants["cbat_ppv_ratio"] - rtol = constants["rtol"] - max_it = constants["max_it"] - - sources = config.datasets()["home_batteries"]["sources"] + + cbat_ppv_ratio = CONSTANTS["cbat_ppv_ratio"] + rtol = CONSTANTS["rtol"] + max_it = CONSTANTS["max_it"] df_list = [] @@ -104,8 +111,7 @@ def allocate_home_batteries_to_buildings(): # get home battery capacity per mv grid id sql = f""" SELECT el_capacity as p_nom_min, bus_id as bus FROM - {sources["storage"]["schema"]} - .{sources["storage"]["table"]} + {targets.tables["storages"]} WHERE carrier = 'home_battery' AND scenario = '{scenario}'; """ @@ -197,10 +203,8 @@ def allocate_home_batteries_to_buildings(): class EgonHomeBatteries(Base): - targets = config.datasets()["home_batteries"]["targets"] - - __tablename__ = targets["home_batteries"]["table"] - __table_args__ = {"schema": targets["home_batteries"]["schema"]} + __tablename__ = "egon_home_batteries" + __table_args__ = {"schema": "supply"} index = Column(Integer, primary_key=True, index=True) scenario = Column(String) @@ -214,11 +218,10 @@ def add_metadata(): """ Add metadata to table supply.egon_home_batteries """ - targets = config.datasets()["home_batteries"]["targets"] - deposit_id_mastr = config.datasets()["mastr_new"]["deposit_id"] - deposit_id_data_bundle = config.datasets()["data-bundle"]["sources"][ - "zenodo" - ]["deposit_id"] + _, targets = load_sources_and_targets("Storages") + + deposit_id_mastr = CONSTANTS["deposit_id_mastr"] + deposit_id_data_bundle = CONSTANTS["deposit_id_data_bundle"] contris = contributors(["kh", "kh"]) @@ -231,10 +234,7 @@ def add_metadata(): contris[1]["comment"] = "Add workflow to generate dataset." meta = { - "name": ( - f"{targets['home_batteries']['schema']}." - f"{targets['home_batteries']['table']}" - ), + "name": targets.get_table_name("home_batteries"), "title": "eGon Home Batteries", "id": "WILL_BE_SET_AT_PUBLICATION", "description": "Home storage systems allocated to buildings", @@ -274,6 +274,7 @@ def add_metadata(): "path": (f"https://zenodo.org/record/{deposit_id_mastr}"), "licenses": [license_dedl(attribution="© Amme, Jonathan")], }, + # 'sources()' correctly refers to the function from metadata sources()["openstreetmap"], sources()["era5"], sources()["vg250"], @@ -287,17 +288,15 @@ def add_metadata(): "resources": [ { "profile": "tabular-data-resource", - "name": ( - f"{targets['home_batteries']['schema']}." - f"{targets['home_batteries']['table']}" - ), + "name": targets.get_table_name("home_batteries"), "path": "None", "format": "PostgreSQL", "encoding": "UTF-8", "schema": { "fields": generate_resource_fields_from_db_table( - targets["home_batteries"]["schema"], - targets["home_batteries"]["table"], + targets.get_table_schema("home_batteries"), + # FIX: Use [-1] to get the table name safely (works with or without 'schema.' prefix) + targets.get_table_name("home_batteries").split('.')[-1], ), "primaryKey": "index", }, @@ -339,8 +338,8 @@ def add_metadata(): db.submit_comment( f"'{json.dumps(meta)}'", - targets["home_batteries"]["schema"], - targets["home_batteries"]["table"], + targets.get_table_schema("home_batteries"), + targets.get_table_name("home_batteries").split('.')[-1], ) @@ -358,3 +357,19 @@ def create_table(df): if_exists="append", index=False, ) + + +def create_table(df): + """Create mapping table home battery <-> building id""" + engine = db.engine() + + EgonHomeBatteries.__table__.drop(bind=engine, checkfirst=True) + EgonHomeBatteries.__table__.create(bind=engine, checkfirst=True) + + df.reset_index().to_sql( + name=EgonHomeBatteries.__table__.name, + schema=EgonHomeBatteries.__table__.schema, + con=engine, + if_exists="append", + index=False, + ) \ No newline at end of file diff --git a/src/egon/data/datasets/storages/pumped_hydro.py b/src/egon/data/datasets/storages/pumped_hydro.py index 2f1ceaffe..96eb5a51e 100755 --- a/src/egon/data/datasets/storages/pumped_hydro.py +++ b/src/egon/data/datasets/storages/pumped_hydro.py @@ -19,6 +19,7 @@ select_target, ) import egon.data.config +from egon.data.datasets import load_sources_and_targets def select_nep_pumped_hydro(scn): @@ -30,7 +31,7 @@ def select_nep_pumped_hydro(scn): pandas.DataFrame Pumped hydro plants from NEP list """ - cfg = egon.data.config.datasets()["power_plants"] + sources, targets = load_sources_and_targets("Storages") carrier = "pumped_hydro" @@ -40,7 +41,7 @@ def select_nep_pumped_hydro(scn): f""" SELECT bnetza_id, name, carrier, postcode, capacity, city, federal_state, c2035_capacity - FROM {cfg['sources']['nep_conv']} + FROM {sources.tables['nep_conv']} WHERE carrier = '{carrier}' AND c2035_capacity > 0 AND postcode != 'None'; @@ -57,7 +58,7 @@ def select_nep_pumped_hydro(scn): f""" SELECT bnetza_id, name, carrier, postcode, capacity, city, federal_state - FROM {cfg['sources']['nep_conv']} + FROM {sources.tables['nep_conv']} WHERE carrier = '{carrier}' AND capacity > 0 AND postcode != 'None' @@ -89,11 +90,11 @@ def select_mastr_pumped_hydro(): pandas.DataFrame Pumped hydro plants from MaStR """ - sources = egon.data.config.datasets()["power_plants"]["sources"] + sources, targets = load_sources_and_targets("Storages") # Read-in data from MaStR mastr_ph = pd.read_csv( - WORKING_DIR_MASTR_NEW / sources["mastr_storage"], + WORKING_DIR_MASTR_NEW / sources.files["mastr_storage"], delimiter=",", usecols=[ "Nettonennleistung", @@ -375,4 +376,4 @@ def apply_voltage_level_thresholds(power_plants): power_plants.loc[power_plants["el_capacity"] > 20, "voltage_level"] = 3 power_plants.loc[power_plants["el_capacity"] > 120, "voltage_level"] = 1 - return power_plants + return power_plants \ No newline at end of file diff --git a/src/egon/data/datasets/storages_etrago/__init__.py b/src/egon/data/datasets/storages_etrago/__init__.py index cb932a905..e4268421d 100644 --- a/src/egon/data/datasets/storages_etrago/__init__.py +++ b/src/egon/data/datasets/storages_etrago/__init__.py @@ -5,10 +5,11 @@ import geopandas as gpd import pandas as pd - -from egon.data import config, db -from egon.data.datasets import Dataset -from egon.data.datasets.scenario_parameters import get_sector_parameters +from egon.data import db, config +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets +from egon.data.datasets.scenario_parameters import ( + get_sector_parameters, +) class StorageEtrago(Dataset): @@ -37,11 +38,26 @@ class StorageEtrago(Dataset): * :py:class:`grid.egon_etrago_storage ` is extended """ + sources = DatasetSources( + tables={ + "storage": "supply.egon_storages", + "scenario_parameters": "scenario.egon_scenario_parameters", + "bus": "grid.egon_etrago_bus", + "ehv-substation": "grid.egon_ehv_substation", + "hv-substation": "grid.egon_hvmv_substation", + } + ) + targets = DatasetTargets( + tables={ + "storage": "grid.egon_etrago_storage" + } + ) #: name: str = "StorageEtrago" #: - version: str = "0.0.9" + version: str = "0.0.12" + def __init__(self, dependencies): super().__init__( @@ -50,12 +66,8 @@ def __init__(self, dependencies): dependencies=dependencies, tasks=(insert_PHES, extendable_batteries), ) - - def insert_PHES(): # Get datasets configuration - sources = config.datasets()["storage_etrago"]["sources"] - targets = config.datasets()["storage_etrago"]["targets"] engine = db.engine() @@ -64,10 +76,10 @@ def insert_PHES(): # Delete outdated data on pumped hydro units (PHES) inside Germany from database db.execute_sql( f""" - DELETE FROM {targets['storage']['schema']}.{targets['storage']['table']} + DELETE FROM {StorageEtrago.targets.tables['storage']} WHERE carrier = 'pumped_hydro' AND scn_name = '{scn}' - AND bus IN (SELECT bus_id FROM {sources['bus']['schema']}.{sources['bus']['table']} + AND bus IN (SELECT bus_id FROM {StorageEtrago.sources.tables['bus']} WHERE scn_name = '{scn}' AND country = 'DE'); """ @@ -76,7 +88,7 @@ def insert_PHES(): # Select data on PSH units from database phes = db.select_dataframe( f"""SELECT scenario as scn_name, bus_id as bus, carrier, el_capacity as p_nom - FROM {sources['storage']['schema']}.{sources['storage']['table']} + FROM {StorageEtrago.sources.tables['storage']} WHERE carrier = 'pumped_hydro' AND scenario= '{scn}' """ @@ -98,9 +110,9 @@ def insert_PHES(): # Write data to db phes.to_sql( - targets["storage"]["table"], + StorageEtrago.targets.get_table_name("storage"), engine, - schema=targets["storage"]["schema"], + schema=StorageEtrago.targets.get_table_schema("storage"), if_exists="append", index=phes.index, ) @@ -108,18 +120,18 @@ def insert_PHES(): def extendable_batteries_per_scenario(scenario): # Get datasets configuration - sources = config.datasets()["storage_etrago"]["sources"] - targets = config.datasets()["storage_etrago"]["targets"] + + engine = db.engine() # Delete outdated data on extendable battetries inside Germany from database db.execute_sql( f""" - DELETE FROM {targets['storage']['schema']}.{targets['storage']['table']} + DELETE FROM {StorageEtrago.targets.tables['storage']} WHERE carrier = 'battery' AND scn_name = '{scenario}' - AND bus IN (SELECT bus_id FROM {sources['bus']['schema']}.{sources['bus']['table']} + AND bus IN (SELECT bus_id FROM {StorageEtrago.sources.tables['bus']} WHERE scn_name = '{scenario}' AND country = 'DE'); """ @@ -128,14 +140,13 @@ def extendable_batteries_per_scenario(scenario): extendable_batteries = db.select_dataframe( f""" SELECT bus_id as bus, scn_name FROM - {sources['bus']['schema']}. - {sources['bus']['table']} + {StorageEtrago.sources.tables['bus']} WHERE carrier = 'AC' AND scn_name = '{scenario}' AND (bus_id IN (SELECT bus_id - FROM {sources['ehv-substation']['schema']}.{sources['ehv-substation']['table']}) + FROM {StorageEtrago.sources.tables['ehv-substation']}) OR bus_id IN (SELECT bus_id - FROM {sources['hv-substation']['schema']}.{sources['hv-substation']['table']} + FROM {StorageEtrago.sources.tables['hv-substation']} )) """ ) @@ -144,8 +155,7 @@ def extendable_batteries_per_scenario(scenario): home_batteries = db.select_dataframe( f""" SELECT el_capacity as p_nom_min, bus_id as bus FROM - {sources['storage']['schema']}. - {sources['storage']['table']} + {StorageEtrago.sources.tables['storage']} WHERE carrier = 'home_battery' AND scenario = '{scenario}'; """ @@ -199,9 +209,9 @@ def extendable_batteries_per_scenario(scenario): # Write data to db extendable_batteries.to_sql( - targets["storage"]["table"], + StorageEtrago.targets.get_table_name("storage"), engine, - schema=targets["storage"]["schema"], + schema=StorageEtrago.targets.get_table_schema("storage"), if_exists="append", index=False, ) @@ -209,4 +219,4 @@ def extendable_batteries_per_scenario(scenario): def extendable_batteries(): for scn in config.settings()["egon-data"]["--scenarios"]: - extendable_batteries_per_scenario(scn) + extendable_batteries_per_scenario(scn) \ No newline at end of file diff --git a/src/egon/data/datasets/substation/.spyproject/config/codestyle.ini b/src/egon/data/datasets/substation/.spyproject/config/codestyle.ini new file mode 100644 index 000000000..0f54b4c43 --- /dev/null +++ b/src/egon/data/datasets/substation/.spyproject/config/codestyle.ini @@ -0,0 +1,8 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + +[main] +version = 0.2.0 + diff --git a/src/egon/data/datasets/substation/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini b/src/egon/data/datasets/substation/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini new file mode 100644 index 000000000..0b95e5cee --- /dev/null +++ b/src/egon/data/datasets/substation/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini @@ -0,0 +1,5 @@ +[codestyle] +indentation = True +edge_line = True +edge_line_columns = 79 + diff --git a/src/egon/data/datasets/substation/.spyproject/config/defaults/defaults-encoding-0.2.0.ini b/src/egon/data/datasets/substation/.spyproject/config/defaults/defaults-encoding-0.2.0.ini new file mode 100644 index 000000000..0ce193c1e --- /dev/null +++ b/src/egon/data/datasets/substation/.spyproject/config/defaults/defaults-encoding-0.2.0.ini @@ -0,0 +1,3 @@ +[encoding] +text_encoding = utf-8 + diff --git a/src/egon/data/datasets/substation/.spyproject/config/defaults/defaults-vcs-0.2.0.ini b/src/egon/data/datasets/substation/.spyproject/config/defaults/defaults-vcs-0.2.0.ini new file mode 100644 index 000000000..ee2548333 --- /dev/null +++ b/src/egon/data/datasets/substation/.spyproject/config/defaults/defaults-vcs-0.2.0.ini @@ -0,0 +1,4 @@ +[vcs] +use_version_control = False +version_control_system = + diff --git a/src/egon/data/datasets/substation/.spyproject/config/defaults/defaults-workspace-0.2.0.ini b/src/egon/data/datasets/substation/.spyproject/config/defaults/defaults-workspace-0.2.0.ini new file mode 100644 index 000000000..2a73ab7ad --- /dev/null +++ b/src/egon/data/datasets/substation/.spyproject/config/defaults/defaults-workspace-0.2.0.ini @@ -0,0 +1,6 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False + diff --git a/src/egon/data/datasets/substation/.spyproject/config/encoding.ini b/src/egon/data/datasets/substation/.spyproject/config/encoding.ini new file mode 100644 index 000000000..a17acedd7 --- /dev/null +++ b/src/egon/data/datasets/substation/.spyproject/config/encoding.ini @@ -0,0 +1,6 @@ +[encoding] +text_encoding = utf-8 + +[main] +version = 0.2.0 + diff --git a/src/egon/data/datasets/substation/.spyproject/config/vcs.ini b/src/egon/data/datasets/substation/.spyproject/config/vcs.ini new file mode 100644 index 000000000..fd66eae01 --- /dev/null +++ b/src/egon/data/datasets/substation/.spyproject/config/vcs.ini @@ -0,0 +1,7 @@ +[vcs] +use_version_control = False +version_control_system = + +[main] +version = 0.2.0 + diff --git a/src/egon/data/datasets/substation/.spyproject/config/workspace.ini b/src/egon/data/datasets/substation/.spyproject/config/workspace.ini new file mode 100644 index 000000000..b8bea683f --- /dev/null +++ b/src/egon/data/datasets/substation/.spyproject/config/workspace.ini @@ -0,0 +1,12 @@ +[workspace] +restore_data_on_startup = True +save_data_on_exit = True +save_history = True +save_non_project_files = False +project_type = 'empty-project-type' +recent_files = ['..\\..\\..\\..\\..\\..\\.spyder-py3\\temp.py'] + +[main] +version = 0.2.0 +recent_files = [] + diff --git a/src/egon/data/datasets/substation/__init__.py b/src/egon/data/datasets/substation/__init__.py index 7e792eee7..c48603512 100644 --- a/src/egon/data/datasets/substation/__init__.py +++ b/src/egon/data/datasets/substation/__init__.py @@ -1,17 +1,15 @@ """The central module containing code to create substation tables """ - -import os - from geoalchemy2.types import Geometry from sqlalchemy import Column, Float, Integer, Sequence, Text from sqlalchemy.ext.declarative import declarative_base - +import os from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets import egon.data.config + Base = declarative_base() @@ -72,10 +70,29 @@ class EgonHvmvTransferBuses(Base): class SubstationExtraction(Dataset): + + sources = DatasetSources( + tables={ + "osm_ways": "openstreetmap.osm_ways", + "osm_nodes": "openstreetmap.osm_nodes", + "osm_points": "openstreetmap.osm_point", + "osm_lines": "openstreetmap.osm_line", + } + ) + + + targets = DatasetTargets( + tables={ + "hvmv_substation": "grid.egon_hvmv_transfer_buses", + "ehv_substation": "grid.egon_ehv_transfer_buses", + "transfer_busses": "public.transfer_busses_complete", # Assuming public schema + } + ) + def __init__(self, dependencies): super().__init__( name="substation_extraction", - version="0.0.2", + version="0.0.3", dependencies=dependencies, tasks=( create_tables, @@ -95,38 +112,26 @@ def create_tables(): ------- None. """ - cfg_targets = egon.data.config.datasets()["substation_extraction"][ - "targets" - ] + + db.execute_sql("CREATE SCHEMA IF NOT EXISTS grid;") + db.execute_sql( - f"CREATE SCHEMA IF NOT EXISTS {cfg_targets['hvmv_substation']['schema']};" + f"""DROP TABLE IF EXISTS {SubstationExtraction.targets.tables['ehv_substation']} CASCADE;""" ) - # Drop tables db.execute_sql( - f"""DROP TABLE IF EXISTS - {cfg_targets['ehv_substation']['schema']}. - {cfg_targets['ehv_substation']['table']} CASCADE;""" + f"""DROP TABLE IF EXISTS {SubstationExtraction.targets.tables['hvmv_substation']} CASCADE;""" ) db.execute_sql( - f"""DROP TABLE IF EXISTS - {cfg_targets['hvmv_substation']['schema']}. - {cfg_targets['hvmv_substation']['table']} CASCADE;""" + f"""DROP SEQUENCE IF EXISTS {SubstationExtraction.targets.tables['hvmv_substation']}_bus_id_seq CASCADE;""" ) db.execute_sql( - f"""DROP SEQUENCE IF EXISTS - {cfg_targets['hvmv_substation']['schema']}. - {cfg_targets['hvmv_substation']['table']}_bus_id_seq CASCADE;""" + f"""DROP SEQUENCE IF EXISTS {SubstationExtraction.targets.tables['ehv_substation']}_bus_id_seq CASCADE;""" ) - db.execute_sql( - f"""DROP SEQUENCE IF EXISTS - {cfg_targets['ehv_substation']['schema']}. - {cfg_targets['ehv_substation']['table']}_bus_id_seq CASCADE;""" - ) engine = db.engine() EgonEhvTransferBuses.__table__.create(bind=engine, checkfirst=True) @@ -224,20 +229,18 @@ def create_sql_functions(): def transfer_busses(): - targets = egon.data.config.datasets()["substation_extraction"]["targets"] + db.execute_sql( f""" - DROP TABLE IF EXISTS {targets['transfer_busses']['table']}; - CREATE TABLE {targets['transfer_busses']['table']} AS + DROP TABLE IF EXISTS {SubstationExtraction.targets.tables['transfer_busses']}; + CREATE TABLE {SubstationExtraction.targets.tables['transfer_busses']} AS SELECT DISTINCT ON (osm_id) * FROM - (SELECT * FROM {targets['ehv_substation']['schema']}. - {targets['ehv_substation']['table']} + (SELECT * FROM {SubstationExtraction.targets.tables['ehv_substation']} UNION SELECT bus_id, lon, lat, point, polygon, voltage, power_type, substation, osm_id, osm_www, frequency, subst_name, ref, operator, dbahn, status - FROM {targets['hvmv_substation']['schema']}. - {targets['hvmv_substation']['table']} ORDER BY osm_id) as foo; + FROM {SubstationExtraction.targets.tables['hvmv_substation']} ORDER BY osm_id) as foo; """ ) diff --git a/src/egon/data/datasets/substation_voronoi.py b/src/egon/data/datasets/substation_voronoi.py index 722257b56..c8d02d1c5 100644 --- a/src/egon/data/datasets/substation_voronoi.py +++ b/src/egon/data/datasets/substation_voronoi.py @@ -2,22 +2,46 @@ """ -from geoalchemy2.types import Geometry +import egon.data.config +from egon.data import db +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from sqlalchemy import Column, Integer, Sequence from sqlalchemy.ext.declarative import declarative_base - -from egon.data import db -from egon.data.datasets import Dataset -import egon.data.config +from geoalchemy2.types import Geometry Base = declarative_base() class SubstationVoronoi(Dataset): + name: str = "substation_voronoi" + version: str = "0.0.2" + + # Defined sources and targets for the file + sources = DatasetSources( + tables={ + "boundaries": {"schema": "boundaries", "table": "vg250_sta_union"}, + "hvmv_substation": {"schema": "grid", "table": "egon_hvmv_substation"}, + "ehv_substation": {"schema": "grid", "table": "egon_ehv_substation"}, + } + ) + + targets = DatasetTargets( + tables={ + "ehv_substation_voronoi": { + "schema": "grid", + "table": "egon_ehv_substation_voronoi", + }, + "hvmv_substation_voronoi": { + "schema": "grid", + "table": "egon_hvmv_substation_voronoi", + }, + } + ) + def __init__(self, dependencies): super().__init__( - name="substation_voronoi", - version="0.0.0", + name=self.name, + version=self.version, dependencies=dependencies, tasks=( create_tables, @@ -63,31 +87,27 @@ def create_tables(): None. """ - cfg_voronoi = egon.data.config.datasets()["substation_voronoi"]["targets"] db.execute_sql( - f"""DROP TABLE IF EXISTS - {cfg_voronoi['ehv_substation_voronoi']['schema']}. - {cfg_voronoi['ehv_substation_voronoi']['table']} CASCADE;""" + f"DROP TABLE IF EXISTS {SubstationVoronoi.targets.tables['ehv_substation_voronoi']['schema']}." + f"{SubstationVoronoi.targets.tables['ehv_substation_voronoi']['table']} CASCADE;" ) + db.execute_sql( - f"""DROP TABLE IF EXISTS - {cfg_voronoi['hvmv_substation_voronoi']['schema']}. - {cfg_voronoi['hvmv_substation_voronoi']['table']} CASCADE;""" + f"DROP TABLE IF EXISTS {SubstationVoronoi.targets.tables['hvmv_substation_voronoi']['schema']}." + f"{SubstationVoronoi.targets.tables['hvmv_substation_voronoi']['table']} CASCADE;" ) # Drop sequences db.execute_sql( - f"""DROP SEQUENCE IF EXISTS - {cfg_voronoi['ehv_substation_voronoi']['schema']}. - {cfg_voronoi['ehv_substation_voronoi']['table']}_id_seq CASCADE;""" + f"DROP SEQUENCE IF EXISTS {SubstationVoronoi.targets.tables['ehv_substation_voronoi']['schema']}." + f"{SubstationVoronoi.targets.tables['ehv_substation_voronoi']['table']}_id_seq CASCADE;" ) db.execute_sql( - f"""DROP SEQUENCE IF EXISTS - {cfg_voronoi['hvmv_substation_voronoi']['schema']}. - {cfg_voronoi['hvmv_substation_voronoi']['table']}_id_seq CASCADE;""" + f"DROP SEQUENCE IF EXISTS {SubstationVoronoi.targets.tables['hvmv_substation_voronoi']['schema']}." + f"{SubstationVoronoi.targets.tables['hvmv_substation_voronoi']['table']}_id_seq CASCADE;" ) engine = db.engine() @@ -108,20 +128,17 @@ def substation_voronoi(): substation_list = ["hvmv_substation", "ehv_substation"] for substation in substation_list: - cfg_boundaries = egon.data.config.datasets()["substation_voronoi"][ - "sources" - ]["boundaries"] - cfg_substation = egon.data.config.datasets()["substation_voronoi"][ - "sources" - ][substation] - cfg_voronoi = egon.data.config.datasets()["substation_voronoi"][ - "targets" - ][substation + "_voronoi"] + + cfg_boundaries = SubstationVoronoi.sources.tables["boundaries"] + cfg_substation = SubstationVoronoi.sources.tables[substation] + cfg_voronoi = SubstationVoronoi.targets.tables[substation + "_voronoi"] view = "grid.egon_voronoi_no_borders" # Create view for Voronoi polygons without taking borders into account - db.execute_sql(f"DROP VIEW IF EXISTS {view} CASCADE;") + db.execute_sql( + f"DROP VIEW IF EXISTS {view} CASCADE;" + ) db.execute_sql( f""" diff --git a/src/egon/data/datasets/tyndp.py b/src/egon/data/datasets/tyndp.py index f6ad69a5b..f345fd03e 100644 --- a/src/egon/data/datasets/tyndp.py +++ b/src/egon/data/datasets/tyndp.py @@ -1,11 +1,11 @@ """The central module containing all code dealing with downloading tyndp data """ -from urllib.request import urlretrieve import os from egon.data import config -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets +from urllib.request import urlretrieve class Tyndp(Dataset): @@ -28,7 +28,24 @@ class Tyndp(Dataset): #: name: str = "Tyndp" #: - version: str = "0.0.1" + version: str = "0.0.3" + + sources = DatasetSources( + files={ + "capacities": "https://2020.entsos-tyndp-scenarios.eu/wp-content/uploads/2020/06/TYNDP-2020-Scenario-Datafile.xlsx.zip", + "demand_2030": "https://eepublicdownloads.entsoe.eu/tyndp-documents/2020-data/Demand_TimeSeries_2030_DistributedEnergy.xlsx", + "demand_2040": "https://eepublicdownloads.entsoe.eu/tyndp-documents/2020-data/Demand_TimeSeries_2040_DistributedEnergy.xlsx", + } + ) + + targets = DatasetTargets( + files={ + "capacities": "TYNDP-2020-Scenario-Datafile.xlsx.zip", + "demand_2030": "Demand_TimeSeries_2030_DistributedEnergy.xlsx", + "demand_2040": "Demand_TimeSeries_2040_DistributedEnergy.xlsx", + } + ) + def __init__(self, dependencies): super().__init__( @@ -45,13 +62,12 @@ def download(): ------- None. """ - sources = config.datasets()["tyndp"]["sources"] - targets = config.datasets()["tyndp"]["targets"] if not os.path.exists("tyndp"): os.mkdir("tyndp") for dataset in ["capacities", "demand_2030", "demand_2040"]: - target_file = targets[dataset] + source_url = Tyndp.sources.files[dataset] + target_file = Tyndp.targets.files[dataset] - urlretrieve(sources[dataset], f"tyndp/{target_file}") + urlretrieve(source_url, f"tyndp/{target_file}") diff --git a/src/egon/data/datasets/vg250/__init__.py b/src/egon/data/datasets/vg250/__init__.py index 378f86895..7c4a919af 100644 --- a/src/egon/data/datasets/vg250/__init__.py +++ b/src/egon/data/datasets/vg250/__init__.py @@ -11,8 +11,8 @@ from pathlib import Path from urllib.request import urlretrieve -import codecs import datetime +import codecs import json import os import time @@ -22,11 +22,13 @@ from egon.data import db from egon.data.config import settings -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets +import egon.data.config from egon.data.metadata import ( context, - licenses_datenlizenz_deutschland, + meta_metadata, + licenses_datenlizenz_deutschland, ) import egon.data.config @@ -40,18 +42,18 @@ def download_files(): *vg250/original_data/target/file*. """ - data_config = egon.data.config.datasets() - vg250_config = data_config["vg250"]["original_data"] + + download_directory = Path(".") / "vg250" # Create the folder, if it does not exist already if not os.path.exists(download_directory): os.mkdir(download_directory) - target_file = download_directory / vg250_config["target"]["file"] + target_file = download_directory / Path(Vg250.targets.files['vg250_zip']).name if not os.path.isfile(target_file): - urlretrieve(vg250_config["source"]["url"], target_file) + urlretrieve(Vg250.sources.urls['vg250_zip'], target_file) def to_postgres(): @@ -64,19 +66,16 @@ def to_postgres(): """ - # Get information from data configuration file - data_config = egon.data.config.datasets() - vg250_orig = data_config["vg250"]["original_data"] - vg250_processed = data_config["vg250"]["processed"] + # Create target schema - db.execute_sql(f"CREATE SCHEMA IF NOT EXISTS {vg250_processed['schema']};") + db.execute_sql(f"CREATE SCHEMA IF NOT EXISTS boundaries;") - zip_file = Path(".") / "vg250" / vg250_orig["target"]["file"] + zip_file = Path(Vg250.sources.files['vg250_zip']) engine_local_db = db.engine() # Extract shapefiles from zip archive and send it to postgres db - for filename, table in vg250_processed["file_table_map"].items(): + for filename, table in Vg250.file_table_map.items(): # Open files and read .shp (within .zip) with geopandas data = gpd.read_file( f"zip://{zip_file}!vg250_01-01.geo84.shape.ebenen/" @@ -107,36 +106,34 @@ def to_postgres(): # Drop table before inserting data db.execute_sql( - f"DROP TABLE IF EXISTS " - f"{vg250_processed['schema']}.{table} CASCADE;" + f"DROP TABLE IF EXISTS {Vg250.targets.tables[table]} CASCADE;" ) # create database table from geopandas dataframe data.to_postgis( - table, + Vg250.targets.get_table_name(table), engine_local_db, - schema=vg250_processed["schema"], + schema=Vg250.targets.get_table_schema(table), index=True, if_exists="replace", dtype={"geometry": Geometry()}, ) db.execute_sql( - f"ALTER TABLE {vg250_processed['schema']}.{table} " + f"ALTER TABLE {Vg250.targets.tables[table]} " f"ADD PRIMARY KEY (id);" - ) + ) # Add index on geometry column db.execute_sql( f"CREATE INDEX {table}_geometry_idx ON " - f"{vg250_processed['schema']}.{table} USING gist (geometry);" - ) + f"{Vg250.targets.tables[table]} USING gist (geometry);" + ) def add_metadata(): """Writes metadata JSON string into table comment.""" - # Prepare variables - vg250_config = egon.data.config.datasets()["vg250"] + title_and_description = { "vg250_sta": { @@ -186,12 +183,12 @@ def add_metadata(): "mit ihren Grenzen, statistischen Schlüsselzahlen, Namen der " "Verwaltungseinheit sowie die spezifische Bezeichnung der " "Verwaltungsebene des jeweiligen Landes.", - "path": vg250_config["original_data"]["source"]["url"], + "path": Vg250.sources.urls["vg250_zip"], "licenses": licenses, } - for table in vg250_config["processed"]["file_table_map"].values(): - schema_table = ".".join([vg250_config["processed"]["schema"], table]) + for table in Vg250.file_table_map.values(): + schema_table = Vg250.targets.tables[table] meta = { "name": schema_table, "title": title_and_description[table]["title"], @@ -254,7 +251,7 @@ def add_metadata(): meta_json = "'" + json.dumps(meta) + "'" db.submit_comment( - meta_json, vg250_config["processed"]["schema"], table + meta_json, Vg250.targets.get_table_schema(table), table ) @@ -472,6 +469,40 @@ def vg250_metadata_resources_fields(): class Vg250(Dataset): + + sources = DatasetSources( + urls={ + "vg250_zip": "https://daten.gdz.bkg.bund.de/produkte/vg/vg250_ebenen_0101/2020/vg250_01-01.geo84.shape.ebenen.zip" + }, + files={ + # The downloaded file is a source for the 'to_postgres' step + "vg250_zip": "vg250/vg250_01-01.geo84.shape.ebenen.zip" + } + ) + targets = DatasetTargets( + files={ + # The downloaded file is a target of the 'download' step + "vg250_zip": "vg250/vg250_01-01.geo84.shape.ebenen.zip" + }, + tables={ + "vg250_sta": "boundaries.vg250_sta", + "vg250_lan": "boundaries.vg250_lan", + "vg250_rbz": "boundaries.vg250_rbz", + "vg250_krs": "boundaries.vg250_krs", + "vg250_vwg": "boundaries.vg250_vwg", + "vg250_gem": "boundaries.vg250_gem", + } + ) + + file_table_map = { + "VG250_STA.shp": "vg250_sta", + "VG250_LAN.shp": "vg250_lan", + "VG250_RBZ.shp": "vg250_rbz", + "VG250_KRS.shp": "vg250_krs", + "VG250_VWG.shp": "vg250_vwg", + "VG250_GEM.shp": "vg250_gem", + } + """ Obtains and processes VG250 data and writes it to database. @@ -508,14 +539,11 @@ class Vg250(Dataset): """ - filename = egon.data.config.datasets()["vg250"]["original_data"]["source"][ - "url" - ] #: name: str = "VG250" - #: - version: str = filename + "-0.0.4" + version: str = "0.0.8" + def __init__(self, dependencies): super().__init__( @@ -529,4 +557,4 @@ def __init__(self, dependencies): add_metadata, cleaning_and_preperation, ), - ) + ) \ No newline at end of file diff --git a/src/egon/data/datasets/vg250_mv_grid_districts.py b/src/egon/data/datasets/vg250_mv_grid_districts.py index 237420263..d2023a4db 100644 --- a/src/egon/data/datasets/vg250_mv_grid_districts.py +++ b/src/egon/data/datasets/vg250_mv_grid_districts.py @@ -6,10 +6,10 @@ import geopandas as gpd import pandas as pd -from egon.data import config, db +from egon.data import db Base = declarative_base() -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets class Vg250MvGridDistricts(Dataset): @@ -29,7 +29,19 @@ class Vg250MvGridDistricts(Dataset): #: name: str = "Vg250MvGridDistricts" #: - version: str = "0.0.1" + version: str = "0.0.2" + sources = DatasetSources( + tables={ + "egon_mv_grid_district": {"schema": "grid", "table": "egon_mv_grid_district"}, + "federal_states": {"schema": "boundaries", "table": "vg250_lan_union"}, + } + ) + + targets = DatasetTargets( + tables={ + "map": {"schema": "boundaries", "table": "egon_map_mvgriddistrict_vg250"} + } + ) def __init__(self, dependencies): super().__init__( @@ -57,7 +69,9 @@ def create_tables(): """ - db.execute_sql("CREATE SCHEMA IF NOT EXISTS boundaries;") + db.execute_sql( + f"CREATE SCHEMA IF NOT EXISTS {Vg250MvGridDistricts.targets.tables['map']['schema']};" + ) engine = db.engine() MapMvgriddistrictsVg250.__table__.drop(bind=engine, checkfirst=True) MapMvgriddistrictsVg250.__table__.create(bind=engine, checkfirst=True) @@ -73,9 +87,10 @@ def mapping(): # Create table create_tables() - # Select sources and targets from dataset configuration - sources = config.datasets()["map_mvgrid_vg250"]["sources"] - target = config.datasets()["map_mvgrid_vg250"]["targets"]["map"] + # Select sources and targets from dataset definition + sources = Vg250MvGridDistricts.sources.tables + target = Vg250MvGridDistricts.targets.tables["map"] + # Delete existing data db.execute_sql(f"DELETE FROM {target['schema']}.{target['table']}") diff --git a/src/egon/data/datasets/zensus/__init__.py b/src/egon/data/datasets/zensus/__init__.py index 3d498a12b..597e931e9 100755 --- a/src/egon/data/datasets/zensus/__init__.py +++ b/src/egon/data/datasets/zensus/__init__.py @@ -5,6 +5,7 @@ import csv import json import os + import zipfile from shapely.geometry import Point, shape @@ -14,17 +15,39 @@ from egon.data import db, subprocess from egon.data.config import settings -from egon.data.datasets import Dataset -import egon.data.config +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets class ZensusPopulation(Dataset): + sources = DatasetSources( + urls={ + "original_data": ( + "https://www.zensus2011.de/SharedDocs/Downloads/DE/" + "Pressemitteilung/DemografischeGrunddaten/" + "csv_Bevoelkerung_100m_Gitter.zip?__blob=publicationFile&v=3" + ), + }, + tables={ + "boundaries_vg250_lan": "boundaries.vg250_lan", + }, + ) + targets = DatasetTargets( + files={ + + "zensus_population": "data_bundle_egon_data/zensus_population/csv_Bevoelkerung_100m_Gitter.zip" + }, + tables={ + "zensus_population": "society.destatis_zensus_population_per_ha" + }, + ) + def __init__(self, dependencies): super().__init__( name="ZensusPopulation", - version="0.0.2", + version="0.0.4", dependencies=dependencies, tasks=( + download_zensus_pop, create_zensus_pop_table, population_to_postgres, ), @@ -32,12 +55,44 @@ def __init__(self, dependencies): class ZensusMiscellaneous(Dataset): + sources = DatasetSources( + urls={ + "zensus_households": ( + "https://www.zensus2011.de/SharedDocs/Downloads/DE/" + "Pressemitteilung/DemografischeGrunddaten/" + "csv_Haushalte_100m_Gitter.zip?__blob=publicationFile&v=2" + ), + "zensus_buildings": ( + "https://www.zensus2011.de/SharedDocs/Downloads/DE/" + "Pressemitteilung/DemografischeGrunddaten/" + "csv_Gebaeude_100m_Gitter.zip?__blob=publicationFile&v=2" + ), + "zensus_apartments": ( + "https://www.zensus2011.de/SharedDocs/Downloads/DE/" + "Pressemitteilung/DemografischeGrunddaten/" + "csv_Wohnungen_100m_Gitter.zip?__blob=publicationFile&v=5" + ), + } + ) + targets = DatasetTargets( + files={ + "zensus_households": "data_bundle_egon_data/zensus_population/csv_Haushalte_100m_Gitter.zip", + "zensus_buildings": "data_bundle_egon_data/zensus_population/csv_Gebaeude_100m_Gitter.zip", + "zensus_apartments": "data_bundle_egon_data/zensus_population/csv_Wohnungen_100m_Gitter.zip", + }, + tables={ + "zensus_households": "society.egon_destatis_zensus_household_per_ha", + "zensus_buildings": "society.egon_destatis_zensus_building_per_ha", + "zensus_apartments": "society.egon_destatis_zensus_apartment_per_ha", + }, + ) def __init__(self, dependencies): super().__init__( name="ZensusMiscellaneous", - version="0.0.1", + version="0.0.2", dependencies=dependencies, tasks=( + download_zensus_misc, create_zensus_misc_tables, zensus_misc_to_postgres, ), @@ -79,64 +134,35 @@ def download_and_check(url, target_file, max_iteration=5): def download_zensus_pop(): - """Download Zensus csv file on population per hectare grid cell.""" - - data_config = egon.data.config.datasets() - zensus_population_config = data_config["zensus_population"][ - "original_data" - ] - download_directory = Path(".") / "zensus_population" - # Create the folder, if it does not exist already - if not os.path.exists(download_directory): - os.mkdir(download_directory) - - target_file = ( - download_directory / zensus_population_config["target"]["file"] - ) - - url = zensus_population_config["source"]["url"] - download_and_check(url, target_file, max_iteration=5) + """Download the Zensus population ZIP to the path defined in + ZensusPopulation.targets.files using the URL from + ZensusPopulation.sources.urls (no global config.datasets() usage).""" + target_file = Path(ZensusPopulation.targets.files["zensus_population"]) + target_file.parent.mkdir(parents=True, exist_ok=True) + download_and_check(ZensusPopulation.sources.urls["original_data"], target_file, max_iteration=5) def download_zensus_misc(): - """Download Zensus csv files on data per hectare grid cell.""" - - # Get data config - data_config = egon.data.config.datasets() - download_directory = Path(".") / "zensus_population" - # Create the folder, if it does not exist already - if not os.path.exists(download_directory): - os.mkdir(download_directory) - # Download remaining zensus data set on households, buildings, apartments - - zensus_config = data_config["zensus_misc"]["original_data"] - zensus_misc_processed = data_config["zensus_misc"]["processed"] - zensus_url = zensus_config["source"]["url"] - zensus_files = zensus_misc_processed["file_table_map"].keys() - url_path_map = list(zip(zensus_url, zensus_files)) - - for url, path in url_path_map: - target_file_misc = download_directory / path - - download_and_check(url, target_file_misc, max_iteration=5) + """Download the Zensus miscellaneous ZIP files (households, buildings, + apartments) using the URL/file mappings from + ZensusMiscellaneous.sources.urls and .targets.files for each key.""" + for key, url in ZensusMiscellaneous.sources.urls.items(): + target_file = Path(ZensusMiscellaneous.targets.files[key]) + target_file.parent.mkdir(parents=True, exist_ok=True) + download_and_check(url, target_file, max_iteration=5) def create_zensus_pop_table(): """Create tables for zensus data in postgres database""" - # Get information from data configuration file - data_config = egon.data.config.datasets() - zensus_population_processed = data_config["zensus_population"]["processed"] + # Create table for population data + population_table = ZensusPopulation.targets.tables["zensus_population"] - # Create target schema db.execute_sql( - f"CREATE SCHEMA IF NOT EXISTS {zensus_population_processed['schema']};" - ) - - # Create table for population data - population_table = ( - f"{zensus_population_processed['schema']}" - f".{zensus_population_processed['table']}" + f""" + CREATE SCHEMA IF NOT EXISTS + {ZensusPopulation.targets.get_table_schema("zensus_population")}; + """ ) db.execute_sql(f"DROP TABLE IF EXISTS {population_table} CASCADE;") @@ -150,32 +176,27 @@ def create_zensus_pop_table(): population smallint, geom_point geometry(Point,3035), geom geometry (Polygon, 3035), - CONSTRAINT {zensus_population_processed['table']}_pkey + CONSTRAINT {population_table.split('.')[1]}_pkey PRIMARY KEY (id) + ); """ ) - def create_zensus_misc_tables(): """Create tables for zensus data in postgres database""" - # Get information from data configuration file - data_config = egon.data.config.datasets() - zensus_misc_processed = data_config["zensus_misc"]["processed"] - - # Create target schema - db.execute_sql( - f"CREATE SCHEMA IF NOT EXISTS {zensus_misc_processed['schema']};" - ) # Create tables for household, apartment and building - for table in zensus_misc_processed["file_table_map"].values(): - misc_table = f"{zensus_misc_processed['schema']}.{table}" - - db.execute_sql(f"DROP TABLE IF EXISTS {misc_table} CASCADE;") + for table in ZensusMiscellaneous.targets.tables: + table_name = ZensusMiscellaneous.targets.tables[table] + # Create target schema + db.execute_sql( + f"CREATE SCHEMA IF NOT EXISTS {table_name.split('.')[0]};" + ) + db.execute_sql(f"DROP TABLE IF EXISTS {table_name} CASCADE;") db.execute_sql( - f"CREATE TABLE {misc_table}" + f"CREATE TABLE {table_name}" f""" (id SERIAL, grid_id VARCHAR(50), grid_id_new VARCHAR (50), @@ -185,37 +206,12 @@ def create_zensus_misc_tables(): quantity smallint, quantity_q smallint, zensus_population_id int, - CONSTRAINT {table}_pkey PRIMARY KEY (id) + CONSTRAINT {table_name.split('.')[1]}_pkey PRIMARY KEY (id) ); """ ) -def target(source, dataset): - """Generate the target path corresponding to a source path. - - Parameters - ---------- - dataset: str - Toggles between production (`dataset='Everything'`) and test mode e.g. - (`dataset='Schleswig-Holstein'`). - In production mode, data covering entire Germany - is used. In the test mode a subset of this data is used for testing the - workflow. - Returns - ------- - Path - Path to target csv-file - - """ - return Path( - os.path.join(Path("."), "data_bundle_egon_data", source.stem) - + "zensus_population" - + "." - + dataset - + source.suffix - ) - def select_geom(): """Select the union of the geometries of Schleswig-Holstein from the @@ -225,7 +221,6 @@ def select_geom(): """ docker_db_config = db.credentials() - geojson = subprocess.run( ["ogr2ogr"] + ["-s_srs", "epsg:4326"] @@ -239,14 +234,16 @@ def select_geom(): f" port={docker_db_config['PORT']}" f" dbname='{docker_db_config['POSTGRES_DB']}'" ] - + ["-sql", "SELECT ST_Union(geometry) FROM boundaries.vg250_lan"], + + [ + "-sql", + f"SELECT ST_Union(geometry) FROM {ZensusPopulation.sources.tables['boundaries_vg250_lan']}", + ], text=True, ) features = json.loads(geojson.stdout)["features"] assert ( len(features) == 1 ), f"Found {len(features)} geometry features, expected exactly one." - return prep(shape(features[0]["geometry"])) @@ -276,13 +273,16 @@ def filter_zensus_population(filename, dataset): schleswig_holstein = select_geom() - if not os.path.isfile(target(csv_file, dataset)): + # compute the filtered file path inline + filtered_target = csv_file.parent / f"{csv_file.stem}.{dataset}{csv_file.suffix}" + filtered_target.parent.mkdir(parents=True, exist_ok=True) + if not filtered_target.exists(): with open(csv_file, mode="r", newline="") as input_lines: rows = csv.DictReader(input_lines, delimiter=";") gitter_ids = set() with open( - target(csv_file, dataset), mode="w", newline="" + filtered_target, mode="w", newline="" ) as destination: output = csv.DictWriter( destination, delimiter=";", fieldnames=rows.fieldnames @@ -295,7 +295,7 @@ def filter_zensus_population(filename, dataset): Point(float(row["x_mp_100m"]), float(row["y_mp_100m"])) ) ) - return target(csv_file, dataset) + return filtered_target def filter_zensus_misc(filename, dataset): @@ -323,18 +323,20 @@ def filter_zensus_misc(filename, dataset): gitter_ids = set( pd.read_sql( - "SELECT grid_id from society.destatis_zensus_population_per_ha", + f"SELECT grid_id FROM {ZensusPopulation.targets.tables['zensus_population']}", con=db.engine(), ).grid_id.values ) - if not os.path.isfile(target(csv_file, dataset)): - with open( - csv_file, mode="r", newline="", encoding="iso-8859-1" - ) as inputs: + # inline target path (no helper) + filtered_target = csv_file.parent / f"{csv_file.stem}.{dataset}{csv_file.suffix}" + filtered_target.parent.mkdir(parents=True, exist_ok=True) + + if not filtered_target.exists(): + with open(csv_file, mode="r", newline="", encoding="iso-8859-1") as inputs: rows = csv.DictReader(inputs, delimiter=",") with open( - target(csv_file, dataset), + filtered_target, mode="w", newline="", encoding="iso-8859-1", @@ -346,62 +348,43 @@ def filter_zensus_misc(filename, dataset): output.writerows( row for row in rows if row["Gitter_ID_100m"] in gitter_ids ) - return target(csv_file, dataset) + return filtered_target def population_to_postgres(): """Import Zensus population data to postgres database""" - # Get information from data configuration file - data_config = egon.data.config.datasets() - zensus_population_orig = data_config["zensus_population"]["original_data"] - zensus_population_processed = data_config["zensus_population"]["processed"] - input_file = ( - Path(".") - / "data_bundle_egon_data" - / "zensus_population" - / zensus_population_orig["target"]["file"] - ) + input_file = Path(ZensusPopulation.targets.files["zensus_population"]).resolve() dataset = settings()["egon-data"]["--dataset-boundary"] - - # Read database configuration from docker-compose.yml docker_db_config = db.credentials() - - population_table = ( - f"{zensus_population_processed['schema']}" - f".{zensus_population_processed['table']}" - ) + population_table = ZensusPopulation.targets.tables["zensus_population"] with zipfile.ZipFile(input_file) as zf: for filename in zf.namelist(): - + if not filename.lower().endswith(".csv"): + continue zf.extract(filename) - - if dataset == "Everything": - filename_insert = filename - else: - filename_insert = filter_zensus_population(filename, dataset) + filename_insert = filename if dataset == "Everything" else filter_zensus_population(filename, dataset) host = ["-h", f"{docker_db_config['HOST']}"] port = ["-p", f"{docker_db_config['PORT']}"] pgdb = ["-d", f"{docker_db_config['POSTGRES_DB']}"] user = ["-U", f"{docker_db_config['POSTGRES_USER']}"] - command = [ + command= [ "-c", - rf"\copy {population_table} (grid_id, x_mp, y_mp, population)" - rf" FROM '{filename_insert}' DELIMITER ';' CSV HEADER;", + rf"\copy {population_table} (grid_id, x_mp, y_mp, population) " + rf"FROM '{filename_insert}' DELIMITER ';' CSV HEADER;", ] subprocess.run( ["psql"] + host + port + pgdb + user + command, env={"PGPASSWORD": docker_db_config["POSTGRES_PASSWORD"]}, ) - os.remove(filename) + os.remove(filename) db.execute_sql( f"UPDATE {population_table} zs" - " SET geom_point=ST_SetSRID(ST_MakePoint(zs.x_mp, zs.y_mp), 3035);" + " SET geom_point=ST_SetSRID(ST_MakePoint(zs.x_mp, zs.y_mp), 3035);" ) - db.execute_sql( f"UPDATE {population_table} zs" """ SET geom=ST_SetSRID( @@ -410,15 +393,13 @@ def population_to_postgres(): ); """ ) - db.execute_sql( - f"CREATE INDEX {zensus_population_processed['table']}_geom_idx ON" + f"CREATE INDEX {population_table.split('.')[1]}_geom_idx ON" f" {population_table} USING gist (geom);" ) - db.execute_sql( f"CREATE INDEX" - f" {zensus_population_processed['table']}_geom_point_idx" + f" {population_table.split('.')[1]}_geom_point_idx" f" ON {population_table} USING gist (geom_point);" ) @@ -426,31 +407,18 @@ def population_to_postgres(): def zensus_misc_to_postgres(): """Import data on buildings, households and apartments to postgres db""" - # Get information from data configuration file - data_config = egon.data.config.datasets() - zensus_misc_processed = data_config["zensus_misc"]["processed"] - zensus_population_processed = data_config["zensus_population"]["processed"] - file_path = Path(".") / "data_bundle_egon_data" / "zensus_population" - dataset = settings()["egon-data"]["--dataset-boundary"] - - population_table = ( - f"{zensus_population_processed['schema']}" - f".{zensus_population_processed['table']}" - ) - # Read database configuration from docker-compose.yml + dataset = settings()["egon-data"]["--dataset-boundary"] docker_db_config = db.credentials() - for input_file, table in zensus_misc_processed["file_table_map"].items(): - with zipfile.ZipFile(file_path / input_file) as zf: - csvfiles = [n for n in zf.namelist() if n.lower()[-3:] == "csv"] + for key, file_path in ZensusMiscellaneous.targets.files.items(): + zip_path = Path(file_path).resolve() + + with zipfile.ZipFile(zip_path) as zf: + csvfiles = [n for n in zf.namelist() if n.lower().endswith(".csv")] for filename in csvfiles: zf.extract(filename) - - if dataset == "Everything": - filename_insert = filename - else: - filename_insert = filter_zensus_misc(filename, dataset) + filename_insert = filename if dataset == "Everything" else filter_zensus_misc(filename, dataset) host = ["-h", f"{docker_db_config['HOST']}"] port = ["-p", f"{docker_db_config['PORT']}"] @@ -458,7 +426,7 @@ def zensus_misc_to_postgres(): user = ["-U", f"{docker_db_config['POSTGRES_USER']}"] command = [ "-c", - rf"\copy {zensus_population_processed['schema']}.{table}" + rf"\copy {ZensusMiscellaneous.targets.tables[key]}" f"""(grid_id, grid_id_new, attribute, @@ -475,29 +443,28 @@ def zensus_misc_to_postgres(): env={"PGPASSWORD": docker_db_config["POSTGRES_PASSWORD"]}, ) - os.remove(filename) + os.remove(filename) + db.execute_sql( - f"""UPDATE {zensus_population_processed['schema']}.{table} as b + f"""UPDATE {ZensusMiscellaneous.targets.tables[key]} as b SET zensus_population_id = zs.id - FROM {population_table} zs + FROM {ZensusPopulation.targets.tables["zensus_population"]} zs WHERE b.grid_id = zs.grid_id;""" ) db.execute_sql( - f"""ALTER TABLE {zensus_population_processed['schema']}.{table} - ADD CONSTRAINT {table}_fkey + f"""ALTER TABLE {ZensusMiscellaneous.targets.tables[key]} + ADD CONSTRAINT + {ZensusMiscellaneous.targets.get_table_name(key)}_fkey FOREIGN KEY (zensus_population_id) - REFERENCES {population_table}(id);""" + REFERENCES {ZensusPopulation.targets.tables["zensus_population"]}(id);""" ) - # Create combined table + # combined table create_combined_zensus_table() - - # Delete entries for unpopulated cells adjust_zensus_misc() - def create_combined_zensus_table(): """Create combined table with buildings, apartments and population per cell @@ -533,21 +500,14 @@ def adjust_zensus_misc(): None. """ - # Get information from data configuration file - data_config = egon.data.config.datasets() - zensus_population_processed = data_config["zensus_population"]["processed"] - zensus_misc_processed = data_config["zensus_misc"]["processed"] - - population_table = ( - f"{zensus_population_processed['schema']}" - f".{zensus_population_processed['table']}" - ) - for input_file, table in zensus_misc_processed["file_table_map"].items(): + + for table in ZensusMiscellaneous.targets.tables: db.execute_sql( f""" - DELETE FROM {zensus_population_processed['schema']}.{table} as b + DELETE FROM {ZensusMiscellaneous.targets.tables[table]} as b WHERE b.zensus_population_id IN ( - SELECT id FROM {population_table} + SELECT id FROM { + ZensusPopulation.targets.tables["zensus_population"]} WHERE population < 0);""" ) diff --git a/src/egon/data/datasets/zensus_mv_grid_districts.py b/src/egon/data/datasets/zensus_mv_grid_districts.py index 22923fb60..e6aa3ea5c 100644 --- a/src/egon/data/datasets/zensus_mv_grid_districts.py +++ b/src/egon/data/datasets/zensus_mv_grid_districts.py @@ -7,10 +7,10 @@ import geopandas as gpd from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.mv_grid_districts import MvGridDistricts from egon.data.datasets.zensus_vg250 import DestatisZensusPopulationPerHa -import egon.data.config + class ZensusMvGridDistricts(Dataset): @@ -30,7 +30,29 @@ class ZensusMvGridDistricts(Dataset): #: name: str = "ZensusMvGridDistricts" #: - version: str = "0.0.1" + version: str = "0.0.2" + + sources = DatasetSources( + tables={ + "zensus_population": { + "schema": "society", + "table": "destatis_zensus_population_per_ha", + }, + "egon_mv_grid_district": { + "schema": "grid", + "table": "egon_mv_grid_district", + }, + } + ) + + targets = DatasetTargets( + tables={ + "map": { + "schema": "boundaries", + "table": "egon_map_zensus_grid_districts", + } + } + ) def __init__(self, dependencies): super().__init__( @@ -73,27 +95,24 @@ def mapping(): MapZensusGridDistricts.__table__.drop(bind=db.engine(), checkfirst=True) MapZensusGridDistricts.__table__.create(bind=db.engine(), checkfirst=True) - # Get information from data configuration file - cfg = egon.data.config.datasets()["map_zensus_grid_districts"] - + sources = ZensusMvGridDistricts.sources.tables + target = ZensusMvGridDistricts.targets.tables["map"] + # Delete existsing data - db.execute_sql( - f"""DELETE FROM - {cfg['targets']['map']['schema']}.{cfg['targets']['map']['table']}""" - ) + db.execute_sql(f"DELETE FROM {target['schema']}.{target['table']}") # Select zensus cells zensus = db.select_geodataframe( f"""SELECT id as zensus_population_id, geom_point FROM - {cfg['sources']['zensus_population']['schema']}. - {cfg['sources']['zensus_population']['table']}""", + {sources['zensus_population']['schema']}. + {sources['zensus_population']['table']}""", geom_col="geom_point", ) grid_districts = db.select_geodataframe( f"""SELECT bus_id, geom - FROM {cfg['sources']['egon_mv_grid_district']['schema']}. - {cfg['sources']['egon_mv_grid_district']['table']}""", + FROM {sources['egon_mv_grid_district']['schema']}. + {sources['egon_mv_grid_district']['table']}""", geom_col="geom", epsg=3035, ) @@ -103,8 +122,8 @@ def mapping(): # Insert results to database join[["zensus_population_id", "bus_id"]].to_sql( - cfg["targets"]["map"]["table"], - schema=cfg["targets"]["map"]["schema"], + target["table"], + schema=target["schema"], con=db.engine(), if_exists="replace", ) diff --git a/src/egon/data/datasets/zensus_vg250.py b/src/egon/data/datasets/zensus_vg250.py index e5d39906e..b2278c1e8 100755 --- a/src/egon/data/datasets/zensus_vg250.py +++ b/src/egon/data/datasets/zensus_vg250.py @@ -18,7 +18,7 @@ import pandas as pd from egon.data import db -from egon.data.datasets import Dataset +from egon.data.datasets import Dataset, DatasetSources, DatasetTargets from egon.data.datasets.vg250 import vg250_metadata_resources_fields from egon.data.metadata import ( context, @@ -28,16 +28,56 @@ meta_metadata, sources, ) -import egon.data.config + Base = declarative_base() class ZensusVg250(Dataset): + + name: str = "ZensusVg250" + version: str = "0.0.4" + + sources = DatasetSources( + tables={ + "zensus_population": { + "schema": "society", + "table": "destatis_zensus_population_per_ha", + }, + "vg250_municipalities": { + "schema": "boundaries", + "table": "vg250_gem", + }, + "map_zensus_vg250": { + "schema": "boundaries", + "table": "egon_map_zensus_vg250", + }, + }, + urls={ + "vg250_original_data": "https://daten.gdz.bkg.bund.de/produkte/vg/vg250_ebenen_0101/2020/vg250_01-01.geo84.shape.ebenen.zip" + }, + ) + targets = DatasetTargets( + tables={ + "map": { + "schema": "boundaries", + "table": "egon_map_zensus_vg250", + }, + "zensus_inside_germany": { + "schema": "society", + "table": "destatis_zensus_population_per_ha_inside_germany", + }, + "vg250_gem_population": { + "schema": "boundaries", + "table": "vg250_gem_population", + }, + } + ) + def __init__(self, dependencies): super().__init__( - name="ZensusVg250", - version="0.0.3", + name=ZensusVg250.name, + version=ZensusVg250.version, dependencies=dependencies, tasks=( map_zensus_vg250, @@ -177,26 +217,23 @@ def map_zensus_vg250(): MapZensusVg250.__table__.drop(bind=db.engine(), checkfirst=True) MapZensusVg250.__table__.create(bind=db.engine(), checkfirst=True) - # Get information from data configuration file - cfg = egon.data.config.datasets()["map_zensus_vg250"] + sources = ZensusVg250.sources.tables + target = ZensusVg250.targets.tables["map"] local_engine = db.engine() - db.execute_sql( - f"""DELETE FROM - {cfg['targets']['map']['schema']}.{cfg['targets']['map']['table']}""" - ) - + db.execute_sql(f"DELETE FROM {target['schema']}.{target['table']}") + gdf = db.select_geodataframe( f"""SELECT * FROM - {cfg['sources']['zensus_population']['schema']}. - {cfg['sources']['zensus_population']['table']}""", + {sources['zensus_population']['schema']}. + {sources['zensus_population']['table']}""", geom_col="geom_point", ) gdf_boundaries = db.select_geodataframe( - f"""SELECT * FROM {cfg['sources']['vg250_municipalities']['schema']}. - {cfg['sources']['vg250_municipalities']['table']}""", + f"""SELECT * FROM {sources['vg250_municipalities']['schema']}. + {sources['vg250_municipalities']['table']}""", geom_col="geometry", epsg=3035, ) @@ -246,8 +283,8 @@ def map_zensus_vg250(): ].set_geometry( "zensus_geom" ).to_postgis( - cfg["targets"]["map"]["table"], - schema=cfg["targets"]["map"]["schema"], + target["table"], + schema=target["schema"], con=local_engine, if_exists="replace", ) @@ -318,7 +355,8 @@ def population_in_municipalities(): srid = 3035 gem = db.select_geodataframe( - "SELECT * FROM boundaries.vg250_gem", + f"SELECT * FROM {ZensusVg250.sources.tables['vg250_municipalities']['schema']}." + f"{ZensusVg250.sources.tables['vg250_municipalities']['table']}", geom_col="geometry", epsg=srid, index_col="id", @@ -329,11 +367,15 @@ def population_in_municipalities(): gem["area_km2"] = gem.area / 1000000 population = db.select_dataframe( - """SELECT id, population, vg250_municipality_id - FROM society.destatis_zensus_population_per_ha - INNER JOIN boundaries.egon_map_zensus_vg250 ON ( - society.destatis_zensus_population_per_ha.id = - boundaries.egon_map_zensus_vg250.zensus_population_id) + f"""SELECT id, population, vg250_municipality_id + FROM {ZensusVg250.sources.tables['zensus_population']['schema']}. + {ZensusVg250.sources.tables['zensus_population']['table']} + INNER JOIN {ZensusVg250.sources.tables['map_zensus_vg250']['schema']}. + {ZensusVg250.sources.tables['map_zensus_vg250']['table']} ON ( + {ZensusVg250.sources.tables['zensus_population']['schema']}. + {ZensusVg250.sources.tables['zensus_population']['table']}.id = + {ZensusVg250.sources.tables['map_zensus_vg250']['schema']}. + {ZensusVg250.sources.tables['map_zensus_vg250']['table']}.zensus_population_id) WHERE population > 0""" ) @@ -348,8 +390,8 @@ def population_in_municipalities(): gem["population_density"] = gem["population_total"] / gem["area_km2"] gem.reset_index().to_postgis( - "vg250_gem_population", - schema="boundaries", + ZensusVg250.targets.tables["vg250_gem_population"]["table"], + schema=ZensusVg250.targets.tables["vg250_gem_population"]["schema"], con=db.engine(), if_exists="replace", ) @@ -527,7 +569,7 @@ def add_metadata_vg250_gem_pop(): Creates a metdadata JSON string and writes it to the database table comment """ - vg250_config = egon.data.config.datasets()["vg250"] + schema_table = ".".join( [ Vg250GemPopulation.__table__.schema, @@ -549,7 +591,7 @@ def add_metadata_vg250_gem_pop(): "mit ihren Grenzen, statistischen Schlüsselzahlen, Namen der " "Verwaltungseinheit sowie die spezifische Bezeichnung der " "Verwaltungsebene des jeweiligen Landes.", - "path": vg250_config["original_data"]["source"]["url"], + "path": ZensusVg250.sources.urls["vg250_original_data"], "licenses": licenses, }