|
1 | 1 | """ |
2 | | -App configuration: This will likely change when we have a common location for data files |
| 2 | +App configuration and static reference data loading. |
| 3 | +
|
| 4 | +Data files are static reference data that never change during runtime. |
| 5 | +They are loaded once at module import to avoid repeated file I/O on the hot path |
| 6 | +(start_task/stop_task calls for instance). |
3 | 7 | """ |
4 | 8 |
|
5 | 9 | import atexit |
6 | 10 | import json |
7 | 11 | import sys |
8 | 12 | from contextlib import ExitStack |
9 | | -from typing import Dict |
| 13 | +from typing import Any, Dict |
10 | 14 |
|
11 | 15 | import pandas as pd |
12 | 16 |
|
|
18 | 22 | from importlib_resources import files as importlib_resources_files |
19 | 23 |
|
20 | 24 |
|
| 25 | +_CACHE: Dict[str, Any] = {} |
| 26 | +_MODULE_NAME = "codecarbon" |
| 27 | + |
| 28 | + |
| 29 | +def _get_resource_path(filepath: str): |
| 30 | + """Get filesystem path to a package resource file.""" |
| 31 | + file_manager = ExitStack() |
| 32 | + atexit.register(file_manager.close) |
| 33 | + ref = importlib_resources_files(_MODULE_NAME).joinpath(filepath) |
| 34 | + path = file_manager.enter_context(importlib_resources_as_file(ref)) |
| 35 | + return path |
| 36 | + |
| 37 | + |
| 38 | +def _load_static_data() -> None: |
| 39 | + """ |
| 40 | + Load all static reference data at module import. |
| 41 | +
|
| 42 | + Called once when codecarbon is imported. All data loaded here |
| 43 | + is immutable and shared across all tracker instances. |
| 44 | + """ |
| 45 | + # Global energy mix - used for emissions calculations |
| 46 | + path = _get_resource_path("data/private_infra/global_energy_mix.json") |
| 47 | + with open(path) as f: |
| 48 | + _CACHE["global_energy_mix"] = json.load(f) |
| 49 | + |
| 50 | + # Cloud emissions data |
| 51 | + path = _get_resource_path("data/cloud/impact.csv") |
| 52 | + _CACHE["cloud_emissions"] = pd.read_csv(path) |
| 53 | + |
| 54 | + # Carbon intensity per source |
| 55 | + path = _get_resource_path("data/private_infra/carbon_intensity_per_source.json") |
| 56 | + with open(path) as f: |
| 57 | + _CACHE["carbon_intensity_per_source"] = json.load(f) |
| 58 | + |
| 59 | + # CPU power data |
| 60 | + path = _get_resource_path("data/hardware/cpu_power.csv") |
| 61 | + _CACHE["cpu_power"] = pd.read_csv(path) |
| 62 | + |
| 63 | + |
| 64 | +# Load static data at module import |
| 65 | +_load_static_data() |
| 66 | + |
| 67 | + |
21 | 68 | class DataSource: |
22 | 69 | def __init__(self): |
23 | 70 | self.config = { |
@@ -84,56 +131,63 @@ def cpu_power_path(self): |
84 | 131 |
|
85 | 132 | def get_global_energy_mix_data(self) -> Dict: |
86 | 133 | """ |
87 | | - Returns Global Energy Mix Data |
| 134 | + Returns Global Energy Mix Data. |
| 135 | + Data is pre-loaded at module import for performance. |
88 | 136 | """ |
89 | | - with open(self.global_energy_mix_data_path) as f: |
90 | | - global_energy_mix: Dict = json.load(f) |
91 | | - return global_energy_mix |
| 137 | + return _CACHE["global_energy_mix"] |
92 | 138 |
|
93 | 139 | def get_cloud_emissions_data(self) -> pd.DataFrame: |
94 | 140 | """ |
95 | | - Returns Cloud Regions Impact Data |
| 141 | + Returns Cloud Regions Impact Data. |
| 142 | + Data is pre-loaded at module import for performance. |
96 | 143 | """ |
97 | | - return pd.read_csv(self.cloud_emissions_path) |
| 144 | + return _CACHE["cloud_emissions"] |
98 | 145 |
|
99 | 146 | def get_country_emissions_data(self, country_iso_code: str) -> Dict: |
100 | 147 | """ |
101 | | - Returns Emissions Across Regions in a country |
| 148 | + Returns Emissions Across Regions in a country. |
| 149 | + Data is cached on first access per country. |
| 150 | +
|
102 | 151 | :param country_iso_code: ISO code similar to one used in file names |
103 | 152 | :return: emissions in lbs/MWh and region code |
104 | 153 | """ |
105 | | - try: |
106 | | - with open(self.country_emissions_data_path(country_iso_code)) as f: |
107 | | - country_emissions_data: Dict = json.load(f) |
108 | | - return country_emissions_data |
109 | | - except KeyError: |
110 | | - # KeyError raised from line 39, when there is no data path specified for |
111 | | - # the given country |
112 | | - raise DataSourceException |
| 154 | + cache_key = f"country_emissions_{country_iso_code}" |
| 155 | + if cache_key not in _CACHE: |
| 156 | + try: |
| 157 | + with open(self.country_emissions_data_path(country_iso_code)) as f: |
| 158 | + _CACHE[cache_key] = json.load(f) |
| 159 | + except KeyError: |
| 160 | + # KeyError raised when there is no data path specified for the country |
| 161 | + raise DataSourceException |
| 162 | + return _CACHE[cache_key] |
113 | 163 |
|
114 | 164 | def get_country_energy_mix_data(self, country_iso_code: str) -> Dict: |
115 | 165 | """ |
116 | | - Returns Energy Mix Across Regions in a country |
| 166 | + Returns Energy Mix Across Regions in a country. |
| 167 | + Data is cached on first access per country. |
| 168 | +
|
117 | 169 | :param country_iso_code: ISO code similar to one used in file names |
118 | 170 | :return: energy mix by region code |
119 | 171 | """ |
120 | | - with open(self.country_energy_mix_data_path(country_iso_code)) as f: |
121 | | - country_energy_mix_data: Dict = json.load(f) |
122 | | - return country_energy_mix_data |
| 172 | + cache_key = f"country_energy_mix_{country_iso_code}" |
| 173 | + if cache_key not in _CACHE: |
| 174 | + with open(self.country_energy_mix_data_path(country_iso_code)) as f: |
| 175 | + _CACHE[cache_key] = json.load(f) |
| 176 | + return _CACHE[cache_key] |
123 | 177 |
|
124 | 178 | def get_carbon_intensity_per_source_data(self) -> Dict: |
125 | 179 | """ |
126 | 180 | Returns Carbon intensity per source. In gCO2.eq/kWh. |
| 181 | + Data is pre-loaded at module import for performance. |
127 | 182 | """ |
128 | | - with open(self.carbon_intensity_per_source_path) as f: |
129 | | - carbon_intensity_per_source: Dict = json.load(f) |
130 | | - return carbon_intensity_per_source |
| 183 | + return _CACHE["carbon_intensity_per_source"] |
131 | 184 |
|
132 | 185 | def get_cpu_power_data(self) -> pd.DataFrame: |
133 | 186 | """ |
134 | | - Returns CPU power Data |
| 187 | + Returns CPU power Data. |
| 188 | + Data is pre-loaded at module import for performance. |
135 | 189 | """ |
136 | | - return pd.read_csv(self.cpu_power_path) |
| 190 | + return _CACHE["cpu_power"] |
137 | 191 |
|
138 | 192 |
|
139 | 193 | class DataSourceException(Exception): |
|
0 commit comments