From cdfeac25a6ef928f79707136ebdae3a100ff5bc2 Mon Sep 17 00:00:00 2001 From: Nicolas Colombi Date: Thu, 19 Dec 2024 12:01:20 +0100 Subject: [PATCH 01/13] optimize concatenation of centroids --- climada/hazard/centroids/centr.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/climada/hazard/centroids/centr.py b/climada/hazard/centroids/centr.py index b0c6365c7e..1e1b1fa3a7 100644 --- a/climada/hazard/centroids/centr.py +++ b/climada/hazard/centroids/centr.py @@ -332,7 +332,12 @@ def from_pnt_bounds(cls, points_bounds, res, crs=DEF_CRS): ) def append(self, centr): - """Append Centroids + """Append Centroids to the current object for batch concatenation. + + This method adds the `centr.gdf` to the list of DataFrames to be concatenated + later with `finalize_append()`. Instead of concatenating immediately, it accumulates + the centroids in `_batch_gdf` to perform the concatenation all at once, which is more + efficient for multiple appends. Note that the result might contain duplicate points if the object to append has an overlap with the current object. @@ -351,12 +356,31 @@ def append(self, centr): union : Union of Centroid objects. remove_duplicate_points : Remove duplicate points in a Centroids object. """ + + if not hasattr(self, "_batch_gdf"): + self._batch_gdf = [] # Initialize the batch + if not u_coord.equal_crs(self.crs, centr.crs): raise ValueError( f"The given centroids use different CRS: {self.crs}, {centr.crs}. " "The centroids are incompatible and cannot be concatenated." ) - self.gdf = pd.concat([self.gdf, centr.gdf]) + self._batch_gdf.append(centr.gdf) + + def finalize_append(self): + """Concatenate all batch-appended centroids into the main GeoDataFrame (gdf). + + This method should be called after all `append` operations have been performed on the + Centroids object. It concatenates all the accumulated GeoDataFrames stored in the + `_batch_gdf` list into the `gdf` attribute of the Centroids object. By doing this in one + step, it avoids the performance overhead associated with repeated concatenations. + + Once concatenation is complete, the `_batch_gdf` list is cleared to prepare for future + append operations. + """ + + self.gdf = pd.concat([self.gdf] + self._batch_gdf, ignore_index=True) + self._batch_gdf = [] # clear the batch after concatenation def union(self, *others): """Create the union of Centroids objects @@ -377,6 +401,7 @@ def union(self, *others): centroids = copy.deepcopy(self) for cent in others: centroids.append(cent) + centroids.finalize_append() return centroids.remove_duplicate_points() def remove_duplicate_points(self): From b237166934f1bf260a58ae4bd9afb84250182cff Mon Sep 17 00:00:00 2001 From: Nicolas Colombi Date: Thu, 19 Dec 2024 15:10:14 +0100 Subject: [PATCH 02/13] Optimize append and union methods to minimize repeated calls to append --- climada/hazard/centroids/centr.py | 55 +++++++++++-------------------- 1 file changed, 19 insertions(+), 36 deletions(-) diff --git a/climada/hazard/centroids/centr.py b/climada/hazard/centroids/centr.py index 1e1b1fa3a7..9ccdac2405 100644 --- a/climada/hazard/centroids/centr.py +++ b/climada/hazard/centroids/centr.py @@ -331,16 +331,16 @@ def from_pnt_bounds(cls, points_bounds, res, crs=DEF_CRS): } ) - def append(self, centr): - """Append Centroids to the current object for batch concatenation. + def append(self, *centr): + """Append Centroids to the current centroid object for concatenation. - This method adds the `centr.gdf` to the list of DataFrames to be concatenated - later with `finalize_append()`. Instead of concatenating immediately, it accumulates - the centroids in `_batch_gdf` to perform the concatenation all at once, which is more - efficient for multiple appends. + This method check that all centroids use the same CRS, append the list of centroids to + the initial Centroid object and eventually concatenate them to create a single centroid + object with the union of all centroids. Note that the result might contain duplicate points if the object to append has an overlap - with the current object. + with the current object. Duplicates points will be removed in `union` + by calling `remove_duplicate_points`. Parameters ---------- @@ -356,34 +356,18 @@ def append(self, centr): union : Union of Centroid objects. remove_duplicate_points : Remove duplicate points in a Centroids object. """ - - if not hasattr(self, "_batch_gdf"): - self._batch_gdf = [] # Initialize the batch - - if not u_coord.equal_crs(self.crs, centr.crs): - raise ValueError( - f"The given centroids use different CRS: {self.crs}, {centr.crs}. " - "The centroids are incompatible and cannot be concatenated." - ) - self._batch_gdf.append(centr.gdf) - - def finalize_append(self): - """Concatenate all batch-appended centroids into the main GeoDataFrame (gdf). - - This method should be called after all `append` operations have been performed on the - Centroids object. It concatenates all the accumulated GeoDataFrames stored in the - `_batch_gdf` list into the `gdf` attribute of the Centroids object. By doing this in one - step, it avoids the performance overhead associated with repeated concatenations. - - Once concatenation is complete, the `_batch_gdf` list is cleared to prepare for future - append operations. - """ - - self.gdf = pd.concat([self.gdf] + self._batch_gdf, ignore_index=True) - self._batch_gdf = [] # clear the batch after concatenation + for cc in centr: + if not u_coord.equal_crs(self.crs, cc.crs): + raise ValueError( + f"The given centroids use different CRS: {self.crs}, {cc.crs}. " + "The centroids are incompatible and cannot be concatenated." + ) + self.gdf = pd.concat([self.gdf] + [cc.gdf for cc in centr]) def union(self, *others): - """Create the union of Centroids objects + """Create the union of the current Centroids object with one or more other centroids + objects by passing the list of centroids to `append` for concatenation and then + removes duplicates. All centroids must have the same CRS. Points that are contained in more than one of the Centroids objects will only be contained once (i.e. duplicates are removed). @@ -399,9 +383,8 @@ def union(self, *others): Centroids object containing the union of all Centroids. """ centroids = copy.deepcopy(self) - for cent in others: - centroids.append(cent) - centroids.finalize_append() + centroids.append(*others) + return centroids.remove_duplicate_points() def remove_duplicate_points(self): From 76989eac8955875add0ba996215ed80bab048416 Mon Sep 17 00:00:00 2001 From: Nicolas Colombi Date: Thu, 19 Dec 2024 15:30:02 +0100 Subject: [PATCH 03/13] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dc6793de0..53ad8cc84d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,9 @@ Code freeze date: YYYY-MM-DD ### Changed +- `climada.hazard.centroids.centr.union` and `climada.hazard.centroids.centr.append`[#989](https://github.com/CLIMADA-project/climada_python/pull/989). Results: computing windfields is 10 times faster. + - `union` does not iterate anymore over `append` but calls directly `append` with the entire list of centroids objects. + - `append` concatenate centroids only once, when all centroids have been appended, instead of concatenating after every centroid is appended. - `climada.util.coordinates.get_country_geometries` function: Now throwing a ValueError if unregognized ISO country code is given (before, the invalid ISO code was ignored) [#980](https://github.com/CLIMADA-project/climada_python/pull/980) - Improved scaling factors implemented in `climada.hazard.trop_cyclone.apply_climate_scenario_knu` to model the impact of climate changes to tropical cyclones [#734](https://github.com/CLIMADA-project/climada_python/pull/734) - In `climada.util.plot.geo_im_from_array`, NaNs are plotted in gray while cells with no centroid are not plotted [#929](https://github.com/CLIMADA-project/climada_python/pull/929) From f9121d766ea219bab304b0e800c6377f48623e4f Mon Sep 17 00:00:00 2001 From: Nicolas Colombi <115944312+NicolasColombi@users.noreply.github.com> Date: Fri, 20 Dec 2024 11:52:36 +0100 Subject: [PATCH 04/13] Update CHANGELOG.md Co-authored-by: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> --- CHANGELOG.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 53ad8cc84d..0d8bcc677e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,9 +31,7 @@ Code freeze date: YYYY-MM-DD ### Changed -- `climada.hazard.centroids.centr.union` and `climada.hazard.centroids.centr.append`[#989](https://github.com/CLIMADA-project/climada_python/pull/989). Results: computing windfields is 10 times faster. - - `union` does not iterate anymore over `append` but calls directly `append` with the entire list of centroids objects. - - `append` concatenate centroids only once, when all centroids have been appended, instead of concatenating after every centroid is appended. +- `Centroids.append` now takes multiple arguments and provides a performance boost when doing so [#989](https://github.com/CLIMADA-project/climada_python/pull/989) - `climada.util.coordinates.get_country_geometries` function: Now throwing a ValueError if unregognized ISO country code is given (before, the invalid ISO code was ignored) [#980](https://github.com/CLIMADA-project/climada_python/pull/980) - Improved scaling factors implemented in `climada.hazard.trop_cyclone.apply_climate_scenario_knu` to model the impact of climate changes to tropical cyclones [#734](https://github.com/CLIMADA-project/climada_python/pull/734) - In `climada.util.plot.geo_im_from_array`, NaNs are plotted in gray while cells with no centroid are not plotted [#929](https://github.com/CLIMADA-project/climada_python/pull/929) From 3d1f279424e57b05248cf3f93755d5ce081ebfa3 Mon Sep 17 00:00:00 2001 From: Nicolas Colombi <115944312+NicolasColombi@users.noreply.github.com> Date: Fri, 20 Dec 2024 11:53:50 +0100 Subject: [PATCH 05/13] Update climada/hazard/centroids/centr.py Co-authored-by: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> --- climada/hazard/centroids/centr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/climada/hazard/centroids/centr.py b/climada/hazard/centroids/centr.py index 9ccdac2405..c80aeadcdd 100644 --- a/climada/hazard/centroids/centr.py +++ b/climada/hazard/centroids/centr.py @@ -339,8 +339,8 @@ def append(self, *centr): object with the union of all centroids. Note that the result might contain duplicate points if the object to append has an overlap - with the current object. Duplicates points will be removed in `union` - by calling `remove_duplicate_points`. + with the current object. Remove duplicates by either using :py:meth:`union` + or calling :py:meth:`remove_duplicate_points` after appending. Parameters ---------- From 565d42f89bce8ae3251d4587d0b72c8e0cfa9167 Mon Sep 17 00:00:00 2001 From: Nicolas Colombi Date: Fri, 20 Dec 2024 11:56:58 +0100 Subject: [PATCH 06/13] fix linter issue --- climada/hazard/centroids/centr.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/climada/hazard/centroids/centr.py b/climada/hazard/centroids/centr.py index c80aeadcdd..aff7b2539f 100644 --- a/climada/hazard/centroids/centr.py +++ b/climada/hazard/centroids/centr.py @@ -356,13 +356,13 @@ def append(self, *centr): union : Union of Centroid objects. remove_duplicate_points : Remove duplicate points in a Centroids object. """ - for cc in centr: - if not u_coord.equal_crs(self.crs, cc.crs): + for other in centr: + if not u_coord.equal_crs(self.crs, other.crs): raise ValueError( - f"The given centroids use different CRS: {self.crs}, {cc.crs}. " + f"The given centroids use different CRS: {self.crs}, {other.crs}. " "The centroids are incompatible and cannot be concatenated." ) - self.gdf = pd.concat([self.gdf] + [cc.gdf for cc in centr]) + self.gdf = pd.concat([self.gdf] + [other.gdf for other in centr]) def union(self, *others): """Create the union of the current Centroids object with one or more other centroids From 8e08be5547cbca26ec42d2bde331c2cc4f1357b6 Mon Sep 17 00:00:00 2001 From: Nicolas Colombi Date: Fri, 20 Dec 2024 14:12:29 +0100 Subject: [PATCH 07/13] add test for append with multiple arguments --- climada/hazard/centroids/centr.py | 10 +++++----- climada/hazard/centroids/test/test_centr.py | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/climada/hazard/centroids/centr.py b/climada/hazard/centroids/centr.py index aff7b2539f..4cb6e86dfa 100644 --- a/climada/hazard/centroids/centr.py +++ b/climada/hazard/centroids/centr.py @@ -344,8 +344,8 @@ def append(self, *centr): Parameters ---------- - centr : Centroids - Centroids to append. The centroids need to have the same CRS. + centr : list + List of Centroids to append. The centroids need to have the same CRS. Raises ------ @@ -366,7 +366,7 @@ def append(self, *centr): def union(self, *others): """Create the union of the current Centroids object with one or more other centroids - objects by passing the list of centroids to `append` for concatenation and then + objects by passing the list of centroids to :py:meth:`append` for concatenation and then removes duplicates. All centroids must have the same CRS. Points that are contained in more than one of the @@ -374,8 +374,8 @@ def union(self, *others): Parameters ---------- - others : list of Centroids - Centroids contributing to the union. + others : list + List of Centroids contributing to the union. Returns ------- diff --git a/climada/hazard/centroids/test/test_centr.py b/climada/hazard/centroids/test/test_centr.py index 778d9383ef..34f73d3129 100644 --- a/climada/hazard/centroids/test/test_centr.py +++ b/climada/hazard/centroids/test/test_centr.py @@ -816,6 +816,26 @@ def test_append_dif_crs(self): with self.assertRaises(ValueError): self.centr.append(centr2) + def test_append_multiple_arguments(self): + """Test passing append multiple arguments in the form of a list of Centroids.""" + # create a single centroid + lat, lon = np.array([1, 2]), np.array([1, 2]) + centr = Centroids(lat=lat, lon=lon) + # create a list of centroids + coords = [(np.array([3, 4]), np.array([3, 4]))] + centroids_list = [Centroids(lat=lat, lon=lon) for lat, lon in coords] + + centr.append(*centroids_list) + + self.assertEqual(centr.lat[0], 1) + self.assertEqual(centr.lat[1], 2) + self.assertEqual(centr.lat[2], 3) + self.assertEqual(centr.lat[3], 4) + self.assertEqual(centr.lon[0], 1) + self.assertEqual(centr.lon[1], 2) + self.assertEqual(centr.lon[2], 3) + self.assertEqual(centr.lon[3], 4) + def test_remove_duplicate_pass(self): """Test remove_duplicate_points""" centr = Centroids( From 68b006e5be2dd9f0785ec1bc7f1ca1622c0a373e Mon Sep 17 00:00:00 2001 From: Nicolas Colombi Date: Fri, 20 Dec 2024 14:18:54 +0100 Subject: [PATCH 08/13] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d8bcc677e..8c1401339d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,7 @@ Code freeze date: YYYY-MM-DD ### Fixed +- Resolved an issue where windspeed computation was much slower than in Climada v3 [#989](https://github.com/CLIMADA-project/climada_python/pull/989) - File handles are being closed after reading netcdf files with `climada.hazard` modules [#953](https://github.com/CLIMADA-project/climada_python/pull/953) - Avoids a ValueError in the impact calculation for cases with a single exposure point and MDR values of 0, by explicitly removing zeros in `climada.hazard.Hazard.get_mdr` [#933](https://github.com/CLIMADA-project/climada_python/pull/948) From b0003f35180202073dba21983a5988ea4ecd66c0 Mon Sep 17 00:00:00 2001 From: Nicolas Colombi <115944312+NicolasColombi@users.noreply.github.com> Date: Fri, 20 Dec 2024 14:51:32 +0100 Subject: [PATCH 09/13] use numpy for testing Co-authored-by: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> --- climada/hazard/centroids/test/test_centr.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/climada/hazard/centroids/test/test_centr.py b/climada/hazard/centroids/test/test_centr.py index 34f73d3129..b7747268df 100644 --- a/climada/hazard/centroids/test/test_centr.py +++ b/climada/hazard/centroids/test/test_centr.py @@ -827,14 +827,8 @@ def test_append_multiple_arguments(self): centr.append(*centroids_list) - self.assertEqual(centr.lat[0], 1) - self.assertEqual(centr.lat[1], 2) - self.assertEqual(centr.lat[2], 3) - self.assertEqual(centr.lat[3], 4) - self.assertEqual(centr.lon[0], 1) - self.assertEqual(centr.lon[1], 2) - self.assertEqual(centr.lon[2], 3) - self.assertEqual(centr.lon[3], 4) + np.testing.assert_array_equal(centr.lat, [1, 2, 3, 4]) + np.testing.assert_array_equal(centr.lon, [1, 2, 3, 4]) def test_remove_duplicate_pass(self): """Test remove_duplicate_points""" From c56eb8b19c3a23ea883a0393e63b940d33fb5aa2 Mon Sep 17 00:00:00 2001 From: Nicolas Colombi <115944312+NicolasColombi@users.noreply.github.com> Date: Mon, 23 Dec 2024 11:10:09 +0100 Subject: [PATCH 10/13] Update centr.py Update docstrings to clarify the type of argument of union and append --- climada/hazard/centroids/centr.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/climada/hazard/centroids/centr.py b/climada/hazard/centroids/centr.py index 4cb6e86dfa..8a90f85096 100644 --- a/climada/hazard/centroids/centr.py +++ b/climada/hazard/centroids/centr.py @@ -344,8 +344,8 @@ def append(self, *centr): Parameters ---------- - centr : list - List of Centroids to append. The centroids need to have the same CRS. + centr : Centroids + Centroids to append. The centroids need to have the same CRS. Raises ------ @@ -374,8 +374,8 @@ def union(self, *others): Parameters ---------- - others : list - List of Centroids contributing to the union. + others : Centroids + Centroids contributing to the union. Returns ------- From d9d1b94549cf2eabc4faa62a0f8724dc95019b28 Mon Sep 17 00:00:00 2001 From: Nicolas Colombi <115944312+NicolasColombi@users.noreply.github.com> Date: Mon, 23 Dec 2024 15:25:19 +0100 Subject: [PATCH 11/13] Update climada/hazard/centroids/test/test_centr.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sarah Hülsen <49907095+sarah-hlsn@users.noreply.github.com> --- climada/hazard/centroids/test/test_centr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/climada/hazard/centroids/test/test_centr.py b/climada/hazard/centroids/test/test_centr.py index b7747268df..0e36693719 100644 --- a/climada/hazard/centroids/test/test_centr.py +++ b/climada/hazard/centroids/test/test_centr.py @@ -817,7 +817,7 @@ def test_append_dif_crs(self): self.centr.append(centr2) def test_append_multiple_arguments(self): - """Test passing append multiple arguments in the form of a list of Centroids.""" + """Test passing append() multiple arguments in the form of a list of Centroids.""" # create a single centroid lat, lon = np.array([1, 2]), np.array([1, 2]) centr = Centroids(lat=lat, lon=lon) From 59acc603209475cae465b277003496cec846be47 Mon Sep 17 00:00:00 2001 From: Nicolas Colombi <115944312+NicolasColombi@users.noreply.github.com> Date: Mon, 23 Dec 2024 15:25:41 +0100 Subject: [PATCH 12/13] Update climada/hazard/centroids/centr.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sarah Hülsen <49907095+sarah-hlsn@users.noreply.github.com> --- climada/hazard/centroids/centr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/climada/hazard/centroids/centr.py b/climada/hazard/centroids/centr.py index 8a90f85096..7d05261184 100644 --- a/climada/hazard/centroids/centr.py +++ b/climada/hazard/centroids/centr.py @@ -335,7 +335,7 @@ def append(self, *centr): """Append Centroids to the current centroid object for concatenation. This method check that all centroids use the same CRS, append the list of centroids to - the initial Centroid object and eventually concatenate them to create a single centroid + the initial Centroid object and eventually concatenates them to create a single centroid object with the union of all centroids. Note that the result might contain duplicate points if the object to append has an overlap From d27ae7a5ab13c3da6327902b32769f827db91b9f Mon Sep 17 00:00:00 2001 From: Nicolas Colombi <115944312+NicolasColombi@users.noreply.github.com> Date: Mon, 23 Dec 2024 15:26:29 +0100 Subject: [PATCH 13/13] Update climada/hazard/centroids/centr.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sarah Hülsen <49907095+sarah-hlsn@users.noreply.github.com> --- climada/hazard/centroids/centr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/climada/hazard/centroids/centr.py b/climada/hazard/centroids/centr.py index 7d05261184..c4044f7dd5 100644 --- a/climada/hazard/centroids/centr.py +++ b/climada/hazard/centroids/centr.py @@ -334,7 +334,7 @@ def from_pnt_bounds(cls, points_bounds, res, crs=DEF_CRS): def append(self, *centr): """Append Centroids to the current centroid object for concatenation. - This method check that all centroids use the same CRS, append the list of centroids to + This method checks that all centroids use the same CRS, appends the list of centroids to the initial Centroid object and eventually concatenates them to create a single centroid object with the union of all centroids.