From b0b5c48f783e25107f0d8291efdb6cdfb9e014c2 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Fri, 13 Feb 2026 12:12:14 -0800 Subject: [PATCH 01/12] refactor and add comments to subwatershed visit order conditionals --- src/pygeoprocessing/routing/routing.pyx | 59 +++++++++++++++++-------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/src/pygeoprocessing/routing/routing.pyx b/src/pygeoprocessing/routing/routing.pyx index 4f12528f..96be0109 100644 --- a/src/pygeoprocessing/routing/routing.pyx +++ b/src/pygeoprocessing/routing/routing.pyx @@ -3848,6 +3848,18 @@ def calculate_subwatershed_boundary( stream_layer = stream_vector.GetLayer() # construct linkage data structure for upstream streams + # + # each stream feature starts and ends at a confluence point. + # there are no confluence points midway along a stream. + # a stream may have multiple tributaries only if they share a confluence point. + # + # therefore if stream B is a tributary of stream A, then + # stream A's upstream coordinate (its starting point) equals + # stream B's downstream coordinate (its end point) + # + # map each stream's downstream endpoint to its FID + # we will use this to look up a stream's tributaries + # given its upstream coordinate. upstream_fid_map = collections.defaultdict(list) for stream_feature in stream_layer: ds_x = int(stream_feature.GetField('ds_x')) @@ -3864,6 +3876,11 @@ def calculate_subwatershed_boundary( # first stream_layer.SetAttributeFilter(f'"outlet"=1') # these are done last + # + # outlet streams are the lowest/final streams, they are not a tributary + # to any other stream. + # + # visit outlet streams in strahler stream order from largest to smallest for _, outlet_fid in sorted([ (x.GetField('order'), x.GetFID()) for x in stream_layer], reverse=True): @@ -3876,12 +3893,17 @@ def calculate_subwatershed_boundary( us_x = int(working_feature.GetField('us_x')) us_y = int(working_feature.GetField('us_y')) + ds_x = int(working_feature.GetField('ds_x')) + ds_y = int(working_feature.GetField('ds_y')) ds_x_1 = int(working_feature.GetField('ds_x_1')) ds_y_1 = int(working_feature.GetField('ds_y_1')) - upstream_coord = (us_x, us_y) + # if this stream has any tributaries that have not yet been + # processed, add them to the stack. + # otherwise, if all tributaries have been processed, we can + # proceed to calculate the subwatershed. upstream_fids = [ - fid for fid in upstream_fid_map[upstream_coord] + fid for fid in upstream_fid_map[(us_x, us_y)] if fid not in processed_nodes] if upstream_fids: working_stack.extend(upstream_fids) @@ -3890,26 +3912,27 @@ def calculate_subwatershed_boundary( # the `not outlet_at_confluence` bit allows us to seed # even if the order is 1, otherwise confluences fill # the order 1 streams - if (working_feature.GetField('order') > 1 or - not outlet_at_confluence): - if outlet_at_confluence: - # seed the upstream point - visit_order_stack.append((working_fid, us_x, us_y)) - else: - # seed the downstream but +1 step point - visit_order_stack.append( - (working_fid, ds_x_1, ds_y_1)) + # + # if order == 1: is a headwater stream (has no tributaries) + # if order > 1: has tributaries + + # if this stream has tributaries, and we are using the confluence + # point as the outlet, then we can seed the upstream point + # (which is the downstream point of the tributaries) + if working_feature.GetField('order') > 1 and outlet_at_confluence: + visit_order_stack.append((working_fid, us_x, us_y)) + # if this stream is an outlet, we need to push its downstream point + # onto the stack, because there are no further streams to do so if working_feature.GetField('outlet') == 1: # an outlet is a special case where the outlet itself # should be a subwatershed done last. - ds_x = int(working_feature.GetField('ds_x')) - ds_y = int(working_feature.GetField('ds_y')) - if not outlet_at_confluence: - # undo the previous visit because it will be at - # one pixel up and we want the pixel right at - # the outlet - visit_order_stack.pop() visit_order_stack.append((working_fid, ds_x, ds_y)) + # if not an outlet and we are using 1 pixel up from the confluence + # point, seed the downstream+1 point + # TODO: why does this add DS+1 point of this stream and not of its tributaries? + elif not outlet_at_confluence: + visit_order_stack.append((working_fid, ds_x_1, ds_y_1)) + cdef int edge_side, edge_dir, cell_to_test, out_dir_increase=-1 cdef int left, right, n_steps, terminated_early From 6311db11f20974c00a0a34852abc0885c737b383 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Fri, 13 Feb 2026 13:38:55 -0800 Subject: [PATCH 02/12] add ManagedRaster.is_out_of_bounds function --- src/pygeoprocessing/extensions.pxd | 1 + .../extensions/ManagedRaster.h | 7 ++ src/pygeoprocessing/routing/routing.pyx | 91 ++++++++++--------- 3 files changed, 55 insertions(+), 44 deletions(-) diff --git a/src/pygeoprocessing/extensions.pxd b/src/pygeoprocessing/extensions.pxd index 564621ce..e568d201 100644 --- a/src/pygeoprocessing/extensions.pxd +++ b/src/pygeoprocessing/extensions.pxd @@ -53,6 +53,7 @@ cdef extern from "extensions/ManagedRaster.h": double get(long xi, long yi) void _load_block(int block_index) except * void close() + bint is_out_of_bounds(int x, int y) cdef cppclass ManagedFlowDirRaster[T]: LRUCache[int, double*]* lru_cache diff --git a/src/pygeoprocessing/extensions/ManagedRaster.h b/src/pygeoprocessing/extensions/ManagedRaster.h index 17382847..f8204df2 100644 --- a/src/pygeoprocessing/extensions/ManagedRaster.h +++ b/src/pygeoprocessing/extensions/ManagedRaster.h @@ -380,6 +380,13 @@ class ManagedRaster { delete lru_cache; free(actualBlockWidths); } + + bool is_out_of_bounds(long x, long y) { + if (x < 0 or x >= raster_x_size or y < 0 or y >= raster_y_size) { + return true; + } + return false; + } }; // Represents a flow direction raster, which may be of type MFD or D8 diff --git a/src/pygeoprocessing/routing/routing.pyx b/src/pygeoprocessing/routing/routing.pyx index 96be0109..ee06330b 100644 --- a/src/pygeoprocessing/routing/routing.pyx +++ b/src/pygeoprocessing/routing/routing.pyx @@ -3026,13 +3026,12 @@ def extract_strahler_streams_d8( # 321 # 4x0 # 567 - cdef unsigned int xoff, yoff, i, j, d, d_n, n_cols, n_rows + cdef unsigned int xoff, yoff, i, j, d, d_n cdef unsigned int win_xsize, win_ysize - n_cols, n_rows = flow_dir_info['raster_size'] - LOGGER.info('(extract_strahler_streams_d8): seed the drains') - cdef unsigned long n_pixels = n_cols * n_rows + cdef unsigned long n_pixels = ( + flow_dir_managed_raster.raster_x_size * flow_dir_managed_raster.raster_y_size) cdef long n_processed = 0 cdef time_t last_log_time last_log_time = ctime(NULL) @@ -3093,7 +3092,7 @@ def extract_strahler_streams_d8( d_n = flow_dir_managed_raster.get(x_l, y_l) x_n = x_l + COL_OFFSETS[d_n] y_n = y_l + ROW_OFFSETS[d_n] - if (x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_rows or + if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or flow_dir_managed_raster.get( x_n, y_n) == flow_nodata): is_drain = 1 @@ -3109,7 +3108,7 @@ def extract_strahler_streams_d8( x_n = x_l + COL_OFFSETS[d] y_n = y_l + ROW_OFFSETS[d] # check if on border - if x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_rows: + if flow_dir_managed_raster.is_out_of_bounds(x_n, y_n): continue d_n = flow_dir_managed_raster.get(x_n, y_n) if d_n == flow_nodata: @@ -3160,8 +3159,8 @@ def extract_strahler_streams_d8( payload = _calculate_stream_geometry( source_stream_point.xi, source_stream_point.yi, source_stream_point.upstream_d8_dir, - flow_dir_info['geotransform'], n_cols, n_rows, - flow_accum_managed_raster, flow_dir_managed_raster, flow_nodata, + flow_dir_info['geotransform'], flow_accum_managed_raster, + flow_dir_managed_raster, flow_nodata, min_flow_accum_threshold, coord_to_stream_ids) if payload is None: LOGGER.debug( @@ -3401,7 +3400,7 @@ def extract_strahler_streams_d8( 'upstream_d8_dir') payload = _calculate_stream_geometry( ds_x, ds_y, upstream_d8_dir, - flow_dir_info['geotransform'], n_cols, n_rows, + flow_dir_info['geotransform'], flow_accum_managed_raster, flow_dir_managed_raster, flow_nodata, working_flow_accum_threshold, @@ -3593,6 +3592,10 @@ def _build_discovery_finish_rasters( target_finish_raster_path): """Generates a discovery and finish time raster for a given d8 flow path. + The discovery raster records the order in which each pixel is visited, from + 0 (visited first) to the total number of pixels (visited last). Pixels are + visited by starting from drain points and working upslope. + Args: flow_dir_d8_raster_path_band (tuple): a D8 flow raster path band tuple. Paths may use any GDAL-supported scheme, including virtual file @@ -3607,8 +3610,6 @@ def _build_discovery_finish_rasters( """ flow_dir_info = pygeoprocessing.get_raster_info( flow_dir_d8_raster_path_band[0]) - cdef int n_cols, n_rows - n_cols, n_rows = flow_dir_info['raster_size'] cdef int flow_dir_nodata = ( flow_dir_info['nodata'][flow_dir_d8_raster_path_band[1]-1]) @@ -3633,12 +3634,14 @@ def _build_discovery_finish_rasters( cdef long discovery_count = 0 cdef int n_processed, n_pixels - n_pixels = n_rows * n_cols + n_pixels = ( + flow_dir_managed_raster.raster_x_size * + flow_dir_managed_raster.raster_x_size) n_processed = 0 cdef time_t last_log_time = ctime(NULL) cdef int n_pushed - cdef int i, j, xoff, yoff, win_xsize, win_ysize, x_l, y_l, x_n, y_n + cdef int i, j, xoff, yoff, win_xsize, win_ysize, x, y, x_n, y_n cdef int n_dir, test_dir for offset_dict in pygeoprocessing.iterblocks( @@ -3655,24 +3658,26 @@ def _build_discovery_finish_rasters( win_ysize = offset_dict['win_ysize'] n_processed += win_xsize * win_ysize - for i in range(win_xsize): - for j in range(win_ysize): - x_l = xoff + i - y_l = yoff + j + for xi in range(win_xsize): + for yi in range(win_ysize): + x = xoff + xi + y = yoff + yi # check to see if this pixel is a drain - d_n = flow_dir_managed_raster.get(x_l, y_l) - if d_n == flow_dir_nodata: + flow_dir = flow_dir_managed_raster.get(x, y) + if flow_dir == flow_dir_nodata: continue - # check if downstream neighbor runs off raster or is nodata - x_n = x_l + COL_OFFSETS[d_n] - y_n = y_l + ROW_OFFSETS[d_n] + # find the downstream neighbor (the pixel that this pixel drains to) + x_n = x + COL_OFFSETS[flow_dir] + y_n = y + ROW_OFFSETS[flow_dir] - if (x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_rows or + # if the downstream neighbor runs off raster or is nodata, then + # this pixel is a drain point, so we can push it to the stack + if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or flow_dir_managed_raster.get( x_n, y_n) == flow_dir_nodata): - discovery_stack.push(CoordinateType(x_l, y_l)) - finish_stack.push(FinishType(x_l, y_l, 1)) + discovery_stack.push(CoordinateType(x, y)) + finish_stack.push(FinishType(x, y, 1)) while not discovery_stack.empty(): # This coordinate is the downstream end of the stream @@ -3689,15 +3694,20 @@ def _build_discovery_finish_rasters( for test_dir in range(8): x_n = raster_coord.xi + COL_OFFSETS[test_dir % 8] y_n = raster_coord.yi + ROW_OFFSETS[test_dir % 8] - if x_n < 0 or y_n < 0 or \ - x_n >= n_cols or y_n >= n_rows: + + # skip neighbors that are outside of the raster bounds, + # or that have nodata + if flow_dir_managed_raster.is_out_of_bounds(x_n, y_n): continue n_dir = flow_dir_managed_raster.get(x_n, y_n) if n_dir == flow_dir_nodata: continue + + # if the neighbor drains to this pixel, push it to the stack if INFLOW_OFFSETS[test_dir] == n_dir: discovery_stack.push(CoordinateType(x_n, y_n)) n_pushed += 1 + # this reference is for the previous top and represents # how many elements must be processed before finish # time can be defined @@ -3804,9 +3814,6 @@ def calculate_subwatershed_boundary( discovery_time_raster_path) cdef long discovery_nodata = discovery_info['nodata'][0] - cdef unsigned int n_cols, n_rows - n_cols, n_rows = discovery_info['raster_size'] - geotransform = discovery_info['geotransform'] cdef double g0, g1, g2, g3, g4, g5 g0, g1, g2, g3, g4, g5 = geotransform @@ -3988,7 +3995,6 @@ def calculate_subwatershed_boundary( edge_dir = (cell_to_test+2) % 8 if _in_watershed( x_l, y_l, cell_to_test, discovery, finish, - n_cols, n_rows, discovery_managed_raster, discovery_nodata): edge_side = (edge_side-2) % 8 edge_dir = (edge_dir-2) % 8 @@ -4015,12 +4021,12 @@ def calculate_subwatershed_boundary( LOGGER.warning('quitting, too many steps') terminated_early = 1 break - if x_l < 0 or y_l < 0 or x_l >= n_cols or y_l >= n_rows: + if discovery_managed_raster.is_out_of_bounds(x_l, y_l): # This is unexpected but worth checking since missing this # error would be very difficult to debug. raise RuntimeError( - f'{x_l}, {y_l} out of bounds for ' - f'{n_cols}x{n_rows} raster.') + f'{x_l}, {y_l} out of bounds for {discovery_managed_raster.raster_x_size}' + f'x{discovery_managed_raster.raster_y_size} raster.') if edge_side - ((edge_dir-2) % 8) == 0: # counterclockwise configuration left = edge_dir @@ -4032,10 +4038,10 @@ def calculate_subwatershed_boundary( left = (edge_side+1) out_dir_increase = -2 left_in = _in_watershed( - x_l, y_l, left, discovery, finish, n_cols, n_rows, + x_l, y_l, left, discovery, finish, discovery_managed_raster, discovery_nodata) right_in = _in_watershed( - x_l, y_l, right, discovery, finish, n_cols, n_rows, + x_l, y_l, right, discovery, finish, discovery_managed_raster, discovery_nodata) if right_in: # turn right @@ -4501,7 +4507,6 @@ cdef void _diagonal_fill_step( cdef int _in_watershed( int x_l, int y_l, int direction_to_test, int discovery, int finish, - int n_cols, int n_rows, ManagedRaster discovery_managed_raster, long discovery_nodata): """Test if pixel in direction is in the watershed. @@ -4513,8 +4518,6 @@ cdef int _in_watershed( came from discovery/finish (long): the discovery and finish time that defines whether a pixel discovery time is inside a watershed or not. - n_cols/n_rows (int): number of columns/rows in the discovery raster, - used to ensure step does not go out of bounds. discovery_managed_raster (ManagedRaster): discovery time raster x/y gives the discovery time for that pixel. discovery_nodata (long): nodata value for discovery raster @@ -4524,7 +4527,8 @@ cdef int _in_watershed( """ cdef int x_n = x_l + COL_OFFSETS[direction_to_test] cdef int y_n = y_l + ROW_OFFSETS[direction_to_test] - if x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_rows: + if (x_n < 0 or y_n < 0 or x_n >= discovery_managed_raster.raster_x_size or + y_n >= discovery_managed_raster.raster_y_size): return 0 cdef long point_discovery = discovery_managed_raster.get(x_n, y_n) return (point_discovery != discovery_nodata and @@ -4533,8 +4537,8 @@ cdef int _in_watershed( cdef _calculate_stream_geometry( - int x_l, int y_l, int upstream_d8_dir, geotransform, int n_cols, - int n_rows, ManagedRaster flow_accum_managed_raster, + int x_l, int y_l, int upstream_d8_dir, geotransform, + ManagedRaster flow_accum_managed_raster, ManagedRaster flow_dir_managed_raster, int flow_dir_nodata, int flow_accum_threshold, coord_to_stream_ids): """Calculate the upstream geometry from the given point. @@ -4548,7 +4552,6 @@ cdef _calculate_stream_geometry( upstream_d8_dir (int): upstream D8 direction to search geotransform (list): 6 element list representing the geotransform used to convert to georeferenced coordinates. - n_cols/n_rows (int): number of columns and rows in raster. flow_accum_managed_raster (ManagedRaster): flow accumulation raster flow_dir_managed_raster (ManagedRaster): d8 flow direction raster flow_dir_nodata (int): nodata for flow direction @@ -4622,7 +4625,7 @@ cdef _calculate_stream_geometry( y_n = y_l + ROW_OFFSETS[d] # check out of bounds - if x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_rows: + if flow_dir_managed_raster.is_out_of_bounds(x_n, y_n): continue # check for nodata From 8508f2c286ce81fdac6f020ddc3a13d483371c58 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Fri, 13 Feb 2026 13:49:18 -0800 Subject: [PATCH 03/12] add ManagedRaster.is_out_of_bounds_or_nodata function --- src/pygeoprocessing/extensions.pxd | 1 + .../extensions/ManagedRaster.h | 29 ++++++++++++------- src/pygeoprocessing/routing/routing.pyx | 28 +++++------------- 3 files changed, 27 insertions(+), 31 deletions(-) diff --git a/src/pygeoprocessing/extensions.pxd b/src/pygeoprocessing/extensions.pxd index e568d201..4ee61b1d 100644 --- a/src/pygeoprocessing/extensions.pxd +++ b/src/pygeoprocessing/extensions.pxd @@ -54,6 +54,7 @@ cdef extern from "extensions/ManagedRaster.h": void _load_block(int block_index) except * void close() bint is_out_of_bounds(int x, int y) + bint is_out_of_bounds_or_nodata(int x, int y) cdef cppclass ManagedFlowDirRaster[T]: LRUCache[int, double*]* lru_cache diff --git a/src/pygeoprocessing/extensions/ManagedRaster.h b/src/pygeoprocessing/extensions/ManagedRaster.h index f8204df2..eb73fa0e 100644 --- a/src/pygeoprocessing/extensions/ManagedRaster.h +++ b/src/pygeoprocessing/extensions/ManagedRaster.h @@ -73,6 +73,17 @@ static void log_msg(LogLevel level, string msg) Py_DECREF(pyString); } +// Note: I was concerned that checking each value for nan would be too slow, but +// I compared its performance against another implementation where we first reclassify +// nans to a regular float, and then skip the nan check, and that was much slower: +// https://github.com/natcap/invest/issues/1714#issuecomment-2762134419 +inline bool is_close(double x, double y) { + if (isnan(x) and isnan(y)) { + return true; + } + return abs(x - y) <= (pow(10, -8) + pow(10, -05) * abs(y)); +} + class NeighborTuple { public: int direction, x, y; @@ -387,6 +398,13 @@ class ManagedRaster { } return false; } + + bool is_out_of_bounds_or_nodata(long x, long y) { + if (is_out_of_bounds(x, y) or is_close(get(x, y), nodata)) { + return true; + } + return false; + } }; // Represents a flow direction raster, which may be of type MFD or D8 @@ -858,15 +876,4 @@ class UpslopeNeighborsNoDivide: public Neighbors { UpslopeNeighborNoDivideIterator end() { return UpslopeNeighborNoDivideIterator(&endVal); } }; -// Note: I was concerned that checking each value for nan would be too slow, but -// I compared its performance against another implementation where we first reclassify -// nans to a regular float, and then skip the nan check, and that was much slower: -// https://github.com/natcap/invest/issues/1714#issuecomment-2762134419 -inline bool is_close(double x, double y) { - if (isnan(x) and isnan(y)) { - return true; - } - return abs(x - y) <= (pow(10, -8) + pow(10, -05) * abs(y)); -} - #endif // NATCAP_INVEST_MANAGEDRASTER_H_ diff --git a/src/pygeoprocessing/routing/routing.pyx b/src/pygeoprocessing/routing/routing.pyx index ee06330b..dfa99c31 100644 --- a/src/pygeoprocessing/routing/routing.pyx +++ b/src/pygeoprocessing/routing/routing.pyx @@ -3092,9 +3092,7 @@ def extract_strahler_streams_d8( d_n = flow_dir_managed_raster.get(x_l, y_l) x_n = x_l + COL_OFFSETS[d_n] y_n = y_l + ROW_OFFSETS[d_n] - if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or - flow_dir_managed_raster.get( - x_n, y_n) == flow_nodata): + if flow_dir_managed_raster.is_out_of_bounds_or_nodata(x_n, y_n): is_drain = 1 if not is_drain and ( @@ -3108,11 +3106,9 @@ def extract_strahler_streams_d8( x_n = x_l + COL_OFFSETS[d] y_n = y_l + ROW_OFFSETS[d] # check if on border - if flow_dir_managed_raster.is_out_of_bounds(x_n, y_n): + if flow_dir_managed_raster.is_out_of_bounds_or_nodata(x_n, y_n): continue d_n = flow_dir_managed_raster.get(x_n, y_n) - if d_n == flow_nodata: - continue if (INFLOW_OFFSETS[d] == d_n and flow_accum_managed_raster.get( x_n, y_n) >= min_flow_accum_threshold): @@ -3673,9 +3669,7 @@ def _build_discovery_finish_rasters( # if the downstream neighbor runs off raster or is nodata, then # this pixel is a drain point, so we can push it to the stack - if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or - flow_dir_managed_raster.get( - x_n, y_n) == flow_dir_nodata): + if flow_dir_managed_raster.is_out_of_bounds_or_nodata(x_n, y_n): discovery_stack.push(CoordinateType(x, y)) finish_stack.push(FinishType(x, y, 1)) @@ -3697,13 +3691,11 @@ def _build_discovery_finish_rasters( # skip neighbors that are outside of the raster bounds, # or that have nodata - if flow_dir_managed_raster.is_out_of_bounds(x_n, y_n): - continue - n_dir = flow_dir_managed_raster.get(x_n, y_n) - if n_dir == flow_dir_nodata: + if flow_dir_managed_raster.is_out_of_bounds_or_nodata(x_n, y_n): continue # if the neighbor drains to this pixel, push it to the stack + n_dir = flow_dir_managed_raster.get(x_n, y_n) if INFLOW_OFFSETS[test_dir] == n_dir: discovery_stack.push(CoordinateType(x_n, y_n)) n_pushed += 1 @@ -4624,17 +4616,13 @@ cdef _calculate_stream_geometry( x_n = x_l + COL_OFFSETS[d] y_n = y_l + ROW_OFFSETS[d] - # check out of bounds - if flow_dir_managed_raster.is_out_of_bounds(x_n, y_n): - continue - - # check for nodata - d_n = flow_dir_managed_raster.get(x_n, y_n) - if d_n == flow_dir_nodata: + # check out of bounds or nodata + if flow_dir_managed_raster.is_out_of_bounds_or_nodata(x_n, y_n): continue # check if there's an upstream inflow pixel with flow accum # greater than the threshold + d_n = flow_dir_managed_raster.get(x_n, y_n) if INFLOW_OFFSETS[d] == d_n and ( flow_accum_managed_raster.get( x_n, y_n) > flow_accum_threshold): From a3a558bf13bae10fb07c50c79595001dd97d8a67 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Fri, 13 Feb 2026 13:56:40 -0800 Subject: [PATCH 04/12] add ManagedRaster.is_nodata function; remove unused param --- src/pygeoprocessing/extensions.pxd | 5 +++-- src/pygeoprocessing/extensions/ManagedRaster.h | 6 +++++- src/pygeoprocessing/routing/routing.pyx | 7 +++---- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/pygeoprocessing/extensions.pxd b/src/pygeoprocessing/extensions.pxd index 4ee61b1d..971eae6b 100644 --- a/src/pygeoprocessing/extensions.pxd +++ b/src/pygeoprocessing/extensions.pxd @@ -53,8 +53,9 @@ cdef extern from "extensions/ManagedRaster.h": double get(long xi, long yi) void _load_block(int block_index) except * void close() - bint is_out_of_bounds(int x, int y) - bint is_out_of_bounds_or_nodata(int x, int y) + bint is_out_of_bounds(long x, long y) + bint is_out_of_bounds_or_nodata(long x, long y) + bint is_nodata(long x, long y) cdef cppclass ManagedFlowDirRaster[T]: LRUCache[int, double*]* lru_cache diff --git a/src/pygeoprocessing/extensions/ManagedRaster.h b/src/pygeoprocessing/extensions/ManagedRaster.h index eb73fa0e..6c3091bb 100644 --- a/src/pygeoprocessing/extensions/ManagedRaster.h +++ b/src/pygeoprocessing/extensions/ManagedRaster.h @@ -400,11 +400,15 @@ class ManagedRaster { } bool is_out_of_bounds_or_nodata(long x, long y) { - if (is_out_of_bounds(x, y) or is_close(get(x, y), nodata)) { + if (is_out_of_bounds(x, y) or is_nodata(x, y)) { return true; } return false; } + + bool is_nodata(long x, long y) { + return is_close(get(x, y), nodata); + } }; // Represents a flow direction raster, which may be of type MFD or D8 diff --git a/src/pygeoprocessing/routing/routing.pyx b/src/pygeoprocessing/routing/routing.pyx index dfa99c31..fce6e85b 100644 --- a/src/pygeoprocessing/routing/routing.pyx +++ b/src/pygeoprocessing/routing/routing.pyx @@ -3156,7 +3156,7 @@ def extract_strahler_streams_d8( source_stream_point.xi, source_stream_point.yi, source_stream_point.upstream_d8_dir, flow_dir_info['geotransform'], flow_accum_managed_raster, - flow_dir_managed_raster, flow_nodata, + flow_dir_managed_raster, min_flow_accum_threshold, coord_to_stream_ids) if payload is None: LOGGER.debug( @@ -3398,7 +3398,7 @@ def extract_strahler_streams_d8( ds_x, ds_y, upstream_d8_dir, flow_dir_info['geotransform'], flow_accum_managed_raster, - flow_dir_managed_raster, flow_nodata, + flow_dir_managed_raster, working_flow_accum_threshold, coord_to_stream_ids) if payload is None: @@ -4531,7 +4531,7 @@ cdef int _in_watershed( cdef _calculate_stream_geometry( int x_l, int y_l, int upstream_d8_dir, geotransform, ManagedRaster flow_accum_managed_raster, - ManagedRaster flow_dir_managed_raster, int flow_dir_nodata, + ManagedRaster flow_dir_managed_raster, int flow_accum_threshold, coord_to_stream_ids): """Calculate the upstream geometry from the given point. @@ -4546,7 +4546,6 @@ cdef _calculate_stream_geometry( used to convert to georeferenced coordinates. flow_accum_managed_raster (ManagedRaster): flow accumulation raster flow_dir_managed_raster (ManagedRaster): d8 flow direction raster - flow_dir_nodata (int): nodata for flow direction flow_accum_threshold (int): minimum flow accumulation value to define string. coord_to_stream_ids (dict): map raster space coordinate tuple to From e5fda0559751d009c2d25192a98649803979b395 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Fri, 13 Feb 2026 14:13:24 -0800 Subject: [PATCH 05/12] add ManagedRaster.n_pixels attribute; use class attributes where possible --- src/pygeoprocessing/extensions.pxd | 1 + .../extensions/ManagedRaster.h | 2 + src/pygeoprocessing/routing/routing.pyx | 95 ++++++++----------- 3 files changed, 43 insertions(+), 55 deletions(-) diff --git a/src/pygeoprocessing/extensions.pxd b/src/pygeoprocessing/extensions.pxd index 971eae6b..c1ee1407 100644 --- a/src/pygeoprocessing/extensions.pxd +++ b/src/pygeoprocessing/extensions.pxd @@ -39,6 +39,7 @@ cdef extern from "extensions/ManagedRaster.h": int block_ybits long raster_x_size long raster_y_size + long n_pixels int block_nx int block_ny int write_mode diff --git a/src/pygeoprocessing/extensions/ManagedRaster.h b/src/pygeoprocessing/extensions/ManagedRaster.h index 6c3091bb..91082916 100644 --- a/src/pygeoprocessing/extensions/ManagedRaster.h +++ b/src/pygeoprocessing/extensions/ManagedRaster.h @@ -115,6 +115,7 @@ class ManagedRaster { long raster_y_size; int block_nx; int block_ny; + long n_pixels; char* raster_path; int band_id; GDALDataset* dataset; @@ -149,6 +150,7 @@ class ManagedRaster { raster_x_size = dataset->GetRasterXSize(); raster_y_size = dataset->GetRasterYSize(); + n_pixels = raster_x_size * raster_y_size; if (band_id < 1 or band_id > dataset->GetRasterCount()) { throw std::invalid_argument( diff --git a/src/pygeoprocessing/routing/routing.pyx b/src/pygeoprocessing/routing/routing.pyx index fce6e85b..7956e3af 100644 --- a/src/pygeoprocessing/routing/routing.pyx +++ b/src/pygeoprocessing/routing/routing.pyx @@ -852,10 +852,10 @@ def flow_dir_d8( if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * raster_x_size + current_pixel = xoff + yoff * dem_managed_raster.raster_x_size LOGGER.info( '(flow dir d8): ' - f'{current_pixel} of {raster_x_size*raster_y_size} ' + f'{current_pixel} of {dem_managed_raster.n_pixels} ' f'pixels complete') # make a buffer big enough to capture block and boundaries around it @@ -866,7 +866,8 @@ def flow_dir_d8( # attempt to expand read block by a pixel boundary (xa, xb, ya, yb), modified_offset_dict = _generate_read_bounds( - offset_dict, raster_x_size, raster_y_size) + offset_dict, dem_managed_raster.raster_x_size, + dem_managed_raster.raster_y_size) dem_buffer_array[ya:yb, xa:xb] = dem_band.ReadAsArray( **modified_offset_dict).astype(numpy.float64) @@ -939,8 +940,7 @@ def flow_dir_d8( xi_n = xi_q+COL_OFFSETS[i_n] yi_n = yi_q+ROW_OFFSETS[i_n] - if (xi_n < 0 or xi_n >= raster_x_size or - yi_n < 0 or yi_n >= raster_y_size): + if dem_managed_raster.is_out_of_bounds(xi_n, yi_n): n_height = dem_nodata else: n_height = dem_managed_raster.get(xi_n, yi_n) @@ -1013,8 +1013,7 @@ def flow_dir_d8( for i_n in range(8): xi_n = xi_q+COL_OFFSETS[i_n] yi_n = yi_q+ROW_OFFSETS[i_n] - if (xi_n < 0 or xi_n >= raster_x_size or - yi_n < 0 or yi_n >= raster_y_size): + if dem_managed_raster.is_out_of_bounds(xi_n, yi_n): continue n_drain_distance = drain_distance + ( @@ -1285,8 +1284,7 @@ def flow_accumulation_d8( for i_n in range(flow_pixel.last_flow_dir, 8): xi_n = flow_pixel.xi+COL_OFFSETS[i_n] yi_n = flow_pixel.yi+ROW_OFFSETS[i_n] - if (xi_n < 0 or xi_n >= raster_x_size or - yi_n < 0 or yi_n >= raster_y_size): + if flow_dir_managed_raster.is_out_of_bounds(xi_n, yi_n): # neighbor not upstream: off edges of the raster continue upstream_flow_dir = flow_dir_managed_raster.get( @@ -1576,9 +1574,9 @@ def flow_dir_mfd( if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * raster_x_size + current_pixel = xoff + yoff * dem_managed_raster.raster_x_size LOGGER.info('%.1f%% complete', 100.0 * current_pixel / ( - raster_x_size * raster_y_size)) + dem_managed_raster.n_pixels)) # make a buffer big enough to capture block and boundaries around it dem_buffer_array = numpy.empty( @@ -1589,7 +1587,8 @@ def flow_dir_mfd( # check if we can widen the border to include real data from the # raster (xa, xb, ya, yb), modified_offset_dict = _generate_read_bounds( - offset_dict, raster_x_size, raster_y_size) + offset_dict, dem_managed_raster.raster_x_size, + dem_managed_raster.raster_y_size) dem_buffer_array[ya:yb, xa:xb] = dem_band.ReadAsArray( **modified_offset_dict).astype(numpy.float64) @@ -1673,8 +1672,7 @@ def flow_dir_mfd( xi_n = xi_q+COL_OFFSETS[i_n] yi_n = yi_q+ROW_OFFSETS[i_n] - if (xi_n < 0 or xi_n >= raster_x_size or - yi_n < 0 or yi_n >= raster_y_size): + if dem_managed_raster.is_out_of_bounds(xi_n, yi_n): n_height = dem_nodata else: n_height = dem_managed_raster.get(xi_n, yi_n) @@ -1780,8 +1778,7 @@ def flow_dir_mfd( for i_n in range(8): xi_n = xi_q+COL_OFFSETS[i_n] yi_n = yi_q+ROW_OFFSETS[i_n] - if (xi_n < 0 or xi_n >= raster_x_size or - yi_n < 0 or yi_n >= raster_y_size): + if dem_managed_raster.is_out_of_bounds(xi_n, yi_n): continue n_drain_distance = drain_distance + ( @@ -1814,8 +1811,7 @@ def flow_dir_mfd( yi_n = yi_q+ROW_OFFSETS[i_n] downhill_slope_array[i_n] = 0.0 - if (xi_n < 0 or xi_n >= raster_x_size or - yi_n < 0 or yi_n >= raster_y_size): + if dem_managed_raster.is_out_of_bounds(xi_n, yi_n): continue if dem_managed_raster.get(xi_n, yi_n) != root_height: @@ -1906,7 +1902,7 @@ def flow_accumulation_mfd( cdef unsigned long win_ysize, win_xsize, xoff, yoff # These are used to estimate % complete - cdef unsigned long long visit_count, pixel_count + cdef unsigned long long visit_count # the _root variables remembers the pixel index where the plateau/pit # region was first detected when iterating over the DEM. @@ -2001,7 +1997,6 @@ def flow_accumulation_mfd( flow_dir_raster_info = pygeoprocessing.get_raster_info( flow_dir_mfd_raster_path_band[0]) raster_x_size, raster_y_size = flow_dir_raster_info['raster_size'] - pixel_count = raster_x_size * raster_y_size visit_count = 0 LOGGER.debug('starting search') @@ -2016,9 +2011,9 @@ def flow_accumulation_mfd( if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * raster_x_size + current_pixel = xoff + yoff * flow_dir_managed_raster.raster_x_size LOGGER.info('Flow accum MFD %.1f%% complete', 100.0 * current_pixel / ( - raster_x_size * raster_y_size)) + flow_dir_managed_raster.n_pixels)) # make a buffer big enough to capture block and boundaries around it flow_dir_mfd_buffer_array = numpy.empty( @@ -2029,7 +2024,8 @@ def flow_accumulation_mfd( # check if we can widen the border to include real data from the # raster (xa, xb, ya, yb), modified_offset_dict = _generate_read_bounds( - offset_dict, raster_x_size, raster_y_size) + offset_dict, flow_dir_managed_raster.raster_x_size, + flow_dir_managed_raster.raster_y_size) flow_dir_mfd_buffer_array[ya:yb, xa:xb] = flow_dir_band.ReadAsArray( **modified_offset_dict).astype(numpy.int32) @@ -2079,14 +2075,13 @@ def flow_accumulation_mfd( last_log_time = ctime(NULL) LOGGER.info( 'Flow accum MFD %.1f%% complete', - 100.0 * visit_count / float(pixel_count)) + 100.0 * visit_count / float(flow_dir_managed_raster.n_pixels)) preempted = 0 for i_n in range(flow_pixel.last_flow_dir, 8): xi_n = flow_pixel.xi+COL_OFFSETS[i_n] yi_n = flow_pixel.yi+ROW_OFFSETS[i_n] - if (xi_n < 0 or xi_n >= raster_x_size or - yi_n < 0 or yi_n >= raster_y_size): + if flow_dir_managed_raster.is_out_of_bounds(xi_n, yi_n): # no upstream here continue compressed_upstream_flow_dir = ( @@ -2265,9 +2260,9 @@ def distance_to_channel_d8( if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * raster_x_size + current_pixel = xoff + yoff * flow_dir_d8_managed_raster.raster_x_size LOGGER.info('Dist to channel D8 %.1f%% complete', 100.0 * current_pixel / ( - raster_x_size * raster_y_size)) + flow_dir_d8_managed_raster.n_pixels)) # make a buffer big enough to capture block and boundaries around it channel_buffer_array = numpy.empty( @@ -2278,7 +2273,8 @@ def distance_to_channel_d8( # check if we can widen the border to include real data from the # raster (xa, xb, ya, yb), modified_offset_dict = _generate_read_bounds( - offset_dict, raster_x_size, raster_y_size) + offset_dict, flow_dir_d8_managed_raster.raster_x_size, + flow_dir_d8_managed_raster.raster_y_size) channel_buffer_array[ya:yb, xa:xb] = channel_band.ReadAsArray( **modified_offset_dict).astype(numpy.int8) @@ -2309,8 +2305,7 @@ def distance_to_channel_d8( xi_n = xi_q+COL_OFFSETS[i_n] yi_n = yi_q+ROW_OFFSETS[i_n] - if (xi_n < 0 or xi_n >= raster_x_size or - yi_n < 0 or yi_n >= raster_y_size): + if flow_dir_d8_managed_raster.is_out_of_bounds(xi_n, yi_n): continue if channel_managed_raster.get(xi_n, yi_n) == 1: @@ -2517,9 +2512,9 @@ def distance_to_channel_mfd( if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * raster_x_size + current_pixel = xoff + yoff * flow_dir_mfd_managed_raster.raster_x_size LOGGER.info('Dist to channel MFD %.1f%% complete', 100.0 * current_pixel / ( - raster_x_size * raster_y_size)) + flow_dir_mfd_managed_raster.n_pixels)) # make a buffer big enough to capture block and boundaries around it channel_buffer_array = numpy.empty( @@ -2534,7 +2529,8 @@ def distance_to_channel_mfd( # check if we can widen the border to include real data from the # raster (xa, xb, ya, yb), modified_offset_dict = _generate_read_bounds( - offset_dict, raster_x_size, raster_y_size) + offset_dict, flow_dir_mfd_managed_raster.raster_x_size, + flow_dir_mfd_managed_raster.raster_y_size) channel_buffer_array[ya:yb, xa:xb] = channel_band.ReadAsArray( **modified_offset_dict).astype(numpy.int8) @@ -2601,8 +2597,7 @@ def distance_to_channel_mfd( xi_n = pixel.xi+COL_OFFSETS[i_n] yi_n = pixel.yi+ROW_OFFSETS[i_n] - if (xi_n < 0 or xi_n >= raster_x_size or - yi_n < 0 or yi_n >= raster_y_size): + if flow_dir_mfd_managed_raster.is_out_of_bounds(xi_n, yi_n): continue # if the pixel has a neighbor that hasn't been visited yet, @@ -2778,9 +2773,9 @@ def extract_streams_mfd( yi_root = yi+yoff if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * raster_x_size + current_pixel = xoff + yoff * flow_accum_mr.raster_x_size LOGGER.info('Extract streams MFD %.1f%% complete', 100.0 * current_pixel / ( - raster_x_size * raster_y_size)) + flow_accum_mr.n_pixels)) for xi in range(win_xsize): xi_root = xi+xoff flow_accum = flow_accum_mr.get(xi_root, yi_root) @@ -2800,8 +2795,7 @@ def extract_streams_mfd( continue xi_n = xi_root+COL_OFFSETS[i_n] yi_n = yi_root+ROW_OFFSETS[i_n] - if (xi_n < 0 or xi_n >= raster_x_size or - yi_n < 0 or yi_n >= raster_y_size): + if flow_accum_mr.is_out_of_bounds(xi_n, yi_n): # it'll drain off the edge of the raster is_outlet = 1 break @@ -2821,8 +2815,7 @@ def extract_streams_mfd( for i_sn in range(8): xi_sn = xi_n+COL_OFFSETS[i_sn] yi_sn = yi_n+ROW_OFFSETS[i_sn] - if (xi_sn < 0 or xi_sn >= raster_x_size or - yi_sn < 0 or yi_sn >= raster_y_size): + if flow_accum_mr.is_out_of_bounds(xi_sn, yi_sn): continue flow_dir_mfd = flow_dir_mfd_mr.get(xi_sn, yi_sn) if flow_dir_mfd == flow_dir_nodata: @@ -2852,8 +2845,7 @@ def extract_streams_mfd( if (flow_dir_mfd >> (i_sn*4)) & 0xF > 0: xi_sn = xi_bn+COL_OFFSETS[i_sn] yi_sn = yi_bn+ROW_OFFSETS[i_sn] - if (xi_sn < 0 or xi_sn >= raster_x_size or - yi_sn < 0 or yi_sn >= raster_y_size): + if flow_accum_mr.is_out_of_bounds(xi_sn, yi_sn): continue if stream_mr.get(xi_sn, yi_sn) == 2: stream_mr.set(xi_sn, yi_sn, 1) @@ -3030,8 +3022,6 @@ def extract_strahler_streams_d8( cdef unsigned int win_xsize, win_ysize LOGGER.info('(extract_strahler_streams_d8): seed the drains') - cdef unsigned long n_pixels = ( - flow_dir_managed_raster.raster_x_size * flow_dir_managed_raster.raster_y_size) cdef long n_processed = 0 cdef time_t last_log_time last_log_time = ctime(NULL) @@ -3070,7 +3060,7 @@ def extract_strahler_streams_d8( if ctime(NULL)-last_log_time > _LOGGING_PERIOD: LOGGER.info( '(extract_strahler_streams_d8): drain seeding ' - f'{n_processed} of {n_pixels} pixels complete') + f'{n_processed} of {flow_dir_managed_raster.n_pixels} pixels complete') last_log_time = ctime(NULL) xoff = offset_dict['xoff'] yoff = offset_dict['yoff'] @@ -3629,11 +3619,7 @@ def _build_discovery_finish_rasters( cdef FinishType finish_coordinate cdef long discovery_count = 0 - cdef int n_processed, n_pixels - n_pixels = ( - flow_dir_managed_raster.raster_x_size * - flow_dir_managed_raster.raster_x_size) - n_processed = 0 + cdef int n_processed = 0 cdef time_t last_log_time = ctime(NULL) cdef int n_pushed @@ -3646,7 +3632,7 @@ def _build_discovery_finish_rasters( if ctime(NULL)-last_log_time > _LOGGING_PERIOD: LOGGER.info( f'(discovery time processing): ' - f'{n_processed/n_pixels*100:.1f}% complete') + f'{n_processed/flow_dir_managed_raster.n_pixels*100:.1f}% complete') last_log_time = ctime(NULL) xoff = offset_dict['xoff'] yoff = offset_dict['yoff'] @@ -4519,8 +4505,7 @@ cdef int _in_watershed( """ cdef int x_n = x_l + COL_OFFSETS[direction_to_test] cdef int y_n = y_l + ROW_OFFSETS[direction_to_test] - if (x_n < 0 or y_n < 0 or x_n >= discovery_managed_raster.raster_x_size or - y_n >= discovery_managed_raster.raster_y_size): + if discovery_managed_raster.is_out_of_bounds(x_n, y_n): return 0 cdef long point_discovery = discovery_managed_raster.get(x_n, y_n) return (point_discovery != discovery_nodata and From 4bae8d03ef089450b79c68a4b6653a2808673596 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Tue, 17 Feb 2026 09:09:21 -0800 Subject: [PATCH 06/12] use ManagedRaster raster_x_size, raster_y_size properties --- src/pygeoprocessing/routing/routing.pyx | 111 ++++++++---------------- 1 file changed, 35 insertions(+), 76 deletions(-) diff --git a/src/pygeoprocessing/routing/routing.pyx b/src/pygeoprocessing/routing/routing.pyx index 7956e3af..d71d75fb 100644 --- a/src/pygeoprocessing/routing/routing.pyx +++ b/src/pygeoprocessing/routing/routing.pyx @@ -744,9 +744,6 @@ def flow_dir_d8( # a plateau in case no other valid drain was found cdef queue[int] nodata_flow_dir_queue - # properties of the parallel rasters - cdef unsigned int raster_x_size, raster_y_size - cdef unsigned long current_pixel # used for time-delayed logging @@ -764,9 +761,6 @@ def flow_dir_d8( # pick some very improbable value since it's hard to deal with NaNs dem_nodata = IMPROBABLE_FLOAT_NODATA - # these are used to determine if a sample is within the raster - raster_x_size, raster_y_size = dem_raster_info['raster_size'] - # this is the nodata value for all the flat region and pit masks mask_nodata = 0 @@ -800,19 +794,6 @@ def flow_dir_d8( flow_dir_managed_raster = ManagedRaster( target_flow_dir_path.encode('utf-8'), 1, True) - # this creates a raster that's used for a dynamic programming solution to - # shortest path to the drain for plateaus. the raster is filled with - # raster_x_size * raster_y_size as a distance that's greater than the - # longest plateau drain distance possible for this raster. - plateau_distance_path = os.path.join( - working_dir_path, 'plateau_distance.tif') - pygeoprocessing.new_raster_from_base( - dem_raster_path_band[0], plateau_distance_path, gdal.GDT_Float64, - [-1], fill_value_list=[raster_x_size * raster_y_size], - raster_driver_creation_tuple=raster_driver_creation_tuple) - plateau_distance_managed_raster = ManagedRaster( - plateau_distance_path.encode('utf-8'), 1, True) - # this raster is for random access of the DEM compatable_dem_raster_path_band = None @@ -836,6 +817,19 @@ def flow_dir_d8( compatable_dem_raster_path_band[0].encode('utf-8'), compatable_dem_raster_path_band[1], False) + # this creates a raster that's used for a dynamic programming solution to + # shortest path to the drain for plateaus. the raster is filled with + # the number of pixels as a distance that's greater than the + # longest plateau drain distance possible for this raster. + plateau_distance_path = os.path.join( + working_dir_path, 'plateau_distance.tif') + pygeoprocessing.new_raster_from_base( + dem_raster_path_band[0], plateau_distance_path, gdal.GDT_Float64, + [-1], fill_value_list=[dem_managed_raster.n_pixels], + raster_driver_creation_tuple=raster_driver_creation_tuple) + plateau_distance_managed_raster = ManagedRaster( + plateau_distance_path.encode('utf-8'), 1, True) + # and this raster is for efficient block-by-block reading of the dem dem_raster = gdal.OpenEx( compatable_dem_raster_path_band[0], gdal.OF_RASTER) @@ -1124,9 +1118,6 @@ def flow_accumulation_d8( cdef stack[WeightedFlowPixelType] search_stack cdef WeightedFlowPixelType flow_pixel - # properties of the parallel rasters - cdef unsigned int raster_x_size, raster_y_size - cdef unsigned long current_pixel # used for time-delayed logging @@ -1196,7 +1187,6 @@ def flow_accumulation_d8( flow_dir_raster_info = pygeoprocessing.get_raster_info( flow_dir_raster_path_band[0]) - raster_x_size, raster_y_size = flow_dir_raster_info['raster_size'] tmp_flow_dir_nodata = flow_dir_raster_info['nodata'][ flow_dir_raster_path_band[1]-1] @@ -1217,9 +1207,9 @@ def flow_accumulation_d8( if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * raster_x_size + current_pixel = xoff + yoff * flow_dir_managed_raster.raster_x_size LOGGER.info('Flow accumulation D8 %.1f%% complete', 100.0 * current_pixel / ( - raster_x_size * raster_y_size)) + flow_dir_managed_raster.n_pixels)) # make a buffer big enough to capture block and boundaries around it flow_dir_buffer_array = numpy.empty( @@ -1229,7 +1219,8 @@ def flow_accumulation_d8( # attempt to expand read block by a pixel boundary (xa, xb, ya, yb), modified_offset_dict = _generate_read_bounds( - offset_dict, raster_x_size, raster_y_size) + offset_dict, flow_dir_managed_raster.raster_x_size, + flow_dir_managed_raster.raster_y_size) flow_dir_buffer_array[ya:yb, xa:xb] = flow_dir_band.ReadAsArray( **modified_offset_dict).astype(numpy.uint8) @@ -1452,9 +1443,6 @@ def flow_dir_mfd( # a plateau in case no other valid drain was found cdef queue[int] nodata_flow_dir_queue - # properties of the parallel rasters - cdef unsigned long raster_x_size, raster_y_size - # used for time-delayed logging cdef time_t last_log_time last_log_time = ctime(NULL) @@ -1474,9 +1462,6 @@ def flow_dir_mfd( # pick some very improbable value since it's hard to deal with NaNs dem_nodata = IMPROBABLE_FLOAT_NODATA - # these are used to determine if a sample is within the raster - raster_x_size, raster_y_size = dem_raster_info['raster_size'] - # this is the nodata value for all the flat region and pit masks mask_nodata = 0 @@ -1520,21 +1505,6 @@ def flow_dir_mfd( plateau_drain_mask_managed_raster = ManagedRaster( plateu_drain_mask_path.encode('utf-8'), 1, True) - # this creates a raster that's used for a dynamic programming solution to - # shortest path to the drain for plateaus. the raster is filled with - # raster_x_size * raster_y_size as a distance that's greater than the - # longest plateau drain distance possible for this raster. - plateau_distance_path = os.path.join( - working_dir_path, 'plateau_distance.tif') - cdef unsigned long plateau_distance_nodata = raster_x_size * raster_y_size - pygeoprocessing.new_raster_from_base( - dem_raster_path_band[0], plateau_distance_path, gdal.GDT_Float64, - [plateau_distance_nodata], - fill_value_list=[plateau_distance_nodata], - raster_driver_creation_tuple=raster_driver_creation_tuple) - plateau_distance_managed_raster = ManagedRaster( - plateau_distance_path.encode('utf-8'), 1, True) - # this raster is for random access of the DEM compatable_dem_raster_path_band = None dem_block_xsize, dem_block_ysize = dem_raster_info['block_size'] @@ -1557,6 +1527,21 @@ def flow_dir_mfd( compatable_dem_raster_path_band[0].encode('utf-8'), compatable_dem_raster_path_band[1], False) + # this creates a raster that's used for a dynamic programming solution to + # shortest path to the drain for plateaus. the raster is filled with + # the number of pixels as a distance that's greater than the + # longest plateau drain distance possible for this raster. + plateau_distance_path = os.path.join( + working_dir_path, 'plateau_distance.tif') + cdef unsigned long plateau_distance_nodata = dem_managed_raster.n_pixels + pygeoprocessing.new_raster_from_base( + dem_raster_path_band[0], plateau_distance_path, gdal.GDT_Float64, + [plateau_distance_nodata], + fill_value_list=[plateau_distance_nodata], + raster_driver_creation_tuple=raster_driver_creation_tuple) + plateau_distance_managed_raster = ManagedRaster( + plateau_distance_path.encode('utf-8'), 1, True) + # and this raster is for efficient block-by-block reading of the dem dem_raster = gdal.OpenEx( compatable_dem_raster_path_band[0], gdal.OF_RASTER) @@ -1935,9 +1920,6 @@ def flow_accumulation_mfd( cdef stack[FlowPixelType] search_stack cdef FlowPixelType flow_pixel - # properties of the parallel rasters - cdef unsigned long raster_x_size, raster_y_size - # used for time-delayed logging cdef time_t last_log_time last_log_time = ctime(NULL) @@ -1994,9 +1976,6 @@ def flow_accumulation_mfd( if raw_weight_nodata is not None: weight_nodata = raw_weight_nodata - flow_dir_raster_info = pygeoprocessing.get_raster_info( - flow_dir_mfd_raster_path_band[0]) - raster_x_size, raster_y_size = flow_dir_raster_info['raster_size'] visit_count = 0 LOGGER.debug('starting search') @@ -2194,9 +2173,6 @@ def distance_to_channel_d8( # from a defined flow distance pixel cdef stack[PixelType] distance_to_channel_stack - # properties of the parallel rasters - cdef unsigned int raster_x_size, raster_y_size - # these area used to store custom per-pixel weights and per-pixel values # for distance updates cdef double weight_val, pixel_val @@ -2246,10 +2222,6 @@ def distance_to_channel_d8( channel_raster = gdal.OpenEx(channel_raster_path_band[0], gdal.OF_RASTER) channel_band = channel_raster.GetRasterBand(channel_raster_path_band[1]) - flow_dir_raster_info = pygeoprocessing.get_raster_info( - flow_dir_d8_raster_path_band[0]) - raster_x_size, raster_y_size = flow_dir_raster_info['raster_size'] - # this outer loop searches for undefined channels for offset_dict in pygeoprocessing.iterblocks( channel_raster_path_band, offset_only=True, largest_block=0): @@ -2428,9 +2400,6 @@ def distance_to_channel_mfd( # from a defined flow distance pixel cdef stack[MFDFlowPixelType] distance_to_channel_stack - # properties of the parallel rasters - cdef unsigned int raster_x_size, raster_y_size - # this value is used to store the current weight which might be 1 or # come from a predefined flow accumulation weight raster cdef double weight_val @@ -2498,10 +2467,6 @@ def distance_to_channel_mfd( else: weight_nodata = IMPROBABLE_FLOAT_NODATA - flow_dir_raster_info = pygeoprocessing.get_raster_info( - flow_dir_mfd_raster_path_band[0]) - raster_x_size, raster_y_size = flow_dir_raster_info['raster_size'] - # this outer loop searches for undefined channels for offset_dict in pygeoprocessing.iterblocks( channel_raster_path_band, offset_only=True, largest_block=0): @@ -2726,9 +2691,6 @@ def extract_streams_mfd( flow_accum_raster_path_band[1]-1] stream_nodata = 255 - cdef int raster_x_size, raster_y_size - raster_x_size, raster_y_size = flow_accum_info['raster_size'] - pygeoprocessing.new_raster_from_base( flow_accum_raster_path_band[0], target_stream_raster_path, gdal.GDT_Byte, [stream_nodata], @@ -4127,8 +4089,6 @@ def detect_lowest_drain_and_sink(dem_raster_path_band): else: dem_nodata = IMPROBABLE_FLOAT_NODATA - raster_x_size, raster_y_size = dem_raster_info['raster_size'] - cdef ManagedRaster dem_managed_raster = ManagedRaster( dem_raster_path_band[0].encode('utf-8'), dem_raster_path_band[1], False) @@ -4143,10 +4103,10 @@ def detect_lowest_drain_and_sink(dem_raster_path_band): if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * raster_x_size + current_pixel = xoff + yoff * dem_managed_raster.raster_x_size LOGGER.info( '(infer_sinks): ' - f'{current_pixel} of {raster_x_size * raster_y_size} ' + f'{current_pixel} of {dem_managed_raster.n_pixels} ' 'pixels complete') # search block for local sinks @@ -4169,8 +4129,7 @@ def detect_lowest_drain_and_sink(dem_raster_path_band): xi_n = xi_root+COL_OFFSETS[i_n] yi_n = yi_root+ROW_OFFSETS[i_n] - if (xi_n < 0 or xi_n >= raster_x_size or - yi_n < 0 or yi_n >= raster_y_size): + if dem_managed_raster.is_out_of_bounds(xi_n, yi_n): # it'll drain off the edge of the raster if center_val < lowest_drain_height: # found a new lower edge height From d136ea6bde517a2ce27c8f44ca72f39a647b3c41 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Tue, 17 Feb 2026 15:30:47 -0800 Subject: [PATCH 07/12] add lots of notes in comments to understand subwatershed delineation --- src/pygeoprocessing/routing/routing.pyx | 282 +++++++++++++++++++----- 1 file changed, 225 insertions(+), 57 deletions(-) diff --git a/src/pygeoprocessing/routing/routing.pyx b/src/pygeoprocessing/routing/routing.pyx index d71d75fb..57ee602d 100644 --- a/src/pygeoprocessing/routing/routing.pyx +++ b/src/pygeoprocessing/routing/routing.pyx @@ -3544,6 +3544,60 @@ def _build_discovery_finish_rasters( 0 (visited first) to the total number of pixels (visited last). Pixels are visited by starting from drain points and working upslope. + + Considering an example watershed, where A is the drain point: + + A + / \ + B C + / / \ + E F G + / \ | / \ + J K L N O + + the discovery order would be + + 0 + / \ + 7 1 + / / \ + 8 5 2 + / \ | / \ + 10 9 6 4 3 + + and the finish order would be + + 9 + / \ + 9 5 + / / \ + 9 5 3 + / \ | / \ + 9 8 5 3 2 + + + thus given a pixel P and its neighbor Q, + + if Q's discovery time is greater than or equal to P's discovery time, + (ie. Q is either upslope of P or it is in a different branch) + + and Q's discovery time is less than or equal to P's finish time, + (ie. Q is first visited before P's branch is complete) + then Q is in the same watershed as P. ??? + + + P Q Pd Qd Pf (Qd >= Pd and Qd <= Pf) + ----------------------------- + G O 2 3 3 True + G N 2 4 3 False + C G 1 2 5 True + C F 1 5 5 True + F L 5 6 5 False + C L 1 6 5 False + + + + Args: flow_dir_d8_raster_path_band (tuple): a D8 flow raster path band tuple. Paths may use any GDAL-supported scheme, including virtual file @@ -3588,6 +3642,22 @@ def _build_discovery_finish_rasters( cdef int i, j, xoff, yoff, win_xsize, win_ysize, x, y, x_n, y_n cdef int n_dir, test_dir + # this essentially does a depth-first traversal of the watershed, + # starting from drain points and working upslope. + # + # iterate over each pixel in the flow dir raster, block by block + # if a pixel is a drain point, push it onto the stack + # while the stack isn't empty, + # pop a pixel from the stack + # check each neighbor to see if it drains to the pixel + # for each neighbor pixel, if it drains to i, push it to the stack + # + # after checking all neighbors, push an item to the finish stack + # that consists of (x_i, y_i, number of neighbors pushed) + # + # if no neighbors were pushed, (no pixels drain to i, so i is a local high point), + # + # for offset_dict in pygeoprocessing.iterblocks( flow_dir_d8_raster_path_band, offset_only=True): # search raster block by raster block @@ -3608,7 +3678,7 @@ def _build_discovery_finish_rasters( y = yoff + yi # check to see if this pixel is a drain flow_dir = flow_dir_managed_raster.get(x, y) - if flow_dir == flow_dir_nodata: + if flow_dir_managed_raster.is_nodata(flow_dir): continue # find the downstream neighbor (the pixel that this pixel drains to) @@ -3621,6 +3691,38 @@ def _build_discovery_finish_rasters( discovery_stack.push(CoordinateType(x, y)) finish_stack.push(FinishType(x, y, 1)) + + + # [A] [(A,1)] + # [B C] A 0 [(A,1), (A,2)] + # [B F G] C 1 [(A,1), (A,2), (C,2)] + # [B F N O] G 2 [(A,1), (A,2), (C,2), (G,2)] + # [B F N] O 3 [(A,1), (A,2), (C,2), (G,2), (O,0)] + # [(A,1), (A,2), (C,2), (G,2)] -> set finish raster at O to (3-1) = 2 + # [(A,1), (A,2), (C,2), (G,1)] + # [B F] N 4 [(A,1), (A,2), (C,2), (G,1), (N,0)] + # [(A,1), (A,2), (C,2), (G,1)] -> set finish raster at N to (4-1) = 3 + # [(A,1), (A,2), (C,2)] -> set finish raster at G to (4-1) = 3 + # [(A,1), (A,2), (C,1)] + # [B L] F 5 [(A,1), (A,2), (C,1), (F,1)] + # [B] L 6 [(A,1), (A,2), (C,1), (F,1), (L,0)] + # [(A,1), (A,2), (C,1), (F,1)] -> set finish raster at L to (6-1) = 5 + # [(A,1), (A,2), (C,1)] -> set finish raster at F to (6-1) = 5 + # [(A,1), (A,2)] -> set finish raster at C to (6-1) = 5 + # [(A,1), (A,1)] + # [E] B 7 [(A,1), (A,1), (B,1)] + # [J K] E 8 [(A,1), (A,1), (B,1), (E,2)] + # [J] K 9 [(A,1), (A,1), (B,1), (E,2), (K,0)] + # [(A,1), (A,1), (B,1), (E,2)] -> set finish raster at K to 8 + # [(A,1), (A,1), (B,1), (E,1)] + # [] J 10 [(A,1), (A,1), (B,1), (E,1), (J,0)] + # [(A,1), (A,1), (B,1), (E,1)] -> set finish raster at J to 9 + # [(A,1), (A,1), (B,1)] -> set finish raster at E to 9 + # [(A,1), (A,1)] -> set finish raster at B to 9 + # [(A,1)] -> set finish raster at A to 9 + # [] -> set finish raster at A to 9 + + while not discovery_stack.empty(): # This coordinate is the downstream end of the stream raster_coord = discovery_stack.top() @@ -3655,7 +3757,15 @@ def _build_discovery_finish_rasters( FinishType( raster_coord.xi, raster_coord.yi, n_pushed)) - # pop the finish stack until n_pushed > 1 + # if n_pushed is 0, that means no neighbors drain to pixel i, + # and so i is a local high point. + # + # pop the finish stack until n_pushed > 1. + # the top item will be (xi, yi, 0) + # set the finish order raster at (xi, yi) to discovery order - 1 (why?) + # + # + # if n_pushed == 0: while (not finish_stack.empty() and finish_stack.top().n_pushed <= 1): @@ -3663,7 +3773,7 @@ def _build_discovery_finish_rasters( finish_stack.pop() finish_managed_raster.set( finish_coordinate.xi, finish_coordinate.yi, - discovery_count-1) + discovery_count - 1) if not finish_stack.empty(): # then take one more because one branch is done finish_coordinate = finish_stack.top() @@ -3786,7 +3896,6 @@ def calculate_subwatershed_boundary( cdef unsigned int x_l, y_l cdef int outflow_dir cdef double x_f, y_f - cdef double x_p, y_p cdef long discovery, finish cdef time_t last_log_time = ctime(NULL) @@ -3893,7 +4002,7 @@ def calculate_subwatershed_boundary( f'{(index/len(visit_order_stack))*100:.1f}% complete') last_log_time = ctime(NULL) discovery = discovery_managed_raster.get(x_l, y_l) - if discovery == -1: + if discovery_managed_raster.is_nodata(discovery): continue boundary_list = [(x_l, y_l)] finish = finish_managed_raster.get(x_l, y_l) @@ -3903,86 +4012,143 @@ def calculate_subwatershed_boundary( watershed_boundary = ogr.Geometry(ogr.wkbLinearRing) outflow_dir = d8_flow_dir_managed_raster.get(x_l, y_l) - # this is the center point of the pixel that will be offset to - # make the edge - x_f = x_l+0.5 - y_f = y_l+0.5 + # x_offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] + # + [0.5, 0.5, 0, -0.5, -0.5, -0.5, 0, 0.5] + + # = [1, 1, 0.5, 0, 0, 0, 0.5, 1] + # - [0, 0, -0.5, 0, 0, 0, 0.5, 0] + + # = [1, 1, 1, 0, 0, 0, 0, 1] - x_f += COL_OFFSETS[outflow_dir]*0.5 - y_f += ROW_OFFSETS[outflow_dir]*0.5 - if outflow_dir % 2 == 0: - # need to back up the point a bit - x_f -= ROW_OFFSETS[outflow_dir]*0.5 - y_f += COL_OFFSETS[outflow_dir]*0.5 - x_p, y_p = gdal.ApplyGeoTransform(geotransform, x_f, y_f) - watershed_boundary.AddPoint(x_p, y_p) + # y_offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] + # + [0, -0.5, -0.5, -0.5, 0, 0.5, 0.5, 0.5] - # keep track of how many steps x/y and when we get back to 0 we've - # made a loop + # = [0.5, 0, 0, 0, 0.5, 1, 1, 1] + # + [0.5, 0, 0, 0, -0.5, 0, 0, 0] + + # = [1, 0, 0, 0, 0, 1, 1, 1] + + + # offsets: + # given a pixel coordinate (x, y) representing the top left corner of the pixel, + # and that pixel's flow direction (0-7), + # (x + x_corner_offsets[flow_dir], y + y_corner_offsets[flow_dir]) + # is the coordinate of the pixel corner to start delineating from. + # + # for diagonal outflow, this is the corner point shared with the neighbor. + # for orthogonal outflow, there are two corner points shared with the neighbor. + # this chooses the corner that is farther "clockwise". + x_corner_offsets = [1, 1, 1, 0, 0, 0, 0, 1] + y_corner_offsets = [1, 0, 0, 0, 0, 1, 1, 1] + + + # move from the centerpoint to the point along the pixel border + # corresponding to the pixel's outflow direction. + x_f = x_l + x_corner_offsets[outflow_dir] + y_f = y_l + y_corner_offsets[outflow_dir] + watershed_boundary.AddPoint( + *gdal.ApplyGeoTransform(geotransform, x_f, y_f)) + + # keep track of how many steps we've taken in x and y directions + # when both of them get back to 0, we've made a loop delta_x, delta_y = 0, 0 # determine the first edge if outflow_dir % 2 == 0: # outflow through a straight side, so trivial edge detection - edge_side = outflow_dir + edge_side = outflow_dir # the outflow direction 0 - 7 edge_dir = (2+edge_side) % 8 + + # diagonal outflow requires testing neighboring cells to determine first edge + # + # if i drains to a diagonal neighbor, + # test the neighbor counterclockwise from that pixel + # so that we are always testing a pixel that shares a side with i + # + # a b c + # d x e + # f g h + # + # if x's outflow direction is 1 (it drains to c), + # cell_to_test is 2 (b) + # edge_side is 2 (b) + # edge_dir is 4 (d) + # + # if b is in watershed, + # edge_side is 0 (e) + # edge_dir is 2 (b) + # else: - # diagonal outflow requires testing neighboring cells to - # determine first edge - cell_to_test = (outflow_dir+1) % 8 + # get the next neighbor going counterclockwise from outflow_dir + cell_to_test = (outflow_dir + 1) % 8 edge_side = cell_to_test - edge_dir = (cell_to_test+2) % 8 + # get the neighbor 90 degrees counterclockwise from cell_to_test + edge_dir = (cell_to_test + 2) % 8 if _in_watershed( x_l, y_l, cell_to_test, discovery, finish, - discovery_managed_raster, discovery_nodata): - edge_side = (edge_side-2) % 8 - edge_dir = (edge_dir-2) % 8 + discovery_managed_raster): + edge_side = (edge_side - 2 ) % 8 + edge_dir = (edge_dir - 2) % 8 x_l += COL_OFFSETS[edge_dir] y_l += ROW_OFFSETS[edge_dir] # note the pixel moved boundary_list.append((x_l, y_l)) + # walk the watershed boundary until we come back to the starting point + # this loop repeats until it has outlined a complete watershed, + # or it reaches the maximum allowed number of steps. n_steps = 0 terminated_early = 0 while True: - # step the edge then determine the projected coordinates - x_f += COL_OFFSETS[edge_dir] - y_f += ROW_OFFSETS[edge_dir] - delta_x += COL_OFFSETS[edge_dir] - delta_y += ROW_OFFSETS[edge_dir] - # equivalent to gdal.ApplyGeoTransform(geotransform, x_f, y_f) - # to eliminate python function call overhead - x_p = g0 + g1*x_f + g2*y_f - y_p = g3 + g4*x_f + g5*y_f - watershed_boundary.AddPoint(x_p, y_p) + # stop delineating this watershed if we've exceeded the step limit. + # this prevents hanging in loops or excessively large watersheds n_steps += 1 if n_steps > _int_max_steps_per_watershed: LOGGER.warning('quitting, too many steps') terminated_early = 1 break + # Exit with an error if we've reached a pixel outside the raster + # bounds. This is unexpected but worth checking since missing this + # error would be very difficult to debug. if discovery_managed_raster.is_out_of_bounds(x_l, y_l): - # This is unexpected but worth checking since missing this - # error would be very difficult to debug. raise RuntimeError( f'{x_l}, {y_l} out of bounds for {discovery_managed_raster.raster_x_size}' f'x{discovery_managed_raster.raster_y_size} raster.') + + # step the edge then determine the projected coordinates + x_f += COL_OFFSETS[edge_dir] + y_f += ROW_OFFSETS[edge_dir] + + # increment/decrement the deltas, tracking how many pixels + # we've moved from the starting point in the x and y directions + delta_x += COL_OFFSETS[edge_dir] + delta_y += ROW_OFFSETS[edge_dir] + + # convert the pixel-space coordinates to the gis projection and + # add the point to the watershed geometry + # this math is equivalent to gdal.ApplyGeoTransform(geotransform, x_f, y_f) + # to eliminate python function call overhead + watershed_boundary.AddPoint( + g0 + g1 * x_f + g2 * y_f, + g3 + g4 * x_f + g5 * y_f) + + # counterclockwise configuration if edge_side - ((edge_dir-2) % 8) == 0: - # counterclockwise configuration left = edge_dir right = (left-1) % 8 out_dir_increase = 2 + # clockwise configuration (swapping "left" and "right") else: - # clockwise configuration (swapping "left" and "right") right = edge_dir left = (edge_side+1) out_dir_increase = -2 left_in = _in_watershed( x_l, y_l, left, discovery, finish, - discovery_managed_raster, discovery_nodata) + discovery_managed_raster) right_in = _in_watershed( x_l, y_l, right, discovery, finish, - discovery_managed_raster, discovery_nodata) + discovery_managed_raster) if right_in: # turn right out_dir = edge_side @@ -3994,7 +4160,6 @@ def calculate_subwatershed_boundary( _diagonal_fill_step( x_l, y_l, right, discovery, finish, discovery_managed_raster, - discovery_nodata, boundary_list) elif left_in: # step forward @@ -4007,8 +4172,9 @@ def calculate_subwatershed_boundary( edge_side = edge_dir edge_dir = (edge_side + out_dir_increase) % 8 + # when the deltas become 0, we've returned to the starting point, + # meaning that we made a loop and the watershed is complete if delta_x == 0 and delta_y == 0: - # met the start point so we completed the watershed loop break watershed_feature = ogr.Feature(watershed_layer.GetLayerDefn()) @@ -4383,8 +4549,7 @@ def detect_outlets( cdef void _diagonal_fill_step( int x_l, int y_l, int edge_dir, long discovery, long finish, - ManagedRaster discovery_managed_raster, - long discovery_nodata, boundary_list): + ManagedRaster discovery_managed_raster, boundary_list): """Fill diagonal that are in the watershed behind the new edge. Used as a helper function to mark pixels as part of the watershed @@ -4410,7 +4575,6 @@ cdef void _diagonal_fill_step( whether a pixel discovery time is inside a watershed or not. discovery_managed_raster (ManagedRaster): discovery time raster x/y gives the discovery time for that pixel. - discovery_nodata (long): nodata value for discovery raster boundary_list (list): this list is appended to for new pixels that should be neighbors in the fill. @@ -4430,7 +4594,7 @@ cdef void _diagonal_fill_step( for x_t, y_t in test_list: point_discovery = discovery_managed_raster.get( x_t, y_t) - if (point_discovery != discovery_nodata and + if (not discovery_managed_raster.is_nodata(point_discovery) and point_discovery >= discovery and point_discovery <= finish): boundary_list.append((int(x_t), int(y_t))) @@ -4444,20 +4608,18 @@ cdef void _diagonal_fill_step( cdef int _in_watershed( int x_l, int y_l, int direction_to_test, int discovery, int finish, - ManagedRaster discovery_managed_raster, - long discovery_nodata): + ManagedRaster discovery_managed_raster): """Test if pixel in direction is in the watershed. Args: - x_l/y_l (int): leading coordinate of the watershed boundary - edge. + x_l (int): x coordinate of the watershed boundary edge pixel to test + y_l (int): y coordinate of the watershed boundary edge pixel to test direction_to_test (int): D8 direction that points which direction the edge came from - discovery/finish (long): the discovery and finish time that defines - whether a pixel discovery time is inside a watershed or not. + discovery (int): the discovery time of the watershed boundary edge pixel + finish (int): the finish time of the watershed boundary edge pixel discovery_managed_raster (ManagedRaster): discovery time raster x/y gives the discovery time for that pixel. - discovery_nodata (long): nodata value for discovery raster Return: 1 if in, 0 if out. @@ -4467,7 +4629,13 @@ cdef int _in_watershed( if discovery_managed_raster.is_out_of_bounds(x_n, y_n): return 0 cdef long point_discovery = discovery_managed_raster.get(x_n, y_n) - return (point_discovery != discovery_nodata and + # discovery time: the order in which a pixel is first visited + # finish time: the order in which a pixel is last visited + # + # if pixel n's discovery time is greater than or equal to pixel l's discovery time, and + # the pixel n's discovery time is less than or equal to pixel l's finish time, + # then pixel n is in the watershed + return (not discovery_managed_raster.is_nodata(point_discovery) and point_discovery >= discovery and point_discovery <= finish) From 405012345a0cd98c83c7118f55cb982eb8c6f024 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Tue, 17 Feb 2026 17:00:38 -0800 Subject: [PATCH 08/12] add more docstring notes on subwatershed algorithm --- src/pygeoprocessing/routing/routing.pyx | 86 +++++++++++++------------ 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/src/pygeoprocessing/routing/routing.pyx b/src/pygeoprocessing/routing/routing.pyx index 57ee602d..a856c97a 100644 --- a/src/pygeoprocessing/routing/routing.pyx +++ b/src/pygeoprocessing/routing/routing.pyx @@ -3538,12 +3538,19 @@ def extract_strahler_streams_d8( def _build_discovery_finish_rasters( flow_dir_d8_raster_path_band, target_discovery_raster_path, target_finish_raster_path): - """Generates a discovery and finish time raster for a given d8 flow path. + """Generate discovery and finish time rasters from a D8 flow dir raster. + + This algorithm traverses the flow path starting from drain points and + working upslope using a depth-first search. The terms "discovery time" and + "finish time" come from graph theory and refer to the order in which a + pixel is first visited vs. marked completed by the algorithm. The discovery raster records the order in which each pixel is visited, from - 0 (visited first) to the total number of pixels (visited last). Pixels are - visited by starting from drain points and working upslope. + 0 (visited first) to the total number of pixels minus 1 (visited last). + The finish raster records when the "branch" of the watershed originating + from each pixel is complete. A pixel's finish time is equal to the largest + discovery time of any pixel upslope of it. Considering an example watershed, where A is the drain point: @@ -3567,36 +3574,13 @@ def _build_discovery_finish_rasters( and the finish order would be - 9 + 10 / \ - 9 5 + 10 6 / / \ - 9 5 3 + 10 6 4 / \ | / \ - 9 8 5 3 2 - - - thus given a pixel P and its neighbor Q, - - if Q's discovery time is greater than or equal to P's discovery time, - (ie. Q is either upslope of P or it is in a different branch) - - and Q's discovery time is less than or equal to P's finish time, - (ie. Q is first visited before P's branch is complete) - then Q is in the same watershed as P. ??? - - - P Q Pd Qd Pf (Qd >= Pd and Qd <= Pf) - ----------------------------- - G O 2 3 3 True - G N 2 4 3 False - C G 1 2 5 True - C F 1 5 5 True - F L 5 6 5 False - C L 1 6 5 False - - - + 10 9 6 4 3 Args: flow_dir_d8_raster_path_band (tuple): a D8 flow raster path band tuple. @@ -4036,13 +4020,13 @@ def calculate_subwatershed_boundary( # (x + x_corner_offsets[flow_dir], y + y_corner_offsets[flow_dir]) # is the coordinate of the pixel corner to start delineating from. # - # for diagonal outflow, this is the corner point shared with the neighbor. - # for orthogonal outflow, there are two corner points shared with the neighbor. - # this chooses the corner that is farther "clockwise". + # for diagonal outflow, this is the corner point shared + # with the downslope neighbor. + # for orthogonal outflow, there are two corner points shared with the + # downslope neighbor. this chooses the corner that is farther "clockwise". x_corner_offsets = [1, 1, 1, 0, 0, 0, 0, 1] y_corner_offsets = [1, 0, 0, 0, 0, 1, 1, 1] - # move from the centerpoint to the point along the pixel border # corresponding to the pixel's outflow direction. x_f = x_l + x_corner_offsets[outflow_dir] @@ -4055,10 +4039,16 @@ def calculate_subwatershed_boundary( delta_x, delta_y = 0, 0 # determine the first edge + # + # outflow through a straight side, so trivial edge detection if outflow_dir % 2 == 0: - # outflow through a straight side, so trivial edge detection + edge_side = outflow_dir # the outflow direction 0 - 7 - edge_dir = (2+edge_side) % 8 + + # get the direction in which to step from the current vertex + # we don't move diagonally across pixels, so this is + # one of 0 (right), 2 (up), 4 (left), or 6 (down) + edge_dir = (2 + edge_side) % 8 # diagonal outflow requires testing neighboring cells to determine first edge # @@ -4083,7 +4073,10 @@ def calculate_subwatershed_boundary( # get the next neighbor going counterclockwise from outflow_dir cell_to_test = (outflow_dir + 1) % 8 edge_side = cell_to_test - # get the neighbor 90 degrees counterclockwise from cell_to_test + + # get the direction in which to step from the current vertex + # we don't move diagonally across pixels, so this is + # one of 0 (right), 2 (up), 4 (left), or 6 (down) edge_dir = (cell_to_test + 2) % 8 if _in_watershed( x_l, y_l, cell_to_test, discovery, finish, @@ -4116,7 +4109,7 @@ def calculate_subwatershed_boundary( f'{x_l}, {y_l} out of bounds for {discovery_managed_raster.raster_x_size}' f'x{discovery_managed_raster.raster_y_size} raster.') - # step the edge then determine the projected coordinates + # step to the next vertex x_f += COL_OFFSETS[edge_dir] y_f += ROW_OFFSETS[edge_dir] @@ -4134,9 +4127,9 @@ def calculate_subwatershed_boundary( g3 + g4 * x_f + g5 * y_f) # counterclockwise configuration - if edge_side - ((edge_dir-2) % 8) == 0: + if edge_side - ((edge_dir - 2) % 8) == 0: left = edge_dir - right = (left-1) % 8 + right = (left - 1) % 8 out_dir_increase = 2 # clockwise configuration (swapping "left" and "right") else: @@ -4611,6 +4604,19 @@ cdef int _in_watershed( ManagedRaster discovery_managed_raster): """Test if pixel in direction is in the watershed. + Given a pixel P and its neighbor Q, + + if Q's discovery time >= P's discovery time, + then either Q is in P's branch, or it is in a later branch. + + if Q's discovery time <= P's finish time, + then either Q is in P's branch, or it is in an earlier branch. + Because this is a depth-first traversal, we are not discovering pixels + in other branches until the current branch is finished. + + If both these conditions are met, Q must be in P's branch + (i.e. Q is upslope of P in the same watershed). + Args: x_l (int): x coordinate of the watershed boundary edge pixel to test y_l (int): y coordinate of the watershed boundary edge pixel to test From 7af10a1514027146af74de461db4d8124fde4260 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Mon, 9 Mar 2026 16:12:47 -0700 Subject: [PATCH 09/12] remove _diagonal_fill_step from subwatersheds algorithm and add comments --- src/pygeoprocessing/routing/routing.pyx | 161 ++++++++++-------------- 1 file changed, 69 insertions(+), 92 deletions(-) diff --git a/src/pygeoprocessing/routing/routing.pyx b/src/pygeoprocessing/routing/routing.pyx index a856c97a..30e84c46 100644 --- a/src/pygeoprocessing/routing/routing.pyx +++ b/src/pygeoprocessing/routing/routing.pyx @@ -3985,9 +3985,17 @@ def calculate_subwatershed_boundary( f'(calculate_subwatershed_boundary): watershed building ' f'{(index/len(visit_order_stack))*100:.1f}% complete') last_log_time = ctime(NULL) + + discovery = discovery_managed_raster.get(x_l, y_l) + + # We reach this case if the current pixel is a boundary pixel of + # a previously calculated watershed. if discovery_managed_raster.is_nodata(discovery): continue + + # keep a list of pixels that are within the watershed along the boundary + # these will later be filled in with -1 in the discovery raster boundary_list = [(x_l, y_l)] finish = finish_managed_raster.get(x_l, y_l) outlet_x = x_l @@ -3996,24 +4004,6 @@ def calculate_subwatershed_boundary( watershed_boundary = ogr.Geometry(ogr.wkbLinearRing) outflow_dir = d8_flow_dir_managed_raster.get(x_l, y_l) - # x_offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] - # + [0.5, 0.5, 0, -0.5, -0.5, -0.5, 0, 0.5] - - # = [1, 1, 0.5, 0, 0, 0, 0.5, 1] - # - [0, 0, -0.5, 0, 0, 0, 0.5, 0] - - # = [1, 1, 1, 0, 0, 0, 0, 1] - - - # y_offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] - # + [0, -0.5, -0.5, -0.5, 0, 0.5, 0.5, 0.5] - - # = [0.5, 0, 0, 0, 0.5, 1, 1, 1] - # + [0.5, 0, 0, 0, -0.5, 0, 0, 0] - - # = [1, 0, 0, 0, 0, 1, 1, 1] - - # offsets: # given a pixel coordinate (x, y) representing the top left corner of the pixel, # and that pixel's flow direction (0-7), @@ -4041,8 +4031,13 @@ def calculate_subwatershed_boundary( # determine the first edge # # outflow through a straight side, so trivial edge detection + # + # if outflow is in an orthogonal direction (0, 2, 4, or 6), + # draw the first edge perpendicular to the outflow + # because this is a drain point if outflow_dir % 2 == 0: - + # the edge side indicates which side of the pixel we're drawing + # the edge on: one of 0 (right), 2 (top), 4 (left), or 6 (bottom) edge_side = outflow_dir # the outflow direction 0 - 7 # get the direction in which to step from the current vertex @@ -4050,6 +4045,8 @@ def calculate_subwatershed_boundary( # one of 0 (right), 2 (up), 4 (left), or 6 (down) edge_dir = (2 + edge_side) % 8 + # if outflow is in a diagonal direction (1, 3, 5, or 7), + # # diagonal outflow requires testing neighboring cells to determine first edge # # if i drains to a diagonal neighbor, @@ -4072,12 +4069,19 @@ def calculate_subwatershed_boundary( else: # get the next neighbor going counterclockwise from outflow_dir cell_to_test = (outflow_dir + 1) % 8 + + # the edge side indicates which side of the pixel we're drawing + # the edge on: one of 0 (right), 2 (top), 4 (left), or 6 (bottom) edge_side = cell_to_test # get the direction in which to step from the current vertex # we don't move diagonally across pixels, so this is # one of 0 (right), 2 (up), 4 (left), or 6 (down) edge_dir = (cell_to_test + 2) % 8 + + # if the neighbor being tested is within the same watershed as + # pixel l, then the edge between them is not the watershed boundary. + # move to the next edge over in a clockwise direction. if _in_watershed( x_l, y_l, cell_to_test, discovery, finish, discovery_managed_raster): @@ -4091,6 +4095,24 @@ def calculate_subwatershed_boundary( # walk the watershed boundary until we come back to the starting point # this loop repeats until it has outlined a complete watershed, # or it reaches the maximum allowed number of steps. + # + # This algorithm tracks a current pixel (x_l, y_l) and a + # current vertex (x_f, y_f). Pixels are always referenced by the coordinate of + # their top-left corner, but the watershed boundary may trace around any edge + # of a pixel. So the current vertex (f) is the leading point of the watershed + # boundary linestring. + # + # We know that we're on the watershed boundary when the current edge is between + # a pixel that's in the watershed and a pixel that's not. The algorithm walks + # counterclockwise around the watershed boundary, so that the "left" side of the + # boundary is the watershed interior, and the right side is the exterior. + # + # The algorithm proceeds by considering adjacent pairs of neighbors of the + # current pixel. If the "left" neighbor is in the watershed and the "right" + # neighbor isn't, we know we're on the boundary. If both are in the watershed, + # we turn right. If neither are in the watershed, we turn left. + # + # n_steps = 0 terminated_early = 0 while True: @@ -4127,39 +4149,46 @@ def calculate_subwatershed_boundary( g3 + g4 * x_f + g5 * y_f) # counterclockwise configuration + # edge_dir is two "directions" counterclockwise of edge_side + # this is always true ? + # + # the "left" pixel to test is the neighbor in the edge direction + # the "right" pixel to test is the next neighbor clockwise from that if edge_side - ((edge_dir - 2) % 8) == 0: left = edge_dir right = (left - 1) % 8 out_dir_increase = 2 # clockwise configuration (swapping "left" and "right") + # When is this case ever reached? else: + print('Reached unexpected case!!!') right = edge_dir left = (edge_side+1) out_dir_increase = -2 - left_in = _in_watershed( - x_l, y_l, left, discovery, finish, - discovery_managed_raster) - right_in = _in_watershed( - x_l, y_l, right, discovery, finish, - discovery_managed_raster) - if right_in: + + # if the "right" pixel to test is in the watershed, + # turn right so that we can draw the boundary around the right pixel + if _in_watershed(x_l, y_l, right, discovery, finish, discovery_managed_raster): # turn right out_dir = edge_side - edge_side = (edge_side-out_dir_increase) % 8 + edge_side = (edge_side - out_dir_increase) % 8 edge_dir = out_dir # pixel moves to be the right cell x_l += COL_OFFSETS[right] y_l += ROW_OFFSETS[right] - _diagonal_fill_step( - x_l, y_l, right, - discovery, finish, discovery_managed_raster, - boundary_list) - elif left_in: + # add the current pixel to the boundary list + boundary_list.append((x_l, y_l)) + + # otherwise if the "left" pixel to test is in the watershed, + # continue straight - we are on a boundary edge + elif _in_watershed(x_l, y_l, left, discovery, finish, discovery_managed_raster): # step forward x_l += COL_OFFSETS[edge_dir] y_l += ROW_OFFSETS[edge_dir] # the pixel moves forward boundary_list.append((x_l, y_l)) + # if neither the "left" or "right" pixel is in the watershed, + # turn left else: # turn left edge_side = edge_dir @@ -4183,7 +4212,15 @@ def calculate_subwatershed_boundary( # this loop fills in the raster at the boundary, done at end so it # doesn't interfere with the loop return to think the cells are no # longer in the watershed + # + # setting the discovery value to -1 guaranteeds that the _in_watershed + # test will always return False - if we reach one of these boundary pixels + # from another watershed, it won't be added to that watershed. + # + # for boundary_x, boundary_y in boundary_list: + if (boundary_x, boundary_y) == (3203, 8357): + print('marking boundary of watershed with stream_fid', stream_fid) discovery_managed_raster.set(boundary_x, boundary_y, -1) watershed_layer.CommitTransaction() watershed_layer = None @@ -4539,66 +4576,6 @@ def detect_outlets( LOGGER.info('outlet detection: done') -cdef void _diagonal_fill_step( - int x_l, int y_l, int edge_dir, - long discovery, long finish, - ManagedRaster discovery_managed_raster, boundary_list): - """Fill diagonal that are in the watershed behind the new edge. - - Used as a helper function to mark pixels as part of the watershed - boundary in one step if they are diagonal and also contained within the - watershed. Prevents a case like this: - - iii - ii1 - i1o - - Instead would fill the diagonal like this: - - iii - i11 - i1o - - Args: - x_l/y_l (int): leading coordinate of the watershed boundary - edge. - edge_dir (int): D8 direction that points which direction the edge - came from - discovery/finish (long): the discovery and finish time that defines - whether a pixel discovery time is inside a watershed or not. - discovery_managed_raster (ManagedRaster): discovery time raster - x/y gives the discovery time for that pixel. - boundary_list (list): this list is appended to for new pixels that - should be neighbors in the fill. - - Return: - None. - """ - # always add the current pixel - boundary_list.append((x_l, y_l)) - - # this section determines which back diagonal was in the watershed and - # fills it. if none are we pick one so there's no degenerate case - cdef int xdelta = COL_OFFSETS[edge_dir] - cdef int ydelta = ROW_OFFSETS[edge_dir] - test_list = [ - (x_l - xdelta, y_l), - (x_l, y_l - ydelta)] - for x_t, y_t in test_list: - point_discovery = discovery_managed_raster.get( - x_t, y_t) - if (not discovery_managed_raster.is_nodata(point_discovery) and - point_discovery >= discovery and - point_discovery <= finish): - boundary_list.append((int(x_t), int(y_t))) - # there's only one diagonal to fill in so it's done here - return - - # if there's a degenerate case then just add the xdelta, - # it doesn't matter - boundary_list.append(test_list[0]) - - cdef int _in_watershed( int x_l, int y_l, int direction_to_test, int discovery, int finish, ManagedRaster discovery_managed_raster): From c7a3be926b251fd809f1e6bdd12419e4a9bd07cb Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Mon, 9 Mar 2026 16:43:01 -0700 Subject: [PATCH 10/12] is_nodata method for ManagedRaster --- src/pygeoprocessing/extensions.pxd | 5 +- .../extensions/ManagedRaster.h | 7 +-- src/pygeoprocessing/routing/routing.pyx | 49 ++++--------------- 3 files changed, 16 insertions(+), 45 deletions(-) diff --git a/src/pygeoprocessing/extensions.pxd b/src/pygeoprocessing/extensions.pxd index c1ee1407..8b5ff86b 100644 --- a/src/pygeoprocessing/extensions.pxd +++ b/src/pygeoprocessing/extensions.pxd @@ -55,7 +55,7 @@ cdef extern from "extensions/ManagedRaster.h": void _load_block(int block_index) except * void close() bint is_out_of_bounds(long x, long y) - bint is_out_of_bounds_or_nodata(long x, long y) + bint is_nodata(double val) bint is_nodata(long x, long y) cdef cppclass ManagedFlowDirRaster[T]: @@ -84,6 +84,9 @@ cdef extern from "extensions/ManagedRaster.h": void set(long xi, long yi, double value) double get(long xi, long yi) void close() + bint is_out_of_bounds(long x, long y) + bint is_nodata(double val) + bint is_nodata(long x, long y) cdef cppclass D8 cdef cppclass MFD diff --git a/src/pygeoprocessing/extensions/ManagedRaster.h b/src/pygeoprocessing/extensions/ManagedRaster.h index 91082916..54593cea 100644 --- a/src/pygeoprocessing/extensions/ManagedRaster.h +++ b/src/pygeoprocessing/extensions/ManagedRaster.h @@ -401,11 +401,8 @@ class ManagedRaster { return false; } - bool is_out_of_bounds_or_nodata(long x, long y) { - if (is_out_of_bounds(x, y) or is_nodata(x, y)) { - return true; - } - return false; + bool is_nodata(double val) { + return is_close(val, nodata); } bool is_nodata(long x, long y) { diff --git a/src/pygeoprocessing/routing/routing.pyx b/src/pygeoprocessing/routing/routing.pyx index 30e84c46..ac52bc97 100644 --- a/src/pygeoprocessing/routing/routing.pyx +++ b/src/pygeoprocessing/routing/routing.pyx @@ -3044,7 +3044,8 @@ def extract_strahler_streams_d8( d_n = flow_dir_managed_raster.get(x_l, y_l) x_n = x_l + COL_OFFSETS[d_n] y_n = y_l + ROW_OFFSETS[d_n] - if flow_dir_managed_raster.is_out_of_bounds_or_nodata(x_n, y_n): + if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or + flow_dir_managed_raster.is_nodata(x_n, y_n)): is_drain = 1 if not is_drain and ( @@ -3058,7 +3059,8 @@ def extract_strahler_streams_d8( x_n = x_l + COL_OFFSETS[d] y_n = y_l + ROW_OFFSETS[d] # check if on border - if flow_dir_managed_raster.is_out_of_bounds_or_nodata(x_n, y_n): + if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or + flow_dir_managed_raster.is_nodata(x_n, y_n)): continue d_n = flow_dir_managed_raster.get(x_n, y_n) if (INFLOW_OFFSETS[d] == d_n and @@ -3671,42 +3673,11 @@ def _build_discovery_finish_rasters( # if the downstream neighbor runs off raster or is nodata, then # this pixel is a drain point, so we can push it to the stack - if flow_dir_managed_raster.is_out_of_bounds_or_nodata(x_n, y_n): + if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or + flow_dir_managed_raster.is_nodata(x_n, y_n)): discovery_stack.push(CoordinateType(x, y)) finish_stack.push(FinishType(x, y, 1)) - - - # [A] [(A,1)] - # [B C] A 0 [(A,1), (A,2)] - # [B F G] C 1 [(A,1), (A,2), (C,2)] - # [B F N O] G 2 [(A,1), (A,2), (C,2), (G,2)] - # [B F N] O 3 [(A,1), (A,2), (C,2), (G,2), (O,0)] - # [(A,1), (A,2), (C,2), (G,2)] -> set finish raster at O to (3-1) = 2 - # [(A,1), (A,2), (C,2), (G,1)] - # [B F] N 4 [(A,1), (A,2), (C,2), (G,1), (N,0)] - # [(A,1), (A,2), (C,2), (G,1)] -> set finish raster at N to (4-1) = 3 - # [(A,1), (A,2), (C,2)] -> set finish raster at G to (4-1) = 3 - # [(A,1), (A,2), (C,1)] - # [B L] F 5 [(A,1), (A,2), (C,1), (F,1)] - # [B] L 6 [(A,1), (A,2), (C,1), (F,1), (L,0)] - # [(A,1), (A,2), (C,1), (F,1)] -> set finish raster at L to (6-1) = 5 - # [(A,1), (A,2), (C,1)] -> set finish raster at F to (6-1) = 5 - # [(A,1), (A,2)] -> set finish raster at C to (6-1) = 5 - # [(A,1), (A,1)] - # [E] B 7 [(A,1), (A,1), (B,1)] - # [J K] E 8 [(A,1), (A,1), (B,1), (E,2)] - # [J] K 9 [(A,1), (A,1), (B,1), (E,2), (K,0)] - # [(A,1), (A,1), (B,1), (E,2)] -> set finish raster at K to 8 - # [(A,1), (A,1), (B,1), (E,1)] - # [] J 10 [(A,1), (A,1), (B,1), (E,1), (J,0)] - # [(A,1), (A,1), (B,1), (E,1)] -> set finish raster at J to 9 - # [(A,1), (A,1), (B,1)] -> set finish raster at E to 9 - # [(A,1), (A,1)] -> set finish raster at B to 9 - # [(A,1)] -> set finish raster at A to 9 - # [] -> set finish raster at A to 9 - - while not discovery_stack.empty(): # This coordinate is the downstream end of the stream raster_coord = discovery_stack.top() @@ -3725,7 +3696,8 @@ def _build_discovery_finish_rasters( # skip neighbors that are outside of the raster bounds, # or that have nodata - if flow_dir_managed_raster.is_out_of_bounds_or_nodata(x_n, y_n): + if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or + flow_dir_managed_raster.is_nodata(x_n, y_n)): continue # if the neighbor drains to this pixel, push it to the stack @@ -4219,8 +4191,6 @@ def calculate_subwatershed_boundary( # # for boundary_x, boundary_y in boundary_list: - if (boundary_x, boundary_y) == (3203, 8357): - print('marking boundary of watershed with stream_fid', stream_fid) discovery_managed_raster.set(boundary_x, boundary_y, -1) watershed_layer.CommitTransaction() watershed_layer = None @@ -4711,7 +4681,8 @@ cdef _calculate_stream_geometry( y_n = y_l + ROW_OFFSETS[d] # check out of bounds or nodata - if flow_dir_managed_raster.is_out_of_bounds_or_nodata(x_n, y_n): + if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or + flow_dir_managed_raster.is_nodata(x_n, y_n)): continue # check if there's an upstream inflow pixel with flow accum From f82ec6190a7f418fdcd2b39816901bd2a237c582 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Wed, 11 Mar 2026 18:00:52 -0700 Subject: [PATCH 11/12] revert some unrelated changes to save for another PR --- src/pygeoprocessing/routing/routing.pyx | 612 ++++++++++++------------ 1 file changed, 315 insertions(+), 297 deletions(-) diff --git a/src/pygeoprocessing/routing/routing.pyx b/src/pygeoprocessing/routing/routing.pyx index ac52bc97..1d34da6f 100644 --- a/src/pygeoprocessing/routing/routing.pyx +++ b/src/pygeoprocessing/routing/routing.pyx @@ -744,6 +744,9 @@ def flow_dir_d8( # a plateau in case no other valid drain was found cdef queue[int] nodata_flow_dir_queue + # properties of the parallel rasters + cdef unsigned int raster_x_size, raster_y_size + cdef unsigned long current_pixel # used for time-delayed logging @@ -761,6 +764,9 @@ def flow_dir_d8( # pick some very improbable value since it's hard to deal with NaNs dem_nodata = IMPROBABLE_FLOAT_NODATA + # these are used to determine if a sample is within the raster + raster_x_size, raster_y_size = dem_raster_info['raster_size'] + # this is the nodata value for all the flat region and pit masks mask_nodata = 0 @@ -794,6 +800,19 @@ def flow_dir_d8( flow_dir_managed_raster = ManagedRaster( target_flow_dir_path.encode('utf-8'), 1, True) + # this creates a raster that's used for a dynamic programming solution to + # shortest path to the drain for plateaus. the raster is filled with + # raster_x_size * raster_y_size as a distance that's greater than the + # longest plateau drain distance possible for this raster. + plateau_distance_path = os.path.join( + working_dir_path, 'plateau_distance.tif') + pygeoprocessing.new_raster_from_base( + dem_raster_path_band[0], plateau_distance_path, gdal.GDT_Float64, + [-1], fill_value_list=[raster_x_size * raster_y_size], + raster_driver_creation_tuple=raster_driver_creation_tuple) + plateau_distance_managed_raster = ManagedRaster( + plateau_distance_path.encode('utf-8'), 1, True) + # this raster is for random access of the DEM compatable_dem_raster_path_band = None @@ -817,19 +836,6 @@ def flow_dir_d8( compatable_dem_raster_path_band[0].encode('utf-8'), compatable_dem_raster_path_band[1], False) - # this creates a raster that's used for a dynamic programming solution to - # shortest path to the drain for plateaus. the raster is filled with - # the number of pixels as a distance that's greater than the - # longest plateau drain distance possible for this raster. - plateau_distance_path = os.path.join( - working_dir_path, 'plateau_distance.tif') - pygeoprocessing.new_raster_from_base( - dem_raster_path_band[0], plateau_distance_path, gdal.GDT_Float64, - [-1], fill_value_list=[dem_managed_raster.n_pixels], - raster_driver_creation_tuple=raster_driver_creation_tuple) - plateau_distance_managed_raster = ManagedRaster( - plateau_distance_path.encode('utf-8'), 1, True) - # and this raster is for efficient block-by-block reading of the dem dem_raster = gdal.OpenEx( compatable_dem_raster_path_band[0], gdal.OF_RASTER) @@ -846,10 +852,10 @@ def flow_dir_d8( if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * dem_managed_raster.raster_x_size + current_pixel = xoff + yoff * raster_x_size LOGGER.info( '(flow dir d8): ' - f'{current_pixel} of {dem_managed_raster.n_pixels} ' + f'{current_pixel} of {raster_x_size*raster_y_size} ' f'pixels complete') # make a buffer big enough to capture block and boundaries around it @@ -860,8 +866,7 @@ def flow_dir_d8( # attempt to expand read block by a pixel boundary (xa, xb, ya, yb), modified_offset_dict = _generate_read_bounds( - offset_dict, dem_managed_raster.raster_x_size, - dem_managed_raster.raster_y_size) + offset_dict, raster_x_size, raster_y_size) dem_buffer_array[ya:yb, xa:xb] = dem_band.ReadAsArray( **modified_offset_dict).astype(numpy.float64) @@ -934,7 +939,8 @@ def flow_dir_d8( xi_n = xi_q+COL_OFFSETS[i_n] yi_n = yi_q+ROW_OFFSETS[i_n] - if dem_managed_raster.is_out_of_bounds(xi_n, yi_n): + if (xi_n < 0 or xi_n >= raster_x_size or + yi_n < 0 or yi_n >= raster_y_size): n_height = dem_nodata else: n_height = dem_managed_raster.get(xi_n, yi_n) @@ -1007,7 +1013,8 @@ def flow_dir_d8( for i_n in range(8): xi_n = xi_q+COL_OFFSETS[i_n] yi_n = yi_q+ROW_OFFSETS[i_n] - if dem_managed_raster.is_out_of_bounds(xi_n, yi_n): + if (xi_n < 0 or xi_n >= raster_x_size or + yi_n < 0 or yi_n >= raster_y_size): continue n_drain_distance = drain_distance + ( @@ -1118,6 +1125,9 @@ def flow_accumulation_d8( cdef stack[WeightedFlowPixelType] search_stack cdef WeightedFlowPixelType flow_pixel + # properties of the parallel rasters + cdef unsigned int raster_x_size, raster_y_size + cdef unsigned long current_pixel # used for time-delayed logging @@ -1187,6 +1197,7 @@ def flow_accumulation_d8( flow_dir_raster_info = pygeoprocessing.get_raster_info( flow_dir_raster_path_band[0]) + raster_x_size, raster_y_size = flow_dir_raster_info['raster_size'] tmp_flow_dir_nodata = flow_dir_raster_info['nodata'][ flow_dir_raster_path_band[1]-1] @@ -1207,9 +1218,9 @@ def flow_accumulation_d8( if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * flow_dir_managed_raster.raster_x_size + current_pixel = xoff + yoff * raster_x_size LOGGER.info('Flow accumulation D8 %.1f%% complete', 100.0 * current_pixel / ( - flow_dir_managed_raster.n_pixels)) + raster_x_size * raster_y_size)) # make a buffer big enough to capture block and boundaries around it flow_dir_buffer_array = numpy.empty( @@ -1219,8 +1230,7 @@ def flow_accumulation_d8( # attempt to expand read block by a pixel boundary (xa, xb, ya, yb), modified_offset_dict = _generate_read_bounds( - offset_dict, flow_dir_managed_raster.raster_x_size, - flow_dir_managed_raster.raster_y_size) + offset_dict, raster_x_size, raster_y_size) flow_dir_buffer_array[ya:yb, xa:xb] = flow_dir_band.ReadAsArray( **modified_offset_dict).astype(numpy.uint8) @@ -1275,7 +1285,8 @@ def flow_accumulation_d8( for i_n in range(flow_pixel.last_flow_dir, 8): xi_n = flow_pixel.xi+COL_OFFSETS[i_n] yi_n = flow_pixel.yi+ROW_OFFSETS[i_n] - if flow_dir_managed_raster.is_out_of_bounds(xi_n, yi_n): + if (xi_n < 0 or xi_n >= raster_x_size or + yi_n < 0 or yi_n >= raster_y_size): # neighbor not upstream: off edges of the raster continue upstream_flow_dir = flow_dir_managed_raster.get( @@ -1443,6 +1454,9 @@ def flow_dir_mfd( # a plateau in case no other valid drain was found cdef queue[int] nodata_flow_dir_queue + # properties of the parallel rasters + cdef unsigned long raster_x_size, raster_y_size + # used for time-delayed logging cdef time_t last_log_time last_log_time = ctime(NULL) @@ -1462,6 +1476,9 @@ def flow_dir_mfd( # pick some very improbable value since it's hard to deal with NaNs dem_nodata = IMPROBABLE_FLOAT_NODATA + # these are used to determine if a sample is within the raster + raster_x_size, raster_y_size = dem_raster_info['raster_size'] + # this is the nodata value for all the flat region and pit masks mask_nodata = 0 @@ -1505,6 +1522,21 @@ def flow_dir_mfd( plateau_drain_mask_managed_raster = ManagedRaster( plateu_drain_mask_path.encode('utf-8'), 1, True) + # this creates a raster that's used for a dynamic programming solution to + # shortest path to the drain for plateaus. the raster is filled with + # raster_x_size * raster_y_size as a distance that's greater than the + # longest plateau drain distance possible for this raster. + plateau_distance_path = os.path.join( + working_dir_path, 'plateau_distance.tif') + cdef unsigned long plateau_distance_nodata = raster_x_size * raster_y_size + pygeoprocessing.new_raster_from_base( + dem_raster_path_band[0], plateau_distance_path, gdal.GDT_Float64, + [plateau_distance_nodata], + fill_value_list=[plateau_distance_nodata], + raster_driver_creation_tuple=raster_driver_creation_tuple) + plateau_distance_managed_raster = ManagedRaster( + plateau_distance_path.encode('utf-8'), 1, True) + # this raster is for random access of the DEM compatable_dem_raster_path_band = None dem_block_xsize, dem_block_ysize = dem_raster_info['block_size'] @@ -1527,21 +1559,6 @@ def flow_dir_mfd( compatable_dem_raster_path_band[0].encode('utf-8'), compatable_dem_raster_path_band[1], False) - # this creates a raster that's used for a dynamic programming solution to - # shortest path to the drain for plateaus. the raster is filled with - # the number of pixels as a distance that's greater than the - # longest plateau drain distance possible for this raster. - plateau_distance_path = os.path.join( - working_dir_path, 'plateau_distance.tif') - cdef unsigned long plateau_distance_nodata = dem_managed_raster.n_pixels - pygeoprocessing.new_raster_from_base( - dem_raster_path_band[0], plateau_distance_path, gdal.GDT_Float64, - [plateau_distance_nodata], - fill_value_list=[plateau_distance_nodata], - raster_driver_creation_tuple=raster_driver_creation_tuple) - plateau_distance_managed_raster = ManagedRaster( - plateau_distance_path.encode('utf-8'), 1, True) - # and this raster is for efficient block-by-block reading of the dem dem_raster = gdal.OpenEx( compatable_dem_raster_path_band[0], gdal.OF_RASTER) @@ -1559,9 +1576,9 @@ def flow_dir_mfd( if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * dem_managed_raster.raster_x_size + current_pixel = xoff + yoff * raster_x_size LOGGER.info('%.1f%% complete', 100.0 * current_pixel / ( - dem_managed_raster.n_pixels)) + raster_x_size * raster_y_size)) # make a buffer big enough to capture block and boundaries around it dem_buffer_array = numpy.empty( @@ -1572,8 +1589,7 @@ def flow_dir_mfd( # check if we can widen the border to include real data from the # raster (xa, xb, ya, yb), modified_offset_dict = _generate_read_bounds( - offset_dict, dem_managed_raster.raster_x_size, - dem_managed_raster.raster_y_size) + offset_dict, raster_x_size, raster_y_size) dem_buffer_array[ya:yb, xa:xb] = dem_band.ReadAsArray( **modified_offset_dict).astype(numpy.float64) @@ -1657,7 +1673,8 @@ def flow_dir_mfd( xi_n = xi_q+COL_OFFSETS[i_n] yi_n = yi_q+ROW_OFFSETS[i_n] - if dem_managed_raster.is_out_of_bounds(xi_n, yi_n): + if (xi_n < 0 or xi_n >= raster_x_size or + yi_n < 0 or yi_n >= raster_y_size): n_height = dem_nodata else: n_height = dem_managed_raster.get(xi_n, yi_n) @@ -1763,7 +1780,8 @@ def flow_dir_mfd( for i_n in range(8): xi_n = xi_q+COL_OFFSETS[i_n] yi_n = yi_q+ROW_OFFSETS[i_n] - if dem_managed_raster.is_out_of_bounds(xi_n, yi_n): + if (xi_n < 0 or xi_n >= raster_x_size or + yi_n < 0 or yi_n >= raster_y_size): continue n_drain_distance = drain_distance + ( @@ -1796,7 +1814,8 @@ def flow_dir_mfd( yi_n = yi_q+ROW_OFFSETS[i_n] downhill_slope_array[i_n] = 0.0 - if dem_managed_raster.is_out_of_bounds(xi_n, yi_n): + if (xi_n < 0 or xi_n >= raster_x_size or + yi_n < 0 or yi_n >= raster_y_size): continue if dem_managed_raster.get(xi_n, yi_n) != root_height: @@ -1887,7 +1906,7 @@ def flow_accumulation_mfd( cdef unsigned long win_ysize, win_xsize, xoff, yoff # These are used to estimate % complete - cdef unsigned long long visit_count + cdef unsigned long long visit_count, pixel_count # the _root variables remembers the pixel index where the plateau/pit # region was first detected when iterating over the DEM. @@ -1920,6 +1939,9 @@ def flow_accumulation_mfd( cdef stack[FlowPixelType] search_stack cdef FlowPixelType flow_pixel + # properties of the parallel rasters + cdef unsigned long raster_x_size, raster_y_size + # used for time-delayed logging cdef time_t last_log_time last_log_time = ctime(NULL) @@ -1976,6 +1998,10 @@ def flow_accumulation_mfd( if raw_weight_nodata is not None: weight_nodata = raw_weight_nodata + flow_dir_raster_info = pygeoprocessing.get_raster_info( + flow_dir_mfd_raster_path_band[0]) + raster_x_size, raster_y_size = flow_dir_raster_info['raster_size'] + pixel_count = raster_x_size * raster_y_size visit_count = 0 LOGGER.debug('starting search') @@ -1990,9 +2016,9 @@ def flow_accumulation_mfd( if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * flow_dir_managed_raster.raster_x_size + current_pixel = xoff + yoff * raster_x_size LOGGER.info('Flow accum MFD %.1f%% complete', 100.0 * current_pixel / ( - flow_dir_managed_raster.n_pixels)) + raster_x_size * raster_y_size)) # make a buffer big enough to capture block and boundaries around it flow_dir_mfd_buffer_array = numpy.empty( @@ -2003,8 +2029,7 @@ def flow_accumulation_mfd( # check if we can widen the border to include real data from the # raster (xa, xb, ya, yb), modified_offset_dict = _generate_read_bounds( - offset_dict, flow_dir_managed_raster.raster_x_size, - flow_dir_managed_raster.raster_y_size) + offset_dict, raster_x_size, raster_y_size) flow_dir_mfd_buffer_array[ya:yb, xa:xb] = flow_dir_band.ReadAsArray( **modified_offset_dict).astype(numpy.int32) @@ -2054,13 +2079,14 @@ def flow_accumulation_mfd( last_log_time = ctime(NULL) LOGGER.info( 'Flow accum MFD %.1f%% complete', - 100.0 * visit_count / float(flow_dir_managed_raster.n_pixels)) + 100.0 * visit_count / float(pixel_count)) preempted = 0 for i_n in range(flow_pixel.last_flow_dir, 8): xi_n = flow_pixel.xi+COL_OFFSETS[i_n] yi_n = flow_pixel.yi+ROW_OFFSETS[i_n] - if flow_dir_managed_raster.is_out_of_bounds(xi_n, yi_n): + if (xi_n < 0 or xi_n >= raster_x_size or + yi_n < 0 or yi_n >= raster_y_size): # no upstream here continue compressed_upstream_flow_dir = ( @@ -2173,6 +2199,9 @@ def distance_to_channel_d8( # from a defined flow distance pixel cdef stack[PixelType] distance_to_channel_stack + # properties of the parallel rasters + cdef unsigned int raster_x_size, raster_y_size + # these area used to store custom per-pixel weights and per-pixel values # for distance updates cdef double weight_val, pixel_val @@ -2222,6 +2251,10 @@ def distance_to_channel_d8( channel_raster = gdal.OpenEx(channel_raster_path_band[0], gdal.OF_RASTER) channel_band = channel_raster.GetRasterBand(channel_raster_path_band[1]) + flow_dir_raster_info = pygeoprocessing.get_raster_info( + flow_dir_d8_raster_path_band[0]) + raster_x_size, raster_y_size = flow_dir_raster_info['raster_size'] + # this outer loop searches for undefined channels for offset_dict in pygeoprocessing.iterblocks( channel_raster_path_band, offset_only=True, largest_block=0): @@ -2232,9 +2265,9 @@ def distance_to_channel_d8( if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * flow_dir_d8_managed_raster.raster_x_size + current_pixel = xoff + yoff * raster_x_size LOGGER.info('Dist to channel D8 %.1f%% complete', 100.0 * current_pixel / ( - flow_dir_d8_managed_raster.n_pixels)) + raster_x_size * raster_y_size)) # make a buffer big enough to capture block and boundaries around it channel_buffer_array = numpy.empty( @@ -2245,8 +2278,7 @@ def distance_to_channel_d8( # check if we can widen the border to include real data from the # raster (xa, xb, ya, yb), modified_offset_dict = _generate_read_bounds( - offset_dict, flow_dir_d8_managed_raster.raster_x_size, - flow_dir_d8_managed_raster.raster_y_size) + offset_dict, raster_x_size, raster_y_size) channel_buffer_array[ya:yb, xa:xb] = channel_band.ReadAsArray( **modified_offset_dict).astype(numpy.int8) @@ -2277,7 +2309,8 @@ def distance_to_channel_d8( xi_n = xi_q+COL_OFFSETS[i_n] yi_n = yi_q+ROW_OFFSETS[i_n] - if flow_dir_d8_managed_raster.is_out_of_bounds(xi_n, yi_n): + if (xi_n < 0 or xi_n >= raster_x_size or + yi_n < 0 or yi_n >= raster_y_size): continue if channel_managed_raster.get(xi_n, yi_n) == 1: @@ -2400,6 +2433,9 @@ def distance_to_channel_mfd( # from a defined flow distance pixel cdef stack[MFDFlowPixelType] distance_to_channel_stack + # properties of the parallel rasters + cdef unsigned int raster_x_size, raster_y_size + # this value is used to store the current weight which might be 1 or # come from a predefined flow accumulation weight raster cdef double weight_val @@ -2467,6 +2503,10 @@ def distance_to_channel_mfd( else: weight_nodata = IMPROBABLE_FLOAT_NODATA + flow_dir_raster_info = pygeoprocessing.get_raster_info( + flow_dir_mfd_raster_path_band[0]) + raster_x_size, raster_y_size = flow_dir_raster_info['raster_size'] + # this outer loop searches for undefined channels for offset_dict in pygeoprocessing.iterblocks( channel_raster_path_band, offset_only=True, largest_block=0): @@ -2477,9 +2517,9 @@ def distance_to_channel_mfd( if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * flow_dir_mfd_managed_raster.raster_x_size + current_pixel = xoff + yoff * raster_x_size LOGGER.info('Dist to channel MFD %.1f%% complete', 100.0 * current_pixel / ( - flow_dir_mfd_managed_raster.n_pixels)) + raster_x_size * raster_y_size)) # make a buffer big enough to capture block and boundaries around it channel_buffer_array = numpy.empty( @@ -2494,8 +2534,7 @@ def distance_to_channel_mfd( # check if we can widen the border to include real data from the # raster (xa, xb, ya, yb), modified_offset_dict = _generate_read_bounds( - offset_dict, flow_dir_mfd_managed_raster.raster_x_size, - flow_dir_mfd_managed_raster.raster_y_size) + offset_dict, raster_x_size, raster_y_size) channel_buffer_array[ya:yb, xa:xb] = channel_band.ReadAsArray( **modified_offset_dict).astype(numpy.int8) @@ -2562,7 +2601,8 @@ def distance_to_channel_mfd( xi_n = pixel.xi+COL_OFFSETS[i_n] yi_n = pixel.yi+ROW_OFFSETS[i_n] - if flow_dir_mfd_managed_raster.is_out_of_bounds(xi_n, yi_n): + if (xi_n < 0 or xi_n >= raster_x_size or + yi_n < 0 or yi_n >= raster_y_size): continue # if the pixel has a neighbor that hasn't been visited yet, @@ -2691,6 +2731,9 @@ def extract_streams_mfd( flow_accum_raster_path_band[1]-1] stream_nodata = 255 + cdef int raster_x_size, raster_y_size + raster_x_size, raster_y_size = flow_accum_info['raster_size'] + pygeoprocessing.new_raster_from_base( flow_accum_raster_path_band[0], target_stream_raster_path, gdal.GDT_Byte, [stream_nodata], @@ -2735,9 +2778,9 @@ def extract_streams_mfd( yi_root = yi+yoff if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * flow_accum_mr.raster_x_size + current_pixel = xoff + yoff * raster_x_size LOGGER.info('Extract streams MFD %.1f%% complete', 100.0 * current_pixel / ( - flow_accum_mr.n_pixels)) + raster_x_size * raster_y_size)) for xi in range(win_xsize): xi_root = xi+xoff flow_accum = flow_accum_mr.get(xi_root, yi_root) @@ -2757,7 +2800,8 @@ def extract_streams_mfd( continue xi_n = xi_root+COL_OFFSETS[i_n] yi_n = yi_root+ROW_OFFSETS[i_n] - if flow_accum_mr.is_out_of_bounds(xi_n, yi_n): + if (xi_n < 0 or xi_n >= raster_x_size or + yi_n < 0 or yi_n >= raster_y_size): # it'll drain off the edge of the raster is_outlet = 1 break @@ -2777,7 +2821,8 @@ def extract_streams_mfd( for i_sn in range(8): xi_sn = xi_n+COL_OFFSETS[i_sn] yi_sn = yi_n+ROW_OFFSETS[i_sn] - if flow_accum_mr.is_out_of_bounds(xi_sn, yi_sn): + if (xi_sn < 0 or xi_sn >= raster_x_size or + yi_sn < 0 or yi_sn >= raster_y_size): continue flow_dir_mfd = flow_dir_mfd_mr.get(xi_sn, yi_sn) if flow_dir_mfd == flow_dir_nodata: @@ -2807,7 +2852,8 @@ def extract_streams_mfd( if (flow_dir_mfd >> (i_sn*4)) & 0xF > 0: xi_sn = xi_bn+COL_OFFSETS[i_sn] yi_sn = yi_bn+ROW_OFFSETS[i_sn] - if flow_accum_mr.is_out_of_bounds(xi_sn, yi_sn): + if (xi_sn < 0 or xi_sn >= raster_x_size or + yi_sn < 0 or yi_sn >= raster_y_size): continue if stream_mr.get(xi_sn, yi_sn) == 2: stream_mr.set(xi_sn, yi_sn, 1) @@ -2980,10 +3026,13 @@ def extract_strahler_streams_d8( # 321 # 4x0 # 567 - cdef unsigned int xoff, yoff, i, j, d, d_n + cdef unsigned int xoff, yoff, i, j, d, d_n, n_cols, n_rows cdef unsigned int win_xsize, win_ysize + n_cols, n_rows = flow_dir_info['raster_size'] + LOGGER.info('(extract_strahler_streams_d8): seed the drains') + cdef unsigned long n_pixels = n_cols * n_rows cdef long n_processed = 0 cdef time_t last_log_time last_log_time = ctime(NULL) @@ -3022,7 +3071,7 @@ def extract_strahler_streams_d8( if ctime(NULL)-last_log_time > _LOGGING_PERIOD: LOGGER.info( '(extract_strahler_streams_d8): drain seeding ' - f'{n_processed} of {flow_dir_managed_raster.n_pixels} pixels complete') + f'{n_processed} of {n_pixels} pixels complete') last_log_time = ctime(NULL) xoff = offset_dict['xoff'] yoff = offset_dict['yoff'] @@ -3044,8 +3093,9 @@ def extract_strahler_streams_d8( d_n = flow_dir_managed_raster.get(x_l, y_l) x_n = x_l + COL_OFFSETS[d_n] y_n = y_l + ROW_OFFSETS[d_n] - if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or - flow_dir_managed_raster.is_nodata(x_n, y_n)): + if (x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_rows or + flow_dir_managed_raster.get( + x_n, y_n) == flow_nodata): is_drain = 1 if not is_drain and ( @@ -3059,10 +3109,11 @@ def extract_strahler_streams_d8( x_n = x_l + COL_OFFSETS[d] y_n = y_l + ROW_OFFSETS[d] # check if on border - if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or - flow_dir_managed_raster.is_nodata(x_n, y_n)): + if x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_rows: continue d_n = flow_dir_managed_raster.get(x_n, y_n) + if d_n == flow_nodata: + continue if (INFLOW_OFFSETS[d] == d_n and flow_accum_managed_raster.get( x_n, y_n) >= min_flow_accum_threshold): @@ -3109,8 +3160,8 @@ def extract_strahler_streams_d8( payload = _calculate_stream_geometry( source_stream_point.xi, source_stream_point.yi, source_stream_point.upstream_d8_dir, - flow_dir_info['geotransform'], flow_accum_managed_raster, - flow_dir_managed_raster, + flow_dir_info['geotransform'], n_cols, n_rows, + flow_accum_managed_raster, flow_dir_managed_raster, flow_nodata, min_flow_accum_threshold, coord_to_stream_ids) if payload is None: LOGGER.debug( @@ -3350,9 +3401,9 @@ def extract_strahler_streams_d8( 'upstream_d8_dir') payload = _calculate_stream_geometry( ds_x, ds_y, upstream_d8_dir, - flow_dir_info['geotransform'], + flow_dir_info['geotransform'], n_cols, n_rows, flow_accum_managed_raster, - flow_dir_managed_raster, + flow_dir_managed_raster, flow_nodata, working_flow_accum_threshold, coord_to_stream_ids) if payload is None: @@ -3598,6 +3649,8 @@ def _build_discovery_finish_rasters( """ flow_dir_info = pygeoprocessing.get_raster_info( flow_dir_d8_raster_path_band[0]) + cdef int n_cols, n_rows + n_cols, n_rows = flow_dir_info['raster_size'] cdef int flow_dir_nodata = ( flow_dir_info['nodata'][flow_dir_d8_raster_path_band[1]-1]) @@ -3621,11 +3674,13 @@ def _build_discovery_finish_rasters( cdef FinishType finish_coordinate cdef long discovery_count = 0 - cdef int n_processed = 0 + cdef int n_processed, n_pixels + n_pixels = n_rows * n_cols + n_processed = 0 cdef time_t last_log_time = ctime(NULL) cdef int n_pushed - cdef int i, j, xoff, yoff, win_xsize, win_ysize, x, y, x_n, y_n + cdef int i, j, xoff, yoff, win_xsize, win_ysize, x_l, y_l, x_n, y_n cdef int n_dir, test_dir # this essentially does a depth-first traversal of the watershed, @@ -3650,7 +3705,7 @@ def _build_discovery_finish_rasters( if ctime(NULL)-last_log_time > _LOGGING_PERIOD: LOGGER.info( f'(discovery time processing): ' - f'{n_processed/flow_dir_managed_raster.n_pixels*100:.1f}% complete') + f'{n_processed/n_pixels*100:.1f}% complete') last_log_time = ctime(NULL) xoff = offset_dict['xoff'] yoff = offset_dict['yoff'] @@ -3658,25 +3713,26 @@ def _build_discovery_finish_rasters( win_ysize = offset_dict['win_ysize'] n_processed += win_xsize * win_ysize - for xi in range(win_xsize): - for yi in range(win_ysize): - x = xoff + xi - y = yoff + yi + for i in range(win_xsize): + for j in range(win_ysize): + x_l = xoff + i + y_l = yoff + j # check to see if this pixel is a drain - flow_dir = flow_dir_managed_raster.get(x, y) - if flow_dir_managed_raster.is_nodata(flow_dir): + d_n = flow_dir_managed_raster.get(x_l, y_l) + if d_n == flow_dir_nodata: continue # find the downstream neighbor (the pixel that this pixel drains to) - x_n = x + COL_OFFSETS[flow_dir] - y_n = y + ROW_OFFSETS[flow_dir] + x_n = x_l + COL_OFFSETS[d_n] + y_n = y_l + ROW_OFFSETS[d_n] # if the downstream neighbor runs off raster or is nodata, then # this pixel is a drain point, so we can push it to the stack - if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or - flow_dir_managed_raster.is_nodata(x_n, y_n)): - discovery_stack.push(CoordinateType(x, y)) - finish_stack.push(FinishType(x, y, 1)) + if (x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_rows or + flow_dir_managed_raster.get( + x_n, y_n) == flow_dir_nodata): + discovery_stack.push(CoordinateType(x_l, y_l)) + finish_stack.push(FinishType(x_l, y_l, 1)) while not discovery_stack.empty(): # This coordinate is the downstream end of the stream @@ -3696,16 +3752,17 @@ def _build_discovery_finish_rasters( # skip neighbors that are outside of the raster bounds, # or that have nodata - if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or - flow_dir_managed_raster.is_nodata(x_n, y_n)): + if x_n < 0 or y_n < 0 or \ + x_n >= n_cols or y_n >= n_rows: continue # if the neighbor drains to this pixel, push it to the stack n_dir = flow_dir_managed_raster.get(x_n, y_n) + if n_dir == flow_dir_nodata: + continue if INFLOW_OFFSETS[test_dir] == n_dir: discovery_stack.push(CoordinateType(x_n, y_n)) n_pushed += 1 - # this reference is for the previous top and represents # how many elements must be processed before finish # time can be defined @@ -3720,8 +3777,6 @@ def _build_discovery_finish_rasters( # the top item will be (xi, yi, 0) # set the finish order raster at (xi, yi) to discovery order - 1 (why?) # - # - # if n_pushed == 0: while (not finish_stack.empty() and finish_stack.top().n_pushed <= 1): @@ -3729,7 +3784,7 @@ def _build_discovery_finish_rasters( finish_stack.pop() finish_managed_raster.set( finish_coordinate.xi, finish_coordinate.yi, - discovery_count - 1) + discovery_count-1) if not finish_stack.empty(): # then take one more because one branch is done finish_coordinate = finish_stack.top() @@ -3820,6 +3875,9 @@ def calculate_subwatershed_boundary( discovery_time_raster_path) cdef long discovery_nodata = discovery_info['nodata'][0] + cdef unsigned int n_cols, n_rows + n_cols, n_rows = discovery_info['raster_size'] + geotransform = discovery_info['geotransform'] cdef double g0, g1, g2, g3, g4, g5 g0, g1, g2, g3, g4, g5 = geotransform @@ -3852,6 +3910,7 @@ def calculate_subwatershed_boundary( cdef unsigned int x_l, y_l cdef int outflow_dir cdef double x_f, y_f + cdef double x_p, y_p cdef long discovery, finish cdef time_t last_log_time = ctime(NULL) @@ -3905,8 +3964,6 @@ def calculate_subwatershed_boundary( us_x = int(working_feature.GetField('us_x')) us_y = int(working_feature.GetField('us_y')) - ds_x = int(working_feature.GetField('ds_x')) - ds_y = int(working_feature.GetField('ds_y')) ds_x_1 = int(working_feature.GetField('ds_x_1')) ds_y_1 = int(working_feature.GetField('ds_y_1')) @@ -3914,8 +3971,9 @@ def calculate_subwatershed_boundary( # processed, add them to the stack. # otherwise, if all tributaries have been processed, we can # proceed to calculate the subwatershed. + upstream_coord = (us_x, us_y) upstream_fids = [ - fid for fid in upstream_fid_map[(us_x, us_y)] + fid for fid in upstream_fid_map[upstream_coord] if fid not in processed_nodes] if upstream_fids: working_stack.extend(upstream_fids) @@ -3945,7 +4003,6 @@ def calculate_subwatershed_boundary( elif not outlet_at_confluence: visit_order_stack.append((working_fid, ds_x_1, ds_y_1)) - cdef int edge_side, edge_dir, cell_to_test, out_dir_increase=-1 cdef int left, right, n_steps, terminated_early cdef int delta_x, delta_y @@ -3957,15 +4014,12 @@ def calculate_subwatershed_boundary( f'(calculate_subwatershed_boundary): watershed building ' f'{(index/len(visit_order_stack))*100:.1f}% complete') last_log_time = ctime(NULL) - - discovery = discovery_managed_raster.get(x_l, y_l) - # We reach this case if the current pixel is a boundary pixel of # a previously calculated watershed. - if discovery_managed_raster.is_nodata(discovery): + if discovery == -1: continue - + # keep a list of pixels that are within the watershed along the boundary # these will later be filled in with -1 in the discovery raster boundary_list = [(x_l, y_l)] @@ -3976,199 +4030,113 @@ def calculate_subwatershed_boundary( watershed_boundary = ogr.Geometry(ogr.wkbLinearRing) outflow_dir = d8_flow_dir_managed_raster.get(x_l, y_l) - # offsets: - # given a pixel coordinate (x, y) representing the top left corner of the pixel, - # and that pixel's flow direction (0-7), - # (x + x_corner_offsets[flow_dir], y + y_corner_offsets[flow_dir]) - # is the coordinate of the pixel corner to start delineating from. - # - # for diagonal outflow, this is the corner point shared - # with the downslope neighbor. - # for orthogonal outflow, there are two corner points shared with the - # downslope neighbor. this chooses the corner that is farther "clockwise". - x_corner_offsets = [1, 1, 1, 0, 0, 0, 0, 1] - y_corner_offsets = [1, 0, 0, 0, 0, 1, 1, 1] - - # move from the centerpoint to the point along the pixel border - # corresponding to the pixel's outflow direction. - x_f = x_l + x_corner_offsets[outflow_dir] - y_f = y_l + y_corner_offsets[outflow_dir] - watershed_boundary.AddPoint( - *gdal.ApplyGeoTransform(geotransform, x_f, y_f)) - - # keep track of how many steps we've taken in x and y directions - # when both of them get back to 0, we've made a loop + # this is the center point of the pixel that will be offset to + # make the edge + x_f = x_l+0.5 + y_f = y_l+0.5 + + x_f += COL_OFFSETS[outflow_dir]*0.5 + y_f += ROW_OFFSETS[outflow_dir]*0.5 + if outflow_dir % 2 == 0: + # need to back up the point a bit + x_f -= ROW_OFFSETS[outflow_dir]*0.5 + y_f += COL_OFFSETS[outflow_dir]*0.5 + + x_p, y_p = gdal.ApplyGeoTransform(geotransform, x_f, y_f) + watershed_boundary.AddPoint(x_p, y_p) + + # keep track of how many steps x/y and when we get back to 0 we've + # made a loop delta_x, delta_y = 0, 0 # determine the first edge - # - # outflow through a straight side, so trivial edge detection - # - # if outflow is in an orthogonal direction (0, 2, 4, or 6), - # draw the first edge perpendicular to the outflow - # because this is a drain point if outflow_dir % 2 == 0: - # the edge side indicates which side of the pixel we're drawing - # the edge on: one of 0 (right), 2 (top), 4 (left), or 6 (bottom) - edge_side = outflow_dir # the outflow direction 0 - 7 - - # get the direction in which to step from the current vertex - # we don't move diagonally across pixels, so this is - # one of 0 (right), 2 (up), 4 (left), or 6 (down) - edge_dir = (2 + edge_side) % 8 - - # if outflow is in a diagonal direction (1, 3, 5, or 7), - # - # diagonal outflow requires testing neighboring cells to determine first edge - # - # if i drains to a diagonal neighbor, - # test the neighbor counterclockwise from that pixel - # so that we are always testing a pixel that shares a side with i - # - # a b c - # d x e - # f g h - # - # if x's outflow direction is 1 (it drains to c), - # cell_to_test is 2 (b) - # edge_side is 2 (b) - # edge_dir is 4 (d) - # - # if b is in watershed, - # edge_side is 0 (e) - # edge_dir is 2 (b) - # + # outflow through a straight side, so trivial edge detection + edge_side = outflow_dir + edge_dir = (2+edge_side) % 8 else: - # get the next neighbor going counterclockwise from outflow_dir - cell_to_test = (outflow_dir + 1) % 8 - - # the edge side indicates which side of the pixel we're drawing - # the edge on: one of 0 (right), 2 (top), 4 (left), or 6 (bottom) + # diagonal outflow requires testing neighboring cells to + # determine first edge + cell_to_test = (outflow_dir+1) % 8 edge_side = cell_to_test - - # get the direction in which to step from the current vertex - # we don't move diagonally across pixels, so this is - # one of 0 (right), 2 (up), 4 (left), or 6 (down) - edge_dir = (cell_to_test + 2) % 8 - - # if the neighbor being tested is within the same watershed as - # pixel l, then the edge between them is not the watershed boundary. - # move to the next edge over in a clockwise direction. + edge_dir = (cell_to_test+2) % 8 if _in_watershed( x_l, y_l, cell_to_test, discovery, finish, - discovery_managed_raster): - edge_side = (edge_side - 2 ) % 8 - edge_dir = (edge_dir - 2) % 8 + n_cols, n_rows, + discovery_managed_raster, discovery_nodata): + edge_side = (edge_side-2) % 8 + edge_dir = (edge_dir-2) % 8 x_l += COL_OFFSETS[edge_dir] y_l += ROW_OFFSETS[edge_dir] # note the pixel moved boundary_list.append((x_l, y_l)) - # walk the watershed boundary until we come back to the starting point - # this loop repeats until it has outlined a complete watershed, - # or it reaches the maximum allowed number of steps. - # - # This algorithm tracks a current pixel (x_l, y_l) and a - # current vertex (x_f, y_f). Pixels are always referenced by the coordinate of - # their top-left corner, but the watershed boundary may trace around any edge - # of a pixel. So the current vertex (f) is the leading point of the watershed - # boundary linestring. - # - # We know that we're on the watershed boundary when the current edge is between - # a pixel that's in the watershed and a pixel that's not. The algorithm walks - # counterclockwise around the watershed boundary, so that the "left" side of the - # boundary is the watershed interior, and the right side is the exterior. - # - # The algorithm proceeds by considering adjacent pairs of neighbors of the - # current pixel. If the "left" neighbor is in the watershed and the "right" - # neighbor isn't, we know we're on the boundary. If both are in the watershed, - # we turn right. If neither are in the watershed, we turn left. - # - # n_steps = 0 terminated_early = 0 while True: - # stop delineating this watershed if we've exceeded the step limit. - # this prevents hanging in loops or excessively large watersheds + # step the edge then determine the projected coordinates + x_f += COL_OFFSETS[edge_dir] + y_f += ROW_OFFSETS[edge_dir] + delta_x += COL_OFFSETS[edge_dir] + delta_y += ROW_OFFSETS[edge_dir] + # equivalent to gdal.ApplyGeoTransform(geotransform, x_f, y_f) + # to eliminate python function call overhead + x_p = g0 + g1*x_f + g2*y_f + y_p = g3 + g4*x_f + g5*y_f + watershed_boundary.AddPoint(x_p, y_p) n_steps += 1 if n_steps > _int_max_steps_per_watershed: LOGGER.warning('quitting, too many steps') terminated_early = 1 break - # Exit with an error if we've reached a pixel outside the raster - # bounds. This is unexpected but worth checking since missing this - # error would be very difficult to debug. - if discovery_managed_raster.is_out_of_bounds(x_l, y_l): + if x_l < 0 or y_l < 0 or x_l >= n_cols or y_l >= n_rows: + # This is unexpected but worth checking since missing this + # error would be very difficult to debug. raise RuntimeError( - f'{x_l}, {y_l} out of bounds for {discovery_managed_raster.raster_x_size}' - f'x{discovery_managed_raster.raster_y_size} raster.') - - # step to the next vertex - x_f += COL_OFFSETS[edge_dir] - y_f += ROW_OFFSETS[edge_dir] - - # increment/decrement the deltas, tracking how many pixels - # we've moved from the starting point in the x and y directions - delta_x += COL_OFFSETS[edge_dir] - delta_y += ROW_OFFSETS[edge_dir] - - # convert the pixel-space coordinates to the gis projection and - # add the point to the watershed geometry - # this math is equivalent to gdal.ApplyGeoTransform(geotransform, x_f, y_f) - # to eliminate python function call overhead - watershed_boundary.AddPoint( - g0 + g1 * x_f + g2 * y_f, - g3 + g4 * x_f + g5 * y_f) - - # counterclockwise configuration - # edge_dir is two "directions" counterclockwise of edge_side - # this is always true ? - # - # the "left" pixel to test is the neighbor in the edge direction - # the "right" pixel to test is the next neighbor clockwise from that - if edge_side - ((edge_dir - 2) % 8) == 0: + f'{x_l}, {y_l} out of bounds for ' + f'{n_cols}x{n_rows} raster.') + if edge_side - ((edge_dir-2) % 8) == 0: + # counterclockwise configuration left = edge_dir - right = (left - 1) % 8 + right = (left-1) % 8 out_dir_increase = 2 - # clockwise configuration (swapping "left" and "right") - # When is this case ever reached? else: - print('Reached unexpected case!!!') + # clockwise configuration (swapping "left" and "right") right = edge_dir left = (edge_side+1) out_dir_increase = -2 - - # if the "right" pixel to test is in the watershed, - # turn right so that we can draw the boundary around the right pixel - if _in_watershed(x_l, y_l, right, discovery, finish, discovery_managed_raster): + left_in = _in_watershed( + x_l, y_l, left, discovery, finish, n_cols, n_rows, + discovery_managed_raster, discovery_nodata) + right_in = _in_watershed( + x_l, y_l, right, discovery, finish, n_cols, n_rows, + discovery_managed_raster, discovery_nodata) + if right_in: # turn right out_dir = edge_side - edge_side = (edge_side - out_dir_increase) % 8 + edge_side = (edge_side-out_dir_increase) % 8 edge_dir = out_dir # pixel moves to be the right cell x_l += COL_OFFSETS[right] y_l += ROW_OFFSETS[right] - # add the current pixel to the boundary list - boundary_list.append((x_l, y_l)) - - # otherwise if the "left" pixel to test is in the watershed, - # continue straight - we are on a boundary edge - elif _in_watershed(x_l, y_l, left, discovery, finish, discovery_managed_raster): + _diagonal_fill_step( + x_l, y_l, right, + discovery, finish, discovery_managed_raster, + discovery_nodata, + boundary_list) + elif left_in: # step forward x_l += COL_OFFSETS[edge_dir] y_l += ROW_OFFSETS[edge_dir] # the pixel moves forward boundary_list.append((x_l, y_l)) - # if neither the "left" or "right" pixel is in the watershed, - # turn left else: # turn left edge_side = edge_dir edge_dir = (edge_side + out_dir_increase) % 8 - # when the deltas become 0, we've returned to the starting point, - # meaning that we made a loop and the watershed is complete if delta_x == 0 and delta_y == 0: + # met the start point so we completed the watershed loop break watershed_feature = ogr.Feature(watershed_layer.GetLayerDefn()) @@ -4184,12 +4152,6 @@ def calculate_subwatershed_boundary( # this loop fills in the raster at the boundary, done at end so it # doesn't interfere with the loop return to think the cells are no # longer in the watershed - # - # setting the discovery value to -1 guaranteeds that the _in_watershed - # test will always return False - if we reach one of these boundary pixels - # from another watershed, it won't be added to that watershed. - # - # for boundary_x, boundary_y in boundary_list: discovery_managed_raster.set(boundary_x, boundary_y, -1) watershed_layer.CommitTransaction() @@ -4255,6 +4217,8 @@ def detect_lowest_drain_and_sink(dem_raster_path_band): else: dem_nodata = IMPROBABLE_FLOAT_NODATA + raster_x_size, raster_y_size = dem_raster_info['raster_size'] + cdef ManagedRaster dem_managed_raster = ManagedRaster( dem_raster_path_band[0].encode('utf-8'), dem_raster_path_band[1], False) @@ -4269,10 +4233,10 @@ def detect_lowest_drain_and_sink(dem_raster_path_band): if ctime(NULL) - last_log_time > _LOGGING_PERIOD: last_log_time = ctime(NULL) - current_pixel = xoff + yoff * dem_managed_raster.raster_x_size + current_pixel = xoff + yoff * raster_x_size LOGGER.info( '(infer_sinks): ' - f'{current_pixel} of {dem_managed_raster.n_pixels} ' + f'{current_pixel} of {raster_x_size * raster_y_size} ' 'pixels complete') # search block for local sinks @@ -4295,7 +4259,8 @@ def detect_lowest_drain_and_sink(dem_raster_path_band): xi_n = xi_root+COL_OFFSETS[i_n] yi_n = yi_root+ROW_OFFSETS[i_n] - if dem_managed_raster.is_out_of_bounds(xi_n, yi_n): + if (xi_n < 0 or xi_n >= raster_x_size or + yi_n < 0 or yi_n >= raster_y_size): # it'll drain off the edge of the raster if center_val < lowest_drain_height: # found a new lower edge height @@ -4546,57 +4511,105 @@ def detect_outlets( LOGGER.info('outlet detection: done') -cdef int _in_watershed( - int x_l, int y_l, int direction_to_test, int discovery, int finish, - ManagedRaster discovery_managed_raster): - """Test if pixel in direction is in the watershed. +cdef void _diagonal_fill_step( + int x_l, int y_l, int edge_dir, + long discovery, long finish, + ManagedRaster discovery_managed_raster, + long discovery_nodata, boundary_list): + """Fill diagonal that are in the watershed behind the new edge. - Given a pixel P and its neighbor Q, + Used as a helper function to mark pixels as part of the watershed + boundary in one step if they are diagonal and also contained within the + watershed. Prevents a case like this: - if Q's discovery time >= P's discovery time, - then either Q is in P's branch, or it is in a later branch. + iii + ii1 + i1o - if Q's discovery time <= P's finish time, - then either Q is in P's branch, or it is in an earlier branch. - Because this is a depth-first traversal, we are not discovering pixels - in other branches until the current branch is finished. + Instead would fill the diagonal like this: - If both these conditions are met, Q must be in P's branch - (i.e. Q is upslope of P in the same watershed). + iii + i11 + i1o Args: - x_l (int): x coordinate of the watershed boundary edge pixel to test - y_l (int): y coordinate of the watershed boundary edge pixel to test + x_l/y_l (int): leading coordinate of the watershed boundary + edge. + edge_dir (int): D8 direction that points which direction the edge + came from + discovery/finish (long): the discovery and finish time that defines + whether a pixel discovery time is inside a watershed or not. + discovery_managed_raster (ManagedRaster): discovery time raster + x/y gives the discovery time for that pixel. + discovery_nodata (long): nodata value for discovery raster + boundary_list (list): this list is appended to for new pixels that + should be neighbors in the fill. + + Return: + None. + """ + # always add the current pixel + boundary_list.append((x_l, y_l)) + + # this section determines which back diagonal was in the watershed and + # fills it. if none are we pick one so there's no degenerate case + cdef int xdelta = COL_OFFSETS[edge_dir] + cdef int ydelta = ROW_OFFSETS[edge_dir] + test_list = [ + (x_l - xdelta, y_l), + (x_l, y_l - ydelta)] + for x_t, y_t in test_list: + point_discovery = discovery_managed_raster.get( + x_t, y_t) + if (point_discovery != discovery_nodata and + point_discovery >= discovery and + point_discovery <= finish): + boundary_list.append((int(x_t), int(y_t))) + # there's only one diagonal to fill in so it's done here + return + + # if there's a degenerate case then just add the xdelta, + # it doesn't matter + boundary_list.append(test_list[0]) + + +cdef int _in_watershed( + int x_l, int y_l, int direction_to_test, int discovery, int finish, + int n_cols, int n_rows, + ManagedRaster discovery_managed_raster, + long discovery_nodata): + """Test if pixel in direction is in the watershed. + + Args: + x_l/y_l (int): leading coordinate of the watershed boundary + edge. direction_to_test (int): D8 direction that points which direction the edge came from - discovery (int): the discovery time of the watershed boundary edge pixel - finish (int): the finish time of the watershed boundary edge pixel + discovery/finish (long): the discovery and finish time that defines + whether a pixel discovery time is inside a watershed or not. + n_cols/n_rows (int): number of columns/rows in the discovery raster, + used to ensure step does not go out of bounds. discovery_managed_raster (ManagedRaster): discovery time raster x/y gives the discovery time for that pixel. + discovery_nodata (long): nodata value for discovery raster Return: 1 if in, 0 if out. """ cdef int x_n = x_l + COL_OFFSETS[direction_to_test] cdef int y_n = y_l + ROW_OFFSETS[direction_to_test] - if discovery_managed_raster.is_out_of_bounds(x_n, y_n): + if x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_rows: return 0 cdef long point_discovery = discovery_managed_raster.get(x_n, y_n) - # discovery time: the order in which a pixel is first visited - # finish time: the order in which a pixel is last visited - # - # if pixel n's discovery time is greater than or equal to pixel l's discovery time, and - # the pixel n's discovery time is less than or equal to pixel l's finish time, - # then pixel n is in the watershed - return (not discovery_managed_raster.is_nodata(point_discovery) and + return (point_discovery != discovery_nodata and point_discovery >= discovery and point_discovery <= finish) cdef _calculate_stream_geometry( - int x_l, int y_l, int upstream_d8_dir, geotransform, - ManagedRaster flow_accum_managed_raster, - ManagedRaster flow_dir_managed_raster, + int x_l, int y_l, int upstream_d8_dir, geotransform, int n_cols, + int n_rows, ManagedRaster flow_accum_managed_raster, + ManagedRaster flow_dir_managed_raster, int flow_dir_nodata, int flow_accum_threshold, coord_to_stream_ids): """Calculate the upstream geometry from the given point. @@ -4609,8 +4622,10 @@ cdef _calculate_stream_geometry( upstream_d8_dir (int): upstream D8 direction to search geotransform (list): 6 element list representing the geotransform used to convert to georeferenced coordinates. + n_cols/n_rows (int): number of columns and rows in raster. flow_accum_managed_raster (ManagedRaster): flow accumulation raster flow_dir_managed_raster (ManagedRaster): d8 flow direction raster + flow_dir_nodata (int): nodata for flow direction flow_accum_threshold (int): minimum flow accumulation value to define string. coord_to_stream_ids (dict): map raster space coordinate tuple to @@ -4680,14 +4695,17 @@ cdef _calculate_stream_geometry( x_n = x_l + COL_OFFSETS[d] y_n = y_l + ROW_OFFSETS[d] - # check out of bounds or nodata - if (flow_dir_managed_raster.is_out_of_bounds(x_n, y_n) or - flow_dir_managed_raster.is_nodata(x_n, y_n)): + # check out of bounds + if x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_rows: + continue + + # check for nodata + d_n = flow_dir_managed_raster.get(x_n, y_n) + if d_n == flow_dir_nodata: continue # check if there's an upstream inflow pixel with flow accum # greater than the threshold - d_n = flow_dir_managed_raster.get(x_n, y_n) if INFLOW_OFFSETS[d] == d_n and ( flow_accum_managed_raster.get( x_n, y_n) > flow_accum_threshold): From 3c4241f771effa2b0d874ae05a64d8c6839b95f4 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Wed, 18 Mar 2026 16:47:18 -0700 Subject: [PATCH 12/12] implement fix to subwatershed algorithm to account for diagonal pixels --- src/pygeoprocessing/routing/routing.pyx | 237 ++++++++++++++++-------- 1 file changed, 156 insertions(+), 81 deletions(-) diff --git a/src/pygeoprocessing/routing/routing.pyx b/src/pygeoprocessing/routing/routing.pyx index 1d34da6f..a62a36cc 100644 --- a/src/pygeoprocessing/routing/routing.pyx +++ b/src/pygeoprocessing/routing/routing.pyx @@ -3851,6 +3851,13 @@ def calculate_subwatershed_boundary( os.path.dirname(target_watershed_boundary_vector_path))) discovery_time_raster_path = os.path.join(workspace_dir, 'discovery.tif') finish_time_raster_path = os.path.join(workspace_dir, 'finish.tif') + boundary_raster_path = os.path.join(workspace_dir, 'boundary.tif') + + pygeoprocessing.new_raster_from_base( + d8_flow_dir_raster_path_band[0], boundary_raster_path, + gdal.GDT_Float64, [-1]) + boundary_managed_raster = ManagedRaster( + boundary_raster_path.encode('utf-8'), 1, True) # construct the discovery/finish time rasters for fast individual cell # watershed detection @@ -4009,6 +4016,7 @@ def calculate_subwatershed_boundary( cdef int _int_max_steps_per_watershed = max_steps_per_watershed for index, (stream_fid, x_l, y_l) in enumerate(visit_order_stack): + print('FID', stream_fid) if ctime(NULL) - last_log_time > _LOGGING_PERIOD: LOGGER.info( f'(calculate_subwatershed_boundary): watershed building ' @@ -4019,7 +4027,7 @@ def calculate_subwatershed_boundary( # a previously calculated watershed. if discovery == -1: continue - + # keep a list of pixels that are within the watershed along the boundary # these will later be filled in with -1 in the discovery raster boundary_list = [(x_l, y_l)] @@ -4069,79 +4077,148 @@ def calculate_subwatershed_boundary( x_l += COL_OFFSETS[edge_dir] y_l += ROW_OFFSETS[edge_dir] # note the pixel moved + print(f'adding current pixel {x_l}, {y_l} to boundary') boundary_list.append((x_l, y_l)) + print(f'starting from pixel {x_l}, {y_l}, vertex {x_f}, {y_f}, edge side {edge_side}, edge dir {edge_dir}') + n_steps = 0 terminated_early = 0 - while True: - # step the edge then determine the projected coordinates - x_f += COL_OFFSETS[edge_dir] - y_f += ROW_OFFSETS[edge_dir] - delta_x += COL_OFFSETS[edge_dir] - delta_y += ROW_OFFSETS[edge_dir] - # equivalent to gdal.ApplyGeoTransform(geotransform, x_f, y_f) - # to eliminate python function call overhead - x_p = g0 + g1*x_f + g2*y_f - y_p = g3 + g4*x_f + g5*y_f - watershed_boundary.AddPoint(x_p, y_p) - n_steps += 1 - if n_steps > _int_max_steps_per_watershed: - LOGGER.warning('quitting, too many steps') - terminated_early = 1 - break - if x_l < 0 or y_l < 0 or x_l >= n_cols or y_l >= n_rows: - # This is unexpected but worth checking since missing this - # error would be very difficult to debug. - raise RuntimeError( - f'{x_l}, {y_l} out of bounds for ' - f'{n_cols}x{n_rows} raster.') - if edge_side - ((edge_dir-2) % 8) == 0: - # counterclockwise configuration - left = edge_dir - right = (left-1) % 8 - out_dir_increase = 2 - else: - # clockwise configuration (swapping "left" and "right") - right = edge_dir - left = (edge_side+1) - out_dir_increase = -2 - left_in = _in_watershed( - x_l, y_l, left, discovery, finish, n_cols, n_rows, - discovery_managed_raster, discovery_nodata) - right_in = _in_watershed( - x_l, y_l, right, discovery, finish, n_cols, n_rows, - discovery_managed_raster, discovery_nodata) - if right_in: - # turn right - out_dir = edge_side - edge_side = (edge_side-out_dir_increase) % 8 - edge_dir = out_dir - # pixel moves to be the right cell - x_l += COL_OFFSETS[right] - y_l += ROW_OFFSETS[right] - _diagonal_fill_step( - x_l, y_l, right, - discovery, finish, discovery_managed_raster, - discovery_nodata, - boundary_list) - elif left_in: - # step forward - x_l += COL_OFFSETS[edge_dir] - y_l += ROW_OFFSETS[edge_dir] - # the pixel moves forward - boundary_list.append((x_l, y_l)) - else: - # turn left - edge_side = edge_dir - edge_dir = (edge_side + out_dir_increase) % 8 + geoms = [] + + starting_points = [((x_l, y_l), (x_f, y_f), (x_p, y_p), edge_side, edge_dir, 'left')] + all_starting_points = set((x_f, y_f)) + + while starting_points: + (x_l, y_l), (x_f, y_f), (x_p, y_p), edge_side, edge_dir, orientation = starting_points.pop() + starting_point = (x_f, y_f) + starting_dir = edge_dir + vertices = [(x_p, y_p)] + print('starting from', (x_l, y_l), (x_f, y_f), edge_side, edge_dir, orientation) + + while True: + # step the edge then determine the projected coordinates + x_f += COL_OFFSETS[edge_dir] + y_f += ROW_OFFSETS[edge_dir] + + delta_x += COL_OFFSETS[edge_dir] + delta_y += ROW_OFFSETS[edge_dir] + + # equivalent to gdal.ApplyGeoTransform(geotransform, x_f, y_f) + # to eliminate python function call overhead + x_p = g0 + g1*x_f + g2*y_f + y_p = g3 + g4*x_f + g5*y_f + watershed_boundary.AddPoint(x_p, y_p) + vertices.append((x_p, y_p)) + + # boundary_list.append((x_l, y_l)) + + n_steps += 1 + if n_steps > _int_max_steps_per_watershed: + LOGGER.warning('quitting, too many steps') + terminated_early = 1 + break + if x_l < 0 or y_l < 0 or x_l >= n_cols or y_l >= n_rows: + # This is unexpected but worth checking since missing this + # error would be very difficult to debug. + raise RuntimeError( + f'{x_l}, {y_l} out of bounds for ' + f'{n_cols}x{n_rows} raster.') + + + if edge_side - ((edge_dir-2) % 8) == 0: + # counterclockwise configuration + left = edge_dir + right = (left - 1) % 8 + out_dir_increase = 2 + else: + # clockwise configuration (swapping "left" and "right") + right = edge_dir + left = (right + 1) % 8 + out_dir_increase = -2 + left_in = _in_watershed( + x_l, y_l, left, discovery, finish, n_cols, n_rows, + discovery_managed_raster, discovery_nodata) + right_in = _in_watershed( + x_l, y_l, right, discovery, finish, n_cols, n_rows, + discovery_managed_raster, discovery_nodata) + print('left', left, left_in) + print('right', right, right_in) + + if orientation == 'left': + if right_in and left_in: + print(f'turn right, move pixel to {right}') + # turn right + out_dir = edge_side + edge_side = (edge_side - out_dir_increase) % 8 + edge_dir = out_dir + + # add both left and right to the boundary + # so that the boundary is a continuous border of pixels that share a side + boundary_list.append((x_l + COL_OFFSETS[left], y_l + ROW_OFFSETS[left])) + boundary_list.append((x_l + COL_OFFSETS[right], y_l + ROW_OFFSETS[right])) + + # pixel moves to be the right cell + x_l += COL_OFFSETS[right] + y_l += ROW_OFFSETS[right] + + elif left_in and not right_in: + print(f'go straight, move pixel to {edge_dir}') + # step forward + x_l += COL_OFFSETS[edge_dir] + y_l += ROW_OFFSETS[edge_dir] + # the pixel moves forward + boundary_list.append((x_l, y_l)) + + elif right_in and not left_in: # continue straight and swap orientation + print(f'turn left and push point to stack') + # turn left + edge_side = edge_dir + edge_dir = (edge_side + out_dir_increase) % 8 + + point = ( + (x_l + COL_OFFSETS[right], y_l + ROW_OFFSETS[right]), + (x_f, y_f), + (x_p, y_p), + (edge_side + 4) % 8, + (edge_dir + 4) % 8, + 'left' + ) + print('pushing', point) + + if (x_l + COL_OFFSETS[right], y_l + ROW_OFFSETS[right]) not in all_starting_points: + starting_points.append(point) + all_starting_points.add((x_l + COL_OFFSETS[right], y_l + ROW_OFFSETS[right])) + boundary_list.append((x_l + COL_OFFSETS[right], y_l + ROW_OFFSETS[right])) - if delta_x == 0 and delta_y == 0: - # met the start point so we completed the watershed loop - break + else: + print('turn left') + # turn left + edge_side = edge_dir + edge_dir = (edge_side + out_dir_increase) % 8 + + + if (x_f, y_f) == starting_point and edge_dir == starting_dir: + geoms.append(shapely.Polygon(vertices)) + break + + if delta_x == 0 and delta_y == 0: + break + + + print(f'pixel: ({x_l}, {y_l}), vertex: ({x_f}, {y_f}), edge side: {edge_side}, edge dir: {edge_dir}') + + + + print(stream_fid, 'geoms:', len(geoms)) + + watershed_polygon = shapely.Polygon() + for geom in geoms: + watershed_polygon = shapely.union(watershed_polygon, geom) watershed_feature = ogr.Feature(watershed_layer.GetLayerDefn()) - watershed_polygon = ogr.Geometry(ogr.wkbPolygon) - watershed_polygon.AddGeometry(watershed_boundary) + watershed_polygon = ogr.CreateGeometryFromWkb(watershed_polygon.wkb) + print(watershed_polygon) watershed_feature.SetGeometry(watershed_polygon) watershed_feature.SetField('stream_fid', stream_fid) watershed_feature.SetField('terminated_early', terminated_early) @@ -4153,6 +4230,7 @@ def calculate_subwatershed_boundary( # doesn't interfere with the loop return to think the cells are no # longer in the watershed for boundary_x, boundary_y in boundary_list: + boundary_managed_raster.set(boundary_x, boundary_y, stream_fid) discovery_managed_raster.set(boundary_x, boundary_y, -1) watershed_layer.CommitTransaction() watershed_layer = None @@ -4160,7 +4238,8 @@ def calculate_subwatershed_boundary( discovery_managed_raster.close() finish_managed_raster.close() d8_flow_dir_managed_raster.close() - shutil.rmtree(workspace_dir) + boundary_managed_raster.close() + # shutil.rmtree(workspace_dir) LOGGER.info( '(calculate_subwatershed_boundary): watershed building 100% complete') @@ -4553,24 +4632,19 @@ cdef void _diagonal_fill_step( # this section determines which back diagonal was in the watershed and # fills it. if none are we pick one so there's no degenerate case - cdef int xdelta = COL_OFFSETS[edge_dir] - cdef int ydelta = ROW_OFFSETS[edge_dir] - test_list = [ - (x_l - xdelta, y_l), - (x_l, y_l - ydelta)] - for x_t, y_t in test_list: - point_discovery = discovery_managed_raster.get( - x_t, y_t) + for _xt, _yt in [(x_l - COL_OFFSETS[edge_dir], y_l), (x_l, y_l - ROW_OFFSETS[edge_dir])]: + point_discovery = discovery_managed_raster.get(_xt, _yt) if (point_discovery != discovery_nodata and point_discovery >= discovery and point_discovery <= finish): - boundary_list.append((int(x_t), int(y_t))) + boundary_list.append((_xt, _yt)) # there's only one diagonal to fill in so it's done here - return + break - # if there's a degenerate case then just add the xdelta, - # it doesn't matter - boundary_list.append(test_list[0]) + # # if there's a degenerate case then just add the xdelta, + # # it doesn't matter + # print('wrongly marking', test_list[0], 'as boundary') + # boundary_list.append(test_list[0]) cdef int _in_watershed( @@ -4601,6 +4675,7 @@ cdef int _in_watershed( if x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_rows: return 0 cdef long point_discovery = discovery_managed_raster.get(x_n, y_n) + print('in watershed?', point_discovery, discovery, finish) return (point_discovery != discovery_nodata and point_discovery >= discovery and point_discovery <= finish)