From aca4e06f8c3f11b6079bc635e98b3d030338baff Mon Sep 17 00:00:00 2001 From: Simran S Sangha Date: Wed, 14 Jan 2026 15:47:10 -0800 Subject: [PATCH 1/8] Add support to subset files in time for stats analysis --- tools/RAiDER/cli/raider.py | 5 +++ tools/RAiDER/gnss/processDelayFiles.py | 52 +++++++++++++++++++++++++- tools/RAiDER/gnss/types.py | 12 ++++-- 3 files changed, 63 insertions(+), 6 deletions(-) diff --git a/tools/RAiDER/cli/raider.py b/tools/RAiDER/cli/raider.py index 9d6fd05f8..0622c4b4d 100644 --- a/tools/RAiDER/cli/raider.py +++ b/tools/RAiDER/cli/raider.py @@ -710,6 +710,10 @@ def combineZTDFiles() -> None: print(f"Observation error threshold: {args.obs_errlimit}") print(f"Nan for negative σ_wm² values: {args.allow_nan_for_negative}") print(f"Min% timespan overlap to keep station: {args.min_pct_days}") + print( + "Subset in time by specified earliest to latest " + f"YYYY-MM-DD dates: {args.timeinterval}" + ) if not args.raider_file.exists(): combineDelayFiles(args.raider_file, loc=args.raider_folder) @@ -733,6 +737,7 @@ def combineZTDFiles() -> None: obs_errlimit=args.obs_errlimit, allow_nan_for_negative=args.allow_nan_for_negative, min_pct_days=args.min_pct_days, + timeinterval=args.timeinterval, ) diff --git a/tools/RAiDER/gnss/processDelayFiles.py b/tools/RAiDER/gnss/processDelayFiles.py index f694ed154..f30a1b528 100644 --- a/tools/RAiDER/gnss/processDelayFiles.py +++ b/tools/RAiDER/gnss/processDelayFiles.py @@ -4,6 +4,7 @@ import glob import math import re +import shutil from pathlib import Path from textwrap import dedent from typing import Optional @@ -14,7 +15,7 @@ from tqdm import tqdm # Local -from RAiDER.cli.parser import add_verbose, add_allow_nan_options +from RAiDER.cli.parser import add_allow_nan_options, add_verbose from RAiDER.logger import logger @@ -38,7 +39,6 @@ def combineDelayFiles( # If single file, just copy source if len(file_paths) == 1: if source == 'model': - import shutil shutil.copy(file_paths[0], out_path) else: file_paths = readZTDFile(file_paths[0], col_name=col_name) @@ -643,6 +643,18 @@ def create_parser() -> argparse.ArgumentParser: default=0.0, ) + p.add_argument( + '--timeinterval', + '-ti', + dest='timeinterval', + type=str, + help=dedent("""\ + Subset in time by specifying earliest YYYY-MM-DD date + followed by latest date YYYY-MM-DD. + -- Example : '2016-01-01 2019-01-01'."""), + default=None, + ) + # add other args to parser add_allow_nan_options(p) add_verbose(p) @@ -660,6 +672,7 @@ def main( obs_errlimit: float=float('inf'), allow_nan_for_negative: bool=True, min_pct_days: float=0.0, + timeinterval: str=None, ): """Merge a combined RAiDER delays file with a GPS ZTD delay file.""" print(f'Merging delay files {raider_file} and {ztd_file}') @@ -668,6 +681,29 @@ def main( dfz = pd.read_csv(ztd_file, parse_dates=['Datetime']) dfr = pd.read_csv(raider_file, parse_dates=['Datetime']) + # time-interval filter + # need to add a day buffer to account for time changes + if timeinterval: + # Parse the time interval string + start_str, end_str = timeinterval.split() + + # Convert to datetime objects and apply the 1-day buffer + # Subtract 1 day from start, Add 1 day to end + start_date = pd.to_datetime(start_str) + end_date = pd.to_datetime(end_str) + start_date_buffer = start_date - pd.Timedelta(days=1) + end_date_buffer = end_date + pd.Timedelta(days=1) + + # apply time filter + dfz = dfz[ + (dfz['Datetime'] >= start_date_buffer) & + (dfz['Datetime'] <= end_date_buffer) + ].reset_index(drop=True) + dfr = dfr[ + (dfr['Datetime'] >= start_date_buffer) & + (dfr['Datetime'] <= end_date_buffer) + ].reset_index(drop=True) + # drop extra columns from tropo delay file expected_data_columns = ['ID', 'Lat', 'Lon', 'Hgt_m', 'Datetime', 'wetDelay', 'hydroDelay', raider_delay] dfr = dfr.drop(columns=[col for col in dfr if col not in expected_data_columns]) @@ -715,6 +751,18 @@ def main( dfz = pass_common_obs(dfr, dfz, localtime='Localtime') dfr = pass_common_obs(dfz, dfr, localtime='Localtime') + # use time-interval again to filter based on 'Localtime' + # to remove straggling observations outside of specified span + if timeinterval: + dfz = dfz[ + (dfz['Datetime'] >= start_date) & + (dfz['Datetime'] <= end_date) + ].reset_index(drop=True) + dfr = dfr[ + (dfr['Datetime'] >= start_date) & + (dfr['Datetime'] <= end_date) + ].reset_index(drop=True) + # drop all lines with nans dfr.dropna(how='any', inplace=True) dfz.dropna(how='any', inplace=True) diff --git a/tools/RAiDER/gnss/types.py b/tools/RAiDER/gnss/types.py index bf537fa81..2d21a9523 100644 --- a/tools/RAiDER/gnss/types.py +++ b/tools/RAiDER/gnss/types.py @@ -1,14 +1,18 @@ import argparse from pathlib import Path -from typing import Optional - +from typing import Optional, List class RAiDERCombineArgs(argparse.Namespace): raider_file: Path - raider_folder: Path - gnss_folder: Path + raider_folder: List[Path] + gnss_folder: List[Path] gnss_file: Optional[Path] raider_column_name: str column_name: str out_name: Path local_time: Optional[str] + obs_errlimit: float + min_pct_days: float + timeinterval: Optional[str] + allow_nan_for_negative: bool + verbose: bool From 6057ce5a340b7f4f17337cdead42471acba47307 Mon Sep 17 00:00:00 2001 From: Simran S Sangha Date: Wed, 14 Jan 2026 16:03:06 -0800 Subject: [PATCH 2/8] Change date filtering to use 'Localtime' instead of 'Datetime' Updated filtering conditions to use 'Localtime' for date range checks. --- tools/RAiDER/gnss/processDelayFiles.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/RAiDER/gnss/processDelayFiles.py b/tools/RAiDER/gnss/processDelayFiles.py index f30a1b528..03896009c 100644 --- a/tools/RAiDER/gnss/processDelayFiles.py +++ b/tools/RAiDER/gnss/processDelayFiles.py @@ -755,12 +755,12 @@ def main( # to remove straggling observations outside of specified span if timeinterval: dfz = dfz[ - (dfz['Datetime'] >= start_date) & - (dfz['Datetime'] <= end_date) + (dfz['Localtime'] >= start_date) & + (dfz['Localtime'] <= end_date) ].reset_index(drop=True) dfr = dfr[ - (dfr['Datetime'] >= start_date) & - (dfr['Datetime'] <= end_date) + (dfr['Localtime'] >= start_date) & + (dfr['Localtime'] <= end_date) ].reset_index(drop=True) # drop all lines with nans From bf78825ad48ddba07d66afc975c94a63259516bc Mon Sep 17 00:00:00 2001 From: Simran S Sangha Date: Wed, 14 Jan 2026 17:06:33 -0800 Subject: [PATCH 3/8] Update CHANGELOG with recent changes and additions Added entry for temporal subsetting in raiderCombine.py and other updates. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8064f78d4..34cfa6a1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). * [731](https://github.com/dbekaert/RAiDER/pull/731) - Fixed fetch routine for GMAO. ### Added +* [792](https://github.com/dbekaert/RAiDER/pull/792) - Added temporal subsetting to `raiderCombine.py` workflow to more seamlessly support annual statistical analyses. * [790](https://github.com/dbekaert/RAiDER/pull/790) - Added a test in `test_interpolator.py` to put test coverage to 100% and linted file. * [789](https://github.com/dbekaert/RAiDER/pull/789) - Introduce `min_pct_days` option to filter stations based on global days percentage. * [788](https://github.com/dbekaert/RAiDER/pull/788) - Updated `variance_analysis` function to include global date tracking parameters and modified datetime handling for station start and end dates. From 12a51783b8b03d40ae0476bc238a3d4c8ccb8e35 Mon Sep 17 00:00:00 2001 From: Simran S Sangha Date: Thu, 15 Jan 2026 11:59:00 -0800 Subject: [PATCH 4/8] Fix warning message formatting in getStationDelays.py --- tools/RAiDER/getStationDelays.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/RAiDER/getStationDelays.py b/tools/RAiDER/getStationDelays.py index d8a2ae05c..cfa0b9ab7 100644 --- a/tools/RAiDER/getStationDelays.py +++ b/tools/RAiDER/getStationDelays.py @@ -274,7 +274,7 @@ def get_station_data(inFile, dateList, gps_repo=None, numCPUs=8, outDir=None, re df.to_csv(name, index=False) else: logger.warning( - f"Station file {name} not found likely" + f"Station file {name} not found likely " "no available data in specified time span" ) From eff05e6de882afe9390ec1134fbf30505f1e82c7 Mon Sep 17 00:00:00 2001 From: Simran S Sangha Date: Thu, 15 Jan 2026 13:09:09 -0800 Subject: [PATCH 5/8] Make folder input types in raiderCombine consistent --- tools/RAiDER/gnss/processDelayFiles.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/RAiDER/gnss/processDelayFiles.py b/tools/RAiDER/gnss/processDelayFiles.py index 03896009c..bc218b5c1 100644 --- a/tools/RAiDER/gnss/processDelayFiles.py +++ b/tools/RAiDER/gnss/processDelayFiles.py @@ -24,12 +24,16 @@ def combineDelayFiles( out_path: Path, - loc: Path=Path.cwd(), + loc: Union[List[Path], Path] = Path.cwd(), source: str='model', ext: str='.csv', ref: Optional[Path]=None, col_name: str='ZTD' ) -> None: + # Normalize input: Protects against Python API users passing single Paths + if isinstance(loc, Path): + loc = [loc] + file_paths = [f for folder in loc for f in folder.glob(f"*{ext}")] if source == 'model': @@ -548,6 +552,7 @@ def create_parser() -> argparse.ArgumentParser: """), type=parse_dir, default=[Path.cwd()], + nargs='+' # Forces input into a list [Path, Path...] ) p.add_argument( '--gnssDir', @@ -560,6 +565,7 @@ def create_parser() -> argparse.ArgumentParser: """), type=parse_dir, default=[Path.cwd()], + nargs='+' # Forces input into a list [Path, Path...] ) p.add_argument( From 3de59e9a0d53bc85def78d98e644fc4041c7db2f Mon Sep 17 00:00:00 2001 From: Simran S Sangha Date: Thu, 15 Jan 2026 13:21:49 -0800 Subject: [PATCH 6/8] Fix typing import bug --- tools/RAiDER/gnss/processDelayFiles.py | 2 +- tools/RAiDER/gnss/types.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/RAiDER/gnss/processDelayFiles.py b/tools/RAiDER/gnss/processDelayFiles.py index bc218b5c1..ce0cff3c3 100644 --- a/tools/RAiDER/gnss/processDelayFiles.py +++ b/tools/RAiDER/gnss/processDelayFiles.py @@ -7,7 +7,7 @@ import shutil from pathlib import Path from textwrap import dedent -from typing import Optional +from typing import List, Optional, Union # Third-party import numpy as np diff --git a/tools/RAiDER/gnss/types.py b/tools/RAiDER/gnss/types.py index 2d21a9523..101ee4f47 100644 --- a/tools/RAiDER/gnss/types.py +++ b/tools/RAiDER/gnss/types.py @@ -1,6 +1,6 @@ import argparse from pathlib import Path -from typing import Optional, List +from typing import List, Optional class RAiDERCombineArgs(argparse.Namespace): raider_file: Path From 6461cc1c096620f73e89d29ab8a5d1e158e1728f Mon Sep 17 00:00:00 2001 From: Simran S Sangha Date: Thu, 15 Jan 2026 13:35:41 -0800 Subject: [PATCH 7/8] Fix variable assignment --- tools/RAiDER/gnss/processDelayFiles.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/RAiDER/gnss/processDelayFiles.py b/tools/RAiDER/gnss/processDelayFiles.py index ce0cff3c3..bae56ddba 100644 --- a/tools/RAiDER/gnss/processDelayFiles.py +++ b/tools/RAiDER/gnss/processDelayFiles.py @@ -5,6 +5,7 @@ import math import re import shutil +from itertools import chain from pathlib import Path from textwrap import dedent from typing import List, Optional, Union @@ -30,10 +31,19 @@ def combineDelayFiles( ref: Optional[Path]=None, col_name: str='ZTD' ) -> None: - # Normalize input: Protects against Python API users passing single Paths + + # Normalize single Path to List + # e.g. Path('folder') -> [Path('folder')] if isinstance(loc, Path): loc = [loc] + # Flatten nested lists if they exist + # e.g. [[Path('A')], [Path('B')]] -> [Path('A'), Path('B')] + # This checks if the list is not empty AND the first item is a list + if loc and isinstance(loc[0], list): + loc = list(chain.from_iterable(loc)) + + # Now 'loc' is guaranteed to be flat: [Path, Path, ...] file_paths = [f for folder in loc for f in folder.glob(f"*{ext}")] if source == 'model': From c349554a9b8e7a54d44bacf2882d61043a4083fc Mon Sep 17 00:00:00 2001 From: Simran S Sangha Date: Fri, 16 Jan 2026 16:27:18 -0800 Subject: [PATCH 8/8] Modify environment.yml to update dependencies Updated the environment.yml file to include 'geopandas' and 'pyogrio' as dependencies, and modified the update command to use the '--prune' option. --- environment.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 1e39ddd85..dcb70cc4b 100644 --- a/environment.yml +++ b/environment.yml @@ -1,5 +1,5 @@ # create environment : conda env create -f environment.yml -# update dependencies: conda env update -f environment.yml +# update dependencies: conda env update -f environment.yml --prune # remove environment : conda env remove -n RAiDER # enter environment : conda activate RAiDER # exit environment : conda deactivate @@ -19,6 +19,7 @@ dependencies: - dask - dem_stitcher>=2.5.8 - ecmwf-api-client + - geopandas - h5netcdf - h5py - herbie-data<2025.2.1 @@ -30,6 +31,7 @@ dependencies: - pandas - progressbar - pydap>3.2.2 + - pyogrio - pyproj>=2.2.0 - pyyaml - rasterio>=1.3.0