diff --git a/CHANGELOG.md b/CHANGELOG.md index 8064f78d..34cfa6a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). * [731](https://github.com/dbekaert/RAiDER/pull/731) - Fixed fetch routine for GMAO. ### Added +* [792](https://github.com/dbekaert/RAiDER/pull/792) - Added temporal subsetting to `raiderCombine.py` workflow to more seamlessly support annual statistical analyses. * [790](https://github.com/dbekaert/RAiDER/pull/790) - Added a test in `test_interpolator.py` to put test coverage to 100% and linted file. * [789](https://github.com/dbekaert/RAiDER/pull/789) - Introduce `min_pct_days` option to filter stations based on global days percentage. * [788](https://github.com/dbekaert/RAiDER/pull/788) - Updated `variance_analysis` function to include global date tracking parameters and modified datetime handling for station start and end dates. diff --git a/environment.yml b/environment.yml index 1e39ddd8..dcb70cc4 100644 --- a/environment.yml +++ b/environment.yml @@ -1,5 +1,5 @@ # create environment : conda env create -f environment.yml -# update dependencies: conda env update -f environment.yml +# update dependencies: conda env update -f environment.yml --prune # remove environment : conda env remove -n RAiDER # enter environment : conda activate RAiDER # exit environment : conda deactivate @@ -19,6 +19,7 @@ dependencies: - dask - dem_stitcher>=2.5.8 - ecmwf-api-client + - geopandas - h5netcdf - h5py - herbie-data<2025.2.1 @@ -30,6 +31,7 @@ dependencies: - pandas - progressbar - pydap>3.2.2 + - pyogrio - pyproj>=2.2.0 - pyyaml - rasterio>=1.3.0 diff --git a/tools/RAiDER/cli/raider.py b/tools/RAiDER/cli/raider.py index 9d6fd05f..0622c4b4 100644 --- a/tools/RAiDER/cli/raider.py +++ b/tools/RAiDER/cli/raider.py @@ -710,6 +710,10 @@ def combineZTDFiles() -> None: print(f"Observation error threshold: {args.obs_errlimit}") print(f"Nan for negative σ_wm² values: {args.allow_nan_for_negative}") print(f"Min% timespan overlap to keep station: {args.min_pct_days}") + print( + "Subset in time by specified earliest to latest " + f"YYYY-MM-DD dates: {args.timeinterval}" + ) if not args.raider_file.exists(): combineDelayFiles(args.raider_file, loc=args.raider_folder) @@ -733,6 +737,7 @@ def combineZTDFiles() -> None: obs_errlimit=args.obs_errlimit, allow_nan_for_negative=args.allow_nan_for_negative, min_pct_days=args.min_pct_days, + timeinterval=args.timeinterval, ) diff --git a/tools/RAiDER/getStationDelays.py b/tools/RAiDER/getStationDelays.py index d8a2ae05..cfa0b9ab 100644 --- a/tools/RAiDER/getStationDelays.py +++ b/tools/RAiDER/getStationDelays.py @@ -274,7 +274,7 @@ def get_station_data(inFile, dateList, gps_repo=None, numCPUs=8, outDir=None, re df.to_csv(name, index=False) else: logger.warning( - f"Station file {name} not found likely" + f"Station file {name} not found likely " "no available data in specified time span" ) diff --git a/tools/RAiDER/gnss/processDelayFiles.py b/tools/RAiDER/gnss/processDelayFiles.py index f694ed15..bae56ddb 100644 --- a/tools/RAiDER/gnss/processDelayFiles.py +++ b/tools/RAiDER/gnss/processDelayFiles.py @@ -4,9 +4,11 @@ import glob import math import re +import shutil +from itertools import chain from pathlib import Path from textwrap import dedent -from typing import Optional +from typing import List, Optional, Union # Third-party import numpy as np @@ -14,7 +16,7 @@ from tqdm import tqdm # Local -from RAiDER.cli.parser import add_verbose, add_allow_nan_options +from RAiDER.cli.parser import add_allow_nan_options, add_verbose from RAiDER.logger import logger @@ -23,12 +25,25 @@ def combineDelayFiles( out_path: Path, - loc: Path=Path.cwd(), + loc: Union[List[Path], Path] = Path.cwd(), source: str='model', ext: str='.csv', ref: Optional[Path]=None, col_name: str='ZTD' ) -> None: + + # Normalize single Path to List + # e.g. Path('folder') -> [Path('folder')] + if isinstance(loc, Path): + loc = [loc] + + # Flatten nested lists if they exist + # e.g. [[Path('A')], [Path('B')]] -> [Path('A'), Path('B')] + # This checks if the list is not empty AND the first item is a list + if loc and isinstance(loc[0], list): + loc = list(chain.from_iterable(loc)) + + # Now 'loc' is guaranteed to be flat: [Path, Path, ...] file_paths = [f for folder in loc for f in folder.glob(f"*{ext}")] if source == 'model': @@ -38,7 +53,6 @@ def combineDelayFiles( # If single file, just copy source if len(file_paths) == 1: if source == 'model': - import shutil shutil.copy(file_paths[0], out_path) else: file_paths = readZTDFile(file_paths[0], col_name=col_name) @@ -548,6 +562,7 @@ def create_parser() -> argparse.ArgumentParser: """), type=parse_dir, default=[Path.cwd()], + nargs='+' # Forces input into a list [Path, Path...] ) p.add_argument( '--gnssDir', @@ -560,6 +575,7 @@ def create_parser() -> argparse.ArgumentParser: """), type=parse_dir, default=[Path.cwd()], + nargs='+' # Forces input into a list [Path, Path...] ) p.add_argument( @@ -643,6 +659,18 @@ def create_parser() -> argparse.ArgumentParser: default=0.0, ) + p.add_argument( + '--timeinterval', + '-ti', + dest='timeinterval', + type=str, + help=dedent("""\ + Subset in time by specifying earliest YYYY-MM-DD date + followed by latest date YYYY-MM-DD. + -- Example : '2016-01-01 2019-01-01'."""), + default=None, + ) + # add other args to parser add_allow_nan_options(p) add_verbose(p) @@ -660,6 +688,7 @@ def main( obs_errlimit: float=float('inf'), allow_nan_for_negative: bool=True, min_pct_days: float=0.0, + timeinterval: str=None, ): """Merge a combined RAiDER delays file with a GPS ZTD delay file.""" print(f'Merging delay files {raider_file} and {ztd_file}') @@ -668,6 +697,29 @@ def main( dfz = pd.read_csv(ztd_file, parse_dates=['Datetime']) dfr = pd.read_csv(raider_file, parse_dates=['Datetime']) + # time-interval filter + # need to add a day buffer to account for time changes + if timeinterval: + # Parse the time interval string + start_str, end_str = timeinterval.split() + + # Convert to datetime objects and apply the 1-day buffer + # Subtract 1 day from start, Add 1 day to end + start_date = pd.to_datetime(start_str) + end_date = pd.to_datetime(end_str) + start_date_buffer = start_date - pd.Timedelta(days=1) + end_date_buffer = end_date + pd.Timedelta(days=1) + + # apply time filter + dfz = dfz[ + (dfz['Datetime'] >= start_date_buffer) & + (dfz['Datetime'] <= end_date_buffer) + ].reset_index(drop=True) + dfr = dfr[ + (dfr['Datetime'] >= start_date_buffer) & + (dfr['Datetime'] <= end_date_buffer) + ].reset_index(drop=True) + # drop extra columns from tropo delay file expected_data_columns = ['ID', 'Lat', 'Lon', 'Hgt_m', 'Datetime', 'wetDelay', 'hydroDelay', raider_delay] dfr = dfr.drop(columns=[col for col in dfr if col not in expected_data_columns]) @@ -715,6 +767,18 @@ def main( dfz = pass_common_obs(dfr, dfz, localtime='Localtime') dfr = pass_common_obs(dfz, dfr, localtime='Localtime') + # use time-interval again to filter based on 'Localtime' + # to remove straggling observations outside of specified span + if timeinterval: + dfz = dfz[ + (dfz['Localtime'] >= start_date) & + (dfz['Localtime'] <= end_date) + ].reset_index(drop=True) + dfr = dfr[ + (dfr['Localtime'] >= start_date) & + (dfr['Localtime'] <= end_date) + ].reset_index(drop=True) + # drop all lines with nans dfr.dropna(how='any', inplace=True) dfz.dropna(how='any', inplace=True) diff --git a/tools/RAiDER/gnss/types.py b/tools/RAiDER/gnss/types.py index bf537fa8..101ee4f4 100644 --- a/tools/RAiDER/gnss/types.py +++ b/tools/RAiDER/gnss/types.py @@ -1,14 +1,18 @@ import argparse from pathlib import Path -from typing import Optional - +from typing import List, Optional class RAiDERCombineArgs(argparse.Namespace): raider_file: Path - raider_folder: Path - gnss_folder: Path + raider_folder: List[Path] + gnss_folder: List[Path] gnss_file: Optional[Path] raider_column_name: str column_name: str out_name: Path local_time: Optional[str] + obs_errlimit: float + min_pct_days: float + timeinterval: Optional[str] + allow_nan_for_negative: bool + verbose: bool