Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
* [731](https://github.com/dbekaert/RAiDER/pull/731) - Fixed fetch routine for GMAO.

### Added
* [792](https://github.com/dbekaert/RAiDER/pull/792) - Added temporal subsetting to `raiderCombine.py` workflow to more seamlessly support annual statistical analyses.
* [790](https://github.com/dbekaert/RAiDER/pull/790) - Added a test in `test_interpolator.py` to put test coverage to 100% and linted file.
* [789](https://github.com/dbekaert/RAiDER/pull/789) - Introduce `min_pct_days` option to filter stations based on global days percentage.
* [788](https://github.com/dbekaert/RAiDER/pull/788) - Updated `variance_analysis` function to include global date tracking parameters and modified datetime handling for station start and end dates.
Expand Down
4 changes: 3 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# create environment : conda env create -f environment.yml
# update dependencies: conda env update -f environment.yml
# update dependencies: conda env update -f environment.yml --prune
# remove environment : conda env remove -n RAiDER
# enter environment : conda activate RAiDER
# exit environment : conda deactivate
Expand All @@ -19,6 +19,7 @@ dependencies:
- dask
- dem_stitcher>=2.5.8
- ecmwf-api-client
- geopandas
- h5netcdf
- h5py
- herbie-data<2025.2.1
Expand All @@ -30,6 +31,7 @@ dependencies:
- pandas
- progressbar
- pydap>3.2.2
- pyogrio
- pyproj>=2.2.0
- pyyaml
- rasterio>=1.3.0
Expand Down
5 changes: 5 additions & 0 deletions tools/RAiDER/cli/raider.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,10 @@ def combineZTDFiles() -> None:
print(f"Observation error threshold: {args.obs_errlimit}")
print(f"Nan for negative σ_wm² values: {args.allow_nan_for_negative}")
print(f"Min% timespan overlap to keep station: {args.min_pct_days}")
print(
"Subset in time by specified earliest to latest "
f"YYYY-MM-DD dates: {args.timeinterval}"
)

if not args.raider_file.exists():
combineDelayFiles(args.raider_file, loc=args.raider_folder)
Expand All @@ -733,6 +737,7 @@ def combineZTDFiles() -> None:
obs_errlimit=args.obs_errlimit,
allow_nan_for_negative=args.allow_nan_for_negative,
min_pct_days=args.min_pct_days,
timeinterval=args.timeinterval,
)


Expand Down
2 changes: 1 addition & 1 deletion tools/RAiDER/getStationDelays.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def get_station_data(inFile, dateList, gps_repo=None, numCPUs=8, outDir=None, re
df.to_csv(name, index=False)
else:
logger.warning(
f"Station file {name} not found likely"
f"Station file {name} not found likely "
"no available data in specified time span"
)

Expand Down
72 changes: 68 additions & 4 deletions tools/RAiDER/gnss/processDelayFiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@
import glob
import math
import re
import shutil
from itertools import chain
from pathlib import Path
from textwrap import dedent
from typing import Optional
from typing import List, Optional, Union

# Third-party
import numpy as np
import pandas as pd
from tqdm import tqdm

# Local
from RAiDER.cli.parser import add_verbose, add_allow_nan_options
from RAiDER.cli.parser import add_allow_nan_options, add_verbose
from RAiDER.logger import logger


Expand All @@ -23,12 +25,25 @@

def combineDelayFiles(
out_path: Path,
loc: Path=Path.cwd(),
loc: Union[List[Path], Path] = Path.cwd(),
source: str='model',
ext: str='.csv',
ref: Optional[Path]=None,
col_name: str='ZTD'
) -> None:

# Normalize single Path to List
# e.g. Path('folder') -> [Path('folder')]
if isinstance(loc, Path):
loc = [loc]

# Flatten nested lists if they exist
# e.g. [[Path('A')], [Path('B')]] -> [Path('A'), Path('B')]
# This checks if the list is not empty AND the first item is a list
if loc and isinstance(loc[0], list):
loc = list(chain.from_iterable(loc))

# Now 'loc' is guaranteed to be flat: [Path, Path, ...]
file_paths = [f for folder in loc for f in folder.glob(f"*{ext}")]

if source == 'model':
Expand All @@ -38,7 +53,6 @@ def combineDelayFiles(
# If single file, just copy source
if len(file_paths) == 1:
if source == 'model':
import shutil
shutil.copy(file_paths[0], out_path)
else:
file_paths = readZTDFile(file_paths[0], col_name=col_name)
Expand Down Expand Up @@ -548,6 +562,7 @@ def create_parser() -> argparse.ArgumentParser:
"""),
type=parse_dir,
default=[Path.cwd()],
nargs='+' # Forces input into a list [Path, Path...]
)
p.add_argument(
'--gnssDir',
Expand All @@ -560,6 +575,7 @@ def create_parser() -> argparse.ArgumentParser:
"""),
type=parse_dir,
default=[Path.cwd()],
nargs='+' # Forces input into a list [Path, Path...]
)

p.add_argument(
Expand Down Expand Up @@ -643,6 +659,18 @@ def create_parser() -> argparse.ArgumentParser:
default=0.0,
)

p.add_argument(
'--timeinterval',
'-ti',
dest='timeinterval',
type=str,
help=dedent("""\
Subset in time by specifying earliest YYYY-MM-DD date
followed by latest date YYYY-MM-DD.
-- Example : '2016-01-01 2019-01-01'."""),
default=None,
)

# add other args to parser
add_allow_nan_options(p)
add_verbose(p)
Expand All @@ -660,6 +688,7 @@ def main(
obs_errlimit: float=float('inf'),
allow_nan_for_negative: bool=True,
min_pct_days: float=0.0,
timeinterval: str=None,
):
"""Merge a combined RAiDER delays file with a GPS ZTD delay file."""
print(f'Merging delay files {raider_file} and {ztd_file}')
Expand All @@ -668,6 +697,29 @@ def main(
dfz = pd.read_csv(ztd_file, parse_dates=['Datetime'])
dfr = pd.read_csv(raider_file, parse_dates=['Datetime'])

# time-interval filter
# need to add a day buffer to account for time changes
if timeinterval:
# Parse the time interval string
start_str, end_str = timeinterval.split()

# Convert to datetime objects and apply the 1-day buffer
# Subtract 1 day from start, Add 1 day to end
start_date = pd.to_datetime(start_str)
end_date = pd.to_datetime(end_str)
start_date_buffer = start_date - pd.Timedelta(days=1)
end_date_buffer = end_date + pd.Timedelta(days=1)

# apply time filter
dfz = dfz[
(dfz['Datetime'] >= start_date_buffer) &
(dfz['Datetime'] <= end_date_buffer)
].reset_index(drop=True)
dfr = dfr[
(dfr['Datetime'] >= start_date_buffer) &
(dfr['Datetime'] <= end_date_buffer)
].reset_index(drop=True)

# drop extra columns from tropo delay file
expected_data_columns = ['ID', 'Lat', 'Lon', 'Hgt_m', 'Datetime', 'wetDelay', 'hydroDelay', raider_delay]
dfr = dfr.drop(columns=[col for col in dfr if col not in expected_data_columns])
Expand Down Expand Up @@ -715,6 +767,18 @@ def main(
dfz = pass_common_obs(dfr, dfz, localtime='Localtime')
dfr = pass_common_obs(dfz, dfr, localtime='Localtime')

# use time-interval again to filter based on 'Localtime'
# to remove straggling observations outside of specified span
if timeinterval:
dfz = dfz[
(dfz['Localtime'] >= start_date) &
(dfz['Localtime'] <= end_date)
].reset_index(drop=True)
dfr = dfr[
(dfr['Localtime'] >= start_date) &
(dfr['Localtime'] <= end_date)
].reset_index(drop=True)

# drop all lines with nans
dfr.dropna(how='any', inplace=True)
dfz.dropna(how='any', inplace=True)
Expand Down
12 changes: 8 additions & 4 deletions tools/RAiDER/gnss/types.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import argparse
from pathlib import Path
from typing import Optional

from typing import List, Optional

class RAiDERCombineArgs(argparse.Namespace):
raider_file: Path
raider_folder: Path
gnss_folder: Path
raider_folder: List[Path]
gnss_folder: List[Path]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sssangha could this be Union[List[Path], Path]?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made the logic more consistent overall in the code, but the logic is as follows:

  • Support wildcard/multiple path input in the parser
  • Split into a list discrete individual paths and push them through as individual paths through the downstream function.

I verified the outputs I get now vs before this little clean-up are consistent though, so the files are being pushed through as we'd expect.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hello @jlmaurer , following up. I worked this out and enforced consistency in the workflow. Please let me know what you think. Thanks

gnss_file: Optional[Path]
raider_column_name: str
column_name: str
out_name: Path
local_time: Optional[str]
obs_errlimit: float
min_pct_days: float
timeinterval: Optional[str]
allow_nan_for_negative: bool
verbose: bool