diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 23eeac3..ebdbaa1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -14,7 +14,7 @@ on: jobs: call-version-info-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-version-info.yml@v0.20.0 + uses: ASFHyP3/actions/.github/workflows/reusable-version-info.yml@v0.21.0 permissions: contents: read with: @@ -23,7 +23,7 @@ jobs: call-docker-ghcr-workflow: needs: call-version-info-workflow # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@v0.20.0 + uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@v0.21.0 permissions: contents: read packages: write diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml index 3c8e12f..2b7431d 100644 --- a/.github/workflows/changelog.yml +++ b/.github/workflows/changelog.yml @@ -14,6 +14,6 @@ on: jobs: call-changelog-check-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.20.0 + uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.21.0 permissions: contents: read diff --git a/.github/workflows/labeled-pr.yml b/.github/workflows/labeled-pr.yml index e702f10..b9e6dcd 100644 --- a/.github/workflows/labeled-pr.yml +++ b/.github/workflows/labeled-pr.yml @@ -13,6 +13,6 @@ on: jobs: call-labeled-pr-check-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.20.0 + uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.21.0 permissions: pull-requests: read diff --git a/.github/workflows/release-checklist-comment.yml b/.github/workflows/release-checklist-comment.yml index 9893ea5..fa98dc3 100644 --- a/.github/workflows/release-checklist-comment.yml +++ b/.github/workflows/release-checklist-comment.yml @@ -10,7 +10,7 @@ on: jobs: call-release-checklist-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-release-checklist-comment.yml@v0.20.0 + uses: ASFHyP3/actions/.github/workflows/reusable-release-checklist-comment.yml@v0.21.0 permissions: pull-requests: write secrets: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 80d7ef7..dd334d8 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,7 +8,7 @@ on: jobs: call-release-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.20.0 + uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.21.0 permissions: {} with: release_prefix: HyP3 mintpy diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml index 906f3da..f06a092 100644 --- a/.github/workflows/static-analysis.yml +++ b/.github/workflows/static-analysis.yml @@ -5,18 +5,18 @@ on: push jobs: call-secrets-analysis-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-secrets-analysis.yml@v0.20.0 + uses: ASFHyP3/actions/.github/workflows/reusable-secrets-analysis.yml@v0.21.0 permissions: contents: read call-ruff-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-ruff.yml@v0.20.0 + uses: ASFHyP3/actions/.github/workflows/reusable-ruff.yml@v0.21.0 permissions: contents: read call-mypy-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-mypy.yml@v0.20.0 + uses: ASFHyP3/actions/.github/workflows/reusable-mypy.yml@v0.21.0 permissions: contents: read diff --git a/.github/workflows/tag-version.yml b/.github/workflows/tag-version.yml index e03a1e6..5eecc67 100644 --- a/.github/workflows/tag-version.yml +++ b/.github/workflows/tag-version.yml @@ -9,7 +9,7 @@ jobs: call-bump-version-workflow: # For first-time setup, create a v0.0.0 tag as shown here: # https://github.com/ASFHyP3/actions#reusable-bump-versionyml - uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.20.0 + uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.21.0 permissions: {} secrets: USER_TOKEN: ${{ secrets.TOOLS_BOT_PAK }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c71d7ab..367d285 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,7 @@ on: jobs: call-pytest-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-pytest.yml@v0.20.0 + uses: ASFHyP3/actions/.github/workflows/reusable-pytest.yml@v0.21.0 permissions: contents: read with: diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e9828a..4fcd2eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/) and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.1.0] + +### Added +- Added a new parameter `--prefix` to pull products from the `volcsarvatory-data-test` bucket. +- Added time interval parameters `--start-date` and `--end-date` discarding products out of the time interval. + +### Changed +- Updated `rename_products` function for recent changes in the name of multiburst products. + ## [1.0.0] ### Added diff --git a/README.md b/README.md index 1a5fed4..5af4a33 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,15 @@ The `hyp3_mintpy` command line tool can be run using the following structure: python -m hyp3_mintpy \ --job-name Okmok_44 \ --min-coherence 0.1 \ + --start-date 2019-01-01 \ + --end-date 2021-01-01 ``` Where: * `--job-name` is the multiburst project name name in HyP3 * `--min-coherence` is the minimum coherence for the timeseries inversion +* `--start-date` start date for the timeseries (will discard products before this date) +* `--end-date` end date for the timeseries (will discard products after this date) > [!IMPORTANT] > Earthdata credentials are necessary to access HyP3 data. See the Credentials section for more information. diff --git a/requirements-static.txt b/requirements-static.txt index 98faa83..51ae32d 100644 --- a/requirements-static.txt +++ b/requirements-static.txt @@ -1,3 +1,3 @@ -ruff==0.14.2 +ruff==0.14.5 mypy==1.18.2 opensarlab_lib diff --git a/src/hyp3_mintpy/__main__.py b/src/hyp3_mintpy/__main__.py index 01d6a41..d19140c 100644 --- a/src/hyp3_mintpy/__main__.py +++ b/src/hyp3_mintpy/__main__.py @@ -19,10 +19,15 @@ def main() -> None: parser.add_argument('--bucket-prefix', default='', help='Add a bucket prefix to product(s)') # TODO: Your arguments here - parser.add_argument('--job-name', help='The name of the HyP3 job', required=True) + parser.add_argument('--job-name', help='The name of the HyP3 job', required=False) + parser.add_argument( + '--prefix', help='Folder that contains multiburst products in the volcsarvatory bucket', required=False + ) parser.add_argument( '--min-coherence', default=0.01, type=float, help='The minimum coherence to process', required=False ) + parser.add_argument('--start-date', type=str, help='Start date for the timeseries (YYYY-MM-DD)') + parser.add_argument('--end-date', type=str, help='End date for the timeseries (YYYY-MM-DD)') args = parser.parse_args() @@ -41,7 +46,13 @@ def main() -> None: UserWarning, ) - product_file = process_mintpy(job_name=args.job_name, min_coherence=args.min_coherence) + product_file = process_mintpy( + job_name=args.job_name, + prefix=args.prefix, + min_coherence=args.min_coherence, + start=args.start_date, + end=args.end_date, + ) if args.bucket: upload_file_to_s3(product_file, args.bucket, args.bucket_prefix) diff --git a/src/hyp3_mintpy/process.py b/src/hyp3_mintpy/process.py index ba2a08a..f4e8ff4 100644 --- a/src/hyp3_mintpy/process.py +++ b/src/hyp3_mintpy/process.py @@ -1,11 +1,15 @@ """mintpy processing.""" +import datetime as dt import logging import os import shutil import subprocess +import warnings from pathlib import Path +import boto3 +import botocore import geopandas as gpd import hyp3_sdk as sdk import opensarlab_lib as osl @@ -32,7 +36,7 @@ def rename_products(folder: str) -> None: folders = [fol for fol in folders if Path(fol).is_dir()] for fol in folders: new = True - if str(fol).count('_') > 7: + if str(fol).count('_') > 8: new = False os.chdir(str(fol)) fs = list(Path('./').glob('*')) @@ -44,10 +48,9 @@ def rename_products(folder: str) -> None: for f in fs: name = f.name if new: - newname = 'S1_' + burst + '_' + '_'.join([n for n in name.split('_')[3:]]) + newname = 'S1_' + burst + '_' + '_'.join([n for n in name.split('_')[4:]]) else: newname = 'S1_' + burst + '_' + '_'.join([n for n in name.split('_')[10:]]) - print(newname) if '.txt' in newname and 'README' not in newname: foldername = newname.split('.')[0] subprocess.call('mv ' + name + ' ' + newname, shell=True) @@ -57,11 +60,15 @@ def rename_products(folder: str) -> None: os.chdir(cwd) -def download_pairs(job_name: str, folder: str | None = None) -> None: +def download_job_pairs( + job_name: str, start: str | None = None, end: str | None = None, folder: str | None = None +) -> str: """Downloads HyP3 products and renames files to meet MintPy standards. Args: job_name: Name of the HyP3 project. + start: Start date for the timeseries if one of the product dates is before this, it won't be downloaded. + end: End date for the timeseries if one of the product dates is after this, it won't be downloaded. folder: Folder name that will contain the downloaded products. If None it will create a folder with the project name. """ hyp3 = sdk.HyP3() @@ -74,11 +81,69 @@ def download_pairs(job_name: str, folder: str | None = None) -> None: file_list = jobs.download_files(Path(folder)) for z in file_list: - shutil.unpack_archive(str(z), folder) + if check_product(z.name, start, end): + shutil.unpack_archive(str(z), folder) z.unlink() rename_products(folder) + return folder + + +def download_bucket_pairs( + key: str | None = None, + start: str | None = None, + end: str | None = None, + path: str = 'multiburst_products/', + bucket: str = 'volcsarvatory-data-test', +) -> str: + """Downloads multiburst products from bucket and renames files to meet MintPy standards. + + Args: + key: Folder name that contains the multiburst product. + start: Start date for the timeseries if one of the product dates is before this, it won't be downloaded. + end: End date for the timeseries if one of the product dates is after this, it won't be downloaded. + path: Additional prefix to the products. + bucket: Name of the bucket. + """ + s3 = boto3.resource('s3', config=boto3.session.Config(signature_version=botocore.UNSIGNED)) + buck = s3.Bucket(bucket) + folder = str(key).split('/')[-1] + Path.mkdir(Path(folder)) + for s3_object in tqdm(buck.objects.filter(Prefix=f'{path}{key}')): + path, filename = os.path.split(s3_object.key) + if check_product(filename, start, end): + buck.download_file(s3_object.key, f'{folder}/{filename}') + z = Path(f'{folder}/{filename}') + shutil.unpack_archive(str(z), folder) + z.unlink() + rename_products(folder) + + return folder + + +def check_product(filename: str, start: str | None = None, end: str | None = None) -> bool: + """Check if products are within a given time interval. + + Args: + filename: Product name. + start: Start date for the timeseries if one of the product dates is before this, it won't be downloaded. + end: End date for the timeseries if one of the product dates is after this, it won't be downloaded. + """ + date1 = dt.datetime.strptime(filename.split('_')[4], '%Y%m%d') + date2 = dt.datetime.strptime(filename.split('_')[5], '%Y%m%d') + cond1 = True + cond2 = True + if start is not None: + start_date = dt.datetime.strptime(start, '%Y-%m-%d') + cond1 = date1 >= start_date and date2 >= start_date + + if end is not None: + end_date = dt.datetime.strptime(end, '%Y-%m-%d') + cond2 = date1 <= end_date and date2 <= end_date + + return cond1 and cond2 + def set_same_epsg(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """Checks if the EPSG is the same to all files if not it reprojects them. @@ -194,11 +259,11 @@ def set_same_frame(folder: str, wgs84: bool = False) -> None: gdal.Warp(str(pth), str(pth), dstSRS='EPSG:4326') -def write_cfg(job_name: str, min_coherence: str) -> None: +def write_cfg(output_name: str, min_coherence: str) -> None: """Creates a basic config file from a template. Args: - job_name: Name of the HyP3 project. + output_name: Name of the HyP3 project. min_coherence: Minimum coherence for timeseries processing. """ cfg_folder = Path(hyp3_mintpy.__file__).parent / 'schemas' @@ -206,9 +271,9 @@ def write_cfg(job_name: str, min_coherence: str) -> None: with Path(f'{cfg_folder}/config.txt').open() as cfg: lines = cfg.readlines() - abspath = Path(job_name).resolve() - Path(f'{job_name}/MintPy').mkdir(parents=True) - with Path(f'{job_name}/MintPy/{job_name}.txt').open('w') as cfg: + abspath = Path(output_name).resolve() + Path(f'{output_name}/MintPy').mkdir(parents=True) + with Path(f'{output_name}/MintPy/{output_name}.txt').open('w') as cfg: for line in lines: newstring = '' if 'folder' in line: @@ -220,39 +285,54 @@ def write_cfg(job_name: str, min_coherence: str) -> None: cfg.write(newstring) -def run_mintpy(job_name: str) -> Path: +def run_mintpy(output_name: str) -> Path: """Calls mintpy and prepares a zip file with the outputs. Args: - job_name: Name of the HyP3 project. + output_name: Name of the HyP3 project. Returns: Path for the output zip file. """ - subprocess.call(f'smallbaselineApp.py {job_name}/MintPy/{job_name}.txt --work-dir {job_name}/MintPy', shell=True) - subprocess.call(f'mv {job_name}/MintPy/*.h5 {job_name}/', shell=True) - subprocess.call(f'mv {job_name}/MintPy/inputs/geometry*.h5 {job_name}/', shell=True) - subprocess.call(f'mv {job_name}/MintPy/*.txt {job_name}/', shell=True) - subprocess.call(f'rm -rf {job_name}/MintPy {job_name}/S1_* {job_name}/shape_*', shell=True) - output_zip = shutil.make_archive(base_name=job_name, format='zip', base_dir=job_name) + subprocess.call( + f'smallbaselineApp.py {output_name}/MintPy/{output_name}.txt --work-dir {output_name}/MintPy', shell=True + ) + subprocess.call(f'mv {output_name}/MintPy/*.h5 {output_name}/', shell=True) + subprocess.call(f'mv {output_name}/MintPy/inputs/geometry*.h5 {output_name}/', shell=True) + subprocess.call(f'mv {output_name}/MintPy/*.txt {output_name}/', shell=True) + subprocess.call(f'rm -rf {output_name}/MintPy {output_name}/S1_* {output_name}/shape_*', shell=True) + output_zip = shutil.make_archive(base_name=output_name, format='zip', base_dir=output_name) return Path(output_zip) -def process_mintpy(job_name: str, min_coherence: float) -> Path: +def process_mintpy( + job_name: str | None, prefix: str | None, min_coherence: float, start: str | None = None, end: str | None = None +) -> Path: """Create a greeting product. Args: job_name: Name of the HyP3 project. + prefix: Folder that contains multiburst products. min_coherence: Minimum coherence for timeseries processing. + start: Start date for the timeseries + end: End date for the timeseries Returns: Path for the output zip file. """ - download_pairs(job_name) - set_same_frame(job_name, wgs84=True) + if job_name is None and prefix is None: + raise ValueError('You should give a job name or a bucket to pull the data from') + elif job_name is not None and prefix is not None: + warnings.warn('Both job name and prefix were given. You should give just one. Using job name...') + + if job_name is not None: + output_name = download_job_pairs(job_name, start, end) + else: + output_name = download_bucket_pairs(prefix, start, end) + set_same_frame(output_name, wgs84=True) - write_cfg(job_name, str(min_coherence)) + write_cfg(output_name, str(min_coherence)) - product_file = run_mintpy(job_name) + product_file = run_mintpy(output_name) return product_file diff --git a/tests/test_process.py b/tests/test_process.py index 6baa2fc..0cbf273 100644 --- a/tests/test_process.py +++ b/tests/test_process.py @@ -6,13 +6,13 @@ import pytest from hyp3_mintpy import util -from hyp3_mintpy.process import check_extent, rename_products, set_same_epsg, set_same_frame, write_cfg +from hyp3_mintpy.process import check_extent, check_product, rename_products, set_same_epsg, set_same_frame, write_cfg def test_rename_products_new(): - Path('test/S1_000-000000s0n00-000000s0n00-000000s0n00_IW_00000000_00000000_VV_INT80_0000').mkdir(parents=True) + Path('test/S1_000_000000s0n00-000000s0n00-000000s0n00_IW_00000000_00000000_VV_INT80_0000').mkdir(parents=True) with Path( - 'test/S1_000-000000s0n00-000000s0n00-000000s0n00_IW_00000000_00000000_VV_INT80_0000/S1_000-000000s0n00-000000s0n00-000000s0n00_IW_00000000_00000000_VV_INT80_0000.txt' + 'test/S1_000_000000s0n00-000000s0n00-000000s0n00_IW_00000000_00000000_VV_INT80_0000/S1_000_000000s0n00-000000s0n00-000000s0n00_IW_00000000_00000000_VV_INT80_0000.txt' ).open('w') as test: test.write('S1_000000_IW1_00000000T000000_VV_AAAA-BURST') @@ -113,3 +113,16 @@ def test_write_cfg(): assert minCoh == float(min_coherence) subprocess.call(f'rm -rf {job_name}', shell=True) + + +def test_check_product(): + filename = 'S1_064_000000s1n00-136231s2n02-000000s3n00_IW_20200604_20200616_VV_INT80_0000.zip' + + assert check_product(filename, None, None) + assert not check_product(filename, '2021-01-01', None) + assert check_product(filename, '2019-01-01', None) + assert check_product(filename, None, '2021-01-01') + assert not check_product(filename, None, '2019-01-01') + assert check_product(filename, '2019-01-01', '2021-01-01') + assert not check_product(filename, '2019-01-01', '2020-06-10') + assert not check_product(filename, '2020-06-10', '2021-01-01')