From 0e9f81d7f96789ae0104153493a12f04ca0883c6 Mon Sep 17 00:00:00 2001 From: William Horn Date: Fri, 30 Jan 2026 15:41:02 -0900 Subject: [PATCH] Download Opera RTC data and mosaic using gdal --- severe-weather/notebooks/environment.yml | 1 + ..._wind_damage_detection_terramind_hls.ipynb | 169 +++++++++++++++--- 2 files changed, 141 insertions(+), 29 deletions(-) diff --git a/severe-weather/notebooks/environment.yml b/severe-weather/notebooks/environment.yml index dadb56c..9289b84 100644 --- a/severe-weather/notebooks/environment.yml +++ b/severe-weather/notebooks/environment.yml @@ -14,6 +14,7 @@ dependencies: # Data manipulation - geopandas - numpy + - gdal # Visualization - matplotlib diff --git a/severe-weather/notebooks/hail_wind_damage_detection_terramind_hls.ipynb b/severe-weather/notebooks/hail_wind_damage_detection_terramind_hls.ipynb index c7c50ce..4ec28fd 100644 --- a/severe-weather/notebooks/hail_wind_damage_detection_terramind_hls.ipynb +++ b/severe-weather/notebooks/hail_wind_damage_detection_terramind_hls.ipynb @@ -251,10 +251,12 @@ "swath_gdf = gdf[gdf['swathID'] == swath_id]\n", "swath = swath_gdf.iloc[0]\n", "\n", - "best_post_date = datetime.fromisoformat(swath['ls5hlsDate']) # Get the day with the best hls data\n", + "hls_post_date = datetime.fromisoformat(swath['ls5hlsDate']) # Get the day with the best hls data\n", + "s1_post_date = datetime.fromisoformat(swath['s1Date']) # Get the day with the best hls data\n", "\n", "print(f'Damage Event Occured: {swath[\"swathDate\"]}')\n", - "print(f'Date with best HLS data: {best_post_date}')" + "print(f'Date with best HLS data: {hls_post_date}')\n", + "print(f'Date with best S1-RTC data: {s1_post_date}')" ] }, { @@ -280,10 +282,11 @@ "source": [ "data_path = Path.cwd() / 'data' / f'swath_{swath_id}'\n", "hls_path = data_path / 'HLS'\n", + "rtc_path = data_path / 'RTC'\n", "label_path = data_path / 'LABEL'\n", "\n", - "for data_path in (hls_path, label_path):\n", - " data_path.mkdir(parents=True, exist_ok=True)" + "for data_folder in (hls_path, rtc_path, label_path):\n", + " data_folder.mkdir(parents=True, exist_ok=True)" ] }, { @@ -295,6 +298,25 @@ "The swath geometry is currently defined in degrees. To define `pixel_size` and `padding` in meters, we need to reproject the swath geometry to a coordinate reference system (CRS) that uses meter-based units." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "utm_crs = gdf.estimate_utm_crs()\n", + "gdf_crs = gdf.to_crs(utm_crs)\n", + "\n", + "swath_crs_gdf = gdf_crs[gdf_crs['swathID'] == swath_id]\n", + "\n", + "swath_crs_gdf['sample_buffer'] = swath_crs_gdf.buffer(3000)\n", + "swath_crs_gdf['search_buffer'] = swath_crs_gdf.buffer(10000)\n", + "\n", + "print(utm_crs)\n", + "print(f\"Degrees: {str(gdf[gdf['swathID'] == swath_id].geometry.iloc[0])[:20]}...\")\n", + "print(f\"Meters: {str(swath_crs_gdf.geometry.iloc[0])[:20]}...\")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -307,16 +329,8 @@ }, "outputs": [], "source": [ - "utm_crs = gdf.estimate_utm_crs()\n", - "gdf_crs = gdf.to_crs(utm_crs)\n", - "\n", - "swath_crs_gdf = gdf_crs[gdf_crs['swathID'] == swath_id]\n", - "\n", - "print(f\"Degrees: {str(gdf[gdf['swathID'] == swath_id].geometry.iloc[0])[:20]}...\")\n", - "print(f\"Meters: {str(swath_crs_gdf.geometry.iloc[0])[:20]}...\")\n", - "\n", "pixel_size_meters = 30\n", - "padding_meters = 1000\n", + "padding_meters = 4000\n", "\n", "\n", "def add_padding_to_bounds(bounds):\n", @@ -333,6 +347,7 @@ " selected = selected_swath_gdf.iloc[0]\n", " geom = selected.geometry\n", " minx, miny, maxx, maxy = add_padding_to_bounds(geom.bounds)\n", + " print(minx, miny, maxx, maxy )\n", "\n", " width = int(np.ceil((maxx - minx) / pixel_size_meters))\n", " height = int(np.ceil((maxy - miny) / pixel_size_meters))\n", @@ -469,6 +484,18 @@ "We will search for HLS data covering the damage polygon **after the damage event** to use as input for training and analysis." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shapely\n", + "\n", + "buffered = shapely.set_precision(swath_crs_gdf['search_buffer'].to_crs('EPSG:4326').iloc[0], grid_size=0.001)\n", + "geometry = polygon.orient(buffered, sign=1.0)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -477,12 +504,16 @@ }, "outputs": [], "source": [ - "geometry = polygon.orient(swath.geometry, sign=1.0)\n", - "\n", - "results = earthaccess.search_data(\n", + "hls_results = earthaccess.search_data(\n", " short_name='HLSS30',\n", " polygon=list(geometry.exterior.coords),\n", - " temporal=(best_post_date, best_post_date + timedelta(days=1))\n", + " temporal=(hls_post_date, hls_post_date + timedelta(days=1))\n", + ")\n", + "\n", + "rtc_results = earthaccess.search_data(\n", + " short_name=['OPERA_L2_RTC-S1_V1'],\n", + " polygon=list(geometry.exterior.coords),\n", + " temporal=(s1_post_date, s1_post_date + timedelta(days=1)),\n", ")" ] }, @@ -507,15 +538,15 @@ }, "outputs": [], "source": [ - "geotiff_list = results\n", - "\n", - "def plot_geotiff_extents(results_list, selected_gdf):\n", + "def plot_geotiff_extents(hls_results, rtc_results, selected_gdf):\n", " all_lats, all_lons = [], []\n", "\n", " m = folium.Map(tiles=\"OpenStreetMap\")\n", - " colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']\n", "\n", - " for i, metadata in enumerate(results_list):\n", + " rtc = [(result,'#1f77b4') for result in rtc_results]\n", + " hls = [(result,'#2ca02c') for result in hls_results]\n", + "\n", + " for metadata, tile_color in hls + rtc:\n", " points = metadata['umm']['SpatialExtent']['HorizontalSpatialDomain']['Geometry']['GPolygons'][0]['Boundary']['Points']\n", "\n", " footprint_coords = []\n", @@ -525,8 +556,6 @@ " all_lats.append(lat)\n", " all_lons.append(lon)\n", "\n", - " tile_color = colors[i % len(colors)]\n", - "\n", " folium.Polygon(\n", " locations=footprint_coords,\n", " tooltip=f\"HLS Tile: {metadata['umm']['GranuleUR']}\",\n", @@ -554,6 +583,7 @@ "\n", " folium.LayerControl().add_to(m)\n", "\n", + "\n", " return m" ] }, @@ -570,7 +600,7 @@ }, "outputs": [], "source": [ - "final_map = plot_geotiff_extents(results, swath_gdf)\n", + "final_map = plot_geotiff_extents(rtc_results, hls_results, swath_gdf)\n", "final_map" ] }, @@ -633,9 +663,14 @@ }, "outputs": [], "source": [ - "files = earthaccess.download(\n", - " results,\n", + "hls_files = earthaccess.download(\n", + " hls_results,\n", " local_path=hls_path\n", + ")\n", + "\n", + "rtc_files = earthaccess.download(\n", + " rtc_results,\n", + " local_path=rtc_path\n", ")" ] }, @@ -729,6 +764,29 @@ "\n", " return fig\n", "\n", + "class RTCDataset(RasterDataset):\n", + " filename_glob = \"OPERA_L2_RTC-S1*.tif\"\n", + "\n", + " # https://hyp3-docs.asf.alaska.edu/guides/opera_rtc_product_guide/#naming-convention\n", + " filename_regex = (\n", + " r\"^OPERA_L2_RTC-S1_\"\n", + " r\"(?PT\\d{3}-\\d{6}-IW\\d)_\"\n", + " r\"(?P\\d{8}T\\d{6})Z_\"\n", + " r\"\\d{8}T\\d{6}Z_\"\n", + " r\"(?PS1[AB])_\"\n", + " r\"(?P\\d+)_\"\n", + " r\"v(?P\\d+\\.\\d+)_\"\n", + " r\"(?PVV|VH)\"\n", + " r\"\\.tif$\"\n", + " )\n", + "\n", + " date_format = \"%Y%m%dT%H%M%S\"\n", + "\n", + " separate_files = True\n", + " is_image = True\n", + "\n", + " all_bands = (\"VV\", \"VH\", 'mask')\n", + "\n", "\n", "class LabelDataset(RasterDataset):\n", " filename_glob = \"swathID_*.tif\"\n", @@ -777,15 +835,59 @@ "source": [ "# B, G, R, N, SW1, SW2\n", "hls_bands = ('B02', 'B03', 'B04', 'B08', 'B11', 'B12')\n", + "# VV, VH\n", + "rtc_bands = ('VV', 'VH', 'mask')\n", "\n", "hls_dataset = HLSDataset(\n", " paths=hls_path,\n", " bands=hls_bands\n", ")\n", "\n", + "rtc_dataset = RTCDataset(\n", + " paths=rtc_path,\n", + " bands=rtc_bands,\n", + ")\n", + "\n", "label_dataset = LabelDataset(paths=label_path)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Merge Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from osgeo import gdal\n", + "\n", + "def mosaic(dataset, dataset_name):\n", + " for band in dataset.bands:\n", + " band_files = [f for f in dataset.files if f'{band}.tif' in f]\n", + "\n", + " vrt_path = data_path / f'{swath_id}_{band}.vrt'\n", + " vrt = gdal.BuildVRT(str(vrt_path), band_files)\n", + "\n", + " tif_path = data_path / 'mosaic' / f'{swath_id}_{dataset_name}_{band}.tif'\n", + " print(tif_path)\n", + " gdal.Translate(str(tif_path), vrt, format='GTiff', creationOptions=['TILED=YES'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mosaic(rtc_dataset, 'rtc')\n", + "mosaic(hls_dataset, 'hls')" + ] + }, { "cell_type": "markdown", "metadata": { @@ -816,7 +918,7 @@ "\n", " for band_tif in band_tifs:\n", " with rasterio.open(band_tif) as src:\n", - " band_data = src.read(1).flatten()\n", + " band_data = src.read(1)\n", "\n", " batch_count = len(band_data)\n", " batch_mean = np.mean(band_data)\n", @@ -838,10 +940,19 @@ "\n", "print('Band, Mean, Std')\n", "for band in hls_dataset.bands:\n", + " break\n", " mean, std = calculate_mean_and_std([f for f in hls_dataset.files if f'{band}.tif' in f])\n", " print(band, f'{mean:.2f}', f'{std:.2f}')\n", " HLS_MEAN.append(mean)\n", - " HLS_STD.append(std)" + " HLS_STD.append(std)\n", + "\n", + "RTC_MEAN, RTC_STD = [], []\n", + "for band in rtc_dataset.bands:\n", + " tif_files = [f for f in rtc_dataset.files if f'{band}.tif' in f] \n", + " mean, std = calculate_mean_and_std(tif_files)\n", + " print(band, f'{mean:.2f}', f'{std:.2f}')\n", + " RTC_MEAN.append(mean)\n", + " RTC_STD.append(std)" ] }, {