Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
298 changes: 298 additions & 0 deletions kerchunk/test_time_kerchunk_singleForecast_nativeNWM.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "82a662bf-4aad-437d-984b-cb54ed8abbcc",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import shutil\n",
"import fsspec\n",
"import ujson\n",
"from kerchunk.hdf import SingleHdf5ToZarr\n",
"from kerchunk.combine import MultiZarrToZarr\n",
"import xarray as xr\n",
"import dask\n",
"import hvplot.xarray\n",
"from datetime import datetime, timedelta"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55bdc833-39d8-41e3-a956-54c0d60c55b5",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import subprocess\n",
"\n",
"try:\n",
" import google.colab\n",
"\n",
" ENV_IS_CL = True\n",
" subprocess.run(\n",
" [\n",
" \"git\",\n",
" \"clone\",\n",
" \"https://github.com/AlabamaWaterInstitute/data_access_examples\",\n",
" ]\n",
" )\n",
" sys.path.append(\"/content/data_access_examples\")\n",
"except:\n",
" ENV_IS_CL = False\n",
" sys.path.append(r\"..\")\n",
" sys.path.append(r\"../data_access_examples\")\n",
" sys.path.append(r\"git\")\n",
"\n",
"print(sys.path[0])\n",
"import nwm_filenames.listofnwmfilenames as lnf\n",
"from nwm_network.NWM_2_1_outlets import outlets_sorted"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cb423afb-67e8-4fe3-adc8-888f15303edb",
"metadata": {},
"outputs": [],
"source": [
"def gen_json(files):\n",
" open_files = fsspec.open_files(files)\n",
" out = []\n",
" for file in open_files:\n",
" with file as f:\n",
" out.append(SingleHdf5ToZarr(f, file.path).translate())\n",
"\n",
" mzz = MultiZarrToZarr(\n",
" out,\n",
" remote_protocol=\"gcs\",\n",
" concat_dims=[\"time\", \"reference_time\"],\n",
" )\n",
"\n",
" tot = mzz.translate()\n",
" return tot"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70027348-78a1-497f-9089-7b8948d63d72",
"metadata": {},
"outputs": [],
"source": [
"import nwm_filenames.listofnwmfilenames as lnf\n",
"\n",
"configs = [\n",
" (1, 1, 1, -1), # Short_range\n",
" (2, 1, 1, 1), # Medium range mem_1\n",
" (2, 1, 1, 2), # Medium range mem_2\n",
" (2, 1, 1, 3), # Medium range mem_3\n",
" (2, 1, 1, 4), # Medium range mem_4\n",
" (2, 1, 1, 5), # Medium range mem_5\n",
" (2, 1, 1, 6), # Medium range mem_6\n",
" (2, 1, 1, 7), # Medium range mem_7\n",
" (3, 1, 1, -1), # Medium range no_da\n",
"]\n",
"\n",
"prefix = \"\"\n",
"start_date = \"20221201\"\n",
"end_date = \"20221201\"\n",
"configuration_list = []\n",
"configuration_list.extend([(*_c, start_date, end_date, [0], 5) for _c in configs])\n",
"print(configuration_list)\n",
"\n",
"file_collections = [lnf.create_file_list(*_c) for _c in configuration_list]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "23dc327e-8bf1-4c0d-8d86-3da9bedd6e09",
"metadata": {},
"outputs": [],
"source": [
"file_collections[0]"
]
},
{
"cell_type": "markdown",
"id": "6f0a7a64-cdaf-44ca-ab1c-8ba3174d72c8",
"metadata": {},
"source": [
"### Generate plot data for one random stream segment"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "306f25c4-0efe-4084-9bbc-7099df31d8b9",
"metadata": {},
"outputs": [],
"source": [
"from time import time"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6c8c919d-8a49-42c5-9393-19cf8130b989",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"# id_list = 101\n",
"id_list = [22811611] # Mississippi River outlet\n",
"# id_list = [22811611, 20427622] # Mississippi River outlet\n",
"# id_list = 20427622 # Random small interior outlet somewhere in Arizona; see https://github.com/AlabamaWaterInstitute/data_access_examples/blob/main/nwm_network/route_link_fsspec.ipynb\n",
"# id_list = outlets_sorted\n",
"ds_list = []\n",
"df_list = []\n",
"tot_list = []\n",
"for _i, files in enumerate(file_collections[0:8]):\n",
" st = time()\n",
" print(f\"generating jsons for {_i}\", end=\"\\t\")\n",
" tot_list.append(gen_json(files))\n",
" print(f\"{time()-st} elapsed\")\n",
"\n",
" print(f\"creating xarray dataset for {_i}\", end=\"\\t\")\n",
" backend_args_1 = {\n",
" \"consolidated\": False,\n",
" \"storage_options\": {\n",
" \"fo\": tot_list[_i],\n",
" \"remote_protocol\": \"gcs\",\n",
" \"remote_options\": {\"anon\": True},\n",
" },\n",
" }\n",
" ds_1 = xr.open_dataset(\"reference://\", engine=\"zarr\", backend_kwargs=backend_args_1)\n",
" print(f\"{time()-st} elapsed\")\n",
"\n",
" print(f\"slicing dataset to feature for {_i}\", end=\"\\t\")\n",
" ds_select_1 = ds_1.sel(feature_id=id_list)\n",
" ds_list.append(ds_select_1)\n",
" print(f\"{time()-st} elapsed\")\n",
"\n",
" print(f\"querying/retrieving data and creating dataframe for {_i}\", end=\"\\t\")\n",
" df_select_1 = ds_select_1[\"streamflow\"].to_dataframe()\n",
" df_list.append(df_select_1)\n",
" print(f\"{time()-st} elapsed\")\n",
"\n",
" # print(f\"selecting feature for {_i}\", end=\"\\t\")\n",
" # ds_select_1.plot.scatter(\"time\",\"streamflow\")\n",
" # print(f\"{time()-st} elapsed\")\n",
"\n",
" print(f\"finishing {_i}\", end=\"\\t\")\n",
" print(f\"{time()-st} total time elapsed\")\n",
" print(f\"\\n\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "02c03af6-042c-4de6-84af-645af2dc3f2c",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"# id_list = 101\n",
"id_list = [22811611] # Mississippi River outlet\n",
"# id_list = [22811611, 20427622] # Mississippi River outlet\n",
"# id_list = 20427622 # Random small interior outlet somewhere in Arizona; see https://github.com/AlabamaWaterInstitute/data_access_examples/blob/main/nwm_network/route_link_fsspec.ipynb\n",
"ds_list = []\n",
"df_list = []\n",
"tot_list = []\n",
"for _i, files in enumerate(file_collections[0:8]):\n",
" st = time()\n",
" print(f\"generating jsons for {_i}\", end=\"\\t\")\n",
" tot_list.append(gen_json(files))\n",
" print(f\"{time()-st} elapsed\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e05dbc5e-59e7-4801-aef5-4481aee994fa",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"id_list = outlets_sorted\n",
"id_list = [22811611] # Mississippi River outlet\n",
"\n",
"for _i, files in enumerate(file_collections[0:8]):\n",
" st = time()\n",
" print(f\"creating xarray dataset for {_i}\", end=\"\\t\")\n",
" backend_args_1 = {\n",
" \"consolidated\": False,\n",
" \"storage_options\": {\n",
" \"fo\": tot_list[_i],\n",
" \"remote_protocol\": \"gcs\",\n",
" \"remote_options\": {\"anon\": True},\n",
" },\n",
" }\n",
" ds_1 = xr.open_dataset(\"reference://\", engine=\"zarr\", backend_kwargs=backend_args_1)\n",
" print(f\"{time()-st} elapsed\")\n",
"\n",
" print(f\"slicing dataset to feature for {_i}\", end=\"\\t\")\n",
" ds_select_1 = ds_1.sel(feature_id=id_list)\n",
" ds_list.append(ds_select_1)\n",
" print(f\"{time()-st} elapsed\")\n",
"\n",
" print(f\"querying/retrieving data and creating dataframe for {_i}\", end=\"\\t\")\n",
" df_select_1 = ds_select_1[\"streamflow\"].to_dataframe()\n",
" df_list.append(df_select_1)\n",
" print(f\"{time()-st} elapsed\")\n",
"\n",
" # print(f\"selecting feature for {_i}\", end=\"\\t\")\n",
" # ds_select_1.plot.scatter(\"time\",\"streamflow\")\n",
" # print(f\"{time()-st} elapsed\")\n",
"\n",
" print(f\"finishing {_i}\", end=\"\\t\")\n",
" print(f\"{time()-st} total time elapsed\")\n",
" print(f\"\\n\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "40d432ca-97eb-4509-9ede-d4cdb69c3e3a",
"metadata": {},
"outputs": [],
"source": [
"df_list[1].xs(22811611, axis=0, level=2, drop_level=False)\n",
"# for more help, see https://stackoverflow.com/questions/53927460/select-rows-in-pandas-multiindex-dataframe"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e149afcf-1530-4489-9d6f-5b2361202538",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading