Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 222 additions & 0 deletions statvar_imports/noaa_gfs/custom_script_final.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
import csv
import io
import re
import time
from google.cloud import storage

# --- CONFIGURATION ---
BUCKET_NAME = "unresolved_mcf"
INPUT_LOCAL = "../noa_gfs/input_files/gfs.t00z.pgrb2.0p25.f000.csv"
OUTPUT_BLOB_NAME = "noaa_gfs/noaa_gfs_output.csv"

# 1. Parameter Mapping (Original)
param_map = {
'PRMSL': ('Pressure_Place', 'Pascal'),
'MSLET': ('MSLPEtaReduction_Pressure_Atmosphere', 'Pascal'),
'TMP': ('Temperature_Place', 'Kelvin'),
'DPT': ('DewPointTemperature_Atmosphere', 'Kelvin'),
'APTMP': ('Apparent_Temperature_Place', 'Kelvin'),
'HGT': ('GeopotentialHeight_Place', 'GeopotentialMeters'),
'RH': ('Humidity_Place', 'Percent'),
'SPFH': ('Humidity_Place', ''),
'UGRD': ('WindSpeed_Place', 'MeterPerSecond'),
'VGRD': ('WindSpeed_Place', 'MeterPerSecond'),
'VIS': ('Visibility_Place', 'Meter'),
'GUST': ('Max_WindSpeed_Place', 'MeterPerSecond'),
'PRES': ('Pressure_Atmosphere', 'Pascal'),
'CLMR': ('MixingRatio_Cloud', ''),
'ICMR': ('MixingRatio_Ice', ''),
'RWMR': ('MixingRatio_Rainwater', ''),
'SNMR': ('MixingRatio_Snow', ''),
'GRLE': ('Count_Graupel', ''),
'REFD': ('Reflectivity_Place', 'Decibel'),
'REFC': ('Max_CompositeReflectivity_Place', 'Decibel'),
'VVEL': ('PressureVerticalVelocity_Velocity_Place', 'PascalPerSecond'),
'DZDT': ('GeometricVerticalVelocity_Velocity_Place', 'MeterPerSecond'),
'ABSV': ('AbsoluteVorticity_Place', 'InverseSecond'),
'O3MR': ('Ozone_MixingRatio_Atmosphere', ''),
'VRATE': ('VentilationRate_Place', 'SquareMeterPerSecond'),
'TSOIL': ('Temperature_Soil', 'Kelvin'),
'SOILW': ('VolumetricSoilMoisture_Soil', ''),
'SOILL': ('LiquidWaterContent_Soil', ''),
'TCDC': ('CloudCover_Place', 'Percent'),
'HINDEX': ('HainesIndex_Place', ''),
'CNWAT': ('CloudWaterContent_Atmosphere', 'KilogramPerMeterSquared'),
'WEASD': ('SnowWaterEquivalent_Place', 'KilogramPerMeterSquared'),
'SNOD': ('Depth_Snow', 'Meter'),
'ICETK': ('Thickness_Ice', 'Meter'),
'ICEG': ('GrowthRate_Count_Ice', 'MeterPerSecond'),
'CPOFP': ('FrozenPrecipitation_Place', 'Percent'),
'PRATE': ('PrecipitationRate_Place', ''),
'CSNOW': ('Occurrence_Place_SurfaceLevel_Snow', ''),
'CICEP': ('Occurrence_Place_SurfaceLevel_IcePellets', ''),
'CFRZR': ('Occurrence_Place_SurfaceLevel_FreezingRain', ''),
'CRAIN': ('Occurrence_Place_SurfaceLevel_Rain', ''),
'VEG': ('Area_Place_SurfaceLevel_Vegetation', 'Percent'),
'SFCR': ('SurfaceRoughness_Place', 'Meter'),
'FRICV': ('FrictionalVelocity_Place', 'MeterPerSecond'),
'SOTYP': ('SoilType_Soil', ''),
'WILT': ('WiltingPoint_Soil', ''),
'FLDCP': ('FieldCapacity_Soil', ''),
'SUNSD': ('SunshineDuration_Place', 'Second'),
'LFTX': ('SurfaceLiftedIndex_Atmosphere', 'Kelvin'),
'4LFTX': ('BestLiftedIndex_Atmosphere', 'Kelvin'),
'CAPE': ('ConvectiveAvailablePotentialEnergy_Atmosphere', 'JoulePerKilogram'),
'CIN': ('ConvectiveInhibition_Atmosphere', 'JoulePerKilogram'),
'PWAT': ('PrecipitableWater_Place', 'KilogramPerMeterSquared'),
'CWAT': ('CloudWater_Place', 'KilogramPerMeterSquared'),
'TOZNE': ('Concentration_Atmosphere_Ozone', ''),
'LCDC': ('CloudCover_Place_LowCloudLayer', 'Percent'),
'MCDC': ('CloudCover_Place_MiddleCloudLayer', 'Percent'),
'HCDC': ('CloudCover_Place_HighCloudLayer', 'Percent'),
'HLCY': ('StormRelativeHelicity_Atmosphere', 'MetersSquaredPerSecondSquared'),
'USTM': ('StormMotion_Atmosphere', 'MeterPerSecond'),
'VSTM': ('StormMotion_Atmosphere', 'MeterPerSecond'),
'ICAHT': ('ICAOStandardAtmosphere_Altitude_Atmosphere', 'Meter'),
'VWSH': ('WindShear_Atmosphere', 'InverseSecond'),
'POT': ('PotentialTemperature_Atmosphere', 'Kelvin'),
'HPBL': ('PlanetaryBoundaryLayer_Altitude_Atmosphere', 'Meter'),
'PLPL': ('LiftedParcelLevel_Pressure_Atmosphere', 'Pascal'),
'LAND': ('Area_LandCover', 'SquareDegree'),
'ICEC': ('Area_IceCover', 'SquareDegree'),
'ICETMP': ('Temperature_SeaIce', 'Kelvin'),
}

# 2. Helper Function to Clean Level for DCID
def format_level_dcid(level):
l = str(level).lower().strip()

if l == "mean sea level":
return "0MetersAboveMeanSeaLevel"
if "m above mean sea level" in l:
val = l.split(" ")[0].replace("-", "To")
return f"{val}MetersAboveMeanSeaLevel"

if l == "surface": return "SurfaceLevel"
if "entire atmosphere" in l: return ""
if l == "planetary boundary layer": return "PlanetaryBoundaryLayer"
if "low cloud layer" in l: return "LowCloudLayer"
if "middle cloud layer" in l: return "MiddleCloudLayer"
if "high cloud layer" in l: return "HighCloudLayer"
if l == "0c isotherm": return "Isotherm0C"
if l == "highest tropospheric freezing level": return "HighestTroposphericFreezingLevel"

if "hybrid level" in l:
val = l.split(" ")[0]
return "LowestHybridLevel" if val == "1" else f"{val}HybridLevel"

if "m below ground" in l:
match = re.search(r'([0-9.]+)-?([0-9.]*)', l)
if match:
start, end = match.group(1), match.group(2)
return f"{start}To{end}Meter" if end else f"{start}Meter"

if "m above ground" in l:
val = l.split(" ")[0].replace("-", "To")
return f"{val}Meter"

if "mb" in l:
# Extracts values from "30-0 mb" -> "30To0Millibar"
# Prevents "GroundLevel" from being attached to Millibar layers later
val = l.split(" ")[0].replace("-", "To")
return f"{val}Millibar"

if "sigma" in l:
val = l.split(" ")[0].replace("-", "To")
suffix = "SigmaLayer" if "layer" in l else "SigmaLevel"
return f"{val}{suffix}"

if "pv=" in l:
return "PotentialVorticityNeg2PVU" if ("neg" in l or "-2" in l) else "PotentialVorticity2PVU"

return "".join(word.capitalize() for word in l.replace("-", " ").split() if word)

# 3. DCID Constructor Logic
def construct_dcid(param_raw, level_raw):
param = str(param_raw).upper()
level_clean = format_level_dcid(level_raw)

mapping = param_map.get(param)
base = mapping[0] if mapping else param

if param == 'RH' and not level_clean:
return "dcid:Humidity_RelativeHumidity"

if level_clean and level_clean in base:
dcid = f"dcid:{base}"
elif not level_clean:
dcid = f"dcid:{base}"
else:
dcid = f"dcid:{base}_{level_clean}"

if param in ['UGRD', 'VGRD', 'USTM', 'VSTM']:
suffix = "UComponent" if param in ['UGRD', 'USTM'] else "VComponent"
if param in ['UGRD', 'VGRD'] and level_clean == "10Meter":
return f"dcid:WindSpeed_{suffix}_Height10Meters"
return f"{dcid}_{suffix}"

if param == 'RH': return f"{dcid}_RelativeHumidity"
if param == 'SPFH': return f"{dcid}_SpecificHumidity"
if param == 'REFC': return f"dcid:{base}"

return dcid

def process_and_upload_true_stream():
client = storage.Client()
bucket = client.bucket(BUCKET_NAME)
blob = bucket.blob(OUTPUT_BLOB_NAME)
blob.chunk_size = 64 * 1024 * 1024

with open(INPUT_LOCAL, mode='r') as f_in:
reader = csv.DictReader(f_in)
output_buffer = io.StringIO()
writer = csv.writer(output_buffer)
writer.writerow(['observationDate', 'value', 'variableMeasured', 'measurementMethod', 'latitude', 'longitude', 'placeName', 'unit'])

with blob.open("w", content_type='text/csv') as cloud_file:
cloud_file.write(output_buffer.getvalue())
output_buffer.seek(0); output_buffer.truncate(0)

for i, row in enumerate(reader):
param = row['Parameter']
level = row['Level']
obs_date = row['Valid_Time'].replace(' ', 'T')
dcid = construct_dcid(param, level)

l_low = level.lower()

# Logic to determine measurementMethod
# If it is Millibar or Mean Sea Level, it must be empty
if "mb" in l_low or "mean sea level" in l_low:
method = ""
else:
method = "GroundLevel" if "ground" in l_low else ""

writer.writerow([
obs_date,
row['Value'],
dcid,
method,
row['Latitude'],
row['Longitude'],
f"latLong/{row['Latitude']}_{row['Longitude']}",
param_map.get(param.upper(), ('', ''))[1]
])

if i % 1000 == 0:
cloud_file.write(output_buffer.getvalue())
output_buffer.seek(0); output_buffer.truncate(0)

cloud_file.write(output_buffer.getvalue())

if __name__ == "__main__":
start_time = time.perf_counter()
print(f"Process started: {time.strftime('%Y-%m-%d %H:%M:%S')}")
try:
process_and_upload_true_stream()
print("Upload complete.")
except Exception as e:
print(f"Error: {e}")
duration = time.perf_counter() - start_time
mins, secs = divmod(duration, 60)
print(f"Total Execution Time: {int(mins)}m {secs:.2f}s")
5 changes: 5 additions & 0 deletions statvar_imports/noaa_gfs/noaa_gfs_metadata.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
parameter,value
dc_api_root,https://api.datacommons.org/
output_columns,"observationDate,value,variableMeasured,measurementMethod,latitude,longitude,placeName,unit"
observation_date_format,%Y-%m-%dT%H:%M:%S
#sourceUrl,https://nomads.ncep.noaa.gov/pub/data/nccf/com/gfs/prod/gfs.20251224/00/atmos/
155 changes: 155 additions & 0 deletions statvar_imports/noaa_gfs/noaa_gfs_pvmap.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
key,p1,v1,p2,v2,p3,v3,p4,v4
Value,value,{Number},observationAbout,country/USA,,,,
Longitude,Longitude,{Data},longitude,{Longitude},,,,
Latitude,Latitude,{Data},latitude,{Latitude},placeName,latLong/{Latitude}_{Longitude},,
Valid_Time,observationDate,{ValidTime},#Eval,"ValidTime=format_date(Data, '%Y-%m-%dT%H:%M:%S')",,,,
PRMSL,measuredProperty,pressure,populationType,Place,height,MeanSeaLevel,unit,Pascal
CLMR,measuredProperty,mixingRatio,populationType,Cloud,,,,
ICMR,measuredProperty,mixingRatio,populationType,Ice,,,,
RWMR,measuredProperty,mixingRatio,populationType,Rainwater,,,,
SNMR,measuredProperty,mixingRatio,populationType,Snow,,,,
GRLE,measuredProperty,count,populationType,Graupel,,,,
REFD,measuredProperty,reflectivity,populationType,Place,unit,Decibel,,
REFC,measuredProperty,compositeReflectivity,populationType,Place,statType,maxValue,unit,Decibel
VIS,measuredProperty,visibility,populationType,Place,unit,Meter,,
UGRD,measuredProperty,windSpeed,populationType,Place,windComponent,UComponent,unit,MeterPerSecond
VGRD,measuredProperty,windSpeed,populationType,Place,windComponent,VComponent,unit,MeterPerSecond
VRATE,measuredProperty,ventilationRate,populationType,Place,unit,SquareMeterPerSecond,,
GUST,measuredProperty,windSpeed,populationType,Place,statType,maxValue,unit,MeterPerSecond
HGT,measuredProperty,geopotentialHeight,populationType,Place,unit,GeopotentialMeters,,
TMP,measuredProperty,temperature,populationType,Place,unit,Kelvin,,
RH,measuredProperty,humidity,populationType,Place,humidityParameter,RelativeHumidity,unit,Percent
SPFH,measuredProperty,humidity,populationType,Place,humidityParameter,SpecificHumidity,,
VVEL,measuredProperty,velocity,populationType,Place,measurementQualifier,PressureVerticalVelocity,unit,PascalPerSecond
DZDT,measuredProperty,velocity,populationType,Place,measurementQualifier,GeometricVerticalVelocity,unit,MeterPerSecond
ABSV,measuredProperty,absoluteVorticity,populationType,Place,unit,InverseSecond,,
O3MR,measuredProperty,mixingRatio,populationType,Atmosphere,measurementQualifier,Ozone,,
TCDC,measuredProperty,cloudCover,populationType,Place,unit,Percent,,
HINDEX,measuredProperty,hainesIndex,populationType,Place,,,,
MSLET,measuredProperty,pressure,populationType,Atmosphere,measurementQualifier,MSLPEtaReduction,unit,Pascal
PRES,measuredProperty,pressure,populationType,Atmosphere,unit,Pascal,,
TSOIL,measuredProperty,temperature,populationType,Soil,unit,Kelvin,,
SOILW,measuredProperty,volumetricSoilMoisture,populationType,Soil,,,,
SOILL,measuredProperty,liquidWaterContent,populationType,Soil,,,,
CNWAT,measuredProperty,cloudWaterContent,populationType,Atmosphere,unit,KilogramPerMeterSquared,,
WEASD,measuredProperty,snowWaterEquivalent,populationType,Place,unit,KilogramPerMeterSquared,,
SNOD,measuredProperty,depth,populationType,Snow,unit,Meter,,
ICETK,measuredProperty,thickness,populationType,Ice,unit,Meter,,
DPT,measuredProperty,dewPointTemperature,populationType,Atmosphere,unit,Kelvin,,
APTMP,measuredProperty,temperature,populationType,Place,unit,Kelvin,measurementQualifier,Apparent
ICEG,measuredProperty,count,populationType,Ice,statType,growthRate,unit,MeterPerSecond
CPOFP,measuredProperty,frozenPrecipitation,populationType,Place,unit,Percent,,
PRATE,measuredProperty,precipitationRate,populationType,Place,,,,
CSNOW,measuredProperty,occurrence,populationType,Place,precipitationType,Snow,,
CICEP,measuredProperty,occurrence,populationType,Place,precipitationType,IcePellets,,
CFRZR,measuredProperty,occurrence,populationType,Place,precipitationType,FreezingRain,,
CRAIN,measuredProperty,occurrence,populationType,Place,precipitationType,Rain,,
SFCR,measuredProperty,surfaceRoughness,populationType,Place,unit,Meter,,
FRICV,measuredProperty,frictionalVelocity,populationType,Place,unit,MeterPerSecond,,
VEG,measuredProperty,area,populationType,Place,landCoverType,Vegetation,unit,Percent
SOTYP,measuredProperty,soilType,populationType,Soil,,,,
WILT,measuredProperty,wiltingPoint,populationType,Soil,,,,
FLDCP,measuredProperty,fieldCapacity,populationType,Soil,,,,
SUNSD,measuredProperty,sunshineDuration,populationType,Place,unit,Second,,
LFTX,measuredProperty,surfaceLiftedIndex,populationType,Atmosphere,unit,Kelvin,,
CAPE,measuredProperty,convectiveAvailablePotentialEnergy,populationType,Atmosphere,unit,JoulePerKilogram,,
CIN,measuredProperty,convectiveInhibition,populationType,Atmosphere,unit,JoulePerKilogram,,
PWAT,measuredProperty,precipitableWater,populationType,Place,unit,KilogramPerMeterSquared,,
CWAT,measuredProperty,cloudWater,populationType,Place,unit,KilogramPerMeterSquared,,
TOZNE,measuredProperty,concentration,populationType,Atmosphere,pollutant,Ozone,,
LCDC,measuredProperty,cloudCover,populationType,Place,unit,Percent,,
MCDC,measuredProperty,cloudCover,populationType,Place,unit,Percent,,
HCDC,measuredProperty,cloudCover,populationType,Place,unit,Percent,,
HLCY,measuredProperty,stormRelativeHelicity,populationType,Atmosphere,unit,MetersSquaredPerSecondSquared,,
USTM,measuredProperty,stormMotion,populationType,Atmosphere,windComponent,UComponent,unit,MeterPerSecond
VSTM,measuredProperty,stormMotion,populationType,Atmosphere,windComponent,VComponent,unit,MeterPerSecond
ICAHT,measuredProperty,altitude,populationType,Atmosphere,measurementQualifier,ICAOStandardAtmosphere,unit,Meter
VWSH,measuredProperty,windShear,populationType,Atmosphere,unit,InverseSecond,,
4LFTX,measuredProperty,bestLiftedIndex,populationType,Atmosphere,unit,Kelvin,,
HPBL,measuredProperty,altitude,populationType,Atmosphere,measurementQualifier,PlanetaryBoundaryLayer,unit,Meter
POT,measuredProperty,potentialTemperature,populationType,Atmosphere,unit,Kelvin,,
PLPL,measuredProperty,pressure,populationType,Atmosphere,measurementQualifier,LiftedParcelLevel,unit,Pascal
LAND,measuredProperty,area,populationType,LandCover,unit,SquareDegree,#Multiply,0.0625
ICEC,measuredProperty,area,populationType,IceCover,unit,SquareDegree,#Multiply,0.0625
ICETMP,measuredProperty,temperature,populationType,SeaIce,unit,Kelvin,,
mean sea level,height,[0 MetersAboveMeanSeaLevel],,,,,,
1 hybrid level,height,LowestHybridLevel,,,,,,
entire atmosphere,,,,,,,,
surface,height,SurfaceLevel,,,,,,
planetary boundary layer,height,PlanetaryBoundaryLayer,,,,,,
0.01 mb,height,[0.01 Millibar],,,,,,
0.02 mb,height,[0.02 Millibar],,,,,,
0.04 mb,height,[0.04 Millibar],,,,,,
0.07 mb,height,[0.07 Millibar],,,,,,
0.1 mb,height,[0.1 Millibar],,,,,,
0.2 mb,height,[0.2 Millibar],,,,,,
0.4 mb,height,[0.4 Millibar],,,,,,
0.7 mb,height,[0.7 Millibar],,,,,,
1 mb,height,[1 Millibar],,,,,,
2 mb,height,[2 Millibar],,,,,,
3 mb,height,[3 Millibar],,,,,,
5 mb,height,[5 Millibar],,,,,,
7 mb,height,[7 Millibar],,,,,,
10 mb,height,[10 Millibar],,,,,,
15 mb,height,[15 Millibar],,,,,,
20 mb,height,[20 Millibar],,,,,,
30 mb,height,[30 Millibar],,,,,,
40 mb,height,[40 Millibar],,,,,,
50 mb,height,[50 Millibar],,,,,,
70 mb,height,[70 Millibar],,,,,,
100 mb,height,[100 Millibar],,,,,,
150 mb,height,[150 Millibar],,,,,,
200 mb,height,[200 Millibar],,,,,,
250 mb,height,[250 Millibar],,,,,,
300 mb,height,[300 Millibar],,,,,,
350 mb,height,[350 Millibar],,,,,,
400 mb,height,[400 Millibar],,,,,,
450 mb,height,[450 Millibar],,,,,,
500 mb,height,[500 Millibar],,,,,,
550 mb,height,[550 Millibar],,,,,,
600 mb,height,[600 Millibar],,,,,,
650 mb,height,[650 Millibar],,,,,,
700 mb,height,[700 Millibar],,,,,,
750 mb,height,[750 Millibar],,,,,,
800 mb,height,[800 Millibar],,,,,,
850 mb,height,[850 Millibar],,,,,,
900 mb,height,[900 Millibar],,,,,,
925 mb,height,[925 Millibar],,,,,,
950 mb,height,[950 Millibar],,,,,,
975 mb,height,[975 Millibar],,,,,,
1000 mb,height,[1000 Millibar],,,,,,
4000 m above ground,height,[4000 Meter],measurementMethod,GroundLevel,,,,
0-0.1 m below ground,depth,[0 0.1 Meter],measurementMethod,GroundLevel,,,,
0.1-0.4 m below ground,depth,[0.1 0.4 Meter],measurementMethod,GroundLevel,,,,
0.4-1 m below ground,depth,[0.4 1 Meter],measurementMethod,GroundLevel,,,,
1-2 m below ground,depth,[1 2 Meter],measurementMethod,GroundLevel,,,,
2 m above ground,height,[2 Meter],measurementMethod,GroundLevel,,,,
10 m above ground,height,[10 Meter],measurementMethod,GroundLevel,,,,
10 m above mean sea level,height,[10 MetersAboveMeanSeaLevel],,,,,,
low cloud layer,height,LowCloudLayer,,,,,,
middle cloud layer,height,MiddleCloudLayer,,,,,,
high cloud layer,height,HighCloudLayer,,,,,,
cloud ceiling,height,CloudCeiling,,,,,,
3000-0 m above ground,height,[3000 0 Meter],measurementMethod,GroundLevel,,,,
6000-0 m above ground,height,[6000 0 Meter],measurementMethod,GroundLevel,,,,
tropopause,height,Tropopause,,,,,,
max wind,height,MaxWind,,,,,,
20 m above ground,height,[20 Meter],measurementMethod,GroundLevel,,,,
30 m above ground,height,[30 Meter],measurementMethod,GroundLevel,,,,
40 m above ground,height,[40 Meter],measurementMethod,GroundLevel,,,,
50 m above ground,height,[50 Meter],measurementMethod,GroundLevel,,,,
80 m above ground,height,[80 Meter],measurementMethod,GroundLevel,,,,
100 m above ground,height,[100 Meter],measurementMethod,GroundLevel,,,,
1829 m above mean sea level,height,[1829 MetersAboveMeanSeaLevel],,,,,,
2743 m above mean sea level,height,[2743 MetersAboveMeanSeaLevel],,,,,,
3658 m above mean sea level,height,[3658 MetersAboveMeanSeaLevel],,,,,,
0C isotherm,height,Isotherm0C,,,,,,
highest tropospheric freezing level,height,HighestTroposphericFreezingLevel,,,,,,
30-0 mb above ground,height,[30 0 Millibar],,,,,,
180-0 mb above ground,height,[180 0 Millibar],,,,,,
0.33-1 sigma layer,height,0.33To1SigmaLayer,,,,,,
0.995 sigma level,height,0.995SigmaLevel,,,,,,
90-0 mb above ground,height,[90 0 Millibar],,,,,,
255-0 mb above ground,height,[255 0 Millibar],,,,,,
PV=2e-06 (Km^2/kg/s) surface,height,PotentialVorticity2PVU,,,,,,
PV=-2e-06 (Km^2/kg/s) surface,height,PotentialVorticityNeg2PVU,,,,,,
Loading
Loading