From 7c4321cb1a3cb461bec2b0d08bc9be86fa0a0e60 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Tue, 23 Dec 2025 23:31:26 +0100 Subject: [PATCH 01/26] almost there --- docs/Project.toml | 4 + .../tutorials/example_synthetic_lstm.jl | 225 ++++++++++++++++++ src/models/GenericHybridModel.jl | 4 +- src/models/NNModels.jl | 65 ++++- src/train.jl | 91 ++++++- src/utils/compute_loss.jl | 13 +- src/utils/tools.jl | 12 +- 7 files changed, 392 insertions(+), 22 deletions(-) create mode 100644 docs/literate/tutorials/example_synthetic_lstm.jl diff --git a/docs/Project.toml b/docs/Project.toml index 52df2908..d06cca83 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,11 +1,15 @@ [deps] +AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" +DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterVitepress = "4710194d-e776-4893-9690-8d956a29c365" EasyHybrid = "61bb816a-e6af-4913-ab9e-91bff2e122e3" +GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a" Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +Lux = "b2108857-7c20-44ae-9111-449ecde12c47" OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" [sources] diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl new file mode 100644 index 00000000..632025fb --- /dev/null +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -0,0 +1,225 @@ +# CC BY-SA 4.0 +# ============================================================================= +# EasyHybrid Example: Synthetic Data Analysis +# ============================================================================= +# This example demonstrates how to use EasyHybrid to train a hybrid model +# on synthetic data for respiration modeling with Q10 temperature sensitivity. +# ============================================================================= + +# ============================================================================= +# Project Setup and Environment +# ============================================================================= +using Pkg + +# Set project path and activate environment +project_path = "docs" +Pkg.activate(project_path) +EasyHybrid_path = joinpath(pwd()) +Pkg.develop(path = EasyHybrid_path) +#Pkg.resolve() +#Pkg.instantiate() + +using EasyHybrid +using AxisKeys +using DimensionalData +using Lux + +# ============================================================================= +# Data Loading and Preprocessing +# ============================================================================= +# Load synthetic dataset from GitHub into DataFrame +df = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/Synthetic4BookChap.nc") + +# Select a subset of data for faster execution +df = df[1:20000, :] + +# KeyedArray from AxisKeys.jl works, but cannot handle DateTime type +dfnot = Float32.(df[!, Not(:time)]) + +ka = to_keyedArray(dfnot) + +# DimensionalData +mat = Array(Matrix(dfnot)') +da = DimArray(mat, (Dim{:col}(Symbol.(names(dfnot))), Dim{:row}(1:size(dfnot, 1)))) + +# ============================================================================= +# Split into sequences +# ============================================================================= + +# ============================================================================= +# neural network model +# ============================================================================= +# Define neural network +NN = Chain(Dense(15, 15, Lux.sigmoid), Dense(15, 15, Lux.sigmoid), Dense(15, 1)) + +broadcast_layer4 = @compact(; layer = Dense(6=>6)) do x::Union{NTuple{<:AbstractArray}, AbstractVector{<:AbstractArray}} + y = map(layer, x) + @return permutedims(stack(y; dims=3), (1, 3, 2)) + #@return y +end + +NN_Memory = Chain( + Recurrence(LSTMCell(6 => 6), return_sequence=true), + broadcast_layer4 +) + +# ============================================================================= +# Define the Physical Model +# ============================================================================= +# RbQ10 model: Respiration model with Q10 temperature sensitivity +# Parameters: +# - ta: air temperature [°C] +# - Q10: temperature sensitivity factor [-] +# - rb: basal respiration rate [μmol/m²/s] +# - tref: reference temperature [°C] (default: 15.0) +function RbQ10(; ta, Q10, rb, tref = 15.0f0) + reco = rb .* Q10 .^ (0.1f0 .* (ta .- tref)) + return (; reco, Q10, rb) +end + +# ============================================================================= +# Define Model Parameters +# ============================================================================= +# Parameter specification: (default, lower_bound, upper_bound) +parameters = ( + # Parameter name | Default | Lower | Upper | Description + rb = (3.0f0, 0.0f0, 13.0f0), # Basal respiration [μmol/m²/s] + Q10 = (2.0f0, 1.0f0, 4.0f0), # Temperature sensitivity factor [-] +) + +# ============================================================================= +# Configure Hybrid Model Components +# ============================================================================= +# Define input variables +forcing = [:ta] # Forcing variables (temperature) + +# Target variable +target = [:reco] # Target variable (respiration) + +# Parameter classification +global_param_names = [:Q10] # Global parameters (same for all samples) +neural_param_names = [:rb] # Neural network predicted parameters + +# ============================================================================= +# Single NN Hybrid Model Training +# ============================================================================= +using GLMakie +# Create single NN hybrid model using the unified constructor +predictors_single_nn = [:sw_pot, :dsw_pot] # Predictor variables (solar radiation, and its derivative) + +hlstm = constructHybridModel( + predictors_single_nn, # Input features + forcing, # Forcing variables + target, # Target variables + RbQ10, # Process-based model function + parameters, # Parameter definitions + neural_param_names, # NN-predicted parameters + global_param_names, # Global parameters + hidden_layers = NN_Memory, # Neural network architecture + scale_nn_outputs = true, # Scale neural network outputs + input_batchnorm = false # Apply batch normalization to inputs +) + + +x, y = prepare_data(hlstm, df) + +xs, ys = split_into_sequences(x, y; input_window = 20, output_window = 2, shift=1, lead_time=0) + +ys_nan = .!isnan.(ys) + +#? this sets up initial ps for the hybrid model version +rng = Random.default_rng(1234) +ps, st = Lux.setup(rng, hlstm) + +train_dl = EasyHybrid.DataLoader((xs, ys); batchsize=5) + +reco_obs_3D = first(train_dl)[2] +reco_obs = dropdims(reco_obs_3D, dims = 1) +x_obs = first(train_dl)[1] + +sdf = hlstm(x_obs, ps, st) + +reco_mod = sdf[1].reco +reco_mod(window = axiskeys(reco_obs, :window)) .- reco_obs + +# ============================================================================= +# train on DataFrame +# ============================================================================= + +out_lstm = train( + hlstm, + df, + (); + nepochs = 10, # Number of training epochs + batchsize = 512, # Batch size for training + opt = AdamW(0.1), # Optimizer and learning rate + monitor_names = [:rb, :Q10], # Parameters to monitor during training + yscale = identity, # Scaling for outputs + shuffleobs = false, + loss_types = [:mse, :nse], + sequence_kwargs = (;input_window = 10, output_window = 4), +) + +# Train the hybrid model +single_nn_out = train( + hm, + df, + (); + nepochs = 10, # Number of training epochs + batchsize = 512, # Batch size for training + opt = AdamW(0.1), # Optimizer and learning rate + monitor_names = [:rb, :Q10], # Parameters to monitor during training + yscale = identity, # Scaling for outputs + shuffleobs = false, + loss_types = [:mse, :nse], +) + +split_data(df, hm) + +split_data(df, hm, sequence_kwargs = (;input_window = 10, output_window = 4)) + +#? this sets up initial ps for the hybrid model version +rng = Random.default_rng(1234) +ps, st = Lux.setup(rng, hm) + +xdl = EasyHybrid.DataLoader(xs; batchsize=512) +ydl = EasyHybrid.DataLoader(ys; batchsize=512) + + + +hm = constructHybridModel( + predictors_single_nn, # Input features + forcing, # Forcing variables + target, # Target variables + RbQ10, # Process-based model function + parameters, # Parameter definitions + neural_param_names, # NN-predicted parameters + global_param_names, # Global parameters + hidden_layers = NN_Memory, # Neural network architecture + scale_nn_outputs = true, # Scale neural network outputs + input_batchnorm = false # Apply batch normalization to inputs +) + +sdf = hm(first(xdl), ps, st) + +mid1 =first(xdl)(RbQ10_Memory.forcing) +mid1[:,end,:] + + +o1, st1 = LuxCore.apply(RbQ10_Memory.NN, first(xdl)(RbQ10_Memory.predictors), ps.ps, st.st) + + +# Train the hybrid model +out_lstm = tune( + hm, + df; + nepochs = 10, # Number of training epochs + batchsize = 512, # Batch size for training + opt = AdamW(0.1), # Optimizer and learning rate + monitor_names = [:rb, :Q10], # Parameters to monitor during training + yscale = identity, # Scaling for outputs + shuffleobs = false, + loss_types = [:mse, :nse], + sequence_kwargs = (;input_window = 10, output_window = 4), + hidden_layers = NN_Memory, +) \ No newline at end of file diff --git a/src/models/GenericHybridModel.jl b/src/models/GenericHybridModel.jl index 5c74aa96..38d6299a 100644 --- a/src/models/GenericHybridModel.jl +++ b/src/models/GenericHybridModel.jl @@ -362,7 +362,7 @@ end # Forward pass for SingleNNHybridModel (optimized, no branching) function (m::SingleNNHybridModel)(ds_k::Union{KeyedArray, AbstractDimArray}, ps, st) # 1) get features - predictors = toArray(ds_k, m.predictors) + predictors = ds_k(m.predictors)#toArray(ds_k, m.predictors) parameters = m.parameters @@ -380,7 +380,7 @@ function (m::SingleNNHybridModel)(ds_k::Union{KeyedArray, AbstractDimArray}, ps, # 3) scale NN parameters (handle empty case) if !isempty(m.neural_param_names) nn_out, st_nn = LuxCore.apply(m.NN, predictors, ps.ps, st.st_nn) - nn_cols = eachrow(nn_out) + nn_cols = eachslice(nn_out, dims=1) nn_params = NamedTuple(zip(m.neural_param_names, nn_cols)) # Use appropriate scaling based on setting diff --git a/src/models/NNModels.jl b/src/models/NNModels.jl index fddfec29..3d6b99c0 100644 --- a/src/models/NNModels.jl +++ b/src/models/NNModels.jl @@ -1,4 +1,4 @@ -export SingleNNModel, MultiNNModel, constructNNModel, prepare_hidden_chain +export SingleNNModel, MultiNNModel, constructNNModel, prepare_hidden_chain, BroadcastLayer using Lux, LuxCore using ..EasyHybrid: hard_sigmoid @@ -49,8 +49,44 @@ function prepare_hidden_chain( ) if hidden_layers isa Chain # user gave a chain of hidden layers only - first_h = hidden_layers[1].out_dims - last_h = hidden_layers[end].out_dims + + # Helper to safely extract dimensions from layers + function get_layer_dim(l, type) + if type == :input + hasproperty(l, :in_dims) && return l.in_dims + (l isa BatchNorm && hasproperty(l, :dims)) && return l.dims + (l isa Recurrence && hasproperty(l.cell, :in_dims)) && return l.cell.in_dims + (l isa CompactLuxLayer && hasproperty(l.layers, :in_dims)) && return l.layers.in_dims + elseif type == :output + hasproperty(l, :out_dims) && return l.out_dims + (l isa BatchNorm && hasproperty(l, :dims)) && return l.dims + (l isa Recurrence && hasproperty(l.cell, :out_dims)) && return l.cell.out_dims + (l isa CompactLuxLayer && hasproperty(l.layers, :out_dims)) && return l.layers.out_dims + end + return nothing + end + + # Determine first_h by searching forward + first_h = nothing + for i in 1:length(hidden_layers) + d = get_layer_dim(hidden_layers[i], :input) + if !isnothing(d) + first_h = d + break + end + end + isnothing(first_h) && error("Could not determine input dimension of hidden_layers Chain (found no Dense or BatchNorm layer).") + + # Determine last_h by searching backward + last_h = nothing + for i in length(hidden_layers):-1:1 + d = get_layer_dim(hidden_layers[i], :output) + if !isnothing(d) + last_h = d + break + end + end + isnothing(last_h) && error("Could not determine output dimension of hidden_layers Chain.") return Chain( input_batchnorm ? BatchNorm(in_dim, affine = false) : identity, @@ -236,3 +272,26 @@ function Base.show(io::IO, ::MIME"text/plain", m::MultiNNModel) end return println("scale NN outputs: ", m.scale_nn_outputs) end + +struct BroadcastLayer{T <: NamedTuple} <: LuxCore.AbstractLuxContainerLayer{(:layers,)} + layers::T +end + +function BroadcastLayer(layers...) + for l in layers + if !iszero(LuxCore.statelength(l)) + throw(ArgumentError("Stateful layer `$l` are not supported for `BroadcastLayer`.")) + end + end + names = ntuple(i -> Symbol("layer_$i"), length(layers)) + return BroadcastLayer(NamedTuple{names}(layers)) +end + +BroadcastLayer(; kwargs...) = BroadcastLayer(connection, (; kwargs...)) + +function (m::BroadcastLayer)(x, ps, st::NamedTuple{names}) where {names} + results = (first ∘ Lux.apply).(values(m.layers), x, values(ps), values(st)) + return results, st +end + +Base.keys(m::BroadcastLayer) = Base.keys(getfield(m, :layers)) \ No newline at end of file diff --git a/src/train.jl b/src/train.jl index 282f389c..6ad07228 100644 --- a/src/train.jl +++ b/src/train.jl @@ -1,4 +1,4 @@ -export train, TrainResults, prepare_data, split_data +export train, TrainResults, prepare_data, split_data, split_into_sequences, windowed_dataset # beneficial for plotting based on type TrainResults? struct TrainResults train_history @@ -411,10 +411,22 @@ function split_data( val_fold::Union{Nothing, Int} = nothing, shuffleobs::Bool = false, split_data_at::Real = 0.8, + sequence_kwargs::Union{Nothing, NamedTuple} = nothing, kwargs... ) data_ = prepare_data(hybridModel, data) + if sequence_kwargs !== nothing + x_keyed, y_keyed = data_ + sis_default = (;input_window = 10, output_window = 1, shift = 1, lead_time = 1) + sis = merge(sis_default, sequence_kwargs) + @info "Using split_into_sequences: $sis" + x_all, y_all = split_into_sequences(x_keyed, y_keyed; sis.input_window, sis.output_window, sis.shift, sis.lead_time) + else + x_all, y_all = data_ + end + + if split_by_id !== nothing && folds !== nothing throw(ArgumentError("split_by_id and folds are not supported together; do the split when constructing folds")) @@ -431,9 +443,8 @@ function split_data( @info "Number of unique $(split_by_id): $(length(unique_ids))" @info "Train IDs: $(length(train_ids)) | Val IDs: $(length(val_ids))" - x_all, y_all = data_ - x_train, y_train = view(x_all, :, train_idx), view(y_all, :, train_idx) - x_val, y_val = view(x_all, :, val_idx), view(y_all, :, val_idx) + x_train, y_train = view_end_dim(x_all, train_idx), view_end_dim(y_all, train_idx) + x_val, y_val = view_end_dim(x_all, val_idx), view_end_dim(y_all, val_idx) return (x_train, y_train), (x_val, y_val) elseif folds !== nothing || val_fold !== nothing @@ -441,7 +452,6 @@ function split_data( @assert val_fold !== nothing "Provide val_fold when using folds." @assert folds !== nothing "Provide folds when using val_fold." @warn "shuffleobs is not supported when using folds and val_fold, this will be ignored and should be done during fold constructions" - x_all, y_all = data_ f = isa(folds, Symbol) ? getbyname(data, folds) : folds n = size(x_all, 2) @assert length(f) == n "length(folds) ($(length(f))) must equal number of samples/columns ($n)." @@ -453,13 +463,13 @@ function split_data( @info "K-fold via external assignments: val_fold=$val_fold → train=$(length(train_idx)) val=$(length(val_idx))" - x_train, y_train = view(x_all, :, train_idx), view(y_all, :, train_idx) - x_val, y_val = view(x_all, :, val_idx), view(y_all, :, val_idx) + x_train, y_train = view_end_dim(x_all, train_idx), view_end_dim(y_all, train_idx) + x_val, y_val = view_end_dim(x_all, val_idx), view_end_dim(y_all, val_idx) return (x_train, y_train), (x_val, y_val) - + else # --- Fallback: simple random/chronological split of prepared data --- - (x_train, y_train), (x_val, y_val) = splitobs(data_; at = split_data_at, shuffle = shuffleobs) + (x_train, y_train), (x_val, y_val) = splitobs((x_all, y_all); at = split_data_at, shuffle = shuffleobs) return (x_train, y_train), (x_val, y_val) end end @@ -613,3 +623,66 @@ end function getbyname(ka::AxisKeys.KeyedArray, name::Symbol) return ka(name) end + +function split_into_sequences(x, y; input_window=5, output_window=1, shift=1, lead_time=1) + ndims(x) == 2 || throw(ArgumentError("expected x to be (feature, time); got ndims(x) = $(ndims(x))")) + ndims(y) == 2 || throw(ArgumentError("expected y to be (target, time); got ndims(y) = $(ndims(y))")) + + Lx, Ly = size(x,2), size(y,2) + Lx == Ly || throw(ArgumentError("x and y must have same time length; got $Lx vs $Ly")) + lead_time ≥ 0 || throw(ArgumentError("lead_time must be ≥ 0 (0 = instantaneous end)")) + + nfeat, ntarget = size(x,1), size(y,1) + L = Lx + + featkeys = axiskeys(x, 1) + timekeys = axiskeys(x, 2) + targetkeys = axiskeys(y, 1) + + # X window keys are lags ending at 0 + lag_keys = (-input_window + 1):0 + + # Y window keys end at lead_time and go backwards if output_window>1 + lead_start = lead_time - output_window + 1 + lead_keys = lead_start:lead_time + + # Choose valid sample starts sx so that: + # sy = ex + lead_start >= 1 and ey = ex + lead_time <= L + sx_min = max(1, 1 - (input_window + lead_time - output_window)) # ensures sy>=1 + sx_max = L - input_window - lead_time + 1 # ensures ey<=L + sx_min <= sx_max || throw(ArgumentError("windows too long for series length")) + + sx_vals = collect(sx_min:shift:sx_max) + num_samples = length(sx_vals) + num_samples ≥ 1 || throw(ArgumentError("no samples with given shift/windows")) + + samplekeys = timekeys[sx_vals] + + Xd = zeros(Float32, nfeat, input_window, num_samples) + Yd = zeros(Float32, ntarget, output_window, num_samples) + + @inbounds @views for (ii, sx) in enumerate(sx_vals) + ex = sx + input_window - 1 + + sy = ex + lead_start + ey = ex + lead_time + + Xd[:, :, ii] .= x[:, sx:ex] + Yd[:, :, ii] .= y[:, sy:ey] + end + + Xk = KeyedArray(Xd; row=featkeys, window=lag_keys, col=samplekeys) + Yk = KeyedArray(Yd; row=targetkeys, window=lead_keys, col=samplekeys) + + Ydrop = dropdims(Yk, dims = 1) + + return Xk, Yk +end + +view_end_dim = function(x_all::Array{Float32, 2}, idx) + return view(x_all, :, idx) +end + +view_end_dim = function(x_all::Array{Float32, 3}, idx) + return view(x_all, :, :, idx) +end \ No newline at end of file diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index a1e63151..1cb7cadf 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -25,7 +25,10 @@ function compute_loss( targets = HM.targets ext_loss = extra_loss(logging) if logging.train_mode - ŷ, st = HM(x, ps, st) + ŷall, st = HM(x, ps, st) + + ŷ = ŷall(window = axiskeys(y_t, :window)) + loss_value = _compute_loss(ŷ, y_t, y_nan, targets, training_loss(logging), logging.agg) # Add extra_loss if provided if ext_loss !== nothing @@ -34,7 +37,10 @@ function compute_loss( end stats = NamedTuple() else - ŷ, _ = HM(x, ps, LuxCore.testmode(st)) + ŷall, _ = HM(x, ps, LuxCore.testmode(st)) + + ŷ = ŷall(window = axiskeys(y_t, :window)) + loss_value = _compute_loss(ŷ, y_t, y_nan, targets, loss_types(logging), logging.agg) # Add extra_loss entries if provided if ext_loss !== nothing @@ -53,6 +59,9 @@ function _compute_loss(ŷ, y, y_nan, targets, loss_spec, agg::Function) end function _compute_loss(ŷ, y, y_nan, targets, loss_types::Vector, agg::Function) + + @show typeof(ŷ) + @show typeof(y) out_loss_types = [ begin losses = assemble_loss(ŷ, y, y_nan, targets, loss_type) diff --git a/src/utils/tools.jl b/src/utils/tools.jl index ea99f66d..7abe8139 100644 --- a/src/utils/tools.jl +++ b/src/utils/tools.jl @@ -171,22 +171,22 @@ ta = data.TA ``` """ function toNamedTuple(ka::KeyedArray, variables::Vector{Symbol}) - vals = [vec(ka([var])) for var in variables] + vals = [dropdims(ka([var]), dims = 1) for var in variables] return (; zip(variables, vals)...) end function toNamedTuple(ka::AbstractDimArray, variables::Vector{Symbol}) - vals = [vec(ka[col = At(var)]) for var in variables] + vals = [ka[col = At([var])] for var in variables] return (; zip(variables, vals)...) end function toNamedTuple(ka::KeyedArray, variables::NTuple{N, Symbol}) where {N} - vals = ntuple(i -> vec(ka([variables[i]])), N) + vals = ntuple(i -> ka([variables[i]]), N) return NamedTuple{variables}(vals) end function toNamedTuple(ka::AbstractDimArray, variables::NTuple{N, Symbol}) where {N} - ntuple(i -> vec(ka[col = At([variables[i]])]), N) + ntuple(i -> ka[col = At([variables[i]])], N) return NamedTuple{variables}(vals) end @@ -238,11 +238,11 @@ sw_in = toNamedTuple(ds_keyed, :SW_IN) ``` """ function toNamedTuple(ka::KeyedArray, variable::Symbol) - return vec(ka[variable]) + return ka(variable) end function toNamedTuple(ka::AbstractDimArray, variable::Symbol) - return vec(ka[col = At(variable)]) + return ka[col = At(variable)] end function toArray(ka::KeyedArray, variable) From ce5d23c4c845ac5462fb6712c5a4d6fdd7532b35 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Tue, 23 Dec 2025 23:52:24 +0100 Subject: [PATCH 02/26] up to compute_loss --- docs/literate/tutorials/example_synthetic_lstm.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index 632025fb..9c281f90 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -134,7 +134,9 @@ ps, st = Lux.setup(rng, hlstm) train_dl = EasyHybrid.DataLoader((xs, ys); batchsize=5) reco_obs_3D = first(train_dl)[2] +reco_nan_3D = .!isnan.(reco_obs_3D) reco_obs = dropdims(reco_obs_3D, dims = 1) +reco_nan = .!isnan.(reco_obs) x_obs = first(train_dl)[1] sdf = hlstm(x_obs, ps, st) @@ -142,6 +144,8 @@ sdf = hlstm(x_obs, ps, st) reco_mod = sdf[1].reco reco_mod(window = axiskeys(reco_obs, :window)) .- reco_obs +EasyHybrid.compute_loss(hlstm, ps, st, (x_obs, (reco_obs, reco_nan)), logging = LoggingLoss(train_mode = true)) + # ============================================================================= # train on DataFrame # ============================================================================= From 35b91ca6943a1d0e616e84bb8ec4b0e243ad63bf Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Thu, 25 Dec 2025 22:52:00 +0100 Subject: [PATCH 03/26] align by name --- .../tutorials/example_synthetic_lstm.jl | 12 +++++++- src/train.jl | 28 ++++++++----------- src/utils/compute_loss.jl | 20 +++++++------ 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index 9c281f90..79835c68 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -144,7 +144,16 @@ sdf = hlstm(x_obs, ps, st) reco_mod = sdf[1].reco reco_mod(window = axiskeys(reco_obs, :window)) .- reco_obs -EasyHybrid.compute_loss(hlstm, ps, st, (x_obs, (reco_obs, reco_nan)), logging = LoggingLoss(train_mode = true)) +sdf = split_data(df, hlstm, sequence_kwargs = (;input_window = 10, output_window = 1, shift = 1, lead_time = 2)); + +typeof(sdf) +(x_train, y_train), (x_val, y_val) = sdf; +x_train +y_train + +ytrain_nan = .!isnan.(y_train) + +EasyHybrid.compute_loss(hlstm, ps, st, (x_train, (y_train, ytrain_nan)), logging = LoggingLoss(train_mode = true)) # ============================================================================= # train on DataFrame @@ -162,6 +171,7 @@ out_lstm = train( shuffleobs = false, loss_types = [:mse, :nse], sequence_kwargs = (;input_window = 10, output_window = 4), + plotting = false ) # Train the hybrid model diff --git a/src/train.jl b/src/train.jl index 6ad07228..82a3ec4f 100644 --- a/src/train.jl +++ b/src/train.jl @@ -421,7 +421,7 @@ function split_data( sis_default = (;input_window = 10, output_window = 1, shift = 1, lead_time = 1) sis = merge(sis_default, sequence_kwargs) @info "Using split_into_sequences: $sis" - x_all, y_all = split_into_sequences(x_keyed, y_keyed; sis.input_window, sis.output_window, sis.shift, sis.lead_time) + x_all, y_all = split_into_sequences(x_keyed, y_keyed; sis.input_window, sis.output_window, sis.shift, sis.lead_time); else x_all, y_all = data_ end @@ -639,17 +639,15 @@ function split_into_sequences(x, y; input_window=5, output_window=1, shift=1, le timekeys = axiskeys(x, 2) targetkeys = axiskeys(y, 1) - # X window keys are lags ending at 0 - lag_keys = (-input_window + 1):0 - - # Y window keys end at lead_time and go backwards if output_window>1 lead_start = lead_time - output_window + 1 - lead_keys = lead_start:lead_time + + lag_keys = Symbol.(["x$(lag)" for lag in (input_window+lead_time-1):-1:lead_time]) + lead_keys = Symbol.(["_y$(lead)" for lead in ((output_window-1):-1:0)]) + lead_keys = Symbol.(lag_keys[end-length(lead_keys)+1:end], lead_keys) + lag_keys[end-length(lead_keys)+1:end] .= lead_keys - # Choose valid sample starts sx so that: - # sy = ex + lead_start >= 1 and ey = ex + lead_time <= L - sx_min = max(1, 1 - (input_window + lead_time - output_window)) # ensures sy>=1 - sx_max = L - input_window - lead_time + 1 # ensures ey<=L + sx_min = max(1, 1 - (input_window + lead_time - output_window)) + sx_max = L - input_window - lead_time + 1 sx_min <= sx_max || throw(ArgumentError("windows too long for series length")) sx_vals = collect(sx_min:shift:sx_max) @@ -663,22 +661,18 @@ function split_into_sequences(x, y; input_window=5, output_window=1, shift=1, le @inbounds @views for (ii, sx) in enumerate(sx_vals) ex = sx + input_window - 1 - sy = ex + lead_start ey = ex + lead_time - Xd[:, :, ii] .= x[:, sx:ex] Yd[:, :, ii] .= y[:, sy:ey] end - Xk = KeyedArray(Xd; row=featkeys, window=lag_keys, col=samplekeys) - Yk = KeyedArray(Yd; row=targetkeys, window=lead_keys, col=samplekeys) - - Ydrop = dropdims(Yk, dims = 1) - + Xk = KeyedArray(Xd; row=featkeys, window=lag_keys, col=samplekeys) + Yk = KeyedArray(Yd; row=targetkeys, window=lead_keys, col=samplekeys) return Xk, Yk end + view_end_dim = function(x_all::Array{Float32, 2}, idx) return view(x_all, :, idx) end diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index 1cb7cadf..7ddcba8a 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -25,10 +25,7 @@ function compute_loss( targets = HM.targets ext_loss = extra_loss(logging) if logging.train_mode - ŷall, st = HM(x, ps, st) - - ŷ = ŷall(window = axiskeys(y_t, :window)) - + ŷ, st = HM(x, ps, st) loss_value = _compute_loss(ŷ, y_t, y_nan, targets, training_loss(logging), logging.agg) # Add extra_loss if provided if ext_loss !== nothing @@ -37,9 +34,7 @@ function compute_loss( end stats = NamedTuple() else - ŷall, _ = HM(x, ps, LuxCore.testmode(st)) - - ŷ = ŷall(window = axiskeys(y_t, :window)) + ŷ, _ = HM(x, ps, LuxCore.testmode(st)) loss_value = _compute_loss(ŷ, y_t, y_nan, targets, loss_types(logging), logging.agg) # Add extra_loss entries if provided @@ -95,7 +90,14 @@ function _compute_loss end function assemble_loss(ŷ, y, y_nan, targets, loss_spec) return [ - _apply_loss(ŷ[target], _get_target_y(y, target), _get_target_nan(y_nan, target), loss_spec) + begin + y_t = _get_target_y(y, target) + y_nan = _get_target_nan(y_nan, target) + @show typeof(ŷ) + ŷ_t = ŷ[target] + ŷ_tsub = ŷ_t(window = axiskeys(y_t, :window)) + _apply_loss(ŷ_tsub, y_t, y_nan, loss_spec) + end for target in targets ] end @@ -187,4 +189,4 @@ end function _loss_name(loss_spec::Tuple) return _loss_name(loss_spec[1]) -end +end \ No newline at end of file From 49731c0ef9421bd1f89ff845e2b3e4a885b6c567 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Thu, 25 Dec 2025 23:14:00 +0100 Subject: [PATCH 04/26] row vs col conflict --- src/models/GenericHybridModel.jl | 2 +- src/train.jl | 2 +- src/utils/compute_loss.jl | 4 ++-- src/utils/tools.jl | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/models/GenericHybridModel.jl b/src/models/GenericHybridModel.jl index 38d6299a..1a93ff7b 100644 --- a/src/models/GenericHybridModel.jl +++ b/src/models/GenericHybridModel.jl @@ -362,7 +362,7 @@ end # Forward pass for SingleNNHybridModel (optimized, no branching) function (m::SingleNNHybridModel)(ds_k::Union{KeyedArray, AbstractDimArray}, ps, st) # 1) get features - predictors = ds_k(m.predictors)#toArray(ds_k, m.predictors) + predictors = ds_k(row = m.predictors) #toArray(ds_k, m.predictors) parameters = m.parameters diff --git a/src/train.jl b/src/train.jl index 82a3ec4f..9a5ebbb7 100644 --- a/src/train.jl +++ b/src/train.jl @@ -668,7 +668,7 @@ function split_into_sequences(x, y; input_window=5, output_window=1, shift=1, le end Xk = KeyedArray(Xd; row=featkeys, window=lag_keys, col=samplekeys) - Yk = KeyedArray(Yd; row=targetkeys, window=lead_keys, col=samplekeys) + Yk = KeyedArray(Yd; row=targetkeys, window=lead_keys, col=samplekeys) return Xk, Yk end diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index 7ddcba8a..c734c644 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -147,7 +147,7 @@ Helper function to apply the appropriate loss function based on the specificatio """ function _apply_loss end -_get_target_y(y, target) = y(target) +_get_target_y(y, target) = y(row = target) _get_target_y(y::AbstractDimArray, target) = y[col = At(target)] # assumes the DimArray uses :col indexing _get_target_y(y::AbstractDimArray, targets::Vector) = y[col = At(targets)] # for multiple targets @@ -165,7 +165,7 @@ Helper function to extract target-specific values from `y`, handling cases where """ function _get_target_y end -_get_target_nan(y_nan, target) = y_nan(target) +_get_target_nan(y_nan, target) = y_nan(row = target) _get_target_nan(y_nan::AbstractDimArray, target) = y_nan[col = At(target)] # assumes the DimArray uses :col indexing _get_target_nan(y_nan::AbstractDimArray, targets::Vector) = y_nan[col = At(targets)] # for multiple targets diff --git a/src/utils/tools.jl b/src/utils/tools.jl index 7abe8139..cce636f3 100644 --- a/src/utils/tools.jl +++ b/src/utils/tools.jl @@ -171,7 +171,7 @@ ta = data.TA ``` """ function toNamedTuple(ka::KeyedArray, variables::Vector{Symbol}) - vals = [dropdims(ka([var]), dims = 1) for var in variables] + vals = [dropdims(ka(row = [var]), dims = 1) for var in variables] return (; zip(variables, vals)...) end @@ -181,7 +181,7 @@ function toNamedTuple(ka::AbstractDimArray, variables::Vector{Symbol}) end function toNamedTuple(ka::KeyedArray, variables::NTuple{N, Symbol}) where {N} - vals = ntuple(i -> ka([variables[i]]), N) + vals = ntuple(i -> ka(row = [variables[i]]), N) return NamedTuple{variables}(vals) end From a3ffec0559e81e818d1b5113c54449abc54fe50c Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Fri, 26 Dec 2025 13:52:43 +0100 Subject: [PATCH 05/26] works train_board to be done --- Project.toml | 4 +- .../tutorials/example_synthetic_lstm.jl | 4 +- src/plotrecipes.jl | 2 +- src/utils/compute_loss.jl | 26 ++++++----- src/utils/io.jl | 2 +- src/utils/tools.jl | 46 +++++++++++++++++-- 6 files changed, 64 insertions(+), 20 deletions(-) diff --git a/Project.toml b/Project.toml index ececaefa..51ae98ed 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "EasyHybrid" uuid = "61bb816a-e6af-4913-ab9e-91bff2e122e3" -authors = ["Lazaro Alonso", "Bernhard Ahrens", "Markus Reichstein"] version = "0.1.7" +authors = ["Lazaro Alonso", "Bernhard Ahrens", "Markus Reichstein"] [deps] AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5" @@ -21,6 +21,7 @@ LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623" MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" +NamedDims = "356022a1-0364-5f58-8944-0da4b18d706f" OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" @@ -57,6 +58,7 @@ MLJ = "0.20, 0.21, 0.22" MLUtils = "0.4.8" Makie = "0.22, 0.23, 0.24" NCDatasets = "0.14.8" +NamedDims = "1.2.3" OptimizationOptimisers = "0.3.7" PrettyTables = "2.4.0, 3.1.2" ProgressMeter = "1.10.4" diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index 79835c68..32fc7fb1 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -163,7 +163,7 @@ out_lstm = train( hlstm, df, (); - nepochs = 10, # Number of training epochs + nepochs = 100, # Number of training epochs batchsize = 512, # Batch size for training opt = AdamW(0.1), # Optimizer and learning rate monitor_names = [:rb, :Q10], # Parameters to monitor during training @@ -179,7 +179,7 @@ single_nn_out = train( hm, df, (); - nepochs = 10, # Number of training epochs + nepochs = 3, # Number of training epochs batchsize = 512, # Batch size for training opt = AdamW(0.1), # Optimizer and learning rate monitor_names = [:rb, :Q10], # Parameters to monitor during training diff --git a/src/plotrecipes.jl b/src/plotrecipes.jl index 8804e50a..33e263ca 100644 --- a/src/plotrecipes.jl +++ b/src/plotrecipes.jl @@ -114,7 +114,7 @@ function to_obs_tuple(y, target_names) end function to_tuple(y::KeyedArray, target_names) - return (; (t => y(t) for t in target_names)...) # observations are fixed, no Observables are needed! + return (; (t => y(row = t) for t in target_names)...) # observations are fixed, no Observables are needed! end function to_tuple(y::AbstractDimArray, target_names) diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index c734c644..feccb3c0 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -35,7 +35,6 @@ function compute_loss( stats = NamedTuple() else ŷ, _ = HM(x, ps, LuxCore.testmode(st)) - loss_value = _compute_loss(ŷ, y_t, y_nan, targets, loss_types(logging), logging.agg) # Add extra_loss entries if provided if ext_loss !== nothing @@ -54,9 +53,7 @@ function _compute_loss(ŷ, y, y_nan, targets, loss_spec, agg::Function) end function _compute_loss(ŷ, y, y_nan, targets, loss_types::Vector, agg::Function) - - @show typeof(ŷ) - @show typeof(y) + out_loss_types = [ begin losses = assemble_loss(ŷ, y, y_nan, targets, loss_type) @@ -91,12 +88,13 @@ function _compute_loss end function assemble_loss(ŷ, y, y_nan, targets, loss_spec) return [ begin - y_t = _get_target_y(y, target) - y_nan = _get_target_nan(y_nan, target) - @show typeof(ŷ) - ŷ_t = ŷ[target] - ŷ_tsub = ŷ_t(window = axiskeys(y_t, :window)) - _apply_loss(ŷ_tsub, y_t, y_nan, loss_spec) + y_t = _get_target_y(y, target) + y_nan_t = _get_target_nan(y_nan, target) + + ŷ_t = ŷ[target] + ŷ_tsub = ŷ_t(window = axiskeys(y_t, :window)) # do key-based alignment first + + _apply_loss(ŷ_tsub, y_t, y_nan_t, loss_spec) end for target in targets ] @@ -189,4 +187,10 @@ end function _loss_name(loss_spec::Tuple) return _loss_name(loss_spec[1]) -end \ No newline at end of file +end + +import ChainRulesCore +import AxisKeys: KeyedArray +import ChainRulesCore: ProjectTo, InplaceableThunk, unthunk + +(project::ProjectTo{KeyedArray})(dx::InplaceableThunk) = project(unthunk(dx)) \ No newline at end of file diff --git a/src/utils/io.jl b/src/utils/io.jl index 78bf5b6e..243343c5 100644 --- a/src/utils/io.jl +++ b/src/utils/io.jl @@ -53,7 +53,7 @@ function save_observations!(file_name, target_names, yobs, train_or_val_name) end function to_named_tuple(ka, target_names) - arrays = [Array(ka(k)) for k in target_names] + arrays = [Array(ka(row = k)) for k in target_names] return NamedTuple{Tuple(target_names)}(arrays) end diff --git a/src/utils/tools.jl b/src/utils/tools.jl index cce636f3..836d2fda 100644 --- a/src/utils/tools.jl +++ b/src/utils/tools.jl @@ -120,13 +120,51 @@ function split_data(df::DataFrame, target, xvars, seqID; f = 0.8, batchsize = 32 return trainloader, valloader, trainall end -function toDataFrame(ka) - data_array = Array(ka') - df = DataFrame(data_array, ka.row) - df.index = ka.col +using AxisKeys +using NamedDims +using DataFrames + +_key_to_colname(k) = k isa Symbol ? k : Symbol(string(k)) + +# 2D KeyedArray -> one DataFrame +function toDataFrame( + ka::KeyedArray{T,2}, + cols_dim::Symbol = :row, + index_dim::Symbol = :col; + index_col::Symbol = :index, +) where {T} + + dcols = NamedDims.dim(ka, cols_dim) # map :row/:col -> numeric dim :contentReference[oaicite:2]{index=2} + didx = NamedDims.dim(ka, index_dim) + + # reorder so rows=index_dim, cols=cols_dim + ka2 = (didx == 1 && dcols == 2) ? ka : permutedims(ka, (didx, dcols)) + + data = Array(AxisKeys.keyless(ka2)) # drop KeyedArray wrapper :contentReference[oaicite:3]{index=3} + names = _key_to_colname.(collect(axiskeys(ka2, 2))) # col names from keys :contentReference[oaicite:4]{index=4} + + df = DataFrame(data, names; makeunique=true) + df[!, index_col] = collect(axiskeys(ka2, 1)) # “index” column return df end +# 3D KeyedArray -> Dict(slice_key => DataFrame) +function toDataFrame( + ka::KeyedArray{T,3}, + cols_dim::Symbol = :row, + index_dim::Symbol = :col; + slice_dim::Symbol = :window, + index_col::Symbol = :index, +) where {T} + + out = Dict{Any,DataFrame}() + for k in axiskeys(ka, slice_dim) + slice = ka(; NamedTuple{(slice_dim,)}((k,))...) # dynamic keyword selection + out[k] = toDataFrame(slice, cols_dim, index_dim; index_col=index_col) + end + return out +end + function toDataFrame(ka::AbstractDimArray) data_array = Array(ka') df = DataFrame(data_array, Array(dims(ka, :col))) From 196a677de89c7224750fc8fa2c02e7a0d7cb533b Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Fri, 26 Dec 2025 18:53:00 +0100 Subject: [PATCH 06/26] make mlp and lstm run --- .../tutorials/example_synthetic_lstm.jl | 115 ++++++------------ src/models/GenericHybridModel.jl | 1 - src/utils/compute_loss.jl | 9 +- 3 files changed, 48 insertions(+), 77 deletions(-) diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index 32fc7fb1..91ae97b8 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -33,19 +33,6 @@ df = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/ # Select a subset of data for faster execution df = df[1:20000, :] -# KeyedArray from AxisKeys.jl works, but cannot handle DateTime type -dfnot = Float32.(df[!, Not(:time)]) - -ka = to_keyedArray(dfnot) - -# DimensionalData -mat = Array(Matrix(dfnot)') -da = DimArray(mat, (Dim{:col}(Symbol.(names(dfnot))), Dim{:row}(1:size(dfnot, 1)))) - -# ============================================================================= -# Split into sequences -# ============================================================================= - # ============================================================================= # neural network model # ============================================================================= @@ -55,7 +42,6 @@ NN = Chain(Dense(15, 15, Lux.sigmoid), Dense(15, 15, Lux.sigmoid), Dense(15, 1)) broadcast_layer4 = @compact(; layer = Dense(6=>6)) do x::Union{NTuple{<:AbstractArray}, AbstractVector{<:AbstractArray}} y = map(layer, x) @return permutedims(stack(y; dims=3), (1, 3, 2)) - #@return y end NN_Memory = Chain( @@ -105,10 +91,10 @@ neural_param_names = [:rb] # Neural network predicted parameters # ============================================================================= using GLMakie # Create single NN hybrid model using the unified constructor -predictors_single_nn = [:sw_pot, :dsw_pot] # Predictor variables (solar radiation, and its derivative) +predictors = [:sw_pot, :dsw_pot] # Predictor variables (solar radiation, and its derivative) hlstm = constructHybridModel( - predictors_single_nn, # Input features + predictors, # Input features forcing, # Forcing variables target, # Target variables RbQ10, # Process-based model function @@ -120,40 +106,50 @@ hlstm = constructHybridModel( input_batchnorm = false # Apply batch normalization to inputs ) +# ================================================================================= +# show steps for data preparation, happens under the hood in the end. +# two KeyedArrays x, y = prepare_data(hlstm, df) +# new split_into_sequences with input_window, output_window, shift and lead_time +# for many-to-one, many-to-many, and different prediction lead times and overlap xs, ys = split_into_sequences(x, y; input_window = 20, output_window = 2, shift=1, lead_time=0) - ys_nan = .!isnan.(ys) -#? this sets up initial ps for the hybrid model version -rng = Random.default_rng(1234) -ps, st = Lux.setup(rng, hlstm) +# split data as in train +sdf = split_data(df, hlstm, sequence_kwargs = (;input_window = 10, output_window = 3, shift = 1, lead_time = 1)); -train_dl = EasyHybrid.DataLoader((xs, ys); batchsize=5) +typeof(sdf) +(x_train, y_train), (x_val, y_val) = sdf; +x_train +y_train +y_train_nan = .!isnan.(y_train) -reco_obs_3D = first(train_dl)[2] -reco_nan_3D = .!isnan.(reco_obs_3D) -reco_obs = dropdims(reco_obs_3D, dims = 1) -reco_nan = .!isnan.(reco_obs) -x_obs = first(train_dl)[1] +# put into train loader to compose minibatches +train_dl = EasyHybrid.DataLoader((x_train, y_train); batchsize=32) -sdf = hlstm(x_obs, ps, st) +# run hybrid model forwards +x_first = first(train_dl)[1] +y_first = first(train_dl)[2] -reco_mod = sdf[1].reco -reco_mod(window = axiskeys(reco_obs, :window)) .- reco_obs +ps, st = Lux.setup(Random.default_rng(), hlstm) +frun = hlstm(x_first, ps, st) -sdf = split_data(df, hlstm, sequence_kwargs = (;input_window = 10, output_window = 1, shift = 1, lead_time = 2)); +# extract predicted yhat +reco_mod = frun[1].reco -typeof(sdf) -(x_train, y_train), (x_val, y_val) = sdf; -x_train -y_train +# bring observations in same shape +reco_obs = dropdims(y_first, dims = 1) +reco_nan = .!isnan.(reco_obs) -ytrain_nan = .!isnan.(y_train) +# simulate loss -> pick the right window +reco_mod(window = axiskeys(reco_obs, :window)) .- reco_obs + +# compute loss +EasyHybrid.compute_loss(hlstm, ps, st, (x_train, (y_train, y_train_nan)), logging = LoggingLoss(train_mode = true)) -EasyHybrid.compute_loss(hlstm, ps, st, (x_train, (y_train, ytrain_nan)), logging = LoggingLoss(train_mode = true)) +# Zygote gradient of loss # ============================================================================= # train on DataFrame @@ -174,66 +170,35 @@ out_lstm = train( plotting = false ) -# Train the hybrid model -single_nn_out = train( - hm, - df, - (); - nepochs = 3, # Number of training epochs - batchsize = 512, # Batch size for training - opt = AdamW(0.1), # Optimizer and learning rate - monitor_names = [:rb, :Q10], # Parameters to monitor during training - yscale = identity, # Scaling for outputs - shuffleobs = false, - loss_types = [:mse, :nse], -) - -split_data(df, hm) - -split_data(df, hm, sequence_kwargs = (;input_window = 10, output_window = 4)) - -#? this sets up initial ps for the hybrid model version -rng = Random.default_rng(1234) -ps, st = Lux.setup(rng, hm) - -xdl = EasyHybrid.DataLoader(xs; batchsize=512) -ydl = EasyHybrid.DataLoader(ys; batchsize=512) +##################################################################################### +# is neural network still running? hm = constructHybridModel( - predictors_single_nn, # Input features + predictors, # Input features forcing, # Forcing variables target, # Target variables RbQ10, # Process-based model function parameters, # Parameter definitions neural_param_names, # NN-predicted parameters global_param_names, # Global parameters - hidden_layers = NN_Memory, # Neural network architecture + hidden_layers = NN, # Neural network architecture scale_nn_outputs = true, # Scale neural network outputs input_batchnorm = false # Apply batch normalization to inputs ) -sdf = hm(first(xdl), ps, st) - -mid1 =first(xdl)(RbQ10_Memory.forcing) -mid1[:,end,:] - - -o1, st1 = LuxCore.apply(RbQ10_Memory.NN, first(xdl)(RbQ10_Memory.predictors), ps.ps, st.st) - # Train the hybrid model -out_lstm = tune( +single_nn_out = train( hm, - df; - nepochs = 10, # Number of training epochs + df, + (); + nepochs = 3, # Number of training epochs batchsize = 512, # Batch size for training opt = AdamW(0.1), # Optimizer and learning rate monitor_names = [:rb, :Q10], # Parameters to monitor during training yscale = identity, # Scaling for outputs shuffleobs = false, loss_types = [:mse, :nse], - sequence_kwargs = (;input_window = 10, output_window = 4), - hidden_layers = NN_Memory, ) \ No newline at end of file diff --git a/src/models/GenericHybridModel.jl b/src/models/GenericHybridModel.jl index 1a93ff7b..ecd0cc06 100644 --- a/src/models/GenericHybridModel.jl +++ b/src/models/GenericHybridModel.jl @@ -533,7 +533,6 @@ function (m::MultiNNHybridModel)(df::DataFrame, ps, st) all_data = to_keyedArray(df) x, _ = prepare_data(m, all_data) - @show typeof(x) out, _ = m(x, ps, LuxCore.testmode(st)) dfnew = copy(df) for k in keys(out) diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index feccb3c0..6fcf859e 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -85,7 +85,7 @@ Returns a single loss value if `loss_spec` is provided, or a NamedTuple of losse """ function _compute_loss end -function assemble_loss(ŷ, y, y_nan, targets, loss_spec) +function assemble_loss(ŷ, y::KeyedArray{T,3}, y_nan, targets, loss_spec) where {T} return [ begin y_t = _get_target_y(y, target) @@ -100,6 +100,13 @@ function assemble_loss(ŷ, y, y_nan, targets, loss_spec) ] end +function assemble_loss(ŷ, y::KeyedArray{T,2}, y_nan, targets, loss_spec) where {T} + return [ + _apply_loss(ŷ[target], _get_target_y(y, target), _get_target_nan(y_nan, target), loss_spec) + for target in targets + ] +end + function assemble_loss(ŷ, y, y_nan, targets, loss_spec::PerTarget) @assert length(targets) == length(loss_spec.losses) "Length of targets and PerTarget losses tuple must match" losses = [ From bae8db37189f77911fa99a30abefabdbc11e568c Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Fri, 26 Dec 2025 22:13:59 +0100 Subject: [PATCH 07/26] col = inout (features, forcing, targets, output), row = batch_size, window = time in KeyedArrays and DimArrays --- .../tutorials/example_synthetic_lstm.jl | 2 +- src/EasyHybrid.jl | 2 +- src/models/GenericHybridModel.jl | 2 +- src/models/Respiration_Rb_Q10.jl | 4 +- src/plotrecipes.jl | 4 +- src/train.jl | 6 +-- src/utils/compute_loss.jl | 14 +++--- src/utils/io.jl | 4 +- src/utils/tools.jl | 49 +++++++++++-------- 9 files changed, 48 insertions(+), 39 deletions(-) diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index 91ae97b8..0f0e71ad 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -144,7 +144,7 @@ reco_obs = dropdims(y_first, dims = 1) reco_nan = .!isnan.(reco_obs) # simulate loss -> pick the right window -reco_mod(window = axiskeys(reco_obs, :window)) .- reco_obs +reco_mod(time = axiskeys(reco_obs, :time)) .- reco_obs # compute loss EasyHybrid.compute_loss(hlstm, ps, st, (x_train, (y_train, y_train_nan)), logging = LoggingLoss(train_mode = true)) diff --git a/src/EasyHybrid.jl b/src/EasyHybrid.jl index 8fb3ed43..8644e4b0 100644 --- a/src/EasyHybrid.jl +++ b/src/EasyHybrid.jl @@ -12,7 +12,7 @@ using ChainRulesCore: ChainRulesCore using ComponentArrays: ComponentArrays, ComponentArray using DataFrameMacros: DataFrameMacros, @transform using DataFrames: DataFrames, DataFrame, GroupedDataFrame, Missing, coalesce, mapcols, select, missing, All -using DimensionalData: DimensionalData, AbstractDimArray, At, dims, groupby +using DimensionalData: DimensionalData, AbstractDimArray, At, Dim, DimArray, dims, groupby using Downloads: Downloads using Hyperopt: Hyperopt, Hyperoptimizer using JLD2: JLD2, jldopen diff --git a/src/models/GenericHybridModel.jl b/src/models/GenericHybridModel.jl index ecd0cc06..d3eefd21 100644 --- a/src/models/GenericHybridModel.jl +++ b/src/models/GenericHybridModel.jl @@ -362,7 +362,7 @@ end # Forward pass for SingleNNHybridModel (optimized, no branching) function (m::SingleNNHybridModel)(ds_k::Union{KeyedArray, AbstractDimArray}, ps, st) # 1) get features - predictors = ds_k(row = m.predictors) #toArray(ds_k, m.predictors) + predictors = ds_k(inout = m.predictors) #toArray(ds_k, m.predictors) parameters = m.parameters diff --git a/src/models/Respiration_Rb_Q10.jl b/src/models/Respiration_Rb_Q10.jl index a0cf0d5a..1bf49a3b 100644 --- a/src/models/Respiration_Rb_Q10.jl +++ b/src/models/Respiration_Rb_Q10.jl @@ -60,8 +60,8 @@ function (hm::RespirationRbQ10)(ds_k, ps, st::NamedTuple) end function (hm::RespirationRbQ10)(ds_k::AbstractDimArray, ps, st::NamedTuple) - p = ds_k[col = At(hm.predictors)] - x = Array(ds_k[col = At(hm.forcing)]) # don't propagate names after this + p = ds_k[inout = At(hm.predictors)] + x = Array(ds_k[inout = At(hm.forcing)]) # don't propagate names after this Rb, stQ10 = LuxCore.apply(hm.NN, p, ps.ps, st.st) #! NN(αᵢ(t)) ≡ Rb(T(t), M(t)) diff --git a/src/plotrecipes.jl b/src/plotrecipes.jl index 33e263ca..7927e8f3 100644 --- a/src/plotrecipes.jl +++ b/src/plotrecipes.jl @@ -114,11 +114,11 @@ function to_obs_tuple(y, target_names) end function to_tuple(y::KeyedArray, target_names) - return (; (t => y(row = t) for t in target_names)...) # observations are fixed, no Observables are needed! + return (; (t => y(inout = t) for t in target_names)...) # observations are fixed, no Observables are needed! end function to_tuple(y::AbstractDimArray, target_names) - return (; (t => Array(y[col = At(t)]) for t in target_names)...) # observations are fixed, no Observables are needed! + return (; (t => Array(y[inout = At(t)]) for t in target_names)...) # observations are fixed, no Observables are needed! end function monitor_to_obs(ŷ, monitor_names; cuts = (0.25, 0.5, 0.75)) diff --git a/src/train.jl b/src/train.jl index 9a5ebbb7..c591bbe4 100644 --- a/src/train.jl +++ b/src/train.jl @@ -539,7 +539,7 @@ end function prepare_data(hm, data::AbstractDimArray) predictors_forcing, targets = get_prediction_target_names(hm) - return (data[col = At(predictors_forcing)], data[col = At(targets)]) # TODO check what this should be rows or cols, I would say rows, but maybe it does not matter + return (data[inout = At(predictors_forcing)], data[inout = At(targets)]) end function prepare_data(hm, data::Tuple) @@ -667,8 +667,8 @@ function split_into_sequences(x, y; input_window=5, output_window=1, shift=1, le Yd[:, :, ii] .= y[:, sy:ey] end - Xk = KeyedArray(Xd; row=featkeys, window=lag_keys, col=samplekeys) - Yk = KeyedArray(Yd; row=targetkeys, window=lead_keys, col=samplekeys) + Xk = KeyedArray(Xd; inout=featkeys, time=lag_keys, batch_size=samplekeys) + Yk = KeyedArray(Yd; inout=targetkeys, time=lead_keys, batch_size=samplekeys) return Xk, Yk end diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index 6fcf859e..1be60596 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -92,7 +92,7 @@ function assemble_loss(ŷ, y::KeyedArray{T,3}, y_nan, targets, loss_spec) where y_nan_t = _get_target_nan(y_nan, target) ŷ_t = ŷ[target] - ŷ_tsub = ŷ_t(window = axiskeys(y_t, :window)) # do key-based alignment first + ŷ_tsub = ŷ_t(time = axiskeys(y_t, :time)) # do key-based alignment first _apply_loss(ŷ_tsub, y_t, y_nan_t, loss_spec) end @@ -152,9 +152,9 @@ Helper function to apply the appropriate loss function based on the specificatio """ function _apply_loss end -_get_target_y(y, target) = y(row = target) -_get_target_y(y::AbstractDimArray, target) = y[col = At(target)] # assumes the DimArray uses :col indexing -_get_target_y(y::AbstractDimArray, targets::Vector) = y[col = At(targets)] # for multiple targets +_get_target_y(y, target) = y(inout = target) +_get_target_y(y::AbstractDimArray, target) = y[inout = At(target)] # assumes the DimArray uses :inout indexing +_get_target_y(y::AbstractDimArray, targets::Vector) = y[inout = At(targets)] # for multiple targets function _get_target_y(y::Tuple, target) y_obs, y_sigma = y @@ -170,9 +170,9 @@ Helper function to extract target-specific values from `y`, handling cases where """ function _get_target_y end -_get_target_nan(y_nan, target) = y_nan(row = target) -_get_target_nan(y_nan::AbstractDimArray, target) = y_nan[col = At(target)] # assumes the DimArray uses :col indexing -_get_target_nan(y_nan::AbstractDimArray, targets::Vector) = y_nan[col = At(targets)] # for multiple targets +_get_target_nan(y_nan, target) = y_nan(inout = target) +_get_target_nan(y_nan::AbstractDimArray, target) = y_nan[inout = At(target)] # assumes the DimArray uses :inout indexing +_get_target_nan(y_nan::AbstractDimArray, targets::Vector) = y_nan[inout = At(targets)] # for multiple targets """ _get_target_nan(y_nan, target) diff --git a/src/utils/io.jl b/src/utils/io.jl index 243343c5..9b4d2b25 100644 --- a/src/utils/io.jl +++ b/src/utils/io.jl @@ -53,12 +53,12 @@ function save_observations!(file_name, target_names, yobs, train_or_val_name) end function to_named_tuple(ka, target_names) - arrays = [Array(ka(row = k)) for k in target_names] + arrays = [Array(ka(inout = k)) for k in target_names] return NamedTuple{Tuple(target_names)}(arrays) end function to_named_tuple(ka::AbstractDimArray, target_names) - arrays = [Array(ka[col = At(k)]) for k in target_names] + arrays = [Array(ka[inout = At(k)]) for k in target_names] return NamedTuple{Tuple(target_names)}(arrays) end diff --git a/src/utils/tools.jl b/src/utils/tools.jl index 836d2fda..f66d0f9c 100644 --- a/src/utils/tools.jl +++ b/src/utils/tools.jl @@ -1,5 +1,5 @@ #### Data handling -export select_predictors, to_keyedArray, split_data +export select_predictors, to_keyedArray, to_dimArray, split_data export toDataFrame, toNamedTuple, toArray # Make vec each entry of NamedTuple (since broadcast ist reserved) @@ -52,7 +52,16 @@ tokeyedArray(df::DataFrame) """ function to_keyedArray(df::DataFrame) d = Matrix(df) |> transpose - return KeyedArray(d, row = Symbol.(names(df)), col = 1:size(d, 2)) + return KeyedArray(d, inout = Symbol.(names(df)), batch_size = 1:size(d, 2)) +end + +# Convert a DataFrame to a DimArray where variables are in 1st dim (rows) +""" +to_dimArray(df::DataFrame) +""" +function to_dimArray(df::DataFrame) + d = Matrix(df) |> transpose |> Array + return DimArray(d, (Dim{:inout}(Symbol.(names(df))), Dim{:batch_size}(1:size(d, 2)))) end # Cast a grouped dataframe into a KeyedArray, where the group is the third dimension @@ -107,14 +116,14 @@ function split_data(df::DataFrame, target, xvars, seqID; f = 0.8, batchsize = 32 # partition does not allow partitioning along that dimension (or even not arrays at all) idx_tr, idx_vali = partition(axiskeys(dkg)[3], f; shuffle) # wrap training data into Flux.DataLoader - x = dkg(row = xvars, seqID = idx_tr) - y = dkg(row = target, seqID = idx_tr) |> Array + x = dkg(inout = xvars, seqID = idx_tr) + y = dkg(inout = target, seqID = idx_tr) |> Array data_t = (; x, y) trainloader = Flux.DataLoader(data_t; batchsize, shuffle, partial) trainall = Flux.DataLoader(data_t; batchsize = size(x, 3), shuffle = false, partial = false) # wrap validation data into Flux.DataLoader - x = dkg(row = xvars, seqID = idx_vali) - y = dkg(row = target, seqID = idx_vali) |> Array + x = dkg(inout = xvars, seqID = idx_vali) + y = dkg(inout = target, seqID = idx_vali) |> Array data_v = (; x, y) valloader = Flux.DataLoader(data_v; batchsize = size(x, 3), shuffle = false, partial = false) return trainloader, valloader, trainall @@ -129,8 +138,8 @@ _key_to_colname(k) = k isa Symbol ? k : Symbol(string(k)) # 2D KeyedArray -> one DataFrame function toDataFrame( ka::KeyedArray{T,2}, - cols_dim::Symbol = :row, - index_dim::Symbol = :col; + cols_dim::Symbol = :inout, + index_dim::Symbol = :batch_size; index_col::Symbol = :index, ) where {T} @@ -151,9 +160,9 @@ end # 3D KeyedArray -> Dict(slice_key => DataFrame) function toDataFrame( ka::KeyedArray{T,3}, - cols_dim::Symbol = :row, - index_dim::Symbol = :col; - slice_dim::Symbol = :window, + cols_dim::Symbol = :inout, + index_dim::Symbol = :batch_size; + slice_dim::Symbol = :time, index_col::Symbol = :index, ) where {T} @@ -167,8 +176,8 @@ end function toDataFrame(ka::AbstractDimArray) data_array = Array(ka') - df = DataFrame(data_array, Array(dims(ka, :col))) - df.index = Array(dims(ka, :row)) + df = DataFrame(data_array, Array(dims(ka, :inout))) + df.index = Array(dims(ka, :batch_size)) return df end @@ -209,22 +218,22 @@ ta = data.TA ``` """ function toNamedTuple(ka::KeyedArray, variables::Vector{Symbol}) - vals = [dropdims(ka(row = [var]), dims = 1) for var in variables] + vals = [dropdims(ka(inout = [var]), dims = 1) for var in variables] return (; zip(variables, vals)...) end function toNamedTuple(ka::AbstractDimArray, variables::Vector{Symbol}) - vals = [ka[col = At([var])] for var in variables] + vals = [ka[inout = At([var])] for var in variables] return (; zip(variables, vals)...) end function toNamedTuple(ka::KeyedArray, variables::NTuple{N, Symbol}) where {N} - vals = ntuple(i -> ka(row = [variables[i]]), N) + vals = ntuple(i -> ka(inout = [variables[i]]), N) return NamedTuple{variables}(vals) end function toNamedTuple(ka::AbstractDimArray, variables::NTuple{N, Symbol}) where {N} - ntuple(i -> ka[col = At([variables[i]])], N) + ntuple(i -> ka[inout = At([variables[i]])], N) return NamedTuple{variables}(vals) end @@ -254,7 +263,7 @@ function toNamedTuple(ka::KeyedArray) end function toNamedTuple(ka::AbstractDimArray) - variables = Symbol.(dims(A, :col)) # Get all variable names from first dimension + variables = Symbol.(dims(ka, :inout)) # Get all variable names from first dimension return toNamedTuple(ka, variables) end @@ -280,7 +289,7 @@ function toNamedTuple(ka::KeyedArray, variable::Symbol) end function toNamedTuple(ka::AbstractDimArray, variable::Symbol) - return ka[col = At(variable)] + return ka[inout = At(variable)] end function toArray(ka::KeyedArray, variable) @@ -288,5 +297,5 @@ function toArray(ka::KeyedArray, variable) end function toArray(ka::AbstractDimArray, variable) - return ka[col = At(variable)] + return ka[inout = At(variable)] end From 6090122e75704336e38f7c5e5413e30c7eb281e7 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Sat, 27 Dec 2025 10:54:41 +0100 Subject: [PATCH 08/26] toArray function uses inout --- src/models/GenericHybridModel.jl | 2 +- src/utils/tools.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/models/GenericHybridModel.jl b/src/models/GenericHybridModel.jl index d3eefd21..255d288f 100644 --- a/src/models/GenericHybridModel.jl +++ b/src/models/GenericHybridModel.jl @@ -362,7 +362,7 @@ end # Forward pass for SingleNNHybridModel (optimized, no branching) function (m::SingleNNHybridModel)(ds_k::Union{KeyedArray, AbstractDimArray}, ps, st) # 1) get features - predictors = ds_k(inout = m.predictors) #toArray(ds_k, m.predictors) + predictors = toArray(ds_k, m.predictors) parameters = m.parameters diff --git a/src/utils/tools.jl b/src/utils/tools.jl index f66d0f9c..1e6df00e 100644 --- a/src/utils/tools.jl +++ b/src/utils/tools.jl @@ -293,7 +293,7 @@ function toNamedTuple(ka::AbstractDimArray, variable::Symbol) end function toArray(ka::KeyedArray, variable) - return ka(variable) + return ka(inout = variable) end function toArray(ka::AbstractDimArray, variable) From 3c38f028032356a7a7e2264d401c1a719cb1d6b0 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Sat, 27 Dec 2025 18:05:24 +0100 Subject: [PATCH 09/26] towards switch for DimArrays --- .../tutorials/example_synthetic_lstm.jl | 13 ++-- projects/book_chapter/example_synthetic.jl | 5 +- src/EasyHybrid.jl | 9 ++- src/models/Respiration_Rb_Q10.jl | 4 +- src/train.jl | 59 ++++++++++++------- src/utils/compute_loss.jl | 52 ++++++++++++---- src/utils/tools.jl | 31 +++++----- 7 files changed, 116 insertions(+), 57 deletions(-) diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index 0f0e71ad..e66e4644 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -110,7 +110,9 @@ hlstm = constructHybridModel( # show steps for data preparation, happens under the hood in the end. # two KeyedArrays -x, y = prepare_data(hlstm, df) +x, y = prepare_data(hlstm, df, array_type = :DimArray) + +ndims(x) # new split_into_sequences with input_window, output_window, shift and lead_time # for many-to-one, many-to-many, and different prediction lead times and overlap @@ -159,7 +161,7 @@ out_lstm = train( hlstm, df, (); - nepochs = 100, # Number of training epochs + nepochs = 2, # Number of training epochs batchsize = 512, # Batch size for training opt = AdamW(0.1), # Optimizer and learning rate monitor_names = [:rb, :Q10], # Parameters to monitor during training @@ -167,7 +169,8 @@ out_lstm = train( shuffleobs = false, loss_types = [:mse, :nse], sequence_kwargs = (;input_window = 10, output_window = 4), - plotting = false + plotting = false, + array_type = :DimArray ) @@ -185,7 +188,8 @@ hm = constructHybridModel( global_param_names, # Global parameters hidden_layers = NN, # Neural network architecture scale_nn_outputs = true, # Scale neural network outputs - input_batchnorm = false # Apply batch normalization to inputs + input_batchnorm = false, # Apply batch normalization to inputs + ) @@ -201,4 +205,5 @@ single_nn_out = train( yscale = identity, # Scaling for outputs shuffleobs = false, loss_types = [:mse, :nse], + array_type = :DimArray ) \ No newline at end of file diff --git a/projects/book_chapter/example_synthetic.jl b/projects/book_chapter/example_synthetic.jl index 642b2c88..27c44cb7 100644 --- a/projects/book_chapter/example_synthetic.jl +++ b/projects/book_chapter/example_synthetic.jl @@ -36,10 +36,7 @@ df = df[1:20000, :] dfnot = Float32.(df[!, Not(:time)]) ka = to_keyedArray(dfnot) - -# DimensionalData -mat = Array(Matrix(dfnot)') -da = DimArray(mat, (Dim{:col}(Symbol.(names(dfnot))), Dim{:row}(1:size(dfnot, 1)))) +da = to_dimArray(dfnot) # ============================================================================= # Define the Physical Model diff --git a/src/EasyHybrid.jl b/src/EasyHybrid.jl index 8644e4b0..0f76e195 100644 --- a/src/EasyHybrid.jl +++ b/src/EasyHybrid.jl @@ -5,14 +5,18 @@ EasyHybrid is a Julia package for hybrid machine learning models, combining neur """ module EasyHybrid -using AxisKeys: AxisKeys, KeyedArray, axiskeys, wrapdims +using AxisKeys: AxisKeys, KeyedArray, Key, axiskeys, wrapdims using CSV: CSV using Chain: @chain using ChainRulesCore: ChainRulesCore using ComponentArrays: ComponentArrays, ComponentArray using DataFrameMacros: DataFrameMacros, @transform using DataFrames: DataFrames, DataFrame, GroupedDataFrame, Missing, coalesce, mapcols, select, missing, All -using DimensionalData: DimensionalData, AbstractDimArray, At, Dim, DimArray, dims, groupby +using DimensionalData: DimensionalData, AbstractDimArray, Dim, DimArray, dims, groupby, lookup, At +# Extend axiskeys to work with DimArrays (delegates to lookup) +AxisKeys.axiskeys(da::AbstractDimArray) = Tuple(lookup(da, d) for d in dims(da)) +AxisKeys.axiskeys(da::AbstractDimArray, i::Int) = lookup(da, dims(da)[i]) +AxisKeys.axiskeys(da::AbstractDimArray, name::Symbol) = lookup(da, name) using Downloads: Downloads using Hyperopt: Hyperopt, Hyperoptimizer using JLD2: JLD2, jldopen @@ -39,6 +43,7 @@ using Static: False, True using Statistics using DataFrames using CSV + using AxisKeys: axiskeys using OptimizationOptimisers: OptimizationOptimisers, Optimisers, Adam, AdamW, RMSProp using ComponentArrays end diff --git a/src/models/Respiration_Rb_Q10.jl b/src/models/Respiration_Rb_Q10.jl index 1bf49a3b..0f211130 100644 --- a/src/models/Respiration_Rb_Q10.jl +++ b/src/models/Respiration_Rb_Q10.jl @@ -60,8 +60,8 @@ function (hm::RespirationRbQ10)(ds_k, ps, st::NamedTuple) end function (hm::RespirationRbQ10)(ds_k::AbstractDimArray, ps, st::NamedTuple) - p = ds_k[inout = At(hm.predictors)] - x = Array(ds_k[inout = At(hm.forcing)]) # don't propagate names after this + p = ds_k[inout = At(hm.predictors)] # No @view - needs to be differentiable + x = Array(ds_k[inout = At(hm.forcing)]) # No @view - needs to be differentiable Rb, stQ10 = LuxCore.apply(hm.NN, p, ps.ps, st.st) #! NN(αᵢ(t)) ≡ Rb(T(t), M(t)) diff --git a/src/train.jl b/src/train.jl index c591bbe4..8d2cebeb 100644 --- a/src/train.jl +++ b/src/train.jl @@ -39,7 +39,8 @@ Default output file is `trained_model.jld2` at the current working directory und - `loss_types`: A vector of loss types to compute during training (default: `[:mse, :r2]`). The first entry is used for plotting in the dynamic trainboard. This loss can be increasing (e.g. NSE) or decreasing (e.g. RMSE). - `agg`: The aggregation function to apply to the computed losses (default: `sum`). -## Data Handling (passed via kwargs): +## Data Handling: +- `array_type`: Array type for data conversion from DataFrame: `:DimArray` (default) or `:KeyedArray`. - `shuffleobs`: Whether to shuffle the training data (default: false). - `split_by_id`: Column name or function to split data by ID (default: nothing -> no ID-based splitting). - `split_data_at`: Fraction of data to use for training when splitting (default: 0.8). @@ -71,6 +72,8 @@ function train( patience = typemax(Int), autodiff_backend = AutoZygote(), return_gradients = True(), + # Array type for data conversion + array_type = :DimArray, # :DimArray or :KeyedArray # Loss and evaluation training_loss = :mse, loss_types = [:mse, :r2], @@ -116,7 +119,7 @@ function train( Random.seed!(random_seed) end - (x_train, y_train), (x_val, y_val) = split_data(data, hybridModel; kwargs...) + (x_train, y_train), (x_val, y_val) = split_data(data, hybridModel; array_type=array_type, kwargs...) train_loader = DataLoader((x_train, y_train), batchsize = batchsize, shuffle = true) @@ -412,9 +415,10 @@ function split_data( shuffleobs::Bool = false, split_data_at::Real = 0.8, sequence_kwargs::Union{Nothing, NamedTuple} = nothing, + array_type::Symbol = :DimArray, kwargs... ) - data_ = prepare_data(hybridModel, data) + data_ = prepare_data(hybridModel, data; array_type=array_type) if sequence_kwargs !== nothing x_keyed, y_keyed = data_ @@ -470,6 +474,7 @@ function split_data( else # --- Fallback: simple random/chronological split of prepared data --- (x_train, y_train), (x_val, y_val) = splitobs((x_all, y_all); at = split_data_at, shuffle = shuffleobs) + @show typeof(x_train) return (x_train, y_train), (x_val, y_val) end end @@ -503,12 +508,19 @@ Split data into training and validation sets, either randomly, by grouping by ID """ function split_data end -function prepare_data(hm, data::KeyedArray) +function prepare_data(hm, data::KeyedArray; array_type=:KeyedArray) predictors_forcing, targets = get_prediction_target_names(hm) + # KeyedArray: use () syntax for views that are differentiable return (data(predictors_forcing), data(targets)) end -function prepare_data(hm, data::DataFrame) +function prepare_data(hm, data::AbstractDimArray; array_type=:DimArray) + predictors_forcing, targets = get_prediction_target_names(hm) + # DimArray: use [] syntax (copies, but differentiable) + return (data[inout = At(predictors_forcing)], data[inout = At(targets)]) +end + +function prepare_data(hm, data::DataFrame; array_type=:DimArray) predictors_forcing, targets = get_prediction_target_names(hm) all_predictor_cols = unique(vcat(values(predictors_forcing)...)) @@ -532,17 +544,17 @@ function prepare_data(hm, data::DataFrame) keep = .!mask_missing_predforce .& mask_at_least_one_target sdf = sdf[keep, col_to_select] - # Convert to Float32 and to your keyed array - ds_keyed = to_keyedArray(Float32.(sdf)) - return prepare_data(hm, ds_keyed) -end - -function prepare_data(hm, data::AbstractDimArray) - predictors_forcing, targets = get_prediction_target_names(hm) - return (data[inout = At(predictors_forcing)], data[inout = At(targets)]) + # Convert to Float32 and to the specified array type + if array_type == :KeyedArray + ds = to_keyedArray(Float32.(sdf)) + else + ds = to_dimArray(Float32.(sdf)) + end + @show typeof(ds) + return prepare_data(hm, ds; array_type=array_type) end -function prepare_data(hm, data::Tuple) +function prepare_data(hm, data::Tuple; array_type=:DimArray) return data end @@ -620,8 +632,8 @@ function getbyname(df::DataFrame, name::Symbol) return df[!, name] end -function getbyname(ka::AxisKeys.KeyedArray, name::Symbol) - return ka(name) +function getbyname(ka::Union{KeyedArray, AbstractDimArray}, name::Symbol) + return @view ka[inout = At(name)] end function split_into_sequences(x, y; input_window=5, output_window=1, shift=1, lead_time=1) @@ -666,10 +678,17 @@ function split_into_sequences(x, y; input_window=5, output_window=1, shift=1, le Xd[:, :, ii] .= x[:, sx:ex] Yd[:, :, ii] .= y[:, sy:ey] end - - Xk = KeyedArray(Xd; inout=featkeys, time=lag_keys, batch_size=samplekeys) - Yk = KeyedArray(Yd; inout=targetkeys, time=lead_keys, batch_size=samplekeys) - return Xk, Yk + if x isa KeyedArray + Xk = KeyedArray(Xd; inout=featkeys, time=lag_keys, batch_size=samplekeys) + Yk = KeyedArray(Yd; inout=targetkeys, time=lead_keys, batch_size=samplekeys) + return Xk, Yk + elseif x isa AbstractDimArray + Xk = DimArray(Xd, (inout = featkeys, time = lag_keys, batch_size = samplekeys)) + Yk = DimArray(Yd, (inout = targetkeys, time = lead_keys, batch_size = samplekeys)) + return Xk, Yk + else + throw(ArgumentError("expected Xd to be KeyedArray or AbstractDimArray; got $(typeof(Xd))")) + end end diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index 1be60596..b804c90e 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -85,14 +85,18 @@ Returns a single loss value if `loss_spec` is provided, or a NamedTuple of losse """ function _compute_loss end -function assemble_loss(ŷ, y::KeyedArray{T,3}, y_nan, targets, loss_spec) where {T} +# Wrapper for time-based subsetting - dispatches on array type for differentiability +_select_time(ŷ_t::KeyedArray, time_keys) = ŷ_t(time = time_keys) # KeyedArray: () syntax - view & differentiable +_select_time(ŷ_t::AbstractDimArray, time_keys) = ŷ_t[time = At(time_keys)] # DimArray: [] syntax - copy & differentiable + +function assemble_loss(ŷ, y::Union{KeyedArray{T,3}, AbstractDimArray{T,3}}, y_nan, targets, loss_spec) where {T} return [ begin y_t = _get_target_y(y, target) y_nan_t = _get_target_nan(y_nan, target) ŷ_t = ŷ[target] - ŷ_tsub = ŷ_t(time = axiskeys(y_t, :time)) # do key-based alignment first + ŷ_tsub = _select_time(ŷ_t, axiskeys(y_t, :time)) # dispatches based on array type _apply_loss(ŷ_tsub, y_t, y_nan_t, loss_spec) end @@ -100,7 +104,7 @@ function assemble_loss(ŷ, y::KeyedArray{T,3}, y_nan, targets, loss_spec) where ] end -function assemble_loss(ŷ, y::KeyedArray{T,2}, y_nan, targets, loss_spec) where {T} +function assemble_loss(ŷ, y::Union{KeyedArray{T,2}, AbstractDimArray{T,2}}, y_nan, targets, loss_spec) where {T} return [ _apply_loss(ŷ[target], _get_target_y(y, target), _get_target_nan(y_nan, target), loss_spec) for target in targets @@ -152,10 +156,25 @@ Helper function to apply the appropriate loss function based on the specificatio """ function _apply_loss end -_get_target_y(y, target) = y(inout = target) -_get_target_y(y::AbstractDimArray, target) = y[inout = At(target)] # assumes the DimArray uses :inout indexing -_get_target_y(y::AbstractDimArray, targets::Vector) = y[inout = At(targets)] # for multiple targets +# For KeyedArray +function _get_target_y(y::KeyedArray, target) + return y(inout = target) +end + +function _get_target_y(y::KeyedArray, targets::Vector) + return y(inout = targets) +end + +# For DimArray +function _get_target_y(y::AbstractDimArray, target) + return @view y[inout = At(target)] +end + +function _get_target_y(y::AbstractDimArray, targets::Vector) + return @view y[inout = At(targets)] +end +# For Tuple (e.g. (y_obs, y_sigma)), supports KeyedArray or DimArray as y_obs function _get_target_y(y::Tuple, target) y_obs, y_sigma = y sigma = y_sigma isa Number ? y_sigma : y_sigma(target) @@ -163,16 +182,29 @@ function _get_target_y(y::Tuple, target) return (y_obs_val, sigma) end - """ _get_target_y(y, target) Helper function to extract target-specific values from `y`, handling cases where `y` can be a tuple of `(y_obs, y_sigma)`. """ function _get_target_y end -_get_target_nan(y_nan, target) = y_nan(inout = target) -_get_target_nan(y_nan::AbstractDimArray, target) = y_nan[inout = At(target)] # assumes the DimArray uses :inout indexing -_get_target_nan(y_nan::AbstractDimArray, targets::Vector) = y_nan[inout = At(targets)] # for multiple targets +# For KeyedArray +function _get_target_nan(y_nan::KeyedArray, target) + return y_nan(inout = target) +end + +function _get_target_nan(y_nan::KeyedArray, targets::Vector) + return y_nan(inout = targets) +end + +# For DimArray +function _get_target_nan(y_nan::AbstractDimArray, target) + return @view y_nan[inout = At(target)] +end + +function _get_target_nan(y_nan::AbstractDimArray, targets::Vector) + return @view y_nan[inout = At(targets)] +end """ _get_target_nan(y_nan, target) diff --git a/src/utils/tools.jl b/src/utils/tools.jl index 1e6df00e..86d76ac5 100644 --- a/src/utils/tools.jl +++ b/src/utils/tools.jl @@ -195,15 +195,16 @@ function toDataFrame(ka, target_names) end # ============================================================================= -# KeyedArray unpacking functions +# Array unpacking functions (works for both KeyedArray and DimArray) # ============================================================================= """ -toNamedTuple(ka::KeyedArray, variables::Vector{Symbol}) -Extract specified variables from a KeyedArray and return them as a NamedTuple of vectors. + toNamedTuple(ka::Union{KeyedArray, AbstractDimArray}, variables::Vector{Symbol}) + +Extract specified variables from a KeyedArray or DimArray and return them as a NamedTuple of vectors. # Arguments: -- `ka`: The KeyedArray to unpack +- `ka`: The KeyedArray or DimArray to unpack - `variables`: Vector of symbols representing the variables to extract # Returns: @@ -211,8 +212,8 @@ Extract specified variables from a KeyedArray and return them as a NamedTuple of # Example: ```julia -# Extract SW_IN and TA from a KeyedArray -data = toNamedTuple(ds_keyed, [:SW_IN, :TA]) +# Extract SW_IN and TA from an array +data = toNamedTuple(ds, [:SW_IN, :TA]) sw_in = data.SW_IN ta = data.TA ``` @@ -223,7 +224,7 @@ function toNamedTuple(ka::KeyedArray, variables::Vector{Symbol}) end function toNamedTuple(ka::AbstractDimArray, variables::Vector{Symbol}) - vals = [ka[inout = At([var])] for var in variables] + vals = [dropdims(ka[inout = At([var])], dims = 1) for var in variables] return (; zip(variables, vals)...) end @@ -249,8 +250,8 @@ Extract all variables from a KeyedArray and return them as a NamedTuple of vecto # Example: ```julia -# Extract all variables from a KeyedArray -data = toNamedTuple(ds_keyed) +# Extract all variables from an array +data = toNamedTuple(ds) # Access individual variables sw_in = data.SW_IN ta = data.TA @@ -258,12 +259,12 @@ nee = data.NEE ``` """ function toNamedTuple(ka::KeyedArray) - variables = Symbol.(axiskeys(ka)[1]) # Get all variable names from first dimension + variables = Symbol.(axiskeys(ka, :inout)) # Get all variable names from :inout dimension return toNamedTuple(ka, variables) end function toNamedTuple(ka::AbstractDimArray) - variables = Symbol.(dims(ka, :inout)) # Get all variable names from first dimension + variables = Symbol.(lookup(ka, :inout)) # Get all variable names from :inout dimension return toNamedTuple(ka, variables) end @@ -272,7 +273,7 @@ toNamedTuple(ka::KeyedArray, variable::Symbol) Extract a single variable from a KeyedArray and return it as a vector. # Arguments: -- `ka`: The KeyedArray to unpack +- `ka`: The KeyedArray or DimArray to unpack - `variable`: Symbol representing the variable to extract # Returns: @@ -280,12 +281,12 @@ Extract a single variable from a KeyedArray and return it as a vector. # Example: ```julia -# Extract just SW_IN from a KeyedArray -sw_in = toNamedTuple(ds_keyed, :SW_IN) +# Extract just SW_IN from an array +sw_in = toNamedTuple(ds, :SW_IN) ``` """ function toNamedTuple(ka::KeyedArray, variable::Symbol) - return ka(variable) + return ka(inout = variable) end function toNamedTuple(ka::AbstractDimArray, variable::Symbol) From 63f4ecfa53f0c3a30607197fc3767fea50be64d2 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Sat, 27 Dec 2025 18:09:20 +0100 Subject: [PATCH 10/26] format runic --- .../tutorials/example_synthetic_lstm.jl | 17 +++--- src/macro_hybrid.jl | 4 +- src/models/GenericHybridModel.jl | 2 +- src/models/NNModels.jl | 4 +- src/train.jl | 52 +++++++++---------- src/utils/compute_loss.jl | 18 +++---- src/utils/tools.jl | 34 ++++++------ 7 files changed, 65 insertions(+), 66 deletions(-) diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index e66e4644..ecec6c5e 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -39,13 +39,13 @@ df = df[1:20000, :] # Define neural network NN = Chain(Dense(15, 15, Lux.sigmoid), Dense(15, 15, Lux.sigmoid), Dense(15, 1)) -broadcast_layer4 = @compact(; layer = Dense(6=>6)) do x::Union{NTuple{<:AbstractArray}, AbstractVector{<:AbstractArray}} +broadcast_layer4 = @compact(; layer = Dense(6 => 6)) do x::Union{NTuple{<:AbstractArray}, AbstractVector{<:AbstractArray}} y = map(layer, x) - @return permutedims(stack(y; dims=3), (1, 3, 2)) + @return permutedims(stack(y; dims = 3), (1, 3, 2)) end NN_Memory = Chain( - Recurrence(LSTMCell(6 => 6), return_sequence=true), + Recurrence(LSTMCell(6 => 6), return_sequence = true), broadcast_layer4 ) @@ -116,11 +116,11 @@ ndims(x) # new split_into_sequences with input_window, output_window, shift and lead_time # for many-to-one, many-to-many, and different prediction lead times and overlap -xs, ys = split_into_sequences(x, y; input_window = 20, output_window = 2, shift=1, lead_time=0) +xs, ys = split_into_sequences(x, y; input_window = 20, output_window = 2, shift = 1, lead_time = 0) ys_nan = .!isnan.(ys) # split data as in train -sdf = split_data(df, hlstm, sequence_kwargs = (;input_window = 10, output_window = 3, shift = 1, lead_time = 1)); +sdf = split_data(df, hlstm, sequence_kwargs = (; input_window = 10, output_window = 3, shift = 1, lead_time = 1)); typeof(sdf) (x_train, y_train), (x_val, y_val) = sdf; @@ -129,7 +129,7 @@ y_train y_train_nan = .!isnan.(y_train) # put into train loader to compose minibatches -train_dl = EasyHybrid.DataLoader((x_train, y_train); batchsize=32) +train_dl = EasyHybrid.DataLoader((x_train, y_train); batchsize = 32) # run hybrid model forwards x_first = first(train_dl)[1] @@ -168,13 +168,12 @@ out_lstm = train( yscale = identity, # Scaling for outputs shuffleobs = false, loss_types = [:mse, :nse], - sequence_kwargs = (;input_window = 10, output_window = 4), + sequence_kwargs = (; input_window = 10, output_window = 4), plotting = false, array_type = :DimArray ) - ##################################################################################### # is neural network still running? @@ -206,4 +205,4 @@ single_nn_out = train( shuffleobs = false, loss_types = [:mse, :nse], array_type = :DimArray -) \ No newline at end of file +) diff --git a/src/macro_hybrid.jl b/src/macro_hybrid.jl index 1b882e41..8bad549f 100644 --- a/src/macro_hybrid.jl +++ b/src/macro_hybrid.jl @@ -63,14 +63,14 @@ macro hybrid(name, params...) docstring = """ $(name)(NN, predictors, forcing, targets$(isempty(param_syms) ? "" : ", " * join(string.(param_syms), ", "))) - + A hybrid model with: - `NN`: a neural network - `predictors`: a tuple of names, i.e, (:clay, :moist) - `forcing`: data names, i.e. (:temp, ) - `targets`: data names, i.e. (:ndvi, ) $(isempty(param_syms) ? "" : "- Physical parameters: " * join(string.(param_syms), ", ")) - + All physical parameters are trainable by default. Use `?Lux.Experimental.freeze` to make specific parameters non-trainable during training. """ # Build the complete struct definition diff --git a/src/models/GenericHybridModel.jl b/src/models/GenericHybridModel.jl index 255d288f..2587d208 100644 --- a/src/models/GenericHybridModel.jl +++ b/src/models/GenericHybridModel.jl @@ -380,7 +380,7 @@ function (m::SingleNNHybridModel)(ds_k::Union{KeyedArray, AbstractDimArray}, ps, # 3) scale NN parameters (handle empty case) if !isempty(m.neural_param_names) nn_out, st_nn = LuxCore.apply(m.NN, predictors, ps.ps, st.st_nn) - nn_cols = eachslice(nn_out, dims=1) + nn_cols = eachslice(nn_out, dims = 1) nn_params = NamedTuple(zip(m.neural_param_names, nn_cols)) # Use appropriate scaling based on setting diff --git a/src/models/NNModels.jl b/src/models/NNModels.jl index 3d6b99c0..3602b79f 100644 --- a/src/models/NNModels.jl +++ b/src/models/NNModels.jl @@ -49,7 +49,7 @@ function prepare_hidden_chain( ) if hidden_layers isa Chain # user gave a chain of hidden layers only - + # Helper to safely extract dimensions from layers function get_layer_dim(l, type) if type == :input @@ -294,4 +294,4 @@ function (m::BroadcastLayer)(x, ps, st::NamedTuple{names}) where {names} return results, st end -Base.keys(m::BroadcastLayer) = Base.keys(getfield(m, :layers)) \ No newline at end of file +Base.keys(m::BroadcastLayer) = Base.keys(getfield(m, :layers)) diff --git a/src/train.jl b/src/train.jl index 8d2cebeb..02bfc3bf 100644 --- a/src/train.jl +++ b/src/train.jl @@ -119,7 +119,7 @@ function train( Random.seed!(random_seed) end - (x_train, y_train), (x_val, y_val) = split_data(data, hybridModel; array_type=array_type, kwargs...) + (x_train, y_train), (x_val, y_val) = split_data(data, hybridModel; array_type = array_type, kwargs...) train_loader = DataLoader((x_train, y_train), batchsize = batchsize, shuffle = true) @@ -418,14 +418,14 @@ function split_data( array_type::Symbol = :DimArray, kwargs... ) - data_ = prepare_data(hybridModel, data; array_type=array_type) + data_ = prepare_data(hybridModel, data; array_type = array_type) if sequence_kwargs !== nothing x_keyed, y_keyed = data_ - sis_default = (;input_window = 10, output_window = 1, shift = 1, lead_time = 1) + sis_default = (; input_window = 10, output_window = 1, shift = 1, lead_time = 1) sis = merge(sis_default, sequence_kwargs) @info "Using split_into_sequences: $sis" - x_all, y_all = split_into_sequences(x_keyed, y_keyed; sis.input_window, sis.output_window, sis.shift, sis.lead_time); + x_all, y_all = split_into_sequences(x_keyed, y_keyed; sis.input_window, sis.output_window, sis.shift, sis.lead_time) else x_all, y_all = data_ end @@ -470,7 +470,7 @@ function split_data( x_train, y_train = view_end_dim(x_all, train_idx), view_end_dim(y_all, train_idx) x_val, y_val = view_end_dim(x_all, val_idx), view_end_dim(y_all, val_idx) return (x_train, y_train), (x_val, y_val) - + else # --- Fallback: simple random/chronological split of prepared data --- (x_train, y_train), (x_val, y_val) = splitobs((x_all, y_all); at = split_data_at, shuffle = shuffleobs) @@ -508,19 +508,19 @@ Split data into training and validation sets, either randomly, by grouping by ID """ function split_data end -function prepare_data(hm, data::KeyedArray; array_type=:KeyedArray) +function prepare_data(hm, data::KeyedArray; array_type = :KeyedArray) predictors_forcing, targets = get_prediction_target_names(hm) # KeyedArray: use () syntax for views that are differentiable return (data(predictors_forcing), data(targets)) end -function prepare_data(hm, data::AbstractDimArray; array_type=:DimArray) +function prepare_data(hm, data::AbstractDimArray; array_type = :DimArray) predictors_forcing, targets = get_prediction_target_names(hm) # DimArray: use [] syntax (copies, but differentiable) return (data[inout = At(predictors_forcing)], data[inout = At(targets)]) end -function prepare_data(hm, data::DataFrame; array_type=:DimArray) +function prepare_data(hm, data::DataFrame; array_type = :DimArray) predictors_forcing, targets = get_prediction_target_names(hm) all_predictor_cols = unique(vcat(values(predictors_forcing)...)) @@ -551,10 +551,10 @@ function prepare_data(hm, data::DataFrame; array_type=:DimArray) ds = to_dimArray(Float32.(sdf)) end @show typeof(ds) - return prepare_data(hm, ds; array_type=array_type) + return prepare_data(hm, ds; array_type = array_type) end -function prepare_data(hm, data::Tuple; array_type=:DimArray) +function prepare_data(hm, data::Tuple; array_type = :DimArray) return data end @@ -636,27 +636,27 @@ function getbyname(ka::Union{KeyedArray, AbstractDimArray}, name::Symbol) return @view ka[inout = At(name)] end -function split_into_sequences(x, y; input_window=5, output_window=1, shift=1, lead_time=1) +function split_into_sequences(x, y; input_window = 5, output_window = 1, shift = 1, lead_time = 1) ndims(x) == 2 || throw(ArgumentError("expected x to be (feature, time); got ndims(x) = $(ndims(x))")) ndims(y) == 2 || throw(ArgumentError("expected y to be (target, time); got ndims(y) = $(ndims(y))")) - Lx, Ly = size(x,2), size(y,2) + Lx, Ly = size(x, 2), size(y, 2) Lx == Ly || throw(ArgumentError("x and y must have same time length; got $Lx vs $Ly")) lead_time ≥ 0 || throw(ArgumentError("lead_time must be ≥ 0 (0 = instantaneous end)")) - nfeat, ntarget = size(x,1), size(y,1) + nfeat, ntarget = size(x, 1), size(y, 1) L = Lx - featkeys = axiskeys(x, 1) - timekeys = axiskeys(x, 2) + featkeys = axiskeys(x, 1) + timekeys = axiskeys(x, 2) targetkeys = axiskeys(y, 1) lead_start = lead_time - output_window + 1 - - lag_keys = Symbol.(["x$(lag)" for lag in (input_window+lead_time-1):-1:lead_time]) - lead_keys = Symbol.(["_y$(lead)" for lead in ((output_window-1):-1:0)]) - lead_keys = Symbol.(lag_keys[end-length(lead_keys)+1:end], lead_keys) - lag_keys[end-length(lead_keys)+1:end] .= lead_keys + + lag_keys = Symbol.(["x$(lag)" for lag in (input_window + lead_time - 1):-1:lead_time]) + lead_keys = Symbol.(["_y$(lead)" for lead in ((output_window - 1):-1:0)]) + lead_keys = Symbol.(lag_keys[(end - length(lead_keys) + 1):end], lead_keys) + lag_keys[(end - length(lead_keys) + 1):end] .= lead_keys sx_min = max(1, 1 - (input_window + lead_time - output_window)) sx_max = L - input_window - lead_time + 1 @@ -668,7 +668,7 @@ function split_into_sequences(x, y; input_window=5, output_window=1, shift=1, le samplekeys = timekeys[sx_vals] - Xd = zeros(Float32, nfeat, input_window, num_samples) + Xd = zeros(Float32, nfeat, input_window, num_samples) Yd = zeros(Float32, ntarget, output_window, num_samples) @inbounds @views for (ii, sx) in enumerate(sx_vals) @@ -679,8 +679,8 @@ function split_into_sequences(x, y; input_window=5, output_window=1, shift=1, le Yd[:, :, ii] .= y[:, sy:ey] end if x isa KeyedArray - Xk = KeyedArray(Xd; inout=featkeys, time=lag_keys, batch_size=samplekeys) - Yk = KeyedArray(Yd; inout=targetkeys, time=lead_keys, batch_size=samplekeys) + Xk = KeyedArray(Xd; inout = featkeys, time = lag_keys, batch_size = samplekeys) + Yk = KeyedArray(Yd; inout = targetkeys, time = lead_keys, batch_size = samplekeys) return Xk, Yk elseif x isa AbstractDimArray Xk = DimArray(Xd, (inout = featkeys, time = lag_keys, batch_size = samplekeys)) @@ -692,10 +692,10 @@ function split_into_sequences(x, y; input_window=5, output_window=1, shift=1, le end -view_end_dim = function(x_all::Array{Float32, 2}, idx) +view_end_dim = function (x_all::Array{Float32, 2}, idx) return view(x_all, :, idx) end -view_end_dim = function(x_all::Array{Float32, 3}, idx) +view_end_dim = function (x_all::Array{Float32, 3}, idx) return view(x_all, :, :, idx) -end \ No newline at end of file +end diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index b804c90e..c75faa73 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -89,22 +89,22 @@ function _compute_loss end _select_time(ŷ_t::KeyedArray, time_keys) = ŷ_t(time = time_keys) # KeyedArray: () syntax - view & differentiable _select_time(ŷ_t::AbstractDimArray, time_keys) = ŷ_t[time = At(time_keys)] # DimArray: [] syntax - copy & differentiable -function assemble_loss(ŷ, y::Union{KeyedArray{T,3}, AbstractDimArray{T,3}}, y_nan, targets, loss_spec) where {T} +function assemble_loss(ŷ, y::Union{KeyedArray{T, 3}, AbstractDimArray{T, 3}}, y_nan, targets, loss_spec) where {T} return [ begin - y_t = _get_target_y(y, target) - y_nan_t = _get_target_nan(y_nan, target) + y_t = _get_target_y(y, target) + y_nan_t = _get_target_nan(y_nan, target) - ŷ_t = ŷ[target] - ŷ_tsub = _select_time(ŷ_t, axiskeys(y_t, :time)) # dispatches based on array type + ŷ_t = ŷ[target] + ŷ_tsub = _select_time(ŷ_t, axiskeys(y_t, :time)) # dispatches based on array type - _apply_loss(ŷ_tsub, y_t, y_nan_t, loss_spec) - end + _apply_loss(ŷ_tsub, y_t, y_nan_t, loss_spec) + end for target in targets ] end -function assemble_loss(ŷ, y::Union{KeyedArray{T,2}, AbstractDimArray{T,2}}, y_nan, targets, loss_spec) where {T} +function assemble_loss(ŷ, y::Union{KeyedArray{T, 2}, AbstractDimArray{T, 2}}, y_nan, targets, loss_spec) where {T} return [ _apply_loss(ŷ[target], _get_target_y(y, target), _get_target_nan(y_nan, target), loss_spec) for target in targets @@ -232,4 +232,4 @@ import ChainRulesCore import AxisKeys: KeyedArray import ChainRulesCore: ProjectTo, InplaceableThunk, unthunk -(project::ProjectTo{KeyedArray})(dx::InplaceableThunk) = project(unthunk(dx)) \ No newline at end of file +(project::ProjectTo{KeyedArray})(dx::InplaceableThunk) = project(unthunk(dx)) diff --git a/src/utils/tools.jl b/src/utils/tools.jl index 86d76ac5..ef8dd85a 100644 --- a/src/utils/tools.jl +++ b/src/utils/tools.jl @@ -137,39 +137,39 @@ _key_to_colname(k) = k isa Symbol ? k : Symbol(string(k)) # 2D KeyedArray -> one DataFrame function toDataFrame( - ka::KeyedArray{T,2}, - cols_dim::Symbol = :inout, - index_dim::Symbol = :batch_size; - index_col::Symbol = :index, -) where {T} + ka::KeyedArray{T, 2}, + cols_dim::Symbol = :inout, + index_dim::Symbol = :batch_size; + index_col::Symbol = :index, + ) where {T} dcols = NamedDims.dim(ka, cols_dim) # map :row/:col -> numeric dim :contentReference[oaicite:2]{index=2} - didx = NamedDims.dim(ka, index_dim) + didx = NamedDims.dim(ka, index_dim) # reorder so rows=index_dim, cols=cols_dim ka2 = (didx == 1 && dcols == 2) ? ka : permutedims(ka, (didx, dcols)) - data = Array(AxisKeys.keyless(ka2)) # drop KeyedArray wrapper :contentReference[oaicite:3]{index=3} + data = Array(AxisKeys.keyless(ka2)) # drop KeyedArray wrapper :contentReference[oaicite:3]{index=3} names = _key_to_colname.(collect(axiskeys(ka2, 2))) # col names from keys :contentReference[oaicite:4]{index=4} - df = DataFrame(data, names; makeunique=true) + df = DataFrame(data, names; makeunique = true) df[!, index_col] = collect(axiskeys(ka2, 1)) # “index” column return df end # 3D KeyedArray -> Dict(slice_key => DataFrame) function toDataFrame( - ka::KeyedArray{T,3}, - cols_dim::Symbol = :inout, - index_dim::Symbol = :batch_size; - slice_dim::Symbol = :time, - index_col::Symbol = :index, -) where {T} - - out = Dict{Any,DataFrame}() + ka::KeyedArray{T, 3}, + cols_dim::Symbol = :inout, + index_dim::Symbol = :batch_size; + slice_dim::Symbol = :time, + index_col::Symbol = :index, + ) where {T} + + out = Dict{Any, DataFrame}() for k in axiskeys(ka, slice_dim) slice = ka(; NamedTuple{(slice_dim,)}((k,))...) # dynamic keyword selection - out[k] = toDataFrame(slice, cols_dim, index_dim; index_col=index_col) + out[k] = toDataFrame(slice, cols_dim, index_dim; index_col = index_col) end return out end From f3555be10dbaa6c416b27431ad7952749ed2642e Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Sat, 27 Dec 2025 18:48:57 +0100 Subject: [PATCH 11/26] DimensionalData cannot do backprop thru at view --- projects/RbQ10/Q10_lstm.jl | 138 +++++++++++++++++++++ projects/book_chapter/Project.toml | 1 + projects/book_chapter/example_synthetic.jl | 11 +- src/train.jl | 2 +- src/utils/compute_loss.jl | 8 +- 5 files changed, 152 insertions(+), 8 deletions(-) create mode 100644 projects/RbQ10/Q10_lstm.jl diff --git a/projects/RbQ10/Q10_lstm.jl b/projects/RbQ10/Q10_lstm.jl new file mode 100644 index 00000000..7a0e04f7 --- /dev/null +++ b/projects/RbQ10/Q10_lstm.jl @@ -0,0 +1,138 @@ +# activate the project's environment and instantiate dependencies +using Pkg +Pkg.activate("projects/RbQ10") +Pkg.develop(path = pwd()) +Pkg.instantiate() + +# start using the package +using EasyHybrid +using EasyHybrid.MLUtils +using Random +# for Plotting +using GLMakie +using AlgebraOfGraphics +using EasyHybrid.AxisKeys +function split_into_sequences(xin, y_target; window_size = 8) + features = size(xin, 1) + xdata = slidingwindow(xin, size = window_size, stride = 1) + # get the target values corresponding to the sliding windows, + # elements of `ydata` correspond to the last sliding window element. + #ydata = y_target[window_size:length(xdata) + window_size - 1] + ydata = slidingwindow(y_target, size = window_size, stride = 1) + + xwindowed = zeros(Float32, features, window_size, length(ydata)) + #ywindowed = zeros(Float32, 1, 1, length(ydata)) + ywindowed = zeros(Float32, 1, window_size, length(ydata)) + for i in 1:length(ydata) + xwindowed[:, :, i] = getobs(xdata, i) + ywindowed[:, :, i] = getobs(ydata, i) + end + xwindowed = KeyedArray(xwindowed; row = xin.row, window = 1:window_size, col = 1:length(ydata)) + ywindowed = KeyedArray(ywindowed; row = 1:1, window = 1:window_size, col = 1:length(ydata)) + return xwindowed, ywindowed +end + +xwindowed, ydata = split_into_sequences(ds_p_f, ds_t; window_size = 8) + +script_dir = @__DIR__ +include(joinpath(script_dir, "data", "prec_process_data.jl")) + +# Common data preprocessing +df = dfall[!, Not(:timesteps)] +ds_keyed = to_keyedArray(Float32.(df)) + +target_names = [:R_soil] +forcing_names = [:cham_temp_filled] +predictor_names = [:moisture_filled, :rgpot2] + +# Define neural network +NN = Chain(Dense(2, 15, Lux.relu), Dense(15, 15, Lux.relu), Dense(15, 1)) +# NN(rand(Float32, 2,1)) #! needs to be instantiated + + +# instantiate Hybrid Model +RbQ10 = RespirationRbQ10(NN, predictor_names, target_names, forcing_names, 2.5f0) # ? do different initial Q10s +# train model +out = train(RbQ10, ds_keyed, (:Q10,); nepochs = 10, batchsize = 512, opt = Adam(0.01)); + +## +output_file = joinpath(@__DIR__, "output_tmp/trained_model.jld2") +all_groups = get_all_groups(output_file) + +# ? Let's use `Recurrence` to stack LSTM cells and deal with sequences and batching at the same time! + +NN_Memory = Chain( + Recurrence(LSTMCell(2 => 6), return_sequence = true), + Recurrence(LSTMCell(6 => 2), return_sequence = false), + Dense(2 => 1) +) + +# c_lstm = LSTMCell(4 => 6) +# ps, st = Lux.setup(rng, c_lstm) +# (y, carry), st_lstm = c_lstm(rand(Float32, 4), ps, st) +# m_lstm = Recurrence(LSTMCell(4 => 6), return_sequence=false) + +rng = Random.default_rng(1234) +ps, st = Lux.setup(rng, NN_Memory) +mock_data = rand(Float32, 2, 8, 5) #! n_features, n_timesteps (window size), n_samples (batch size) +y_, st_ = NN_Memory(mock_data, ps, st) + +RbQ10_Memory = RespirationRbQ10(NN_Memory, predictor_names, target_names, forcing_names, 2.5f0) # ? do different initial Q10s + +## legacy +# ? test lossfn +ps, st = LuxCore.setup(Random.default_rng(), RbQ10_Memory) +# the Tuple `ds_p, ds_t` is later used for batching in the `dataloader`. +ds_p_f, ds_t = EasyHybrid.prepare_data(RbQ10_Memory, ds_keyed) +ds_t_nan = .!isnan.(ds_t) + +# +xwindowed, ydata = split_into_sequences(ds_p_f, ds_t; window_size = 8) +ds_wt = ydata +ds_wt_nan = .!isnan.(ds_wt) +# xdata = slidingwindow(ds_p_f, size = 8, stride = 1) +# getobs(xdata, 1) +# split_test = rand(Float32, 3, 8, 5) +using EasyHybrid.AxisKeys +# A = KeyedArray(split_test; row=ds_p_f.row, window=1:8, col=1:5) + +broadcast_layer2 = @compact(; layer = Dense(2 => 1)) do x::Union{NTuple{<:AbstractArray}, AbstractVector{<:AbstractArray}} + y = map(layer, x) + @return permutedims(stack(y; dims = 3), (1, 3, 2)) +end + +NN_Memory = Chain( + Recurrence(LSTMCell(2 => 2), return_sequence = true), + broadcast_layer2 +) + + +RbQ10_Memory = RespirationRbQ10(NN_Memory, predictor_names, target_names, forcing_names, 2.5f0) # ? do different initial Q10s + +#? this sets up initial ps for the hybrid model version +rng = Random.default_rng(1234) +ps, st = Lux.setup(rng, RbQ10_Memory) + +xdl = EasyHybrid.DataLoader(xwindowed; batchsize = 512) +ydl = EasyHybrid.DataLoader((ds_wt, ds_wt_nan); batchsize = 512) + +sdf = RbQ10_Memory(first(xdl), ps, st) + +mid1 = first(xdl)(RbQ10_Memory.forcing) +mid1[:, end, :] + + +o1, st1 = LuxCore.apply(RbQ10_Memory.NN, first(xdl)(RbQ10_Memory.predictors), ps.ps, st.st) + +sdf[1].R_soil +sdf[1].Rb +first(ydl)[1] + +ls = EasyHybrid.lossfn(RbQ10_Memory, first(xdl), first(ydl), ps, st, LoggingLoss()) + +ls_logs = EasyHybrid.lossfn(RbQ10_Memory, xwindowed, (ds_wt, ds_wt_nan), ps, st, LoggingLoss(train_mode = false)) + + +# p = A(RbQ10_Memory.predictors) +# x = Array(A(RbQ10_Memory.forcing)) # don't propagate names after this +# Rb, st = LuxCore.apply(RbQ10_Memory.NN, p, ps.ps, st.st) diff --git a/projects/book_chapter/Project.toml b/projects/book_chapter/Project.toml index 2bccc509..34cb44cb 100644 --- a/projects/book_chapter/Project.toml +++ b/projects/book_chapter/Project.toml @@ -3,6 +3,7 @@ AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" EasyHybrid = "61bb816a-e6af-4913-ab9e-91bff2e122e3" +GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a" Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" diff --git a/projects/book_chapter/example_synthetic.jl b/projects/book_chapter/example_synthetic.jl index 27c44cb7..c91f7698 100644 --- a/projects/book_chapter/example_synthetic.jl +++ b/projects/book_chapter/example_synthetic.jl @@ -36,7 +36,10 @@ df = df[1:20000, :] dfnot = Float32.(df[!, Not(:time)]) ka = to_keyedArray(dfnot) -da = to_dimArray(dfnot) + +# DimensionalData +mat = Array(Matrix(dfnot)') +da = DimArray(mat, (inout = Symbol.(names(dfnot)), batch_size = 1:size(dfnot, 1))) # ============================================================================= # Define the Physical Model @@ -78,7 +81,7 @@ neural_param_names = [:rb] # Neural network predicted parameters # ============================================================================= # Single NN Hybrid Model Training # ============================================================================= -using WGLMakie +using GLMakie # Create single NN hybrid model using the unified constructor predictors_single_nn = [:sw_pot, :dsw_pot] # Predictor variables (solar radiation, and its derivative) @@ -146,7 +149,9 @@ single_nn_out = train( opt = AdamW(0.1), # Optimizer and learning rate monitor_names = [:rb, :Q10], # Parameters to monitor during training yscale = identity, # Scaling for outputs - shuffleobs = true + shuffleobs = true, + array_type = :DimArray, + plotting = false ) LuxCore.testmode(single_nn_out.st) diff --git a/src/train.jl b/src/train.jl index 02bfc3bf..fac880bd 100644 --- a/src/train.jl +++ b/src/train.jl @@ -73,7 +73,7 @@ function train( autodiff_backend = AutoZygote(), return_gradients = True(), # Array type for data conversion - array_type = :DimArray, # :DimArray or :KeyedArray + array_type = :KeyedArray, # :DimArray or :KeyedArray # Loss and evaluation training_loss = :mse, loss_types = [:mse, :r2], diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index c75faa73..a2b513af 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -167,11 +167,11 @@ end # For DimArray function _get_target_y(y::AbstractDimArray, target) - return @view y[inout = At(target)] + return y[inout = At(target)] end function _get_target_y(y::AbstractDimArray, targets::Vector) - return @view y[inout = At(targets)] + return y[inout = At(targets)] end # For Tuple (e.g. (y_obs, y_sigma)), supports KeyedArray or DimArray as y_obs @@ -199,11 +199,11 @@ end # For DimArray function _get_target_nan(y_nan::AbstractDimArray, target) - return @view y_nan[inout = At(target)] + return y_nan[inout = At(target)] end function _get_target_nan(y_nan::AbstractDimArray, targets::Vector) - return @view y_nan[inout = At(targets)] + return y_nan[inout = At(targets)] end """ From 56b4b2f406bc291b9547a3fd755096d1bed62e66 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Sat, 27 Dec 2025 20:13:41 +0100 Subject: [PATCH 12/26] put output 3D 2D and DimArray and KeyedArray into Dataframe --- .../tutorials/example_synthetic_lstm.jl | 10 +- projects/book_chapter/example_synthetic.jl | 4 +- src/train.jl | 2 - src/utils/tools.jl | 95 +++++++++++++++---- 4 files changed, 78 insertions(+), 33 deletions(-) diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index ecec6c5e..d815aeb5 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -109,11 +109,9 @@ hlstm = constructHybridModel( # ================================================================================= # show steps for data preparation, happens under the hood in the end. -# two KeyedArrays +# :KeyedArray and :DimArray are supported x, y = prepare_data(hlstm, df, array_type = :DimArray) -ndims(x) - # new split_into_sequences with input_window, output_window, shift and lead_time # for many-to-one, many-to-many, and different prediction lead times and overlap xs, ys = split_into_sequences(x, y; input_window = 20, output_window = 2, shift = 1, lead_time = 0) @@ -145,14 +143,9 @@ reco_mod = frun[1].reco reco_obs = dropdims(y_first, dims = 1) reco_nan = .!isnan.(reco_obs) -# simulate loss -> pick the right window -reco_mod(time = axiskeys(reco_obs, :time)) .- reco_obs - # compute loss EasyHybrid.compute_loss(hlstm, ps, st, (x_train, (y_train, y_train_nan)), logging = LoggingLoss(train_mode = true)) -# Zygote gradient of loss - # ============================================================================= # train on DataFrame # ============================================================================= @@ -188,7 +181,6 @@ hm = constructHybridModel( hidden_layers = NN, # Neural network architecture scale_nn_outputs = true, # Scale neural network outputs input_batchnorm = false, # Apply batch normalization to inputs - ) diff --git a/projects/book_chapter/example_synthetic.jl b/projects/book_chapter/example_synthetic.jl index c91f7698..c0bf3457 100644 --- a/projects/book_chapter/example_synthetic.jl +++ b/projects/book_chapter/example_synthetic.jl @@ -119,7 +119,9 @@ single_nn_out = train( yscale = identity, # Scaling for outputs shuffleobs = true, loss_types = [:mse, :nse], - extra_loss = extra_loss + extra_loss = extra_loss, + array_type = :KeyedArray, + plotting = false ) # ============================================================================= diff --git a/src/train.jl b/src/train.jl index fac880bd..80f7d425 100644 --- a/src/train.jl +++ b/src/train.jl @@ -474,7 +474,6 @@ function split_data( else # --- Fallback: simple random/chronological split of prepared data --- (x_train, y_train), (x_val, y_val) = splitobs((x_all, y_all); at = split_data_at, shuffle = shuffleobs) - @show typeof(x_train) return (x_train, y_train), (x_val, y_val) end end @@ -550,7 +549,6 @@ function prepare_data(hm, data::DataFrame; array_type = :DimArray) else ds = to_dimArray(Float32.(sdf)) end - @show typeof(ds) return prepare_data(hm, ds; array_type = array_type) end diff --git a/src/utils/tools.jl b/src/utils/tools.jl index ef8dd85a..b6d832dd 100644 --- a/src/utils/tools.jl +++ b/src/utils/tools.jl @@ -130,36 +130,83 @@ function split_data(df::DataFrame, target, xvars, seqID; f = 0.8, batchsize = 32 end using AxisKeys -using NamedDims +using NamedDims: NamedDims # Required for NamedDims.dim with KeyedArrays using DataFrames +using DimensionalData: DimensionalData, AbstractDimArray, Dim, DimArray, dims, lookup, At _key_to_colname(k) = k isa Symbol ? k : Symbol(string(k)) -# 2D KeyedArray -> one DataFrame +# Helper to get dimension index from dimension name (works for both KeyedArray and DimArray) +_dim_index(ka::KeyedArray, name::Symbol) = NamedDims.dim(ka, name) +function _dim_index(da::AbstractDimArray, name::Symbol) + dim_names = DimensionalData.name.(dims(da)) + idx = findfirst(==(name), dim_names) + isnothing(idx) && throw(ArgumentError("Dimension :$name not found in array with dimensions $dim_names")) + return idx +end + +# Helper to extract raw array data (works for both KeyedArray and DimArray) +_raw_array(ka::KeyedArray) = Array(AxisKeys.keyless(ka)) +_raw_array(da::AbstractDimArray) = Array(parent(da)) + +# Helper to select a single value along a named dimension +_select_at(ka::KeyedArray, dim_name::Symbol, key) = ka(; NamedTuple{(dim_name,)}((key,))...) +_select_at(da::AbstractDimArray, dim_name::Symbol, key) = view(da, Dim{dim_name}(At(key))) + +# 2D Labeled Array -> DataFrame (works for both KeyedArray and DimArray) +""" + toDataFrame(arr::Union{KeyedArray{T, 2}, AbstractDimArray{T, 2}}, cols_dim=:inout, index_dim=:batch_size; index_col=:index) + +Convert a 2D labeled array (KeyedArray or DimArray) to a DataFrame. + +# Arguments +- `arr`: The 2D labeled array to convert +- `cols_dim`: Dimension name to use as DataFrame columns (default: `:inout`) +- `index_dim`: Dimension name to use as DataFrame row index (default: `:batch_size`) +- `index_col`: Name for the index column in the result (default: `:index`) + +# Returns +- `DataFrame` with columns from `cols_dim` keys and an index column from `index_dim` keys +""" function toDataFrame( - ka::KeyedArray{T, 2}, + arr::Union{KeyedArray{T, 2}, AbstractDimArray{T, 2}}, cols_dim::Symbol = :inout, index_dim::Symbol = :batch_size; index_col::Symbol = :index, ) where {T} - dcols = NamedDims.dim(ka, cols_dim) # map :row/:col -> numeric dim :contentReference[oaicite:2]{index=2} - didx = NamedDims.dim(ka, index_dim) + dcols = _dim_index(arr, cols_dim) + didx = _dim_index(arr, index_dim) - # reorder so rows=index_dim, cols=cols_dim - ka2 = (didx == 1 && dcols == 2) ? ka : permutedims(ka, (didx, dcols)) + # Reorder so rows=index_dim, cols=cols_dim (i.e., didx=1, dcols=2) + arr2 = (didx == 1 && dcols == 2) ? arr : permutedims(arr, (didx, dcols)) - data = Array(AxisKeys.keyless(ka2)) # drop KeyedArray wrapper :contentReference[oaicite:3]{index=3} - names = _key_to_colname.(collect(axiskeys(ka2, 2))) # col names from keys :contentReference[oaicite:4]{index=4} + data = _raw_array(arr2) + col_names = _key_to_colname.(collect(axiskeys(arr2, 2))) - df = DataFrame(data, names; makeunique = true) - df[!, index_col] = collect(axiskeys(ka2, 1)) # “index” column + df = DataFrame(data, col_names; makeunique = true) + df[!, index_col] = collect(axiskeys(arr2, 1)) return df end -# 3D KeyedArray -> Dict(slice_key => DataFrame) +# 3D Labeled Array -> Dict(slice_key => DataFrame) +""" + toDataFrame(arr::AbstractLabeledArray{T, 3}, cols_dim=:inout, index_dim=:batch_size; slice_dim=:time, index_col=:index) + +Convert a 3D labeled array (KeyedArray or DimArray) to a Dict of DataFrames, one per slice. + +# Arguments +- `arr`: The 3D labeled array to convert +- `cols_dim`: Dimension name to use as DataFrame columns (default: `:inout`) +- `index_dim`: Dimension name to use as DataFrame row index (default: `:batch_size`) +- `slice_dim`: Dimension name to slice along (default: `:time`) +- `index_col`: Name for the index column in each result DataFrame (default: `:index`) + +# Returns +- `Dict{Any, DataFrame}` mapping slice keys to DataFrames +""" function toDataFrame( - ka::KeyedArray{T, 3}, + arr::Union{KeyedArray{T, 3}, AbstractDimArray{T, 3}}, cols_dim::Symbol = :inout, index_dim::Symbol = :batch_size; slice_dim::Symbol = :time, @@ -167,20 +214,26 @@ function toDataFrame( ) where {T} out = Dict{Any, DataFrame}() - for k in axiskeys(ka, slice_dim) - slice = ka(; NamedTuple{(slice_dim,)}((k,))...) # dynamic keyword selection + for k in axiskeys(arr, slice_dim) + slice = _select_at(arr, slice_dim, k) out[k] = toDataFrame(slice, cols_dim, index_dim; index_col = index_col) end return out end -function toDataFrame(ka::AbstractDimArray) - data_array = Array(ka') - df = DataFrame(data_array, Array(dims(ka, :inout))) - df.index = Array(dims(ka, :batch_size)) - return df -end +# Convenience: extract specific targets from a labeled array into a DataFrame +""" + toDataFrame(arr, target_names) + +Extract specific target variables from a labeled array into a DataFrame with `_pred` suffix. +# Arguments +- `arr`: A labeled array or NamedTuple-like object with property access +- `target_names`: Vector of target variable names to extract + +# Returns +- `DataFrame` with columns named `_pred` for each target +""" function toDataFrame(ka, target_names) data = [getproperty(ka, t_name) for t_name in target_names] From c917a336696f91cc80f0977a825ff6e8dd6bd766 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Sat, 27 Dec 2025 21:28:53 +0100 Subject: [PATCH 13/26] construction of broadcast layer as RecurrenceOutputDense --- .../tutorials/example_synthetic_lstm.jl | 12 +- src/models/NNModels.jl | 111 ++++++++++++++++-- 2 files changed, 108 insertions(+), 15 deletions(-) diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index d815aeb5..d52b505e 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -39,14 +39,12 @@ df = df[1:20000, :] # Define neural network NN = Chain(Dense(15, 15, Lux.sigmoid), Dense(15, 15, Lux.sigmoid), Dense(15, 1)) -broadcast_layer4 = @compact(; layer = Dense(6 => 6)) do x::Union{NTuple{<:AbstractArray}, AbstractVector{<:AbstractArray}} - y = map(layer, x) - @return permutedims(stack(y; dims = 3), (1, 3, 2)) -end - +# Define LSTM-based neural network with memory +# Note: When the Chain ends with a Recurrence layer, EasyHybrid automatically adds +# a RecurrenceOutputDense layer to handle the sequence output format. +# The user only needs to define the Recurrence layer itself! NN_Memory = Chain( - Recurrence(LSTMCell(6 => 6), return_sequence = true), - broadcast_layer4 + Recurrence(LSTMCell(15 => 15), return_sequence = true), ) # ============================================================================= diff --git a/src/models/NNModels.jl b/src/models/NNModels.jl index 3602b79f..9ef49522 100644 --- a/src/models/NNModels.jl +++ b/src/models/NNModels.jl @@ -1,4 +1,4 @@ -export SingleNNModel, MultiNNModel, constructNNModel, prepare_hidden_chain, BroadcastLayer +export SingleNNModel, MultiNNModel, constructNNModel, prepare_hidden_chain, BroadcastLayer, RecurrenceOutputDense using Lux, LuxCore using ..EasyHybrid: hard_sigmoid @@ -16,6 +16,66 @@ struct SingleNNModel <: LuxCore.AbstractLuxContainerLayer{ scale_nn_outputs::Bool end +""" + RecurrenceOutputDense(in_dims => out_dims, [activation]) + +A layer that wraps a Dense layer to handle sequence outputs from Recurrence layers. + +When a Recurrence layer has `return_sequence=true`, it outputs a tuple/vector of arrays +(one per timestep). This layer broadcasts the Dense operation over each timestep and +reshapes the result to `(features, timesteps, batch)` format. + +# Arguments +- `in_dims::Int`: Input dimension (should match Recurrence output dimension) +- `out_dims::Int`: Output dimension +- `activation`: Activation function (default: `identity`) + +# Example +```julia +# Instead of manually creating: +broadcast_layer = @compact(; layer = Dense(15 => 15)) do x + y = map(layer, x) + @return permutedims(stack(y; dims = 3), (1, 3, 2)) +end + +# Simply use: +Chain( + Recurrence(LSTMCell(15 => 15), return_sequence = true), + RecurrenceOutputDense(15 => 15) +) +``` +""" +struct RecurrenceOutputDense{D <: Dense} <: LuxCore.AbstractLuxWrapperLayer{:layer} + layer::D +end + +function RecurrenceOutputDense(mapping::Pair{Int, Int}, activation = identity) + return RecurrenceOutputDense(Dense(mapping.first, mapping.second, activation)) +end + +function RecurrenceOutputDense(in_dims::Int, out_dims::Int, activation = identity) + return RecurrenceOutputDense(Dense(in_dims, out_dims, activation)) +end + +# Handle tuple output from Recurrence (return_sequence=true) +function (m::RecurrenceOutputDense)(x::NTuple{N, <:AbstractArray}, ps, st) where N + y = map(xi -> first(LuxCore.apply(m.layer, xi, ps, st)), x) + result = permutedims(stack(y; dims = 3), (1, 3, 2)) + return result, st +end + +# Handle vector output from Recurrence (return_sequence=true) +function (m::RecurrenceOutputDense)(x::AbstractVector{<:AbstractArray}, ps, st) + y = map(xi -> first(LuxCore.apply(m.layer, xi, ps, st)), x) + result = permutedims(stack(y; dims = 3), (1, 3, 2)) + return result, st +end + +# Fallback for regular array input (non-sequence mode) +function (m::RecurrenceOutputDense)(x::AbstractArray, ps, st) + return LuxCore.apply(m.layer, x, ps, st) +end + """ prepare_hidden_chain(hidden_layers, in_dim, out_dim; activation, input_batchnorm=false) @@ -26,6 +86,8 @@ Construct a neural network `Chain` for use in NN models. - If a `Vector{Int}`, specifies the sizes of each hidden layer. For example, `[32, 16]` creates two hidden layers with 32 and 16 units, respectively. - If a `Chain`, the user provides a pre-built chain of hidden layers (excluding input/output layers). + If the chain ends with a `Recurrence` layer, a `RecurrenceOutputDense` layer is automatically + added to handle the sequence output format. - `in_dim::Int`: Number of input features (input dimension). - `out_dim::Int`: Number of output features (output dimension). - `activation`: Activation function to use in hidden layers (default: `tanh`). @@ -36,9 +98,21 @@ Construct a neural network `Chain` for use in NN models. - Optional input batch normalization (if `input_batchnorm=true`) - Input layer: `Dense(in_dim, h₁, activation)` where `h₁` is the first hidden size - Hidden layers: either user-supplied `Chain` or constructed from `hidden_layers` + - If last hidden layer is a `Recurrence`, a `RecurrenceOutputDense` is added to handle sequence output - Output layer: `Dense(hₖ, out_dim)` where `hₖ` is the last hidden size where `h₁` is the first hidden size and `hₖ` the last. + +# Example with Recurrence (LSTM) +```julia +# User only needs to define: +NN_Memory = Chain( + Recurrence(LSTMCell(15 => 15), return_sequence = true), +) + +# The function automatically adds the RecurrenceOutputDense layer to handle sequence output +model = constructHybridModel(..., hidden_layers = NN_Memory, ...) +``` """ function prepare_hidden_chain( hidden_layers::Union{Vector{Int}, Chain}, @@ -66,6 +140,16 @@ function prepare_hidden_chain( return nothing end + # Check if last layer is a Recurrence layer (needs special handling for sequence output) + # In this framework, we ALWAYS assume return_sequence=true for Recurrence layers + # (this is the EasyHybrid convention, regardless of Lux's default) + function is_sequence_recurrence(layer) + return layer isa Recurrence + end + + last_layer = hidden_layers.layers[end] + ends_with_sequence_recurrence = is_sequence_recurrence(last_layer) + # Determine first_h by searching forward first_h = nothing for i in 1:length(hidden_layers) @@ -75,7 +159,7 @@ function prepare_hidden_chain( break end end - isnothing(first_h) && error("Could not determine input dimension of hidden_layers Chain (found no Dense or BatchNorm layer).") + isnothing(first_h) && error("Could not determine input dimension of hidden_layers Chain.") # Determine last_h by searching backward last_h = nothing @@ -88,12 +172,23 @@ function prepare_hidden_chain( end isnothing(last_h) && error("Could not determine output dimension of hidden_layers Chain.") - return Chain( - input_batchnorm ? BatchNorm(in_dim, affine = false) : identity, - Dense(in_dim, first_h, activation), - hidden_layers.layers..., - Dense(last_h, out_dim) - ) + if ends_with_sequence_recurrence + # Chain ends with Recurrence layer (return_sequence=true) - add RecurrenceOutputDense to handle sequence output + return Chain( + input_batchnorm ? BatchNorm(in_dim, affine = false) : identity, + Dense(in_dim, first_h, activation), + hidden_layers.layers..., + RecurrenceOutputDense(last_h => last_h, activation), + Dense(last_h, out_dim) + ) + else + return Chain( + input_batchnorm ? BatchNorm(in_dim, affine = false) : identity, + Dense(in_dim, first_h, activation), + hidden_layers.layers..., + Dense(last_h, out_dim) + ) + end else # user gave a vector of hidden‐layer sizes hs = hidden_layers From e0d166529a73859cabc26d401e7396412abeec35 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Sat, 27 Dec 2025 21:43:57 +0100 Subject: [PATCH 14/26] Dev example as Literate.jl script --- .../tutorials/example_synthetic_lstm.jl | 152 +++++++++--------- docs/make.jl | 1 + 2 files changed, 73 insertions(+), 80 deletions(-) diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index d52b505e..ed40c724 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -1,17 +1,13 @@ -# CC BY-SA 4.0 -# ============================================================================= -# EasyHybrid Example: Synthetic Data Analysis -# ============================================================================= -# This example demonstrates how to use EasyHybrid to train a hybrid model -# on synthetic data for respiration modeling with Q10 temperature sensitivity. -# ============================================================================= - -# ============================================================================= -# Project Setup and Environment -# ============================================================================= -using Pkg +# # LSTM Hybrid Model with EasyHybrid.jl +# +# This tutorial demonstrates how to use EasyHybrid to train a hybrid model with LSTM +# neural networks on synthetic data for respiration modeling with Q10 temperature sensitivity. +# The code for this tutorial can be found in [docs/src/literate/tutorials](https://github.com/EarthyScience/EasyHybrid.jl/tree/main/docs/src/literate/tutorials/) => example_synthetic_lstm.jl. +# +# ## 1. Load Packages # Set project path and activate environment +using Pkg project_path = "docs" Pkg.activate(project_path) EasyHybrid_path = joinpath(pwd()) @@ -24,19 +20,17 @@ using AxisKeys using DimensionalData using Lux -# ============================================================================= -# Data Loading and Preprocessing -# ============================================================================= -# Load synthetic dataset from GitHub into DataFrame +# ## 2. Data Loading and Preprocessing + +# Load synthetic dataset from GitHub df = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/Synthetic4BookChap.nc") # Select a subset of data for faster execution df = df[1:20000, :] -# ============================================================================= -# neural network model -# ============================================================================= -# Define neural network +# ## 3. Define Neural Network Architectures + +# Define a standard feedforward neural network NN = Chain(Dense(15, 15, Lux.sigmoid), Dense(15, 15, Lux.sigmoid), Dense(15, 1)) # Define LSTM-based neural network with memory @@ -47,106 +41,106 @@ NN_Memory = Chain( Recurrence(LSTMCell(15 => 15), return_sequence = true), ) -# ============================================================================= -# Define the Physical Model -# ============================================================================= -# RbQ10 model: Respiration model with Q10 temperature sensitivity -# Parameters: -# - ta: air temperature [°C] -# - Q10: temperature sensitivity factor [-] -# - rb: basal respiration rate [μmol/m²/s] -# - tref: reference temperature [°C] (default: 15.0) +# ## 4. Define the Physical Model + +""" + RbQ10(; ta, Q10, rb, tref=15.0f0) + +Respiration model with Q10 temperature sensitivity. + +- `ta`: air temperature [°C] +- `Q10`: temperature sensitivity factor [-] +- `rb`: basal respiration rate [μmol/m²/s] +- `tref`: reference temperature [°C] (default: 15.0) +""" function RbQ10(; ta, Q10, rb, tref = 15.0f0) reco = rb .* Q10 .^ (0.1f0 .* (ta .- tref)) return (; reco, Q10, rb) end -# ============================================================================= -# Define Model Parameters -# ============================================================================= +# ## 5. Define Model Parameters + # Parameter specification: (default, lower_bound, upper_bound) parameters = ( - # Parameter name | Default | Lower | Upper | Description rb = (3.0f0, 0.0f0, 13.0f0), # Basal respiration [μmol/m²/s] Q10 = (2.0f0, 1.0f0, 4.0f0), # Temperature sensitivity factor [-] ) -# ============================================================================= -# Configure Hybrid Model Components -# ============================================================================= -# Define input variables -forcing = [:ta] # Forcing variables (temperature) +# ## 6. Configure Hybrid Model Components -# Target variable -target = [:reco] # Target variable (respiration) +# Define input variables +# Forcing variables (temperature) +forcing = [:ta] +# Predictor variables (solar radiation, and its derivative) +predictors = [:sw_pot, :dsw_pot] +# Target variable (respiration) +target = [:reco] # Parameter classification -global_param_names = [:Q10] # Global parameters (same for all samples) -neural_param_names = [:rb] # Neural network predicted parameters +# Global parameters (same for all samples) +global_param_names = [:Q10] +# Neural network predicted parameters +neural_param_names = [:rb] -# ============================================================================= -# Single NN Hybrid Model Training -# ============================================================================= -using GLMakie -# Create single NN hybrid model using the unified constructor -predictors = [:sw_pot, :dsw_pot] # Predictor variables (solar radiation, and its derivative) +# ## 7. Construct LSTM Hybrid Model +# Create LSTM hybrid model using the unified constructor hlstm = constructHybridModel( - predictors, # Input features - forcing, # Forcing variables - target, # Target variables - RbQ10, # Process-based model function - parameters, # Parameter definitions - neural_param_names, # NN-predicted parameters - global_param_names, # Global parameters + predictors, + forcing, + target, + RbQ10, + parameters, + neural_param_names, + global_param_names, hidden_layers = NN_Memory, # Neural network architecture scale_nn_outputs = true, # Scale neural network outputs input_batchnorm = false # Apply batch normalization to inputs ) -# ================================================================================= -# show steps for data preparation, happens under the hood in the end. +# ## 8. Data Preparation Steps (Demonstration) + +# The following steps demonstrate what happens under the hood during training. +# In practice, you can skip to Section 9 and use the `train` function directly. # :KeyedArray and :DimArray are supported x, y = prepare_data(hlstm, df, array_type = :DimArray) -# new split_into_sequences with input_window, output_window, shift and lead_time +# New split_into_sequences with input_window, output_window, shift and lead_time # for many-to-one, many-to-many, and different prediction lead times and overlap xs, ys = split_into_sequences(x, y; input_window = 20, output_window = 2, shift = 1, lead_time = 0) ys_nan = .!isnan.(ys) -# split data as in train -sdf = split_data(df, hlstm, sequence_kwargs = (; input_window = 10, output_window = 3, shift = 1, lead_time = 1)); +# Split data as in train +sdf = split_data(df, hlstm, sequence_kwargs = (; input_window = 10, output_window = 3, shift = 1, lead_time = 1)) typeof(sdf) -(x_train, y_train), (x_val, y_val) = sdf; +(x_train, y_train), (x_val, y_val) = sdf x_train y_train y_train_nan = .!isnan.(y_train) -# put into train loader to compose minibatches +# Put into train loader to compose minibatches train_dl = EasyHybrid.DataLoader((x_train, y_train); batchsize = 32) -# run hybrid model forwards +# Run hybrid model forwards x_first = first(train_dl)[1] y_first = first(train_dl)[2] ps, st = Lux.setup(Random.default_rng(), hlstm) frun = hlstm(x_first, ps, st) -# extract predicted yhat +# Extract predicted yhat reco_mod = frun[1].reco -# bring observations in same shape +# Bring observations in same shape reco_obs = dropdims(y_first, dims = 1) reco_nan = .!isnan.(reco_obs) -# compute loss +# Compute loss EasyHybrid.compute_loss(hlstm, ps, st, (x_train, (y_train, y_train_nan)), logging = LoggingLoss(train_mode = true)) -# ============================================================================= -# train on DataFrame -# ============================================================================= +# ## 9. Train LSTM Hybrid Model out_lstm = train( hlstm, @@ -164,24 +158,22 @@ out_lstm = train( array_type = :DimArray ) +# ## 10. Train Single NN Hybrid Model (Optional) -##################################################################################### -# is neural network still running? - +# For comparison, we can also train a hybrid model with a standard feedforward neural network hm = constructHybridModel( - predictors, # Input features - forcing, # Forcing variables - target, # Target variables - RbQ10, # Process-based model function - parameters, # Parameter definitions - neural_param_names, # NN-predicted parameters - global_param_names, # Global parameters + predictors, + forcing, + target, + RbQ10, + parameters, + neural_param_names, + global_param_names, hidden_layers = NN, # Neural network architecture scale_nn_outputs = true, # Scale neural network outputs input_batchnorm = false, # Apply batch normalization to inputs ) - # Train the hybrid model single_nn_out = train( hm, diff --git a/docs/make.jl b/docs/make.jl index d7bd3f01..cecb28ba 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -60,6 +60,7 @@ makedocs(; "Hyperparameter Tuning" => "tutorials/hyperparameter_tuning.md", "Slurm" => "tutorials/slurm.md", "Cross-validation" => "tutorials/folds.md", + "LSTM Hybrid Model" => "tutorials/example_synthetic_lstm.md", "Loss Functions" => "tutorials/losses.md", ], "Research" => [ From baa71703224ff7d80ec7da3614e567e7888a953f Mon Sep 17 00:00:00 2001 From: BernhardAhrens <32769547+BernhardAhrens@users.noreply.github.com> Date: Sat, 27 Dec 2025 21:57:58 +0100 Subject: [PATCH 15/26] Update docs/literate/tutorials/example_synthetic_lstm.jl Co-authored-by: Lazaro Alonso --- docs/literate/tutorials/example_synthetic_lstm.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index ed40c724..bdabc838 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -23,7 +23,7 @@ using Lux # ## 2. Data Loading and Preprocessing # Load synthetic dataset from GitHub -df = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/Synthetic4BookChap.nc") +df = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/Synthetic4BookChap.nc"); # Select a subset of data for faster execution df = df[1:20000, :] From 455360f84a7bfbbfddbcf6330f93f257b74e0566 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Sat, 27 Dec 2025 21:59:48 +0100 Subject: [PATCH 16/26] prints --- docs/literate/tutorials/example_synthetic_lstm.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index ed40c724..2e34a13c 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -26,7 +26,8 @@ using Lux df = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/Synthetic4BookChap.nc") # Select a subset of data for faster execution -df = df[1:20000, :] +df = df[1:20000, :]; +first(df, 5) # ## 3. Define Neural Network Architectures From 92792eb9e241f521fba1d2f8bc5c042a479d8d62 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Sat, 27 Dec 2025 22:03:00 +0100 Subject: [PATCH 17/26] print short --- .../tutorials/example_synthetic_lstm.jl | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index 2e34a13c..ce96d1a8 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -27,7 +27,7 @@ df = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/ # Select a subset of data for faster execution df = df[1:20000, :]; -first(df, 5) +first(df, 5); # ## 3. Define Neural Network Architectures @@ -105,41 +105,41 @@ hlstm = constructHybridModel( # In practice, you can skip to Section 9 and use the `train` function directly. # :KeyedArray and :DimArray are supported -x, y = prepare_data(hlstm, df, array_type = :DimArray) +x, y = prepare_data(hlstm, df, array_type = :DimArray); # New split_into_sequences with input_window, output_window, shift and lead_time # for many-to-one, many-to-many, and different prediction lead times and overlap -xs, ys = split_into_sequences(x, y; input_window = 20, output_window = 2, shift = 1, lead_time = 0) -ys_nan = .!isnan.(ys) +xs, ys = split_into_sequences(x, y; input_window = 20, output_window = 2, shift = 1, lead_time = 0); +ys_nan = .!isnan.(ys); # Split data as in train -sdf = split_data(df, hlstm, sequence_kwargs = (; input_window = 10, output_window = 3, shift = 1, lead_time = 1)) +sdf = split_data(df, hlstm, sequence_kwargs = (; input_window = 10, output_window = 3, shift = 1, lead_time = 1)); -typeof(sdf) -(x_train, y_train), (x_val, y_val) = sdf -x_train -y_train -y_train_nan = .!isnan.(y_train) +typeof(sdf); +(x_train, y_train), (x_val, y_val) = sdf; +x_train; +y_train; +y_train_nan = .!isnan.(y_train); # Put into train loader to compose minibatches -train_dl = EasyHybrid.DataLoader((x_train, y_train); batchsize = 32) +train_dl = EasyHybrid.DataLoader((x_train, y_train); batchsize = 32); # Run hybrid model forwards -x_first = first(train_dl)[1] -y_first = first(train_dl)[2] +x_first = first(train_dl)[1]; +y_first = first(train_dl)[2]; -ps, st = Lux.setup(Random.default_rng(), hlstm) -frun = hlstm(x_first, ps, st) +ps, st = Lux.setup(Random.default_rng(), hlstm); +frun = hlstm(x_first, ps, st); # Extract predicted yhat -reco_mod = frun[1].reco +reco_mod = frun[1].reco; # Bring observations in same shape -reco_obs = dropdims(y_first, dims = 1) -reco_nan = .!isnan.(reco_obs) +reco_obs = dropdims(y_first, dims = 1); +reco_nan = .!isnan.(reco_obs); # Compute loss -EasyHybrid.compute_loss(hlstm, ps, st, (x_train, (y_train, y_train_nan)), logging = LoggingLoss(train_mode = true)) +EasyHybrid.compute_loss(hlstm, ps, st, (x_train, (y_train, y_train_nan)), logging = LoggingLoss(train_mode = true)); # ## 9. Train LSTM Hybrid Model @@ -157,7 +157,7 @@ out_lstm = train( sequence_kwargs = (; input_window = 10, output_window = 4), plotting = false, array_type = :DimArray -) +); # ## 10. Train Single NN Hybrid Model (Optional) @@ -188,4 +188,4 @@ single_nn_out = train( shuffleobs = false, loss_types = [:mse, :nse], array_type = :DimArray -) +); From 2fd596d8dc750a96a7bac0470a0c0c6701c02753 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Sun, 28 Dec 2025 16:55:56 +0100 Subject: [PATCH 18/26] update tutorials --- .../tutorials/example_synthetic_lstm.jl | 10 +- docs/literate/tutorials/folds.jl | 6 + docs/src/tutorials/example_synthetic_lstm.md | 281 ++++++++++++++++++ 3 files changed, 292 insertions(+), 5 deletions(-) create mode 100644 docs/src/tutorials/example_synthetic_lstm.md diff --git a/docs/literate/tutorials/example_synthetic_lstm.jl b/docs/literate/tutorials/example_synthetic_lstm.jl index 500789a3..314ca6ab 100644 --- a/docs/literate/tutorials/example_synthetic_lstm.jl +++ b/docs/literate/tutorials/example_synthetic_lstm.jl @@ -7,11 +7,11 @@ # ## 1. Load Packages # Set project path and activate environment -using Pkg -project_path = "docs" -Pkg.activate(project_path) -EasyHybrid_path = joinpath(pwd()) -Pkg.develop(path = EasyHybrid_path) +#using Pkg +#project_path = "docs" +#Pkg.activate(project_path) +#EasyHybrid_path = joinpath(pwd()) +#Pkg.develop(path = EasyHybrid_path) #Pkg.resolve() #Pkg.instantiate() diff --git a/docs/literate/tutorials/folds.jl b/docs/literate/tutorials/folds.jl index 47933d59..21d0dbe5 100644 --- a/docs/literate/tutorials/folds.jl +++ b/docs/literate/tutorials/folds.jl @@ -5,6 +5,12 @@ # # ## 1. Load Packages +#using Pkg +#project_path = "docs" +#Pkg.activate(project_path) +#EasyHybrid_path = joinpath(pwd()) +#Pkg.develop(path = EasyHybrid_path) + using EasyHybrid using OhMyThreads using CairoMakie diff --git a/docs/src/tutorials/example_synthetic_lstm.md b/docs/src/tutorials/example_synthetic_lstm.md new file mode 100644 index 00000000..c093a103 --- /dev/null +++ b/docs/src/tutorials/example_synthetic_lstm.md @@ -0,0 +1,281 @@ +```@meta +EditURL = "../../literate/tutorials/example_synthetic_lstm.jl" +``` + +# LSTM Hybrid Model with EasyHybrid.jl + +This tutorial demonstrates how to use EasyHybrid to train a hybrid model with LSTM +neural networks on synthetic data for respiration modeling with Q10 temperature sensitivity. +The code for this tutorial can be found in [docs/src/literate/tutorials](https://github.com/EarthyScience/EasyHybrid.jl/tree/main/docs/src/literate/tutorials/) => example_synthetic_lstm.jl. + +## 1. Load Packages + +Set project path and activate environment + +````@example example_synthetic_lstm +#using Pkg +#project_path = "docs" +#Pkg.activate(project_path) +#EasyHybrid_path = joinpath(pwd()) +#Pkg.develop(path = EasyHybrid_path) +#Pkg.resolve() +#Pkg.instantiate() + +using EasyHybrid +using AxisKeys +using DimensionalData +using Lux +```` + +## 2. Data Loading and Preprocessing + +Load synthetic dataset from GitHub + +````@example example_synthetic_lstm +df = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/Synthetic4BookChap.nc"); +nothing #hide +```` + +Select a subset of data for faster execution + +````@example example_synthetic_lstm +df = df[1:20000, :]; +first(df, 5); +nothing #hide +```` + +## 3. Define Neural Network Architectures + +Define a standard feedforward neural network + +````@example example_synthetic_lstm +NN = Chain(Dense(15, 15, Lux.sigmoid), Dense(15, 15, Lux.sigmoid), Dense(15, 1)) +```` + +Define LSTM-based neural network with memory +Note: When the Chain ends with a Recurrence layer, EasyHybrid automatically adds +a RecurrenceOutputDense layer to handle the sequence output format. +The user only needs to define the Recurrence layer itself! + +````@example example_synthetic_lstm +NN_Memory = Chain( + Recurrence(LSTMCell(15 => 15), return_sequence = true), +) +```` + +## 4. Define the Physical Model + +````@example example_synthetic_lstm +""" + RbQ10(; ta, Q10, rb, tref=15.0f0) + +Respiration model with Q10 temperature sensitivity. + +- `ta`: air temperature [°C] +- `Q10`: temperature sensitivity factor [-] +- `rb`: basal respiration rate [μmol/m²/s] +- `tref`: reference temperature [°C] (default: 15.0) +""" +function RbQ10(; ta, Q10, rb, tref = 15.0f0) + reco = rb .* Q10 .^ (0.1f0 .* (ta .- tref)) + return (; reco, Q10, rb) +end +```` + +## 5. Define Model Parameters + +Parameter specification: (default, lower_bound, upper_bound) + +````@example example_synthetic_lstm +parameters = ( + rb = (3.0f0, 0.0f0, 13.0f0), # Basal respiration [μmol/m²/s] + Q10 = (2.0f0, 1.0f0, 4.0f0), # Temperature sensitivity factor [-] +) +```` + +## 6. Configure Hybrid Model Components + +Define input variables +Forcing variables (temperature) + +````@example example_synthetic_lstm +forcing = [:ta] +```` + +Predictor variables (solar radiation, and its derivative) + +````@example example_synthetic_lstm +predictors = [:sw_pot, :dsw_pot] +```` + +Target variable (respiration) + +````@example example_synthetic_lstm +target = [:reco] +```` + +Parameter classification +Global parameters (same for all samples) + +````@example example_synthetic_lstm +global_param_names = [:Q10] +```` + +Neural network predicted parameters + +````@example example_synthetic_lstm +neural_param_names = [:rb] +```` + +## 7. Construct LSTM Hybrid Model + +Create LSTM hybrid model using the unified constructor + +````@example example_synthetic_lstm +hlstm = constructHybridModel( + predictors, + forcing, + target, + RbQ10, + parameters, + neural_param_names, + global_param_names, + hidden_layers = NN_Memory, # Neural network architecture + scale_nn_outputs = true, # Scale neural network outputs + input_batchnorm = false # Apply batch normalization to inputs +) +```` + +## 8. Data Preparation Steps (Demonstration) + +The following steps demonstrate what happens under the hood during training. +In practice, you can skip to Section 9 and use the `train` function directly. + +:KeyedArray and :DimArray are supported + +````@example example_synthetic_lstm +x, y = prepare_data(hlstm, df, array_type = :DimArray); +nothing #hide +```` + +New split_into_sequences with input_window, output_window, shift and lead_time +for many-to-one, many-to-many, and different prediction lead times and overlap + +````@example example_synthetic_lstm +xs, ys = split_into_sequences(x, y; input_window = 20, output_window = 2, shift = 1, lead_time = 0); +ys_nan = .!isnan.(ys); +nothing #hide +```` + +Split data as in train + +````@example example_synthetic_lstm +sdf = split_data(df, hlstm, sequence_kwargs = (; input_window = 10, output_window = 3, shift = 1, lead_time = 1)); + +typeof(sdf); +(x_train, y_train), (x_val, y_val) = sdf; +x_train; +y_train; +y_train_nan = .!isnan.(y_train); +nothing #hide +```` + +Put into train loader to compose minibatches + +````@example example_synthetic_lstm +train_dl = EasyHybrid.DataLoader((x_train, y_train); batchsize = 32); +nothing #hide +```` + +Run hybrid model forwards + +````@example example_synthetic_lstm +x_first = first(train_dl)[1]; +y_first = first(train_dl)[2]; + +ps, st = Lux.setup(Random.default_rng(), hlstm); +frun = hlstm(x_first, ps, st); +nothing #hide +```` + +Extract predicted yhat + +````@example example_synthetic_lstm +reco_mod = frun[1].reco; +nothing #hide +```` + +Bring observations in same shape + +````@example example_synthetic_lstm +reco_obs = dropdims(y_first, dims = 1); +reco_nan = .!isnan.(reco_obs); +nothing #hide +```` + +Compute loss + +````@example example_synthetic_lstm +EasyHybrid.compute_loss(hlstm, ps, st, (x_train, (y_train, y_train_nan)), logging = LoggingLoss(train_mode = true)); +nothing #hide +```` + +## 9. Train LSTM Hybrid Model + +````@example example_synthetic_lstm +out_lstm = train( + hlstm, + df, + (); + nepochs = 2, # Number of training epochs + batchsize = 512, # Batch size for training + opt = AdamW(0.1), # Optimizer and learning rate + monitor_names = [:rb, :Q10], # Parameters to monitor during training + yscale = identity, # Scaling for outputs + shuffleobs = false, + loss_types = [:mse, :nse], + sequence_kwargs = (; input_window = 10, output_window = 4), + plotting = false, + array_type = :DimArray +); +nothing #hide +```` + +## 10. Train Single NN Hybrid Model (Optional) + +For comparison, we can also train a hybrid model with a standard feedforward neural network + +````@example example_synthetic_lstm +hm = constructHybridModel( + predictors, + forcing, + target, + RbQ10, + parameters, + neural_param_names, + global_param_names, + hidden_layers = NN, # Neural network architecture + scale_nn_outputs = true, # Scale neural network outputs + input_batchnorm = false, # Apply batch normalization to inputs +) +```` + +Train the hybrid model + +````@example example_synthetic_lstm +single_nn_out = train( + hm, + df, + (); + nepochs = 3, # Number of training epochs + batchsize = 512, # Batch size for training + opt = AdamW(0.1), # Optimizer and learning rate + monitor_names = [:rb, :Q10], # Parameters to monitor during training + yscale = identity, # Scaling for outputs + shuffleobs = false, + loss_types = [:mse, :nse], + array_type = :DimArray +); +nothing #hide +```` + From 740483cee5e8a5d2a2ebecc8565c7dc624c09e9d Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Sun, 28 Dec 2025 16:56:23 +0100 Subject: [PATCH 19/26] KeyedArray default --- src/models/NNModels.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/models/NNModels.jl b/src/models/NNModels.jl index 9ef49522..c2f906a3 100644 --- a/src/models/NNModels.jl +++ b/src/models/NNModels.jl @@ -57,14 +57,14 @@ function RecurrenceOutputDense(in_dims::Int, out_dims::Int, activation = identit return RecurrenceOutputDense(Dense(in_dims, out_dims, activation)) end -# Handle tuple output from Recurrence (return_sequence=true) -function (m::RecurrenceOutputDense)(x::NTuple{N, <:AbstractArray}, ps, st) where N +# Handle tuple output from Recurrence (return_sequence = true) +function (m::RecurrenceOutputDense)(x::NTuple{N, <:AbstractArray}, ps, st) where {N} y = map(xi -> first(LuxCore.apply(m.layer, xi, ps, st)), x) result = permutedims(stack(y; dims = 3), (1, 3, 2)) return result, st end -# Handle vector output from Recurrence (return_sequence=true) +# Handle vector output from Recurrence (return_sequence = true) function (m::RecurrenceOutputDense)(x::AbstractVector{<:AbstractArray}, ps, st) y = map(xi -> first(LuxCore.apply(m.layer, xi, ps, st)), x) result = permutedims(stack(y; dims = 3), (1, 3, 2)) From d7f0c484ed896e27a722f5bf1d7cfe6ec0c98aaf Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Sun, 28 Dec 2025 16:56:39 +0100 Subject: [PATCH 20/26] correct dispatch --- src/train.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/train.jl b/src/train.jl index 80f7d425..d6d9fe4e 100644 --- a/src/train.jl +++ b/src/train.jl @@ -415,7 +415,7 @@ function split_data( shuffleobs::Bool = false, split_data_at::Real = 0.8, sequence_kwargs::Union{Nothing, NamedTuple} = nothing, - array_type::Symbol = :DimArray, + array_type::Symbol = :KeyedArray, kwargs... ) data_ = prepare_data(hybridModel, data; array_type = array_type) @@ -690,10 +690,10 @@ function split_into_sequences(x, y; input_window = 5, output_window = 1, shift = end -view_end_dim = function (x_all::Array{Float32, 2}, idx) +function view_end_dim(x_all::Union{KeyedArray{Float32, 2}, AbstractDimArray{Float32, 2}}, idx) return view(x_all, :, idx) end -view_end_dim = function (x_all::Array{Float32, 3}, idx) +function view_end_dim(x_all::Union{KeyedArray{Float32, 3}, AbstractDimArray{Float32, 3}}, idx) return view(x_all, :, :, idx) end From 20770ecf10a052acc48ee62279c02644cbcfc1b3 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Fri, 16 Jan 2026 16:15:47 +0100 Subject: [PATCH 21/26] prepare_data function uses KeyedArray as default loss functions closer to initial --- src/train.jl | 2 +- src/utils/compute_loss.jl | 34 +++++++++++++++++++--------------- test/test_split_data_train.jl | 2 +- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/train.jl b/src/train.jl index d6d9fe4e..b37550c0 100644 --- a/src/train.jl +++ b/src/train.jl @@ -519,7 +519,7 @@ function prepare_data(hm, data::AbstractDimArray; array_type = :DimArray) return (data[inout = At(predictors_forcing)], data[inout = At(targets)]) end -function prepare_data(hm, data::DataFrame; array_type = :DimArray) +function prepare_data(hm, data::DataFrame; array_type = :KeyedArray) predictors_forcing, targets = get_prediction_target_names(hm) all_predictor_cols = unique(vcat(values(predictors_forcing)...)) diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index a2b513af..9823ea0b 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -89,25 +89,25 @@ function _compute_loss end _select_time(ŷ_t::KeyedArray, time_keys) = ŷ_t(time = time_keys) # KeyedArray: () syntax - view & differentiable _select_time(ŷ_t::AbstractDimArray, time_keys) = ŷ_t[time = At(time_keys)] # DimArray: [] syntax - copy & differentiable -function assemble_loss(ŷ, y::Union{KeyedArray{T, 3}, AbstractDimArray{T, 3}}, y_nan, targets, loss_spec) where {T} - return [ - begin - y_t = _get_target_y(y, target) - y_nan_t = _get_target_nan(y_nan, target) - ŷ_t = ŷ[target] - ŷ_tsub = _select_time(ŷ_t, axiskeys(y_t, :time)) # dispatches based on array type +_get_target_ŷ(ŷ, y_t::Union{KeyedArray{T, 3}, AbstractDimArray{T, 3}}, target) where {T} = + _select_time(ŷ[target], axiskeys(y_t, :time)) - _apply_loss(ŷ_tsub, y_t, y_nan_t, loss_spec) - end - for target in targets - ] -end +# For 1D y_t (from 2D y): no time subsetting +_get_target_ŷ(ŷ, y_t::Union{KeyedArray{T, 2}, AbstractDimArray{T, 2}}, target) where {T} = + ŷ[target] + +_get_target_ŷ(ŷ, y_t, target) = + ŷ[target] -function assemble_loss(ŷ, y::Union{KeyedArray{T, 2}, AbstractDimArray{T, 2}}, y_nan, targets, loss_spec) where {T} +function assemble_loss(ŷ, y, y_nan, targets, loss_spec) return [ - _apply_loss(ŷ[target], _get_target_y(y, target), _get_target_nan(y_nan, target), loss_spec) - for target in targets + begin + y_t = _get_target_y(y, target) + ŷ_t = _get_target_ŷ(ŷ, y_t, target) + _apply_loss(ŷ_t, y_t, _get_target_nan(y_nan, target), loss_spec) + end + for target in targets ] end @@ -161,6 +161,10 @@ function _get_target_y(y::KeyedArray, target) return y(inout = target) end +function _get_target_y(y::KeyedArray, target) + return y(inout = target) +end + function _get_target_y(y::KeyedArray, targets::Vector) return y(inout = targets) end diff --git a/test/test_split_data_train.jl b/test/test_split_data_train.jl index da42715b..8ee45846 100644 --- a/test/test_split_data_train.jl +++ b/test/test_split_data_train.jl @@ -121,7 +121,7 @@ const RbQ10_PARAMS = ( @test !isnothing(out) mat = vcat(ka[1], ka[2]) - da = DimArray(mat, (Dim{:col}(mat.keys[1]), Dim{:row}(1:size(mat, 2))))' + da = DimArray(mat, (Dim{:inout}(mat.keys[1]), Dim{:batch_size}(1:size(mat, 2))))' ka = prepare_data(model, da) @test !isnothing(ka) From 1b3e203e26ff478c2a6fa7320b03dfd1b95faafd Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Fri, 16 Jan 2026 17:56:41 +0100 Subject: [PATCH 22/26] fix tests --- src/utils/compute_loss.jl | 3 +++ test/test_compute_loss.jl | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index 9823ea0b..fe89e100 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -156,6 +156,9 @@ Helper function to apply the appropriate loss function based on the specificatio """ function _apply_loss end +_get_target_y(y, target) = y(target) +_get_target_nan(y_nan, target) = y_nan(target) + # For KeyedArray function _get_target_y(y::KeyedArray, target) return y(inout = target) diff --git a/test/test_compute_loss.jl b/test/test_compute_loss.jl index fb87339f..92cbac2e 100644 --- a/test/test_compute_loss.jl +++ b/test/test_compute_loss.jl @@ -72,8 +72,8 @@ using DataFrames :var1 => DimArray([1.0, 2.0, 3.0], (Ti(1:3),)), :var2 => DimArray([2.0, 3.0, 4.0], (Ti(1:3),)) ) - y_dim = DimArray([1.1 1.8; 1.9 3.1; 3.2 3.9], (Ti(1:3), Dim{:col}([:var1, :var2]))) - y_nan_dim = DimArray(trues(3, 2), (Ti(1:3), Dim{:col}([:var1, :var2]))) + y_dim = DimArray([1.1 1.8; 1.9 3.1; 3.2 3.9], (Ti(1:3), Dim{:inout}([:var1, :var2]))) + y_nan_dim = DimArray(trues(3, 2), (Ti(1:3), Dim{:inout}([:var1, :var2]))) # Test single predefined loss loss = _compute_loss(ŷ_dim, y_dim, y_nan_dim, targets, :mse, sum) @@ -121,12 +121,12 @@ end @test _get_target_nan(y_nan_func, :var2) == [true, true, false] # Test with AbstractDimArray - y_nan_dim = DimArray([true false; true true; false true], (Ti(1:3), Dim{:col}([:var1, :var2]))) + y_nan_dim = DimArray([true false; true true; false true], (Ti(1:3), Dim{:inout}([:var1, :var2]))) @test _get_target_nan(y_nan_dim, :var1) == [true, true, false] @test _get_target_nan(y_nan_dim, :var2) == [false, true, true] # Test with Vector of targets - y_nan_dim_multi = DimArray([true false; true true; false true], (Ti(1:3), Dim{:col}([:var1, :var2]))) + y_nan_dim_multi = DimArray([true false; true true; false true], (Ti(1:3), Dim{:inout}([:var1, :var2]))) result = _get_target_nan(y_nan_dim_multi, [:var1, :var2]) @test size(result) == (3, 2) @test result[:, 1] == [true, true, false] @@ -140,12 +140,12 @@ end @test _get_target_y(y_func, :var2) == [2.0, 3.0, 4.0] # Test with AbstractDimArray - y_dim = DimArray([1.0 2.0; 2.0 3.0; 3.0 4.0], (Ti(1:3), Dim{:col}([:var1, :var2]))) + y_dim = DimArray([1.0 2.0; 2.0 3.0; 3.0 4.0], (Ti(1:3), Dim{:inout}([:var1, :var2]))) @test _get_target_y(y_dim, :var1) == [1.0, 2.0, 3.0] @test _get_target_y(y_dim, :var2) == [2.0, 3.0, 4.0] # Test with Vector of targets - y_dim_multi = DimArray([1.0 2.0; 2.0 3.0; 3.0 4.0], (Ti(1:3), Dim{:col}([:var1, :var2]))) + y_dim_multi = DimArray([1.0 2.0; 2.0 3.0; 3.0 4.0], (Ti(1:3), Dim{:inout}([:var1, :var2]))) result = _get_target_y(y_dim_multi, [:var1, :var2]) @test size(result) == (3, 2) @test result[:, 1] == [1.0, 2.0, 3.0] From d62cdf8254c533c4288a4464adf87d783bf7b18b Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Mon, 19 Jan 2026 13:34:20 +0100 Subject: [PATCH 23/26] correct matrices --- docs/Project.toml | 2 +- docs/src/tutorials/example_synthetic_lstm.md | 14 +++++++------- src/utils/compute_loss.jl | 15 +++++++-------- src/utils/tools.jl | 1 - 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/docs/Project.toml b/docs/Project.toml index d06cca83..50eaa846 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -13,4 +13,4 @@ Lux = "b2108857-7c20-44ae-9111-449ecde12c47" OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" [sources] -EasyHybrid = {path = ".."} +EasyHybrid = {path = "C:\\Projects\\EasyHybrid.jl"} diff --git a/docs/src/tutorials/example_synthetic_lstm.md b/docs/src/tutorials/example_synthetic_lstm.md index c093a103..9acb26c2 100644 --- a/docs/src/tutorials/example_synthetic_lstm.md +++ b/docs/src/tutorials/example_synthetic_lstm.md @@ -13,13 +13,13 @@ The code for this tutorial can be found in [docs/src/literate/tutorials](https:/ Set project path and activate environment ````@example example_synthetic_lstm -#using Pkg -#project_path = "docs" -#Pkg.activate(project_path) -#EasyHybrid_path = joinpath(pwd()) -#Pkg.develop(path = EasyHybrid_path) -#Pkg.resolve() -#Pkg.instantiate() +using Pkg +project_path = "docs" +Pkg.activate(project_path) +EasyHybrid_path = joinpath(pwd()) +Pkg.develop(path = EasyHybrid_path) +Pkg.resolve() +Pkg.instantiate() using EasyHybrid using AxisKeys diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index fe89e100..2e72e804 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -90,11 +90,14 @@ _select_time(ŷ_t::KeyedArray, time_keys) = ŷ_t(time = time_keys) # KeyedArray _select_time(ŷ_t::AbstractDimArray, time_keys) = ŷ_t[time = At(time_keys)] # DimArray: [] syntax - copy & differentiable -_get_target_ŷ(ŷ, y_t::Union{KeyedArray{T, 3}, AbstractDimArray{T, 3}}, target) where {T} = +# For 2D y_t (from 3D y): needs time subsetting +# y_t has dims (time, batch_size), ŷ[target] has (time=input_window, batch_size) +# We subset ŷ to match y_t's time dimension (output_window) +_get_target_ŷ(ŷ, y_t::Union{KeyedArray{T, 2}, AbstractDimArray{T, 2}}, target) where {T} = _select_time(ŷ[target], axiskeys(y_t, :time)) -# For 1D y_t (from 2D y): no time subsetting -_get_target_ŷ(ŷ, y_t::Union{KeyedArray{T, 2}, AbstractDimArray{T, 2}}, target) where {T} = +# For 1D y_t (from 2D y): no time subsetting needed +_get_target_ŷ(ŷ, y_t::Union{KeyedArray{T, 1}, AbstractDimArray{T, 1}}, target) where {T} = ŷ[target] _get_target_ŷ(ŷ, y_t, target) = @@ -115,7 +118,7 @@ function assemble_loss(ŷ, y, y_nan, targets, loss_spec::PerTarget) @assert length(targets) == length(loss_spec.losses) "Length of targets and PerTarget losses tuple must match" losses = [ _apply_loss( - ŷ, + _get_target_ŷ(ŷ, y_t, target), _get_target_y(y, target), _get_target_nan(y_nan, target), target, @@ -164,10 +167,6 @@ function _get_target_y(y::KeyedArray, target) return y(inout = target) end -function _get_target_y(y::KeyedArray, target) - return y(inout = target) -end - function _get_target_y(y::KeyedArray, targets::Vector) return y(inout = targets) end diff --git a/src/utils/tools.jl b/src/utils/tools.jl index b6d832dd..35e35e3e 100644 --- a/src/utils/tools.jl +++ b/src/utils/tools.jl @@ -111,7 +111,6 @@ split_data(df::DataFrame, target, xvars, seqID; f=0.8, batchsize=32, shuffle=tru function split_data(df::DataFrame, target, xvars, seqID; f = 0.8, batchsize = 32, shuffle = true, partial = true) dfg = groupby(df, seqID) dkg = to_keyedArray(dfg) - #@show axiskeys(dkg)[1] # Do the partitioning via indices of the 3rd dimension (e.g. seqID) because # partition does not allow partitioning along that dimension (or even not arrays at all) idx_tr, idx_vali = partition(axiskeys(dkg)[3], f; shuffle) From 4071a4f40b737f370d0603dfe6802bdd99427751 Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Mon, 19 Jan 2026 13:39:59 +0100 Subject: [PATCH 24/26] run format.jl --- src/models/NNModels.jl | 2 +- src/utils/compute_loss.jl | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/models/NNModels.jl b/src/models/NNModels.jl index c2f906a3..3bc739da 100644 --- a/src/models/NNModels.jl +++ b/src/models/NNModels.jl @@ -64,7 +64,7 @@ function (m::RecurrenceOutputDense)(x::NTuple{N, <:AbstractArray}, ps, st) where return result, st end -# Handle vector output from Recurrence (return_sequence = true) +# Handle vector output from Recurrence (return_sequence = true) function (m::RecurrenceOutputDense)(x::AbstractVector{<:AbstractArray}, ps, st) y = map(xi -> first(LuxCore.apply(m.layer, xi, ps, st)), x) result = permutedims(stack(y; dims = 3), (1, 3, 2)) diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index 2e72e804..e5f305a2 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -106,11 +106,11 @@ _get_target_ŷ(ŷ, y_t, target) = function assemble_loss(ŷ, y, y_nan, targets, loss_spec) return [ begin - y_t = _get_target_y(y, target) - ŷ_t = _get_target_ŷ(ŷ, y_t, target) - _apply_loss(ŷ_t, y_t, _get_target_nan(y_nan, target), loss_spec) - end - for target in targets + y_t = _get_target_y(y, target) + ŷ_t = _get_target_ŷ(ŷ, y_t, target) + _apply_loss(ŷ_t, y_t, _get_target_nan(y_nan, target), loss_spec) + end + for target in targets ] end From 7f16fae4ecc83f7659bcfbd3a00edd80c27936bd Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Mon, 19 Jan 2026 19:22:37 +0100 Subject: [PATCH 25/26] get test running --- docs/Project.toml | 2 +- src/models/NNModels.jl | 8 +++----- src/utils/compute_loss.jl | 21 +++++++++++++-------- test/runtests.jl | 3 +++ test/test_generic_hybrid_model.jl | 4 ++-- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/docs/Project.toml b/docs/Project.toml index 50eaa846..d06cca83 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -13,4 +13,4 @@ Lux = "b2108857-7c20-44ae-9111-449ecde12c47" OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" [sources] -EasyHybrid = {path = "C:\\Projects\\EasyHybrid.jl"} +EasyHybrid = {path = ".."} diff --git a/src/models/NNModels.jl b/src/models/NNModels.jl index 3bc739da..83ea9fda 100644 --- a/src/models/NNModels.jl +++ b/src/models/NNModels.jl @@ -192,14 +192,12 @@ function prepare_hidden_chain( else # user gave a vector of hidden‐layer sizes hs = hidden_layers - # build the hidden‐to‐hidden part - hidden_chain = length(hs) > 1 ? - Chain((Dense(hs[i], hs[i + 1], activation) for i in 1:(length(hs) - 1))...) : - Chain() + isempty(hs) && return Chain() + in_dim == 0 && return Chain() return Chain( input_batchnorm ? BatchNorm(in_dim, affine = false) : identity, Dense(in_dim, hs[1], activation), - hidden_chain.layers..., + (Dense(hs[i], hs[i + 1], activation) for i in 1:(length(hs) - 1))..., Dense(hs[end], out_dim) ) end diff --git a/src/utils/compute_loss.jl b/src/utils/compute_loss.jl index e5f305a2..021897cc 100644 --- a/src/utils/compute_loss.jl +++ b/src/utils/compute_loss.jl @@ -117,13 +117,18 @@ end function assemble_loss(ŷ, y, y_nan, targets, loss_spec::PerTarget) @assert length(targets) == length(loss_spec.losses) "Length of targets and PerTarget losses tuple must match" losses = [ - _apply_loss( - _get_target_ŷ(ŷ, y_t, target), - _get_target_y(y, target), - _get_target_nan(y_nan, target), - target, - loss_t - ) for (target, loss_t) in zip(targets, loss_spec.losses) + begin + y_t = _get_target_y(y, target) + ŷ_t = _get_target_ŷ(ŷ, y_t, target) + y_nan_t = _get_target_nan(y_nan, target) + _apply_loss( + ŷ_t, + y_t, + y_nan_t, + loss_t + ) + end + for (target, loss_t) in zip(targets, loss_spec.losses) ] return losses end @@ -140,7 +145,7 @@ function _apply_loss(ŷ, y, y_nan, loss_spec::Tuple) return loss_fn(ŷ, y, y_nan, loss_spec) end function _apply_loss(ŷ, y, y_nan, target, loss_spec) - return _apply_loss(ŷ[target], y, y_nan, loss_spec) + return _apply_loss(_get_target_ŷ(ŷ, y, target), y, y_nan, loss_spec) end """ diff --git a/test/runtests.jl b/test/runtests.jl index a772ca70..9461451d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,3 +1,6 @@ +#using Pkg +#Pkg.activate(@__DIR__) +#using Revise using EasyHybrid using Test # import Flux diff --git a/test/test_generic_hybrid_model.jl b/test/test_generic_hybrid_model.jl index 55aac728..c0a4f502 100644 --- a/test/test_generic_hybrid_model.jl +++ b/test/test_generic_hybrid_model.jl @@ -176,7 +176,7 @@ end @test model isa SingleNNHybridModel @test model.predictors == predictors @test model.NN isa Chain - @test length(model.NN.layers) == 0 # Empty chain + @test typeof(model.NN.layers[1]) == Lux.NoOpLayer # Empty chain end @testset "SingleNNHybridModel initialparameters" begin @@ -496,7 +496,7 @@ end st = LuxCore.initialstates(rng, model) @test haskey(ps, :ps) # Even with empty NN, ps key exists (may be empty) - @test isempty(ps.ps) + @test isempty(ps.ps[1]) output, new_st = model(dk, ps, st) @test haskey(output, :y_pred) From aff68de428ff03bf336ecbbd087603b7ffbf6cba Mon Sep 17 00:00:00 2001 From: Bernhard Ahrens Date: Mon, 19 Jan 2026 19:28:22 +0100 Subject: [PATCH 26/26] runic macro conflict --- src/macro_hybrid.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/macro_hybrid.jl b/src/macro_hybrid.jl index 8bad549f..1b882e41 100644 --- a/src/macro_hybrid.jl +++ b/src/macro_hybrid.jl @@ -63,14 +63,14 @@ macro hybrid(name, params...) docstring = """ $(name)(NN, predictors, forcing, targets$(isempty(param_syms) ? "" : ", " * join(string.(param_syms), ", "))) - + A hybrid model with: - `NN`: a neural network - `predictors`: a tuple of names, i.e, (:clay, :moist) - `forcing`: data names, i.e. (:temp, ) - `targets`: data names, i.e. (:ndvi, ) $(isempty(param_syms) ? "" : "- Physical parameters: " * join(string.(param_syms), ", ")) - + All physical parameters are trainable by default. Use `?Lux.Experimental.freeze` to make specific parameters non-trainable during training. """ # Build the complete struct definition