From e16759421213f22f7100d206dc0f154a0318ee31 Mon Sep 17 00:00:00 2001 From: Thomas Wutzler Date: Mon, 23 Dec 2024 20:01:22 +0100 Subject: [PATCH 1/6] test sample_zeta --- Project.toml | 8 ++- src/HybridVariationalInference.jl | 8 +++ src/elbo.jl | 96 ++++++++++++++++++++++++++ src/util._transformvariablesjl | 21 ++++++ test/Project.toml | 2 +- test/runtests.jl | 2 + test/test_elbo.jl | 109 ++++++++++++++++++++++++++++++ test/test_sample_zeta.jl | 99 +++++++++++++++++++++++++++ 8 files changed, 343 insertions(+), 2 deletions(-) create mode 100644 src/elbo.jl create mode 100644 src/util._transformvariablesjl create mode 100644 test/test_elbo.jl create mode 100644 test/test_sample_zeta.jl diff --git a/Project.toml b/Project.toml index a0f407d..0c86f31 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Thomas Wutzler and contributors"] version = "1.0.0-DEV" [deps] +BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" @@ -13,6 +14,8 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +TransformVariables = "84d833dd-6860-57f9-a1a7-6da5db126cff" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [weakdeps] Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" @@ -25,8 +28,9 @@ HybridVariationalInferenceLuxExt = "Lux" HybridVariationalInferenceSimpleChainsExt = "SimpleChains" [compat] -ChainRulesCore = "1.25" +BlockDiagonals = "0.1.42" CUDA = "5.5.2" +ChainRulesCore = "1.25" Combinatorics = "1.0.2" ComponentArrays = "0.15.19" Flux = "v0.15.2" @@ -37,6 +41,8 @@ Random = "1.10.0" SimpleChains = "0.4" StatsBase = "0.34.4" StatsFuns = "1.3.2" +TransformVariables = "0.8.10" +Zygote = "0.6.73" julia = "1.10" [workspace] diff --git a/src/HybridVariationalInference.jl b/src/HybridVariationalInference.jl index 1d7b2bb..7dee309 100644 --- a/src/HybridVariationalInference.jl +++ b/src/HybridVariationalInference.jl @@ -8,6 +8,12 @@ using GPUArraysCore using LinearAlgebra using CUDA using ChainRulesCore +using TransformVariables +using Zygote # Zygote.@ignore CUDA.randn +using BlockDiagonals + +export inverse_ca +include("util._transformvariablesjl") export ComponentArrayInterpreter, flatten1, get_concrete include("ComponentArrayInterpreter.jl") @@ -32,6 +38,8 @@ include("util_opt.jl") #export - all internal include("cholesky.jl") +include("elbo.jl") + export DoubleMM include("DoubleMM/DoubleMM.jl") diff --git a/src/elbo.jl b/src/elbo.jl new file mode 100644 index 0000000..db0cd97 --- /dev/null +++ b/src/elbo.jl @@ -0,0 +1,96 @@ +""" +Generate samples of (inv-transformed) model parameters, ζ, and Log-Determinant +of their distribution. + +Adds the MV-normally distributed residuals, retrieved by `sample_ζ_norm0` +to the means extracted from parameters and predicted by the machine learning +model. +""" +function generate_ζ(rng, g, f, ϕ::AbstractVector, x::AbstractMatrix, + interpreters::NamedTuple; n_MC=3) + # see documentation of neg_elbo_transnorm_gf + ϕc = interpreters.μP_ϕg_unc(CA.getdata(ϕ)) + μ_ζP = ϕc.μP + ϕg = ϕc.ϕg + μ_ζMs0 = g(x, ϕg) # TODO provide μ_ζP to g + ζ_resid, logdetΣ = sample_ζ_norm0(rng, μ_ζP, μ_ζMs0, ϕc.unc; n_MC) + #ζ_resid, logdetΣ = sample_ζ_norm0(rng, ϕ[1:2], reshape(ϕ[2 .+ (1:20)],2,:), ϕ[(end-length(interpreters.unc)+1):end], interpreters.unc; n_MC) + ζ = stack(map(eachcol(ζ_resid)) do r + rc = interpreters.PMs(r) + ζP = μ_ζP .+ rc.θP + μ_ζMs = μ_ζMs0 # g(x, ϕc.ϕ) # TODO provide ζP to g + ζMs = μ_ζMs .+ rc.θMs + vcat(ζP, vec(ζMs)) + end) + ζ, logdetΣ +end + +""" +Extract relevant parameters from θ and return n_MC generted draws +together with the logdet of the transformation. + +Necessary typestable information on number of compponents are provided with +ComponentMarshellers +- marsh_pmu(n_θP, n_θMs, Unc=n_θUnc) +- marsh_batch(n_batch) +- marsh_unc(n_UncP, n_UncM, n_UncCorr) +""" +function sample_ζ_norm0(rng::Random.AbstractRNG, ζP::AbstractVector, ζMs::AbstractMatrix, ϕunc::AbstractVector, args...; + n_MC=3) + n_θP, n_θMs = length(ζP), length(ζMs) + urand = _create_random(rng, CA.getdata(ζP), n_θP + n_θMs, n_MC) + sample_ζ_norm0(urand, ζP, ζMs, ϕunc, args...) +end + +function sample_ζ_norm0(urand::AbstractMatrix, ζP::AbstractVector{T}, ζMs::AbstractMatrix, + ϕunc::AbstractVector, int_unc = ComponentArrayInterpreter(ϕunc); + ) where {T} + ϕuncc = int_unc(CA.getdata(ϕunc)) + n_θP, n_θMs, (n_θM, n_batch) = length(ζP), length(ζMs), size(ζMs) + # make sure to not create a UpperTriangular Matrix of an CuArray in transformU_cholesky1 + UP = transformU_cholesky1(ϕuncc.ρsP) + UM = transformU_cholesky1(ϕuncc.ρsM) + cf = ϕuncc.coef_logσ2_logMs + logσ2_logMs = vec(cf[1, :] .+ cf[2, :] .* ζMs) + logσ2_logP = vec(CA.getdata(ϕuncc.logσ2_logP)) + # CUDA cannot multiply BlockDiagonal * Diagonal, construct already those blocks + σMs = reshape(exp.(logσ2_logMs ./ 2), n_θM, :) + σP = exp.(logσ2_logP ./ 2) + # BlockDiagonal does work with CUDA, but not with combination of Zygote and CUDA + # need to construct full matrix for CUDA + Uσ = _create_blockdiag(UP, UM, σP, σMs, n_batch) + ζ_resid = Uσ' * urand + logdetΣ = 2 .* sum(log.(diag(Uσ))) + # returns CuArrays to either continue on GPU or need to transfer to CPU + ζ_resid, logdetΣ +end + +function _create_blockdiag(UP::AbstractMatrix{T}, UM, σP, σMs, n_batch) where {T} + v = [i == 0 ? UP * Diagonal(σP) : UM * Diagonal(σMs[:, i]) for i in 0:n_batch] + BlockDiagonal(v) +end +function _create_blockdiag(UP::GPUArraysCore.AbstractGPUMatrix{T}, UM, σP, σMs, n_batch) where {T} + # using BlockDiagonal leads to Scalar operations downstream + # v = [i == 0 ? UP * Diagonal(σP) : UM * Diagonal(σMs[:, i]) for i in 0:n_batch] + # BlockDiagonal(v) + # Uσ = cat([i == 0 ? UP * Diagonal(σP) : UM * Diagonal(σMs[:, i]) for i in 0:n_batch]...; + # dims=(1, 2)) + # on GPU use only one big multiplication rather than many small ones + U = cat([i == 0 ? UP : UM for i in 0:n_batch]...; dims=(1, 2)) + #Main.@infiltrate_main + σD = Diagonal(vcat(σP, vec(σMs))) + Uσ = U * σD + # need for Zygote why? + # tmp = cat(Uσ; dims=(1,2)) + tmp = vcat(Uσ) +end + +function _create_random(rng, ::AbstractVector{T}, dims...) where {T} + rand(rng, T, dims...) +end +function _create_random(rng, ::GPUArraysCore.AbstractGPUVector{T}, dims...) where {T} + # ignores rng + # https://discourse.julialang.org/t/help-using-cuda-zygote-and-random-numbers/123458/4?u=bgctw + # Zygote.@ignore CUDA.randn(rng, dims...) + Zygote.@ignore CUDA.randn(dims...) +end \ No newline at end of file diff --git a/src/util._transformvariablesjl b/src/util._transformvariablesjl new file mode 100644 index 0000000..8e008b8 --- /dev/null +++ b/src/util._transformvariablesjl @@ -0,0 +1,21 @@ +""" +Apply TransformVariables.inverse to ComponentArray, `ca`. + +- convert `ca` to a `NamedTuple` +- apply transformation +- convert back to `ComponentArray` +""" +function inverse_ca(trans, ca::CA.AbstractArray) + CA.ComponentArray( + TransformVariables.inverse(trans, cv2NamedTuple(ca)), + CA.getaxes(ca)) +end + +""" +Convert ComponentVector to NamedTuple of the first layer, i.e. keep +ComponentVectors in the second level. +""" +function cv2NamedTuple(ca::CA.ComponentVector) + g = ((k, CA.getdata(ca[k])) for k in keys(ca)) + (; g...) +end \ No newline at end of file diff --git a/test/Project.toml b/test/Project.toml index 3006d5b..97ffeaf 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -15,5 +15,5 @@ StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +TransformVariables = "84d833dd-6860-57f9-a1a7-6da5db126cff" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" -cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" diff --git a/test/runtests.jl b/test/runtests.jl index 49f0f37..18434b3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -13,6 +13,8 @@ const GROUP = get(ENV, "GROUP", "All") # defined in in CI.yml @time @safetestset "test_doubleMM" include("test_doubleMM.jl") #@safetestset "test" include("test/test_cholesky_structure.jl") @time @safetestset "test_cholesky_structure" include("test_cholesky_structure.jl") + #@safetestset "test" include("test/test_sample_zeta.jl") + @time @safetestset "test_sample_zeta" include("test_sample_zeta.jl") # #@safetestset "test" include("test/test_Flux.jl") @time @safetestset "test_Flux" include("test_Flux.jl") diff --git a/test/test_elbo.jl b/test/test_elbo.jl new file mode 100644 index 0000000..62064f7 --- /dev/null +++ b/test/test_elbo.jl @@ -0,0 +1,109 @@ +#using LinearAlgebra, BlockDiagonals + +using Test +using Zygote +using HybridVariationalInference +using HybridVariationalInference: HybridVariationalInference as CP +using StableRNGs +using CUDA +using GPUArraysCore: GPUArraysCore +using Random +using SimpleChains +using ComponentArrays: ComponentArrays as CA +using TransformVariables + +#CUDA.device!(4) +rng = StableRNG(111) + +const case = DoubleMM.DoubleMMCase() +const MLengine = Val(nameof(SimpleChains)) +scenario = (:default,) + +#θsite_true = get_hybridcase_par_templates(case; scenario) +g, ϕg0 = gen_hybridcase_MLapplicator(case, MLengine; scenario); +f = gen_hybridcase_PBmodel(case; scenario) + +(; n_covar, n_site, n_batch, n_θM, n_θP) = get_hybridcase_sizes(case; scenario) + +(; xM, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o +) = gen_hybridcase_synthetic(case, rng; scenario); + +# correlation matrices +ρsP = zeros(sum(1:(n_θP-1))) +ρsM = zeros(sum(1:(n_θM-1))) + +() -> begin + coef_logσ2_logMs = [-5.769 -3.501; -0.01791 0.007951] + logσ2_logP = CA.ComponentVector(r0=-8.997, K2=-5.893) + #mean_σ_o_MC = 0.006042 + + ϕunc = CA.ComponentVector(; + logσ2_logP=logσ2_logP, + coef_logσ2_logMs=coef_logσ2_logMs, + ρsP, + ρsM) +end + + +# for a conservative uncertainty assume σ2=1e-10 and no relationship with magnitude +ϕunc0 = CA.ComponentVector(; + logσ2_logP=fill(-10.0, n_θP), + coef_logσ2_logMs=reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), + ρsP, + ρsM) +#int_unc = ComponentArrayInterpreter(ϕunc0) + +transPMs_batch = as( + (P=as(Array, asℝ₊, n_θP), + Ms=as(Array, asℝ₊, n_θM, n_batch))) +transPMs_all = as( + (P=as(Array, asℝ₊, n_θP), + Ms=as(Array, asℝ₊, n_θM, n_site))) + +ϕ_true = θ = CA.ComponentVector(; + μP=θP_true, + ϕg=ϕg0, #ϕg_opt, # here start from randomized + unc=ϕunc); +trans_gu = as( + (μP=as(Array, asℝ₊, n_θP), + ϕg=as(Array, length(ϕg0)), + unc=as(Array, length(ϕunc)))) +trans_g = as( + (μP=as(Array, asℝ₊, n_θP), + ϕg=as(Array, length(ϕg0)))) + +int_PMs_batch = ComponentArrayInterpreter(CA.ComponentVector(; θP_true, + θMs=CA.ComponentMatrix( + zeros(n_θM, n_batch), first(CA.getaxes(θMs_true)), CA.Axis(i=1:n_batch)))) + +interpreters = map(get_concrete,(; + μP_ϕg_unc=ComponentArrayInterpreter(ϕ_true), + PMs=int_PMs_batch, + unc=ComponentArrayInterpreter(ϕunc0) + )) + +ϕg_true_vec = CA.ComponentVector( + TransformVariables.inverse(trans_gu, CP.cv2NamedTuple(ϕ_true))) +ϕcg_true = interpreters.μP_ϕg_unc(ϕg_true_vec) +ϕ_ini = ζ = vcat(ϕcg_true[[:μP, :ϕg]] .* 1.2, ϕcg_true[[:unc]]); +ϕ_ini0 = ζ = vcat(ϕcg_true[:μP] .* 0.0, ϕg0, ϕunc0); + +neg_elbo_transnorm_gf(rng, g, f, ϕcg_true, y_o[:, 1:n_batch], xM[:, 1:n_batch], + transPMs_batch, map(get_concrete, interpreters); + n_MC=8, logσ2y) +Zygote.gradient(ϕ -> neg_elbo_transnorm_gf( + rng, g, f, ϕ, y_o[:, 1:n_batch], x_o[:, 1:n_batch], + transPMs_batch, interpreters; n_MC=8, logσ2y), ϕcg_true) + + @testset "generate_ζ" begin + ϕ = CA.getdata(ϕ_cpu) + n_sample_pred = 200 + intm_PMs_gen = ComponentArrayInterpreter(CA.ComponentVector(; θP_true, + θMs=CA.ComponentMatrix( + zeros(n_θM, n_site), first(CA.getaxes(θMs_true)), CA.Axis(i=1:n_sample_pred)))) + int_μP_ϕg_unc=ComponentArrayInterpreter(ϕ_true) + interpreters = (; PMs = intm_PMs_gen, μP_ϕg_unc = int_μP_ϕg_unc ) + ζs, _ = CP.generate_ζ(rng, g, f, ϕ, xM, interpreters; n_MC=n_sample_pred) + + end; + diff --git a/test/test_sample_zeta.jl b/test/test_sample_zeta.jl new file mode 100644 index 0000000..6447fcd --- /dev/null +++ b/test/test_sample_zeta.jl @@ -0,0 +1,99 @@ +#using LinearAlgebra, BlockDiagonals + +using Test +using Zygote +using HybridVariationalInference +using HybridVariationalInference: HybridVariationalInference as CP +using StableRNGs +using CUDA +using GPUArraysCore: GPUArraysCore +using Random +#using SimpleChains +using ComponentArrays: ComponentArrays as CA +using TransformVariables + +#CUDA.device!(4) +rng = StableRNG(111) + +const case = DoubleMM.DoubleMMCase() +#const MLengine = Val(nameof(SimpleChains)) +scenario = (:default,) + +(; n_covar, n_site, n_batch, n_θM, n_θP) = get_hybridcase_sizes(case; scenario) + +@testset "test_sample_zeta" begin + (; xM, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o + ) = gen_hybridcase_synthetic(case, rng; scenario); + + # n_site = 2 + # n_θP, n_θM = length(θ_true.θP), length(θ_true.θM) + # σ_θM = θ_true.θM .* 0.1 # 10% around expected + # θMs_true = θ_true.θM .+ randn(n_θM, n_site) .* σ_θM + + # set to 0.02 rather than zero for debugging non-zero correlations + ρsP = zeros(sum(1:(n_θP-1))) .+ 0.02 + ρsM = zeros(sum(1:(n_θM-1))) .+ 0.02 + + ϕunc = CA.ComponentVector(; + logσ2_logP=fill(-10.0, n_θP), + coef_logσ2_logMs=reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), + ρsP, + ρsM) + + transPMs = as( + (P=as(Array, asℝ₊, n_θP), + Ms=as(Array, asℝ₊, n_θM, n_site))) + θ_true = θ = CA.ComponentVector(; + P=θP_true, + Ms=θMs_true) + transPMs = as(( + P=as(Array, asℝ₊, n_θP), + Ms=as(Array, asℝ₊, n_θM, n_site))) + ζ_true = inverse_ca(transPMs, θ_true) + ϕ_true = vcat(ζ_true, CA.ComponentVector(unc=ϕunc)) + ϕ_cpu = vcat(ζ_true .+ 0.01, CA.ComponentVector(unc=ϕunc)) + + interpreters = (; pmu=ComponentArrayInterpreter(ϕ_true)) #, M=int_θM, PMs=int_θPMs) + + n_MC = 3 + @testset "sample_ζ_norm0 cpu" begin + ϕ = CA.getdata(ϕ_cpu) + ϕc = interpreters.pmu(ϕ) + ζ_resid, logdetΣ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC) + @test size(ζ_resid) == (length(ϕc.P) + n_θM * n_site, n_MC) + gr = Zygote.gradient(ϕc -> sum(CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc)[1]), ϕc)[1] + @test length(gr) == length(ϕ) + end; + # + @testset "sample_ζ_norm0 gpu" begin + ϕ = CuArray(CA.getdata(ϕ_cpu)); + #tmp = ϕ[1:6] + #vec2uutri(tmp) + ϕc = interpreters.pmu(ϕ); + @test CA.getdata(ϕc) isa GPUArraysCore.AbstractGPUArray + #ζP, ζMs, ϕunc = ϕc.P, ϕc.Ms, ϕc.unc + #urand = CUDA.randn(length(ϕc.P) + length(ϕc.Ms), n_MC) |> gpu + #include(joinpath(@__DIR__, "uncNN", "elbo.jl")) # callback_loss + #ζ_resid, logdetΣ = sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC) + #Zygote.gradient(ϕc -> sum(sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)[1]), ϕc)[1]; + ζ_resid, logdetΣ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC) + @test ζ_resid isa GPUArraysCore.AbstractGPUArray + @test size(ζ_resid) == (length(ϕc.P) + n_θM * n_site, n_MC) + gr = Zygote.gradient(ϕc -> sum(CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)[1]), ϕc)[1]; + @test length(gr) == length(ϕ) + int_unc = ComponentArrayInterpreter(ϕc.unc) + gr2 = Zygote.gradient(ϕc -> sum(CP.sample_ζ_norm0(rng, CA.getdata(ϕc.P), CA.getdata(ϕc.Ms), CA.getdata(ϕc.unc), int_unc; n_MC)[1]), ϕc)[1]; + end; + + # @testset "generate_ζ" begin + # ϕ = CA.getdata(ϕ_cpu) + # n_sample_pred = 200 + # intm_PMs_gen = ComponentArrayInterpreter(CA.ComponentVector(; θP_true, + # θMs=CA.ComponentMatrix( + # zeros(n_θM, n_site), first(CA.getaxes(θMs_true)), CA.Axis(i=1:n_sample_pred)))) + # int_μP_ϕg_unc=ComponentArrayInterpreter(ϕ_true) + # interpreters = (; PMs = intm_PMs_gen, μP_ϕg_unc = int_μP_ϕg_unc ) + # ζs, _ = CP.generate_ζ(rng, g, f, ϕ, xM, interpreters; n_MC=n_sample_pred) + + # end; +end; From 4a1d5c2877f372128f55f9f77d412f3f38d19b90 Mon Sep 17 00:00:00 2001 From: Thomas Wutzler Date: Thu, 9 Jan 2025 15:18:06 +0100 Subject: [PATCH 2/6] implement ELBO cost function --- dev/Project.toml | 4 + dev/doubleMM.jl | 281 ++++++++---------- ext/HybridVariationalInferenceFluxExt.jl | 34 ++- src/DoubleMM/f_doubleMM.jl | 1 + src/GPUDataHandler.jl | 31 ++ src/HybridVariationalInference.jl | 9 +- src/elbo.jl | 75 ++++- src/logden_normal.jl | 62 ++++ ...variablesjl => util_transformvariables.jl} | 0 test/Project.toml | 2 + test/runtests.jl | 4 + test/test_Flux.jl | 28 +- test/test_elbo.jl | 147 +++++---- test/test_logden_normal.jl | 42 +++ 14 files changed, 508 insertions(+), 212 deletions(-) create mode 100644 src/GPUDataHandler.jl create mode 100644 src/logden_normal.jl rename src/{util._transformvariablesjl => util_transformvariables.jl} (100%) create mode 100644 test/test_logden_normal.jl diff --git a/dev/Project.toml b/dev/Project.toml index 04a4a3a..162ab31 100644 --- a/dev/Project.toml +++ b/dev/Project.toml @@ -3,9 +3,12 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" HybridVariationalInference = "a108c475-a4e2-4021-9a84-cfa7df242f64" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" +Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SimpleChains = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5" @@ -15,3 +18,4 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" TransformVariables = "84d833dd-6860-57f9-a1a7-6da5db126cff" UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" +cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" diff --git a/dev/doubleMM.jl b/dev/doubleMM.jl index cb754a8..7f2a013 100644 --- a/dev/doubleMM.jl +++ b/dev/doubleMM.jl @@ -1,20 +1,24 @@ using Test using HybridVariationalInference +using HybridVariationalInference: HybridVariationalInference as HVI using StableRNGs using Random using Statistics using ComponentArrays: ComponentArrays as CA using SimpleChains +import Flux # to allow for FluxMLEngine and cpu() using MLUtils import Zygote +using CUDA +using TransformVariables using OptimizationOptimisers - using UnicodePlots const case = DoubleMM.DoubleMMCase() const MLengine = Val(nameof(SimpleChains)) +const FluxMLengine = Val(nameof(Flux)) scenario = (:default,) rng = StableRNG(111) @@ -22,26 +26,8 @@ par_templates = get_hybridcase_par_templates(case; scenario) (; n_covar, n_site, n_batch, n_θM, n_θP) = get_hybridcase_sizes(case; scenario) -# const int_θP = ComponentArrayInterpreter(par_templates.θP) -# const int_θM = ComponentArrayInterpreter(par_templates.θM) -# const int_θPMs_flat = ComponentArrayInterpreter(P = n_θP, Ms = n_θM * n_batch) -# const int_θ = ComponentArrayInterpreter(CA.ComponentVector(;θP=par_templates.θP,θM=par_templates.θM)) -# # moved to f_doubleMM -# # const int_θdoubleMM = ComponentArrayInterpreter(flatten1(CA.ComponentVector(;θP,θM))) -# # const S1 = [1.0, 1.0, 1.0, 0.3, 0.1] -# # const S2 = [1.0, 3.0, 5.0, 5.0, 5.0] -# θ = CA.getdata(vcat(par_templates.θP, par_templates.θM)) - -# const int_θPMs = ComponentArrayInterpreter(CA.ComponentVector(;par_templates.θP, -# θMs=CA.ComponentMatrix(zeros(n_θM, n_batch), first(CA.getaxes(par_templates.θM)), CA.Axis(i=1:n_batch)))) - -(; xM, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o) = gen_hybridcase_synthetic( - case, rng; scenario); - -@test isapprox( - vec(mean(CA.getdata(θMs_true); dims = 2)), CA.getdata(par_templates.θM), rtol = 0.02) -@test isapprox(vec(std(CA.getdata(θMs_true); dims = 2)), - CA.getdata(par_templates.θM) .* 0.1, rtol = 0.02) +(; xM, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o, σ_o +) = gen_hybridcase_synthetic(case, rng; scenario); #----- fit g to θMs_true g, ϕg0 = gen_hybridcase_MLapplicator(case, MLengine; scenario); @@ -58,21 +44,22 @@ Zygote.gradient(x -> loss_g(x, xM, g)[1], ϕg0); optf = Optimization.OptimizationFunction((ϕg, p) -> loss_g(ϕg, xM, g)[1], Optimization.AutoZygote()) optprob = Optimization.OptimizationProblem(optf, ϕg0); -res = Optimization.solve(optprob, Adam(0.02), callback = callback_loss(100), maxiters = 600); +res = Optimization.solve(optprob, Adam(0.02), callback = callback_loss(100), maxiters = 800); ϕg_opt1 = res.u; loss_g(ϕg_opt1, xM, g) scatterplot(vec(θMs_true), vec(loss_g(ϕg_opt1, xM, g)[2])) @test cor(vec(θMs_true), vec(loss_g(ϕg_opt1, xM, g)[2])) > 0.9 -tmpf = () -> begin +f = gen_hybridcase_PBmodel(case; scenario) + +() -> begin #----------- fit g and θP to y_o # end2end inversion - f = gen_hybridcase_PBmodel(case; scenario) int_ϕθP = ComponentArrayInterpreter(CA.ComponentVector( ϕg = 1:length(ϕg0), θP = par_templates.θP)) - p = p0 = vcat(ϕg0, par_templates.θP .* 0.9); # slightly disturb θP_true + p = p0 = vcat(ϕg0, par_templates.θP .* 0.9) # slightly disturb θP_true # Pass the site-data for the batches as separate vectors wrapped in a tuple train_loader = MLUtils.DataLoader((xM, xP, y_o), batchsize = n_batch) @@ -85,7 +72,7 @@ tmpf = () -> begin optprob = OptimizationProblem(optf, p0, train_loader) res = Optimization.solve( - optprob, Adam(0.02), callback = callback_loss(100), maxiters = 1000); + optprob, Adam(0.02), callback = callback_loss(100), maxiters = 1000) l1, y_pred_global, y_pred, θMs = loss_gf(res.u, train_loader.data...) scatterplot(vec(θMs_true), vec(θMs)) @@ -94,108 +81,105 @@ tmpf = () -> begin hcat(par_templates.θP, int_ϕθP(res.u).θP) end -#---------- HADVI +#---------- HVI # TODO think about good general initializations coef_logσ2_logMs = [-5.769 -3.501; -0.01791 0.007951] -logσ2_logP = CA.ComponentVector(r0=-8.997, K2=-5.893) +logσ2_logP = CA.ComponentVector(r0 = -8.997, K2 = -5.893) mean_σ_o_MC = 0.006042 # correlation matrices -ρsP = zeros(sum(1:(n_θP-1))) -ρsM = zeros(sum(1:(n_θM-1))) +ρsP = zeros(sum(1:(n_θP - 1))) +ρsM = zeros(sum(1:(n_θM - 1))) ϕunc = CA.ComponentVector(; - logσ2_logP=logσ2_logP, - coef_logσ2_logMs=coef_logσ2_logMs, + logσ2_logP = logσ2_logP, + coef_logσ2_logMs = coef_logσ2_logMs, ρsP, ρsM) int_unc = ComponentArrayInterpreter(ϕunc) # for a conservative uncertainty assume σ2=1e-10 and no relationship with magnitude ϕunc0 = CA.ComponentVector(; - logσ2_logP=fill(-10.0, n_θP), - coef_logσ2_logMs=reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), + logσ2_logP = fill(-10.0, n_θP), + coef_logσ2_logMs = reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), ρsP, ρsM) -logσ2y = fill(2 * log(σ_o), size(y_o, 1)) +logσ2y = 2 .* log.(σ_o) n_MC = 3 - -#-------------- ADVI with g inside cost function -using CUDA -using TransformVariables - transPMs_batch = as( - (P=as(Array, asℝ₊, n_θP), - Ms=as(Array, asℝ₊, n_θM, n_batch))) + (P = as(Array, asℝ₊, n_θP), + Ms = as(Array, asℝ₊, n_θM, n_batch))) transPMs_all = as( - (P=as(Array, asℝ₊, n_θP), - Ms=as(Array, asℝ₊, n_θM, n_site))) - -ϕ_true = θ = CA.ComponentVector(; - μP=θP_true, - ϕg=ϕg_opt, - unc=ϕunc); + (P = as(Array, asℝ₊, n_θP), + Ms = as(Array, asℝ₊, n_θM, n_site))) + +n_ϕg = length(ϕg_opt1) +ϕt_true = θ = CA.ComponentVector(; + μP = θP_true, + ϕg = ϕg_opt1, + unc = ϕunc); trans_gu = as( - (μP=as(Array, asℝ₊, n_θP), - ϕg=as(Array, n_ϕg), - unc=as(Array, length(ϕunc)))) + (μP = as(Array, asℝ₊, n_θP), + ϕg = as(Array, n_ϕg), + unc = as(Array, length(ϕunc)))) trans_g = as( - (μP=as(Array, asℝ₊, n_θP), - ϕg=as(Array, n_ϕg))) - -const int_PMs_batch = ComponentArrayInterpreter(CA.ComponentVector(; θP, - θMs=CA.ComponentMatrix( - zeros(n_θM, n_batch), first(CA.getaxes(θM)), CA.Axis(i=1:n_batch)))) - -interpreters = interpreters_g = map(get_concrete,(; - μP_ϕg_unc=ComponentArrayInterpreter(ϕ_true), - PMs=int_PMs_batch, - unc=ComponentArrayInterpreter(ϕunc) + (μP = as(Array, asℝ₊, n_θP), + ϕg = as(Array, n_ϕg))) + +#const +int_PMs_batch = ComponentArrayInterpreter(CA.ComponentVector(; θP = θP_true, + θMs = CA.ComponentMatrix( + zeros(n_θM, n_batch), first(CA.getaxes(θMs_true)), CA.Axis(i = 1:n_batch)))) + +interpreters = interpreters_g = map(get_concrete, + (; + μP_ϕg_unc = ComponentArrayInterpreter(ϕt_true), + PMs = int_PMs_batch, + unc = ComponentArrayInterpreter(ϕunc) )) -ϕg_true_vec = CA.ComponentVector( - TransformVariables.inverse(trans_gu, cv2NamedTuple(ϕ_true))) -ϕcg_true = interpreters.μP_ϕg_unc(ϕg_true_vec) -ϕ_ini = ζ = vcat(ϕcg_true[[:μP, :ϕg]] .* 1.2, ϕcg_true[[:unc]]); -ϕ_ini0 = ζ = vcat(ϕcg_true[:μP] .* 0.0, SimpleChains.init_params(g), ϕunc0); +ϕ_true = inverse_ca(trans_gu, ϕt_true) +ϕ_ini = ζ = vcat(ϕ_true[[:μP, :ϕg]] .* 1.2, ϕ_true[[:unc]]); # slight disturbance +ϕ_ini0 = ζ = vcat(ϕ_true[:μP] .* 0.0, ϕg0, ϕunc0); # scratch -neg_elbo_transnorm_gf(rng, g, f, ϕcg_true, y_o[:, 1:n_batch], x_o[:, 1:n_batch], - transPMs_batch, map(get_concrete, interpreters); - n_MC=8, logσ2y) -Zygote.gradient(ϕ -> neg_elbo_transnorm_gf( - rng, g, f, ϕ, y_o[:, 1:n_batch], x_o[:, 1:n_batch], - transPMs_batch, interpreters; n_MC=8, logσ2y), ϕcg_true) +# test cost function and gradient +() -> begin + neg_elbo_transnorm_gf(rng, g, f, ϕ_true, y_o[:, 1:n_batch], xM[:, 1:n_batch], + transPMs_batch, map(get_concrete, interpreters); + n_MC = 8, logσ2y) + Zygote.gradient( + ϕ -> neg_elbo_transnorm_gf( + rng, g, f, ϕ, y_o[:, 1:n_batch], xM[:, 1:n_batch], + transPMs_batch, interpreters; n_MC = 8, logσ2y), + CA.getdata(ϕ_true)) +end +# optimize using SimpleChains () -> begin - train_loader = MLUtils.DataLoader((x_o, y_o), batchsize = n_batch) + train_loader = MLUtils.DataLoader((xM, y_o), batchsize = n_batch) - optf = Optimization.OptimizationFunction((ζg, data) -> begin - x_o, y_o = data + optf = Optimization.OptimizationFunction( + (ζg, data) -> begin + xM, y_o = data neg_elbo_transnorm_gf( - rng, g, f, ζg, y_o, x_o, transPMs_batch, map(get_concrete, interpreters_g); n_MC=5, logσ2y) + rng, g, f, ζg, y_o, xM, transPMs_batch, + map(get_concrete, interpreters_g); n_MC = 5, logσ2y) end, Optimization.AutoZygote()) - optprob = Optimization.OptimizationProblem(optf, CA.getdata(ϕ_ini), train_loader); - res = Optimization.solve(optprob, Optimisers.Adam(0.02), callback=callback_loss(50), maxiters=800); + optprob = Optimization.OptimizationProblem(optf, CA.getdata(ϕ_ini), train_loader) + res = Optimization.solve( + optprob, Optimisers.Adam(0.02), callback = callback_loss(50), maxiters = 800) #optprob = Optimization.OptimizationProblem(optf, ϕ_ini0); #res = Optimization.solve(optprob, Adam(0.02), callback=callback_loss(50), maxiters=1_400); end - -#using Lux -ϕ = ϕcg_true |> gpu; -x_o_gpu = x_o |> gpu; -# y_o = y_o |> gpu -# logσ2y = logσ2y |> gpu -n_covar = size(x_o, 1) -g_flux = Flux.Chain( - # dense layer with bias that maps to 8 outputs and applies `tanh` activation - Flux.Dense(n_covar => n_covar * 4, tanh), - Flux.Dense(n_covar * 4 => n_covar * 4, logistic), - # dense layer without bias that maps to n outputs and `identity` activation - Flux.Dense(n_covar * 4 => n_θM, identity, bias=false), -) + +ϕ = ϕ_true |> Flux.gpu; +xM_gpu = xM |> Flux.gpu; +g_flux, ϕg0_flux_cpu = gen_hybridcase_MLapplicator(case, FluxMLengine; scenario); + +# otpimize using LUX () -> begin using Lux g_lux = Lux.Chain( @@ -203,59 +187,56 @@ g_flux = Flux.Chain( Lux.Dense(n_covar => n_covar * 4, tanh), Lux.Dense(n_covar * 4 => n_covar * 4, logistic), # dense layer without bias that maps to n outputs and `identity` activation - Lux.Dense(n_covar * 4 => n_θM, identity, use_bias=false), + Lux.Dense(n_covar * 4 => n_θM, identity, use_bias = false) ) ps, st = Lux.setup(Random.default_rng(), g_lux) ps_ca = CA.ComponentArray(ps) |> gpu st = st |> gpu g_luxs = StatefulLuxLayer{true}(g_lux, nothing, st) - g_luxs(x_o_gpu[:, 1:n_batch], ps_ca) + g_luxs(xM_gpu[:, 1:n_batch], ps_ca) ax_g = CA.getaxes(ps_ca) - g_luxs(x_o_gpu[:, 1:n_batch], CA.ComponentArray(ϕ.ϕg, ax_g)) + g_luxs(xM_gpu[:, 1:n_batch], CA.ComponentArray(ϕ.ϕg, ax_g)) interpreters = (interpreters..., ϕg = ComponentArrayInterpreter(ps_ca)) ϕg = CA.ComponentArray(ϕ.ϕg, ax_g) ϕgc = interpreters.ϕg(ϕ.ϕg) - g_gpu = g_luxs + g_flux = g_luxs end -g_gpu = g_flux - -#Zygote.gradient(ϕg -> sum(g_gpu(x_o_gpu[:, 1:n_batch],ϕg)), ϕgc) -# Zygote.gradient(ϕg -> sum(compute_g(g_gpu, x_o_gpu[:, 1:n_batch], ϕg, interpreters)), ϕ.ϕg) -# Zygote.gradient(ϕ -> sum(tmp_gen1(g_gpu, x_o_gpu[:, 1:n_batch], ϕ, interpreters)), ϕ.ϕg) -# Zygote.gradient(ϕ -> sum(tmp_gen2(g_gpu, x_o_gpu[:, 1:n_batch], ϕ, interpreters)), CA.getdata(ϕ)) -# Zygote.gradient(ϕ -> sum(tmp_gen2(g_gpu, x_o_gpu[:, 1:n_batch], ϕ, interpreters)), ϕ) |> cpu -# Zygote.gradient(ϕ -> sum(tmp_gen3(g_gpu, x_o_gpu[:, 1:n_batch], ϕ, interpreters)), ϕ) |> cpu -# Zygote.gradient(ϕ -> sum(tmp_gen4(g_gpu, x_o_gpu[:, 1:n_batch], ϕ, interpreters)[1]), ϕ) |> cpu -# generate_ζ(rng, g_gpu, f, ϕ, x_o_gpu[:, 1:n_batch], interpreters) -# Zygote.gradient(ϕ -> sum(generate_ζ(rng, g_gpu, f, ϕ, x_o_gpu[:, 1:n_batch], interpreters)[1]), ϕ) |> cpu -# include(joinpath(@__DIR__, "uncNN", "elbo.jl")) # callback_loss -# neg_elbo_transnorm_gf(rng, g_gpu, f, ϕ, y_o[:, 1:n_batch], -# x_o_gpu[:, 1:n_batch], transPMs_batch, interpreters; logσ2y) -# Zygote.gradient(ϕ -> sum(neg_elbo_transnorm_gf(rng, g_gpu, f, ϕ, y_o[:, 1:n_batch], -# x_o_gpu[:, 1:n_batch], transPMs_batch, interpreters; logσ2y)[1]), ϕ) |> cpu - - -fcost(ϕ) = neg_elbo_transnorm_gf(rng, g_gpu, f, ϕ, y_o[:, 1:n_batch], - x_o_gpu[:, 1:n_batch], transPMs_batch, map(get_concrete, interpreters); - n_MC=8, logσ2y = logσ2y) -fcost(ϕ) -gr = Zygote.gradient(fcost, ϕ) |> cpu; -Zygote.gradient(fcost, CA.getdata(ϕ)) +function fcost(ϕ) + neg_elbo_transnorm_gf(rng, g_flux, f, CA.getdata(ϕ), y_o[:, 1:n_batch], + xM_gpu[:, 1:n_batch], transPMs_batch, map(get_concrete, interpreters); + n_MC = 8, logσ2y = logσ2y) +end +fcost(ϕ) +Zygote.gradient(fcost, ϕ) |> cpu; +gr = Zygote.gradient(fcost, CA.getdata(ϕ)); +gr_c = CA.ComponentArray(gr[1], CA.getaxes(ϕ)...) -train_loader = MLUtils.DataLoader((x_o_gpu, y_o), batchsize = n_batch) +train_loader = MLUtils.DataLoader((xM_gpu, y_o), batchsize = n_batch) -optf = Optimization.OptimizationFunction((ζg, data) -> begin - x_o, y_o = data +optf = Optimization.OptimizationFunction( + (ζg, data) -> begin + xM, y_o = data neg_elbo_transnorm_gf( - rng, g_gpu, f, ζg, y_o, x_o, transPMs_batch, map(get_concrete, interpreters_g); n_MC=5, logσ2y) + rng, g_flux, f, ζg, y_o, xM, transPMs_batch, + map(get_concrete, interpreters_g); n_MC = 5, logσ2y) end, Optimization.AutoZygote()) -optprob = Optimization.OptimizationProblem(optf, CA.getdata(ϕ_ini) |> gpu, train_loader); -res = res_gpu = Optimization.solve(optprob, Optimisers.Adam(0.02), callback=callback_loss(50), maxiters=800); +optprob = Optimization.OptimizationProblem( + optf, CA.getdata(ϕ_ini) |> Flux.gpu, train_loader); +res = res_gpu = Optimization.solve( + optprob, Optimisers.Adam(0.02), callback = callback_loss(50), maxiters = 800); -ζ_VIc = interpreters_g.μP_ϕg_unc(res.u |> cpu) -ζMs_VI = g(x_o, ζ_VIc.ϕg) +# start from zero +() -> begin + optprob = Optimization.OptimizationProblem( + optf, CA.getdata(ϕ_ini0) |> Flux.gpu, train_loader); + res = res_gpu = Optimization.solve( + optprob, Optimisers.Adam(0.02), callback = callback_loss(50), maxiters = 4_000); +end + +ζ_VIc = interpreters_g.μP_ϕg_unc(res.u |> Flux.cpu) +ζMs_VI = g(xM, ζ_VIc.ϕg) ϕunc_VI = int_unc(ζ_VIc.unc) hcat(θP_true, exp.(ζ_VIc.μP)) @@ -266,31 +247,35 @@ hcat(ϕunc, ϕunc_VI) # need to compare to MC sample # hard to estimate for original very small theta's but otherwise good # test predicting correct obs-uncertainty of predictive posterior +# TODO reuse g_flux rather than g n_sample_pred = 200 -intm_PMs_gen = ComponentArrayInterpreter(CA.ComponentVector(; θP, - θMs=CA.ComponentMatrix( - zeros(n_θM, n_site), first(CA.getaxes(θM)), CA.Axis(i=1:n_sample_pred)))) +intm_PMs_gen = ComponentArrayInterpreter(CA.ComponentVector(; θP = θP_true, + θMs = CA.ComponentMatrix( + zeros(n_θM, n_site), first(CA.getaxes(θMs_true)), CA.Axis(i = 1:n_sample_pred)))) -include(joinpath(@__DIR__, "uncNN", "elbo.jl")) # callback_loss -ζs, _ = generate_ζ(rng, g, f, res.u |> cpu, x_o, - (;interpreters..., PMs = intm_PMs_gen); n_MC=n_sample_pred) +ζs, _ = HVI.generate_ζ(rng, g, f, res.u |> Flux.cpu, xM, + (; interpreters..., PMs = intm_PMs_gen); n_MC = n_sample_pred) # ζ = ζs[:,1] -θsc = stack(ζ -> CA.getdata(CA.ComponentVector( - TransformVariables.transform(transPMs_all, ζ))), eachcol(ζs)); -y_pred = stack(map(ζ -> first(predict_y(ζ, f, transPMs_all)), eachcol(ζs))); +θsc = stack( + ζ -> CA.getdata(CA.ComponentVector( + TransformVariables.transform(transPMs_all, ζ))), + eachcol(ζs)); +y_pred = stack(map(ζ -> first(HVI.predict_y(ζ, f, transPMs_all)), eachcol(ζs))); size(y_pred) -σ_o_post = mapslices(std, y_pred; dims=3); +σ_o_post = mapslices(std, y_pred; dims = 3)[:, :, 1]; #describe(σ_o_post) -vcat(σ_o, mean_σ_o_MC, mean(σ_o_post), sqrt(mean(abs2, σ_o_post))) -mean_y_pred = map(mean, eachslice(y_pred; dims=(1, 2))) +hcat(σ_o, fill(mean_σ_o_MC, length(σ_o)), + mean(σ_o_post, dims = 2), sqrt.(mean(abs2, σ_o_post, dims = 2))) +mean_y_pred = map(mean, eachslice(y_pred; dims = (1, 2))) #describe(mean_y_pred - y_o) histogram(vec(mean_y_pred - y_true)) # predictions centered around y_o (or y_true) # look at θP, θM1 of first site -intm = ComponentArrayInterpreter(int_θdoubleMM(1:length(int_θdoubleMM)), (n_sample_pred,)) -ζs1c = intm(ζs[1:length(int_θdoubleMM), :]) -vcat(θP_true, θM_true) +θPM = vcat(θP_true, θMs_true[:, 1]) +intm = ComponentArrayInterpreter(θPM, (n_sample_pred,)) +ζs1c = intm(ζs[1:length(θPM), :]) +θPM histogram(exp.(ζs1c[:r0, :])) histogram(exp.(ζs1c[:K2, :])) histogram(exp.(ζs1c[:r1, :])) @@ -301,9 +286,3 @@ scatterplot(ζs1c[:r0, :], ζs1c[:K2, :]) # r0 and K also correlated (from θP) scatterplot(ζs1c[:r0, :], ζs1c[:K1, :]) # no correlation (modeled independent) # TODO compare distributions to MC sample - - - - - - diff --git a/ext/HybridVariationalInferenceFluxExt.jl b/ext/HybridVariationalInferenceFluxExt.jl index 901a1a9..61b7095 100644 --- a/ext/HybridVariationalInferenceFluxExt.jl +++ b/ext/HybridVariationalInferenceFluxExt.jl @@ -1,20 +1,46 @@ -module HybridVariationalInferenceFluxExt +module HybridVariationalInferenceFluxExt using HybridVariationalInference, Flux using HybridVariationalInference: HybridVariationalInference as HVI -struct FluxApplicator{RT} <: AbstractModelApplicator +struct FluxApplicator{RT} <: AbstractModelApplicator rebuild::RT end -function HVI.construct_FluxApplicator(m::Chain) +function HVI.construct_FluxApplicator(m::Chain) _, rebuild = destructure(m) FluxApplicator(rebuild) end -function HVI.apply_model(app::FluxApplicator, x, ϕ) +function HVI.apply_model(app::FluxApplicator, x, ϕ) m = app.rebuild(ϕ) m(x) end +struct FluxGPUDataHandler <: AbstractGPUDataHandler end +HVI.handle_GPU_data(::FluxGPUDataHandler, x::AbstractArray) = cpu(x) + +function __init__() + #@info "HybridVariationalInference: setting FluxGPUDataHandler" + HVI.set_default_GPUHandler(FluxGPUDataHandler()) +end + +function HVI.gen_hybridcase_MLapplicator(case::HVI.DoubleMM.DoubleMMCase, ::Val{:Flux}; + scenario::NTuple = ()) + (; n_covar, n_θM) = get_hybridcase_sizes(case; scenario) + FloatType = get_hybridcase_FloatType(case; scenario) + n_out = n_θM + is_using_dropout = :use_dropout ∈ scenario + is_using_dropout && error("dropout scenario not supported with Flux yet.") + g_chain = Flux.Chain( + # dense layer with bias that maps to 8 outputs and applies `tanh` activation + Flux.Dense(n_covar => n_covar * 4, tanh), + Flux.Dense(n_covar * 4 => n_covar * 4, tanh), + # dense layer without bias that maps to n outputs and `identity` activation + Flux.Dense(n_covar * 4 => n_out, identity, bias = false) + ) + ϕ, _ = destructure(g_chain) + construct_FluxApplicator(g_chain), ϕ +end + end # module diff --git a/src/DoubleMM/f_doubleMM.jl b/src/DoubleMM/f_doubleMM.jl index 31fb2e1..4c92fdd 100644 --- a/src/DoubleMM/f_doubleMM.jl +++ b/src/DoubleMM/f_doubleMM.jl @@ -69,5 +69,6 @@ function HybridVariationalInference.gen_hybridcase_synthetic(case::DoubleMMCase, y_true, y_global_o, y_o, + σ_o = fill(σ_o, size(y_true,1)), ) end diff --git a/src/GPUDataHandler.jl b/src/GPUDataHandler.jl new file mode 100644 index 0000000..64d67e5 --- /dev/null +++ b/src/GPUDataHandler.jl @@ -0,0 +1,31 @@ +abstract type AbstractGPUDataHandler end + +""" + (app::AbstractGPUDataHandler)(x) = handle_GPU_data(app, x) + +Callable applied to argument `x`, used to configure the exchange of data between +GPU and CPU. +By Default, nothing is done to x. +Package extensions for Flux and Lux implement overloads for AbstractGPUArray +that call `cpu(x)` to transfer data on the GPU to CPU. Those package extension +also use `set_default_GPUHandler()` so that . +""" +function handle_GPU_data(::AbstractGPUDataHandler, x::AbstractArray) + x +end + +(app::AbstractGPUDataHandler)(x) = handle_GPU_data(app, x) + + +struct NullGPUDataHandler <: AbstractGPUDataHandler end + +handle_GPU_data(::NullGPUDataHandler, x::AbstractArray) = x + +default_GPU_DataHandler = NullGPUDataHandler() +get_default_GPUHandler() = default_GPU_DataHandler +function set_default_GPUHandler(handler::AbstractGPUDataHandler) + @info "set_default_GPUHandler: setting default handler to $handler" + global default_GPU_DataHandler = handler +end + + diff --git a/src/HybridVariationalInference.jl b/src/HybridVariationalInference.jl index 7dee309..1c619e4 100644 --- a/src/HybridVariationalInference.jl +++ b/src/HybridVariationalInference.jl @@ -13,7 +13,7 @@ using Zygote # Zygote.@ignore CUDA.randn using BlockDiagonals export inverse_ca -include("util._transformvariablesjl") +include("util_transformvariables.jl") export ComponentArrayInterpreter, flatten1, get_concrete include("ComponentArrayInterpreter.jl") @@ -22,6 +22,9 @@ export AbstractModelApplicator, construct_SimpleChainsApplicator, construct_Flux construct_LuxApplicator include("ModelApplicator.jl") +export AbstractGPUDataHandler, NullGPUDataHandler, get_default_GPUHandler +include("GPUDataHandler.jl") + export AbstractHybridCase, gen_hybridcase_MLapplicator, gen_hybridcase_PBmodel, get_hybridcase_sizes, get_hybridcase_FloatType, gen_hybridcase_synthetic, get_hybridcase_par_templates, gen_cov_pred include("hybrid_case.jl") @@ -35,9 +38,13 @@ include("gencovar.jl") export callback_loss include("util_opt.jl") +export neg_logden_indep_normal, entropy_MvNormal +include("logden_normal.jl") + #export - all internal include("cholesky.jl") +export neg_elbo_transnorm_gf include("elbo.jl") export DoubleMM diff --git a/src/elbo.jl b/src/elbo.jl index db0cd97..eb6d4db 100644 --- a/src/elbo.jl +++ b/src/elbo.jl @@ -1,3 +1,55 @@ +""" +Cost function (ELBO) for hybrid model with batched sites. + +It generates n_MC samples for each site, and uses these to compute the +expected value of the likelihood of observations. + +## Arguments +- rng: random number generator (ignored on CUDA, if ϕ is a AbstactGPUArray) +- g: machine learnig model +- f: mechanistic model +- ϕ: flat vector of parameters + including parameter of f (ϕ_P), of g (ϕ_Ms), and of VI (ϕ_unc), + interpreted by interpreters.μP_ϕg_unc and interpreters.PMs +- y_ob: matrix of observations (n_obs x n_site_batch) +- x: matrix of covariates (n_cov x n_site_batch) +- transPMs: Transformations with componets P, Ms, similar to interpreters +- n_MC: number of MonteCarlo samples from the distribution of parameters to simulate + using the mechanistic model f. +- logσ2y: observation uncertainty (log of the variance) +""" +function neg_elbo_transnorm_gf(rng, g, f, ϕ::AbstractVector, y_ob, x::AbstractMatrix, + transPMs, interpreters::NamedTuple; + n_MC=3, logσ2y, gpu_data_handler = get_default_GPUHandler()) + ζ, logdetΣ = generate_ζ(rng, g, f, ϕ, x, interpreters; n_MC) + ζ_cpu = gpu_data_handler(ζ) # differentiable fetch to CPU in Flux package extension + #ζi = first(eachcol(ζ_cpu)) + nLy = reduce(+, map(eachcol(ζ_cpu)) do ζi + y_pred_i, logjac = predict_y(ζi, f, transPMs) + nLy1 = neg_logden_indep_normal(y_ob, y_pred_i, logσ2y) + nLy1 - logjac + end) / n_MC + ent = entropy_MvNormal(size(ζ, 1), logdetΣ) # defined in logden_normal + nLy - ent +end + +function predict_gf(rng, g, f, ϕ::AbstractVector, x::AbstractMatrix, + transPMs, interpreters::NamedTuple; + n_MC=3, logσ2y, gpu_data_handler = get_default_GPUHandler()) + ζ, logdetΣ = generate_ζ(rng, g, f, ϕ, x, interpreters; n_MC) + ζ_cpu = gpu_data_handler(ζ) # differentiable fetch to CPU in Flux package extension + #ζi = first(eachcol(ζ_cpu)) + nLy = reduce(+, map(eachcol(ζ_cpu)) do ζi + y_pred_i, logjac = predict_y(ζi, f, transPMs) + nLy1 = neg_logden_indep_normal(y_ob, y_pred_i, logσ2y) + nLy1 - logjac + end) / n_MC + ent = entropy_MvNormal(size(ζ, 1), logdetΣ) # defined in logden_normal + nLy - ent +end + + + """ Generate samples of (inv-transformed) model parameters, ζ, and Log-Determinant of their distribution. @@ -93,4 +145,25 @@ function _create_random(rng, ::GPUArraysCore.AbstractGPUVector{T}, dims...) wher # https://discourse.julialang.org/t/help-using-cuda-zygote-and-random-numbers/123458/4?u=bgctw # Zygote.@ignore CUDA.randn(rng, dims...) Zygote.@ignore CUDA.randn(dims...) -end \ No newline at end of file +end + +""" +Compute predictions and log-Determinant of the transformation at given +transformed parameters for each site. + +The number of sites is given by the number of columns in `Ms`, which is determined +by the transformation, `transPMs`. + +Steps: +- transform the parameters to original constrained space +- Applies the mechanistic model for each site +""" +function predict_y(ζi, f, transPMs) + θtup, logjac = transform_and_logjac(transPMs, ζi) # both allocating + θc = CA.ComponentVector(θtup) + # TODO provide xP + xP = fill((), size(θc.Ms,2)) + y_pred_global, y_pred = f(θc.P, θc.Ms, xP) # TODO parallelize on CPU + # TODO take care of y_pred_global + y_pred, logjac +end diff --git a/src/logden_normal.jl b/src/logden_normal.jl new file mode 100644 index 0000000..b4d4dfe --- /dev/null +++ b/src/logden_normal.jl @@ -0,0 +1,62 @@ +""" + neg_logden_indep_normal(obs, μ, logσ2s; σfac=1.0) + +Compute the negative Log-density of `θM` for multiple independent normal distributions, +given estimated means `μ` and estimated log of variance parameters `logσ2s`. + +All the arguments should be vectors of the same length. +If `obs`, `μ` are given as a matrix of several column-vectors, their summed +Likelihood is computed, assuming each column having the same `logσ2s`. + +Keyword argument `σfac` can be increased to put more weight on achieving +a low uncertainty estimate and means closer to the observations to help +an initial fit. The obtained parameters then can be used as starting values +for a the proper fit with `σfac=1.0`. +""" +function neg_logden_indep_normal(obs::AbstractVector, μ::AbstractVector, logσ2::AbstractVector; σfac=1.0) + # log of independent Normal distributions + # estimate independent uncertainty of each θM, rather than full covariance + #nlogL = sum(σfac .* log.(σs) .+ 1 / 2 .* abs2.((obs .- μ) ./ σs)) + # problems with division by zero, better constrain log(σs) see Kendall17 + # s = log.(σs) + # nlogL = sum(σfac .* s .+ 1 / 2 .* abs2.((obs .- μ) ./ exp.(s))) + # optimize argument logσ2 rather than σs for performance + #nlogL = sum(σfac .* (1/2) .* logσ2 .+ (1/2) .* exp.(.- logσ2) .* abs2.(obs .- μ)) + # specifying logσ2 instead of σ is not transforming a random variable -> no Jacobian + nlogL = sum(σfac .* logσ2 .+ abs2.(obs .- μ) .* exp.(.-logσ2)) / 2 + return (nlogL) +end +function neg_logden_indep_normal(obss::AbstractMatrix, preds::AbstractMatrix, logσ2::AbstractVector; kwargs...) + nlogLs = map(eachcol(obss), eachcol(preds)) do obs, μ + neg_logden_indep_normal(obs, μ, logσ2; kwargs...) + end + nlogL = sum(nlogLs) + return nlogL +end + +function neg_logden_indep_normal(obss::AbstractMatrix, preds::AbstractMatrix, logσ2s::AbstractMatrix; kwargs...) + nlogLs = map(eachcol(obss), eachcol(preds), eachcol(logσ2s)) do obs, μ, logσ2 + neg_logden_indep_normal(obs, μ, logσ2; kwargs...) + end + nlogL = sum(nlogLs) + return nlogL +end + +entropy_MvNormal(K, logdetΣ) = (K*(1+log(2π)) + logdetΣ)/2 +entropy_MvNormal(Σ) = entropy_MvNormal(size(Σ,1), logdet(Σ)) + +# struct MvNormalLogDensityProblem{T} +# y::AbstractVector{T} +# σfac::T +# end + +# function MvNormalLogDensityProblem(y::AbstractVector{T}; σfac=1) where {T} +# MvNormalLogDensityProblem(y, T(σfac)) +# end + +# # define a callable that unpacks parameters, and evaluates the log likelihood +# function (problem::MvNormalLogDensityProblem)(θ) +# μ, logσ2 = θ +# -sum(problem.σfac .* logσ2 .+ abs2.(problem.y .- μ) .* exp.(.-logσ2)) / 2 +# end + diff --git a/src/util._transformvariablesjl b/src/util_transformvariables.jl similarity index 100% rename from src/util._transformvariablesjl rename to src/util_transformvariables.jl diff --git a/test/Project.toml b/test/Project.toml index 97ffeaf..c4d7183 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,6 +2,7 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" @@ -17,3 +18,4 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TransformVariables = "84d833dd-6860-57f9-a1a7-6da5db126cff" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" +cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" diff --git a/test/runtests.jl b/test/runtests.jl index 18434b3..50635e6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,12 +9,16 @@ const GROUP = get(ENV, "GROUP", "All") # defined in in CI.yml @time @safetestset "test_gencovar" include("test_gencovar.jl") #@safetestset "test" include("test/test_SimpleChains.jl") @time @safetestset "test_SimpleChains" include("test_SimpleChains.jl") + #@safetestset "test" include("test/test_logden_normal.jl") + @time @safetestset "test_logden_normal" include("test_logden_normal.jl") #@safetestset "test" include("test/test_doubleMM.jl") @time @safetestset "test_doubleMM" include("test_doubleMM.jl") #@safetestset "test" include("test/test_cholesky_structure.jl") @time @safetestset "test_cholesky_structure" include("test_cholesky_structure.jl") #@safetestset "test" include("test/test_sample_zeta.jl") @time @safetestset "test_sample_zeta" include("test_sample_zeta.jl") + #@safetestset "test" include("test/test_elbo.jl") + @time @safetestset "test_elbo" include("test_elbo.jl") # #@safetestset "test" include("test/test_Flux.jl") @time @safetestset "test_Flux" include("test_Flux.jl") diff --git a/test/test_Flux.jl b/test/test_Flux.jl index 54c3190..a3ba11b 100644 --- a/test/test_Flux.jl +++ b/test/test_Flux.jl @@ -1,16 +1,37 @@ -using HybridVariationalInference using Test -using Flux using StatsFuns: logistic using CUDA, GPUArraysCore +using HybridVariationalInference +# @testset "get_default_GPUHandler before loading Flux" begin +# # during overall package testing Flux could be loaded beforehand +# h = get_default_GPUHandler() +# @test h isa NullGPUDataHandler +# x = CuArray(1:5) +# xh = h(x) +# @test xh === x +# end; + +using Flux +@testset "get_default_GPUHandler after loading Flux" begin + # difficult to access type in ext + # HybridVariationalInferenceFluxExt.FluxGPUDataHandler + #typeof(HybridVariationalInference.default_GPU_DataHandler) + h = get_default_GPUHandler() + @test !(h isa NullGPUDataHandler) + x = CuArray(1:5) + xh = h(x) + @test xh isa Vector +end; + + @testset "FluxModelApplicator" begin n_covar = 5 n_out = 2 g_chain = Chain( Dense(n_covar => n_covar * 4, tanh), Dense(n_covar * 4 => n_covar * 4, tanh), - Dense(n_covar * 4 => n_out, logistic, bias=false), + Dense(n_covar * 4 => n_out, identity, bias=false), ); g = construct_FluxApplicator(g_chain) n_site = 3 @@ -26,3 +47,4 @@ using CUDA, GPUArraysCore #@test ϕ isa GPUArraysCore.AbstractGPUArray @test size(y) == (n_out, n_site) end; + diff --git a/test/test_elbo.jl b/test/test_elbo.jl index 62064f7..735a960 100644 --- a/test/test_elbo.jl +++ b/test/test_elbo.jl @@ -1,16 +1,16 @@ #using LinearAlgebra, BlockDiagonals using Test -using Zygote using HybridVariationalInference using HybridVariationalInference: HybridVariationalInference as CP using StableRNGs -using CUDA -using GPUArraysCore: GPUArraysCore using Random using SimpleChains using ComponentArrays: ComponentArrays as CA using TransformVariables +using Zygote +using CUDA +using GPUArraysCore: GPUArraysCore #CUDA.device!(4) rng = StableRNG(111) @@ -25,61 +25,62 @@ f = gen_hybridcase_PBmodel(case; scenario) (; n_covar, n_site, n_batch, n_θM, n_θP) = get_hybridcase_sizes(case; scenario) -(; xM, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o +(; xM, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o, σ_o ) = gen_hybridcase_synthetic(case, rng; scenario); # correlation matrices -ρsP = zeros(sum(1:(n_θP-1))) -ρsM = zeros(sum(1:(n_θM-1))) +ρsP = zeros(sum(1:(n_θP - 1))) +ρsM = zeros(sum(1:(n_θM - 1))) () -> begin coef_logσ2_logMs = [-5.769 -3.501; -0.01791 0.007951] - logσ2_logP = CA.ComponentVector(r0=-8.997, K2=-5.893) + logσ2_logP = CA.ComponentVector(r0 = -8.997, K2 = -5.893) #mean_σ_o_MC = 0.006042 ϕunc = CA.ComponentVector(; - logσ2_logP=logσ2_logP, - coef_logσ2_logMs=coef_logσ2_logMs, + logσ2_logP = logσ2_logP, + coef_logσ2_logMs = coef_logσ2_logMs, ρsP, ρsM) end - # for a conservative uncertainty assume σ2=1e-10 and no relationship with magnitude +logσ2y = 2 .* log.(σ_o) ϕunc0 = CA.ComponentVector(; - logσ2_logP=fill(-10.0, n_θP), - coef_logσ2_logMs=reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), + logσ2_logP = fill(-10.0, n_θP), + coef_logσ2_logMs = reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), ρsP, ρsM) #int_unc = ComponentArrayInterpreter(ϕunc0) transPMs_batch = as( - (P=as(Array, asℝ₊, n_θP), - Ms=as(Array, asℝ₊, n_θM, n_batch))) + (P = as(Array, asℝ₊, n_θP), + Ms = as(Array, asℝ₊, n_θM, n_batch))) transPMs_all = as( - (P=as(Array, asℝ₊, n_θP), - Ms=as(Array, asℝ₊, n_θM, n_site))) - + (P = as(Array, asℝ₊, n_θP), + Ms = as(Array, asℝ₊, n_θM, n_site))) + ϕ_true = θ = CA.ComponentVector(; - μP=θP_true, - ϕg=ϕg0, #ϕg_opt, # here start from randomized - unc=ϕunc); + μP = θP_true, + ϕg = ϕg0, #ϕg_opt, # here start from randomized + unc = ϕunc0); trans_gu = as( - (μP=as(Array, asℝ₊, n_θP), - ϕg=as(Array, length(ϕg0)), - unc=as(Array, length(ϕunc)))) + (μP = as(Array, asℝ₊, n_θP), + ϕg = as(Array, length(ϕg0)), + unc = as(Array, length(ϕunc0)))) trans_g = as( - (μP=as(Array, asℝ₊, n_θP), - ϕg=as(Array, length(ϕg0)))) - -int_PMs_batch = ComponentArrayInterpreter(CA.ComponentVector(; θP_true, - θMs=CA.ComponentMatrix( - zeros(n_θM, n_batch), first(CA.getaxes(θMs_true)), CA.Axis(i=1:n_batch)))) - -interpreters = map(get_concrete,(; - μP_ϕg_unc=ComponentArrayInterpreter(ϕ_true), - PMs=int_PMs_batch, - unc=ComponentArrayInterpreter(ϕunc0) + (μP = as(Array, asℝ₊, n_θP), + ϕg = as(Array, length(ϕg0)))) + +int_PMs_batch = ComponentArrayInterpreter(CA.ComponentVector(; θP = θP_true, + θMs = CA.ComponentMatrix( + zeros(n_θM, n_batch), first(CA.getaxes(θMs_true)), CA.Axis(i = 1:n_batch)))) + +interpreters = map(get_concrete, + (; + μP_ϕg_unc = ComponentArrayInterpreter(ϕ_true), + PMs = int_PMs_batch, + unc = ComponentArrayInterpreter(ϕunc0) )) ϕg_true_vec = CA.ComponentVector( @@ -88,22 +89,64 @@ interpreters = map(get_concrete,(; ϕ_ini = ζ = vcat(ϕcg_true[[:μP, :ϕg]] .* 1.2, ϕcg_true[[:unc]]); ϕ_ini0 = ζ = vcat(ϕcg_true[:μP] .* 0.0, ϕg0, ϕunc0); -neg_elbo_transnorm_gf(rng, g, f, ϕcg_true, y_o[:, 1:n_batch], xM[:, 1:n_batch], - transPMs_batch, map(get_concrete, interpreters); - n_MC=8, logσ2y) -Zygote.gradient(ϕ -> neg_elbo_transnorm_gf( - rng, g, f, ϕ, y_o[:, 1:n_batch], x_o[:, 1:n_batch], - transPMs_batch, interpreters; n_MC=8, logσ2y), ϕcg_true) - - @testset "generate_ζ" begin - ϕ = CA.getdata(ϕ_cpu) - n_sample_pred = 200 - intm_PMs_gen = ComponentArrayInterpreter(CA.ComponentVector(; θP_true, - θMs=CA.ComponentMatrix( - zeros(n_θM, n_site), first(CA.getaxes(θMs_true)), CA.Axis(i=1:n_sample_pred)))) - int_μP_ϕg_unc=ComponentArrayInterpreter(ϕ_true) - interpreters = (; PMs = intm_PMs_gen, μP_ϕg_unc = int_μP_ϕg_unc ) - ζs, _ = CP.generate_ζ(rng, g, f, ϕ, xM, interpreters; n_MC=n_sample_pred) - - end; +@testset "generate_ζ" begin + ζ, logdetΣ = CP.generate_ζ( + rng, g, f, ϕcg_true, xM[:, 1:n_batch], map(get_concrete, interpreters); + n_MC = 8) + @test ζ isa Matrix + gr = Zygote.gradient( + ϕ -> sum(CP.generate_ζ( + rng, g, f, ϕ, xM[:, 1:n_batch], map(get_concrete, interpreters); + n_MC = 8)[1]), + CA.getdata(ϕcg_true)) + @test gr[1] isa Vector +end; + +# setup g as FluxNN on gpu +using Flux +FluxMLengine = Val(nameof(Flux)) +g_flux, ϕg0_flux_cpu = gen_hybridcase_MLapplicator(case, FluxMLengine; scenario) + +@testset "generate_ζ gpu" begin + ϕ = CuArray(CA.getdata(ϕcg_true)) + xMg_batch = CuArray(xM[:, 1:n_batch]) + ζ, logdetΣ = CP.generate_ζ( + rng, g_flux, f, ϕ, xMg_batch, map(get_concrete, interpreters); + n_MC = 8) + @test ζ isa CuMatrix + gr = Zygote.gradient( + ϕ -> sum(CP.generate_ζ( + rng, g_flux, f, ϕ, xMg_batch, map(get_concrete, interpreters); + n_MC = 8)[1]), + ϕ) + @test gr[1] isa CuVector +end; + +@testset "neg_elbo_transnorm_gf cpu" begin + cost = neg_elbo_transnorm_gf(rng, g, f, ϕcg_true, y_o[:, 1:n_batch], xM[:, 1:n_batch], + transPMs_batch, map(get_concrete, interpreters); + n_MC = 8, logσ2y) + @test cost isa Float64 + gr = Zygote.gradient( + ϕ -> neg_elbo_transnorm_gf( + rng, g, f, ϕ, y_o[:, 1:n_batch], xM[:, 1:n_batch], + transPMs_batch, interpreters; n_MC = 8, logσ2y), + CA.getdata(ϕcg_true)) + @test gr[1] isa Vector +end; + +@testset "neg_elbo_transnorm_gf gpu" begin + ϕ = CuArray(CA.getdata(ϕcg_true)) + xMg_batch = CuArray(xM[:, 1:n_batch]) + cost = neg_elbo_transnorm_gf(rng, g_flux, f, ϕ, y_o[:, 1:n_batch], xMg_batch, + transPMs_batch, map(get_concrete, interpreters); + n_MC = 8, logσ2y) + @test cost isa Float64 + gr = Zygote.gradient( + ϕ -> neg_elbo_transnorm_gf( + rng, g_flux, f, ϕ, y_o[:, 1:n_batch], xMg_batch, + transPMs_batch, interpreters; n_MC = 8, logσ2y), + ϕ) + @test gr[1] isa CuVector +end; diff --git a/test/test_logden_normal.jl b/test/test_logden_normal.jl new file mode 100644 index 0000000..91147c4 --- /dev/null +++ b/test/test_logden_normal.jl @@ -0,0 +1,42 @@ +using Test +using Distributions +using Test +using HybridVariationalInference +using HybridVariationalInference: HybridVariationalInference as CP +using LinearAlgebra + + +@testset "neg_logden_indep_normal vector" begin + loglik_norm(y, μ, σ) = -1 / 2 .* (Distributions.log2π .+ 2 .* log.(σ) .+ abs2.(y .- μ) ./ abs2.(σ)) + loglik_norm_l(y, μ, logσ) = -1 / 2 .* (Distributions.log2π .+ 2 .* logσ .+ abs2.(y .- μ) ./ abs2.(exp.(logσ))) + logden_norm_l(y, μ, logσ) = -1 / 2 .* (2 .* logσ .+ abs2.(y .- μ) ./ abs2.(exp.(logσ))) + neg_logden_norm_l2(y, μ, logσ2) = (logσ2 .+ abs2.(y .- μ) .* exp.(-logσ2)) ./ 2 + + # first test that neg_logden_norm_l2 retuns values of logpdf(Normal) up to an additive + μ = [1.0, 1.0] + σ = [1.1, 2.0] + logσ2 = log.(abs2.(σ)) + y = [1.2, 1.1] + tmp_true = logpdf.(Normal.(μ, σ), y) + dlogpdf = tmp_true .- tmp_true[1] + #loglik_norm(y, μ, σ) + #loglik_norm_l(y, μ, log.(σ)) + #tmp = logden_norm_l(y, μ, log.(σ)) + #tmp .- tmp[1] + tmp = neg_logden_norm_l2(y, μ, logσ2) + @test isapprox(tmp .- tmp[1], -dlogpdf) + + # next test that the sum of neg_logden_norm_l2 corresponds to + @test neg_logden_indep_normal(y, μ, logσ2) ≈ sum(tmp) +end; + +@testset "entropy_MvNormal" begin + S = Diagonal([4,5]) .+ rand(2,2) + S2 = Symmetric(S*S) + @test entropy_MvNormal(S2) == entropy(MvNormal(S2)) +end; + + + + + From 4d4da56c5c6f6ce64aca122828f802e2f8cfadfe Mon Sep 17 00:00:00 2001 From: Thomas Wutzler Date: Thu, 9 Jan 2025 19:21:24 +0100 Subject: [PATCH 3/6] encapsulate generating transformations and interpreters implement predict_gf function --- dev/doubleMM.jl | 179 +++++++++++++++--------------- src/HybridVariationalInference.jl | 5 +- src/elbo.jl | 45 ++++---- src/init_hybrid_params.jl | 69 ++++++++++++ test/test_cholesky_structure.jl | 4 +- test/test_elbo.jl | 148 ++++++++++++++---------- 6 files changed, 277 insertions(+), 173 deletions(-) create mode 100644 src/init_hybrid_params.jl diff --git a/dev/doubleMM.jl b/dev/doubleMM.jl index 7f2a013..a81d437 100644 --- a/dev/doubleMM.jl +++ b/dev/doubleMM.jl @@ -53,8 +53,8 @@ scatterplot(vec(θMs_true), vec(loss_g(ϕg_opt1, xM, g)[2])) f = gen_hybridcase_PBmodel(case; scenario) +#----------- fit g and θP to y_o () -> begin - #----------- fit g and θP to y_o # end2end inversion int_ϕθP = ComponentArrayInterpreter(CA.ComponentVector( @@ -82,67 +82,78 @@ f = gen_hybridcase_PBmodel(case; scenario) end #---------- HVI -# TODO think about good general initializations -coef_logσ2_logMs = [-5.769 -3.501; -0.01791 0.007951] -logσ2_logP = CA.ComponentVector(r0 = -8.997, K2 = -5.893) -mean_σ_o_MC = 0.006042 - -# correlation matrices -ρsP = zeros(sum(1:(n_θP - 1))) -ρsM = zeros(sum(1:(n_θM - 1))) - -ϕunc = CA.ComponentVector(; - logσ2_logP = logσ2_logP, - coef_logσ2_logMs = coef_logσ2_logMs, - ρsP, - ρsM) -int_unc = ComponentArrayInterpreter(ϕunc) - -# for a conservative uncertainty assume σ2=1e-10 and no relationship with magnitude -ϕunc0 = CA.ComponentVector(; - logσ2_logP = fill(-10.0, n_θP), - coef_logσ2_logMs = reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), - ρsP, - ρsM) - logσ2y = 2 .* log.(σ_o) n_MC = 3 +(; ϕ, transPMs_batch, interpreters, get_transPMs, get_ca_int_PMs) = init_hybrid_params( + θP_true, θMs_true[:, 1], ϕg_opt1, n_batch; transP = asℝ₊, transM = asℝ₊); +ϕ_true = ϕ -transPMs_batch = as( - (P = as(Array, asℝ₊, n_θP), - Ms = as(Array, asℝ₊, n_θM, n_batch))) -transPMs_all = as( - (P = as(Array, asℝ₊, n_θP), - Ms = as(Array, asℝ₊, n_θM, n_site))) - -n_ϕg = length(ϕg_opt1) -ϕt_true = θ = CA.ComponentVector(; - μP = θP_true, - ϕg = ϕg_opt1, - unc = ϕunc); -trans_gu = as( - (μP = as(Array, asℝ₊, n_θP), - ϕg = as(Array, n_ϕg), - unc = as(Array, length(ϕunc)))) -trans_g = as( - (μP = as(Array, asℝ₊, n_θP), - ϕg = as(Array, n_ϕg))) - -#const -int_PMs_batch = ComponentArrayInterpreter(CA.ComponentVector(; θP = θP_true, - θMs = CA.ComponentMatrix( - zeros(n_θM, n_batch), first(CA.getaxes(θMs_true)), CA.Axis(i = 1:n_batch)))) - -interpreters = interpreters_g = map(get_concrete, - (; - μP_ϕg_unc = ComponentArrayInterpreter(ϕt_true), - PMs = int_PMs_batch, - unc = ComponentArrayInterpreter(ϕunc) - )) - -ϕ_true = inverse_ca(trans_gu, ϕt_true) +() -> begin + coef_logσ2_logMs = [-5.769 -3.501; -0.01791 0.007951] + logσ2_logP = CA.ComponentVector(r0 = -8.997, K2 = -5.893) + mean_σ_o_MC = 0.006042 + + # correlation matrices + ρsP = zeros(sum(1:(n_θP - 1))) + ρsM = zeros(sum(1:(n_θM - 1))) + + ϕunc = CA.ComponentVector(; + logσ2_logP = logσ2_logP, + coef_logσ2_logMs = coef_logσ2_logMs, + ρsP, + ρsM) + int_unc = ComponentArrayInterpreter(ϕunc) + + # for a conservative uncertainty assume σ2=1e-10 and no relationship with magnitude + ϕunc0 = CA.ComponentVector(; + logσ2_logP = fill(-10.0, n_θP), + coef_logσ2_logMs = reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), + ρsP, + ρsM) + + transPMs_batch = as( + (P = as(Array, asℝ₊, n_θP), + Ms = as(Array, asℝ₊, n_θM, n_batch))) + transPMs_allsites = as( + (P = as(Array, asℝ₊, n_θP), + Ms = as(Array, asℝ₊, n_θM, n_site))) + + n_ϕg = length(ϕg_opt1) + ϕt_true = θ = CA.ComponentVector(; + μP = θP_true, + ϕg = ϕg_opt1, + unc = ϕunc) + trans_gu = as( + (μP = as(Array, asℝ₊, n_θP), + ϕg = as(Array, n_ϕg), + unc = as(Array, length(ϕunc)))) + trans_g = as( + (μP = as(Array, asℝ₊, n_θP), + ϕg = as(Array, n_ϕg))) + + #const + int_PMs_batch = ComponentArrayInterpreter(CA.ComponentVector(; θP = θP_true, + θMs = CA.ComponentMatrix( + zeros(n_θM, n_batch), first(CA.getaxes(θMs_true)), CA.Axis(i = 1:n_batch)))) + + interpreters = interpreters_g = map(get_concrete, + (; + μP_ϕg_unc = ComponentArrayInterpreter(ϕt_true), + PMs = int_PMs_batch, + unc = ComponentArrayInterpreter(ϕunc) + )) + + ϕ_true = inverse_ca(trans_gu, ϕt_true) +end + +ϕ_ini0 = ζ = vcat(ϕ_true[:μP] .* 0.0, ϕg0, ϕ_true[[:unc]]); # scratch +# +# true values ϕ_ini = ζ = vcat(ϕ_true[[:μP, :ϕg]] .* 1.2, ϕ_true[[:unc]]); # slight disturbance -ϕ_ini0 = ζ = vcat(ϕ_true[:μP] .* 0.0, ϕg0, ϕunc0); # scratch +# hardcoded from HMC inversion +ϕ_ini.unc.coef_logσ2_logMs = [-5.769 -3.501; -0.01791 0.007951] +ϕ_ini.unc.logσ2_logP = CA.ComponentVector(r0 = -8.997, K2 = -5.893) +mean_σ_o_MC = 0.006042 # test cost function and gradient () -> begin @@ -161,10 +172,10 @@ end train_loader = MLUtils.DataLoader((xM, y_o), batchsize = n_batch) optf = Optimization.OptimizationFunction( - (ζg, data) -> begin + (ϕ, data) -> begin xM, y_o = data neg_elbo_transnorm_gf( - rng, g, f, ζg, y_o, xM, transPMs_batch, + rng, g, f, ϕ, y_o, xM, transPMs_batch, map(get_concrete, interpreters_g); n_MC = 5, logσ2y) end, Optimization.AutoZygote()) @@ -181,7 +192,7 @@ g_flux, ϕg0_flux_cpu = gen_hybridcase_MLapplicator(case, FluxMLengine; scenario # otpimize using LUX () -> begin - using Lux + #using Lux g_lux = Lux.Chain( # dense layer with bias that maps to 8 outputs and applies `tanh` activation Lux.Dense(n_covar => n_covar * 4, tanh), @@ -208,18 +219,19 @@ function fcost(ϕ) n_MC = 8, logσ2y = logσ2y) end fcost(ϕ) -Zygote.gradient(fcost, ϕ) |> cpu; +#Zygote.gradient(fcost, ϕ) |> cpu; gr = Zygote.gradient(fcost, CA.getdata(ϕ)); -gr_c = CA.ComponentArray(gr[1], CA.getaxes(ϕ)...) +gr_c = CA.ComponentArray(gr[1] |> Flux.cpu, CA.getaxes(ϕ)...) train_loader = MLUtils.DataLoader((xM_gpu, y_o), batchsize = n_batch) optf = Optimization.OptimizationFunction( - (ζg, data) -> begin + (ϕ, data) -> begin xM, y_o = data - neg_elbo_transnorm_gf( - rng, g_flux, f, ζg, y_o, xM, transPMs_batch, - map(get_concrete, interpreters_g); n_MC = 5, logσ2y) + fcost(ϕ) + # neg_elbo_transnorm_gf( + # rng, g_flux, f, ϕ, y_o, xM, transPMs_batch, + # map(get_concrete, interpreters); n_MC = 5, logσ2y) end, Optimization.AutoZygote()) optprob = Optimization.OptimizationProblem( @@ -230,40 +242,31 @@ res = res_gpu = Optimization.solve( # start from zero () -> begin optprob = Optimization.OptimizationProblem( - optf, CA.getdata(ϕ_ini0) |> Flux.gpu, train_loader); + optf, CA.getdata(ϕ_ini0) |> Flux.gpu, train_loader) res = res_gpu = Optimization.solve( - optprob, Optimisers.Adam(0.02), callback = callback_loss(50), maxiters = 4_000); + optprob, Optimisers.Adam(0.02), callback = callback_loss(50), maxiters = 4_000) end -ζ_VIc = interpreters_g.μP_ϕg_unc(res.u |> Flux.cpu) -ζMs_VI = g(xM, ζ_VIc.ϕg) -ϕunc_VI = int_unc(ζ_VIc.unc) +ζ_VIc = interpreters.μP_ϕg_unc(res.u |> Flux.cpu) +ζMs_VI = g_flux(xM_gpu, ζ_VIc.ϕg |> Flux.gpu) |> Flux.cpu +ϕunc_VI = interpreters.unc(ζ_VIc.unc) hcat(θP_true, exp.(ζ_VIc.μP)) plt = scatterplot(vec(θMs_true), vec(exp.(ζMs_VI))) #lineplot!(plt, 0.0, 1.1, identity) # -hcat(ϕunc, ϕunc_VI) # need to compare to MC sample +hcat(ϕ_ini.unc, ϕunc_VI) # need to compare to MC sample # hard to estimate for original very small theta's but otherwise good # test predicting correct obs-uncertainty of predictive posterior # TODO reuse g_flux rather than g n_sample_pred = 200 -intm_PMs_gen = ComponentArrayInterpreter(CA.ComponentVector(; θP = θP_true, - θMs = CA.ComponentMatrix( - zeros(n_θM, n_site), first(CA.getaxes(θMs_true)), CA.Axis(i = 1:n_sample_pred)))) - -ζs, _ = HVI.generate_ζ(rng, g, f, res.u |> Flux.cpu, xM, - (; interpreters..., PMs = intm_PMs_gen); n_MC = n_sample_pred) -# ζ = ζs[:,1] -θsc = stack( - ζ -> CA.getdata(CA.ComponentVector( - TransformVariables.transform(transPMs_all, ζ))), - eachcol(ζs)); -y_pred = stack(map(ζ -> first(HVI.predict_y(ζ, f, transPMs_all)), eachcol(ζs))); - -size(y_pred) -σ_o_post = mapslices(std, y_pred; dims = 3)[:, :, 1]; +y_pred = predict_gf(rng, g_flux, f, res.u, xM_gpu, interpreters; + get_transPMs, get_ca_int_PMs, n_sample_pred); +size(y_pred) # n_obs x n_site, n_sample_pred + +σ_o_post = dropdims(std(y_pred; dims = 3), dims=3) + #describe(σ_o_post) hcat(σ_o, fill(mean_σ_o_MC, length(σ_o)), mean(σ_o_post, dims = 2), sqrt.(mean(abs2, σ_o_post, dims = 2))) diff --git a/src/HybridVariationalInference.jl b/src/HybridVariationalInference.jl index 1c619e4..5f23d33 100644 --- a/src/HybridVariationalInference.jl +++ b/src/HybridVariationalInference.jl @@ -44,9 +44,12 @@ include("logden_normal.jl") #export - all internal include("cholesky.jl") -export neg_elbo_transnorm_gf +export neg_elbo_transnorm_gf, predict_gf include("elbo.jl") +export init_hybrid_params +include("init_hybrid_params.jl") + export DoubleMM include("DoubleMM/DoubleMM.jl") diff --git a/src/elbo.jl b/src/elbo.jl index eb6d4db..9523994 100644 --- a/src/elbo.jl +++ b/src/elbo.jl @@ -21,34 +21,37 @@ expected value of the likelihood of observations. function neg_elbo_transnorm_gf(rng, g, f, ϕ::AbstractVector, y_ob, x::AbstractMatrix, transPMs, interpreters::NamedTuple; n_MC=3, logσ2y, gpu_data_handler = get_default_GPUHandler()) - ζ, logdetΣ = generate_ζ(rng, g, f, ϕ, x, interpreters; n_MC) - ζ_cpu = gpu_data_handler(ζ) # differentiable fetch to CPU in Flux package extension - #ζi = first(eachcol(ζ_cpu)) - nLy = reduce(+, map(eachcol(ζ_cpu)) do ζi + ζs, logdetΣ = generate_ζ(rng, g, f, ϕ, x, interpreters; n_MC) + ζs_cpu = gpu_data_handler(ζs) # differentiable fetch to CPU in Flux package extension + #ζi = first(eachcol(ζs_cpu)) + nLy = reduce(+, map(eachcol(ζs_cpu)) do ζi y_pred_i, logjac = predict_y(ζi, f, transPMs) nLy1 = neg_logden_indep_normal(y_ob, y_pred_i, logσ2y) nLy1 - logjac end) / n_MC - ent = entropy_MvNormal(size(ζ, 1), logdetΣ) # defined in logden_normal - nLy - ent -end - -function predict_gf(rng, g, f, ϕ::AbstractVector, x::AbstractMatrix, - transPMs, interpreters::NamedTuple; - n_MC=3, logσ2y, gpu_data_handler = get_default_GPUHandler()) - ζ, logdetΣ = generate_ζ(rng, g, f, ϕ, x, interpreters; n_MC) - ζ_cpu = gpu_data_handler(ζ) # differentiable fetch to CPU in Flux package extension - #ζi = first(eachcol(ζ_cpu)) - nLy = reduce(+, map(eachcol(ζ_cpu)) do ζi - y_pred_i, logjac = predict_y(ζi, f, transPMs) - nLy1 = neg_logden_indep_normal(y_ob, y_pred_i, logσ2y) - nLy1 - logjac - end) / n_MC - ent = entropy_MvNormal(size(ζ, 1), logdetΣ) # defined in logden_normal + ent = entropy_MvNormal(size(ζs, 1), logdetΣ) # defined in logden_normal nLy - ent end +""" + predict_gf(rng, g, f, ϕ::AbstractVector, xM::AbstractMatrix, interpreters; + get_transPMs, get_ca_int_PMs, n_sample_pred=200, + gpu_data_handler=get_default_GPUHandler()) +Prediction function for hybrid model. Retuns an Array `(n_obs, n_site, n_sample_pred)`. +""" +function predict_gf(rng, g, f, ϕ::AbstractVector, xM::AbstractMatrix, interpreters; + get_transPMs, get_ca_int_PMs, n_sample_pred=200, + gpu_data_handler=get_default_GPUHandler()) + n_site = size(xM, 2) + intm_PMs_gen = get_ca_int_PMs(n_site) + tans_PMs_gen = get_transPMs(n_site) + ζs, _ = generate_ζ(rng, g, f, CA.getdata(ϕ), CA.getdata(xM), + (; interpreters..., PMs = intm_PMs_gen); n_MC = n_sample_pred) + ζs_cpu = gpu_data_handler(ζs) # + y_pred = stack(map(ζ -> first(predict_y(ζ, f, tans_PMs_gen)), eachcol(ζs_cpu))); + y_pred +end """ Generate samples of (inv-transformed) model parameters, ζ, and Log-Determinant @@ -144,7 +147,7 @@ function _create_random(rng, ::GPUArraysCore.AbstractGPUVector{T}, dims...) wher # ignores rng # https://discourse.julialang.org/t/help-using-cuda-zygote-and-random-numbers/123458/4?u=bgctw # Zygote.@ignore CUDA.randn(rng, dims...) - Zygote.@ignore CUDA.randn(dims...) + ChainRulesCore.@ignore_derivatives CUDA.randn(dims...) end """ diff --git a/src/init_hybrid_params.jl b/src/init_hybrid_params.jl new file mode 100644 index 0000000..955d8ba --- /dev/null +++ b/src/init_hybrid_params.jl @@ -0,0 +1,69 @@ +""" + init_hybrid_params(θP, θM, ϕg, n_batch; transP=asℝ, transM=asℝ) + +Setup ComponentVector of parameters to optimize, and associated tools. +Returns a NamedTuple of +- ϕ: A ComponentVector of parameters to optimize +- transPMs_batch, interpreters: Transformations and interpreters as + required by `neg_elbo_transnorm_gf`. +- get_transPMs: a function returning transformations `(n_site) -> (;P,Ms)` +- get_ca_int_PMs: a function returning ComponentArrayInterpreter for PMs vector + with PMs shaped as a matrix of `n_site` columns of `θM` + +# Arguments +- `θP`, `θM`: Template ComponentVectors of global parameters and ML-predicted parameters +- `ϕg`: vector of parameters to optimize, as returned by `gen_hybridcase_MLapplicator` +- `n_batch`: the number of sites to predicted in each mini-batch +- `transP`, `transM`: the Transformations for the global and site-dependent parameters +""" +function init_hybrid_params(θP, θM, ϕg, n_batch; transP=asℝ, transM=asℝ) + n_θP = length(θP) + n_θM = length(θM) + n_ϕg = length(ϕg) + # zero correlation matrices + ρsP = zeros(sum(1:(n_θP - 1))) + ρsM = zeros(sum(1:(n_θM - 1))) + ϕunc0 = CA.ComponentVector(; + logσ2_logP = fill(-10.0, n_θP), + coef_logσ2_logMs = reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), + ρsP, + ρsM) + ϕt = CA.ComponentVector(; + μP = θP, + ϕg = ϕg, + unc = ϕunc0); + # + get_transPMs = let transP=transP, transM=transM, n_θP=n_θP, n_θM=n_θM + function get_transPMs_inner(n_site) + transPMs = as( + (P = as(Array, transP, n_θP), + Ms = as(Array, transM, n_θM, n_site))) + end + end + transPMs_batch = get_transPMs(n_batch) + trans_gu = as( + (μP = as(Array, asℝ₊, n_θP), + ϕg = as(Array, n_ϕg), + unc = as(Array, length(ϕunc0)))) + ϕ = inverse_ca(trans_gu, ϕt) + # trans_g = as( + # (μP = as(Array, asℝ₊, n_θP), + # ϕg = as(Array, n_ϕg))) + # + get_ca_int_PMs = let + function get_ca_int_PMs_inner(n_site) + ComponentArrayInterpreter(CA.ComponentVector(; θP, + θMs = CA.ComponentMatrix( + zeros(n_θM, n_site), first(CA.getaxes(θM)), CA.Axis(i = 1:n_site)))) + end + + end + interpreters = map(get_concrete, + (; + μP_ϕg_unc = ComponentArrayInterpreter(ϕt), + PMs = get_ca_int_PMs(n_batch), + unc = ComponentArrayInterpreter(ϕunc0) + )) + (;ϕ, transPMs_batch, interpreters, get_transPMs, get_ca_int_PMs) +end + diff --git a/test/test_cholesky_structure.jl b/test/test_cholesky_structure.jl index ad188f4..6566716 100644 --- a/test/test_cholesky_structure.jl +++ b/test/test_cholesky_structure.jl @@ -247,8 +247,8 @@ end #@test Upred ≈ CU SUpred = Upred * Dσ #hcat(SUpred, SU) - @test SUpred≈SU atol=1e-1 + @test SUpred≈SU atol=2e-1 S_pred = Dσ' * Upred' * Upred * Dσ - @test S_pred≈S atol=1e-1 + @test S_pred≈S atol=2e-1 end diff --git a/test/test_elbo.jl b/test/test_elbo.jl index 735a960..2fe1676 100644 --- a/test/test_elbo.jl +++ b/test/test_elbo.jl @@ -28,77 +28,85 @@ f = gen_hybridcase_PBmodel(case; scenario) (; xM, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o, σ_o ) = gen_hybridcase_synthetic(case, rng; scenario); -# correlation matrices -ρsP = zeros(sum(1:(n_θP - 1))) -ρsM = zeros(sum(1:(n_θM - 1))) +logσ2y = 2 .* log.(σ_o) +n_MC = 3 +(; ϕ, transPMs_batch, interpreters, get_transPMs, get_ca_int_PMs) = init_hybrid_params( + θP_true, θMs_true[:, 1], ϕg0, n_batch; transP = asℝ₊, transM = asℝ₊); +ϕ_ini = ϕ () -> begin - coef_logσ2_logMs = [-5.769 -3.501; -0.01791 0.007951] - logσ2_logP = CA.ComponentVector(r0 = -8.997, K2 = -5.893) - #mean_σ_o_MC = 0.006042 - - ϕunc = CA.ComponentVector(; - logσ2_logP = logσ2_logP, - coef_logσ2_logMs = coef_logσ2_logMs, + # correlation matrices + ρsP = zeros(sum(1:(n_θP - 1))) + ρsM = zeros(sum(1:(n_θM - 1))) + + () -> begin + coef_logσ2_logMs = [-5.769 -3.501; -0.01791 0.007951] + logσ2_logP = CA.ComponentVector(r0 = -8.997, K2 = -5.893) + #mean_σ_o_MC = 0.006042 + + ϕunc = CA.ComponentVector(; + logσ2_logP = logσ2_logP, + coef_logσ2_logMs = coef_logσ2_logMs, + ρsP, + ρsM) + end + + # for a conservative uncertainty assume σ2=1e-10 and no relationship with magnitude + logσ2y = 2 .* log.(σ_o) + ϕunc0 = CA.ComponentVector(; + logσ2_logP = fill(-10.0, n_θP), + coef_logσ2_logMs = reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), ρsP, ρsM) + #int_unc = ComponentArrayInterpreter(ϕunc0) + + transPMs_batch = as( + (P = as(Array, asℝ₊, n_θP), + Ms = as(Array, asℝ₊, n_θM, n_batch))) + transPMs_allsites = as( + (P = as(Array, asℝ₊, n_θP), + Ms = as(Array, asℝ₊, n_θM, n_site))) + + ϕ_true = θ = CA.ComponentVector(; + μP = θP_true, + ϕg = ϕg0, #ϕg_opt, # here start from randomized + unc = ϕunc0) + trans_gu = as( + (μP = as(Array, asℝ₊, n_θP), + ϕg = as(Array, length(ϕg0)), + unc = as(Array, length(ϕunc0)))) + trans_g = as( + (μP = as(Array, asℝ₊, n_θP), + ϕg = as(Array, length(ϕg0)))) + + int_PMs_batch = ComponentArrayInterpreter(CA.ComponentVector(; θP = θP_true, + θMs = CA.ComponentMatrix( + zeros(n_θM, n_batch), first(CA.getaxes(θMs_true)), CA.Axis(i = 1:n_batch)))) + + interpreters = map(get_concrete, + (; + μP_ϕg_unc = ComponentArrayInterpreter(ϕ_true), + PMs = int_PMs_batch, + unc = ComponentArrayInterpreter(ϕunc0) + )) + + ϕg_true_vec = CA.ComponentVector( + TransformVariables.inverse(trans_gu, CP.cv2NamedTuple(ϕ_true))) + ϕcg_true = interpreters.μP_ϕg_unc(ϕg_true_vec) + ϕ_ini = ζ = vcat(ϕcg_true[[:μP, :ϕg]] .* 1.2, ϕcg_true[[:unc]]) + ϕ_ini0 = ζ = vcat(ϕcg_true[:μP] .* 0.0, ϕg0, ϕunc0) end -# for a conservative uncertainty assume σ2=1e-10 and no relationship with magnitude -logσ2y = 2 .* log.(σ_o) -ϕunc0 = CA.ComponentVector(; - logσ2_logP = fill(-10.0, n_θP), - coef_logσ2_logMs = reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), - ρsP, - ρsM) -#int_unc = ComponentArrayInterpreter(ϕunc0) - -transPMs_batch = as( - (P = as(Array, asℝ₊, n_θP), - Ms = as(Array, asℝ₊, n_θM, n_batch))) -transPMs_all = as( - (P = as(Array, asℝ₊, n_θP), - Ms = as(Array, asℝ₊, n_θM, n_site))) - -ϕ_true = θ = CA.ComponentVector(; - μP = θP_true, - ϕg = ϕg0, #ϕg_opt, # here start from randomized - unc = ϕunc0); -trans_gu = as( - (μP = as(Array, asℝ₊, n_θP), - ϕg = as(Array, length(ϕg0)), - unc = as(Array, length(ϕunc0)))) -trans_g = as( - (μP = as(Array, asℝ₊, n_θP), - ϕg = as(Array, length(ϕg0)))) - -int_PMs_batch = ComponentArrayInterpreter(CA.ComponentVector(; θP = θP_true, - θMs = CA.ComponentMatrix( - zeros(n_θM, n_batch), first(CA.getaxes(θMs_true)), CA.Axis(i = 1:n_batch)))) - -interpreters = map(get_concrete, - (; - μP_ϕg_unc = ComponentArrayInterpreter(ϕ_true), - PMs = int_PMs_batch, - unc = ComponentArrayInterpreter(ϕunc0) - )) - -ϕg_true_vec = CA.ComponentVector( - TransformVariables.inverse(trans_gu, CP.cv2NamedTuple(ϕ_true))) -ϕcg_true = interpreters.μP_ϕg_unc(ϕg_true_vec) -ϕ_ini = ζ = vcat(ϕcg_true[[:μP, :ϕg]] .* 1.2, ϕcg_true[[:unc]]); -ϕ_ini0 = ζ = vcat(ϕcg_true[:μP] .* 0.0, ϕg0, ϕunc0); - @testset "generate_ζ" begin ζ, logdetΣ = CP.generate_ζ( - rng, g, f, ϕcg_true, xM[:, 1:n_batch], map(get_concrete, interpreters); + rng, g, f, ϕ_ini, xM[:, 1:n_batch], map(get_concrete, interpreters); n_MC = 8) @test ζ isa Matrix gr = Zygote.gradient( ϕ -> sum(CP.generate_ζ( rng, g, f, ϕ, xM[:, 1:n_batch], map(get_concrete, interpreters); n_MC = 8)[1]), - CA.getdata(ϕcg_true)) + CA.getdata(ϕ_ini)) @test gr[1] isa Vector end; @@ -108,7 +116,7 @@ FluxMLengine = Val(nameof(Flux)) g_flux, ϕg0_flux_cpu = gen_hybridcase_MLapplicator(case, FluxMLengine; scenario) @testset "generate_ζ gpu" begin - ϕ = CuArray(CA.getdata(ϕcg_true)) + ϕ = CuArray(CA.getdata(ϕ_ini)) xMg_batch = CuArray(xM[:, 1:n_batch]) ζ, logdetΣ = CP.generate_ζ( rng, g_flux, f, ϕ, xMg_batch, map(get_concrete, interpreters); @@ -123,7 +131,7 @@ g_flux, ϕg0_flux_cpu = gen_hybridcase_MLapplicator(case, FluxMLengine; scenario end; @testset "neg_elbo_transnorm_gf cpu" begin - cost = neg_elbo_transnorm_gf(rng, g, f, ϕcg_true, y_o[:, 1:n_batch], xM[:, 1:n_batch], + cost = neg_elbo_transnorm_gf(rng, g, f, ϕ_ini, y_o[:, 1:n_batch], xM[:, 1:n_batch], transPMs_batch, map(get_concrete, interpreters); n_MC = 8, logσ2y) @test cost isa Float64 @@ -131,12 +139,12 @@ end; ϕ -> neg_elbo_transnorm_gf( rng, g, f, ϕ, y_o[:, 1:n_batch], xM[:, 1:n_batch], transPMs_batch, interpreters; n_MC = 8, logσ2y), - CA.getdata(ϕcg_true)) + CA.getdata(ϕ_ini)) @test gr[1] isa Vector end; @testset "neg_elbo_transnorm_gf gpu" begin - ϕ = CuArray(CA.getdata(ϕcg_true)) + ϕ = CuArray(CA.getdata(ϕ_ini)) xMg_batch = CuArray(xM[:, 1:n_batch]) cost = neg_elbo_transnorm_gf(rng, g_flux, f, ϕ, y_o[:, 1:n_batch], xMg_batch, transPMs_batch, map(get_concrete, interpreters); @@ -150,3 +158,21 @@ end; @test gr[1] isa CuVector end; +@testset "predict_gf cpu" begin + n_sample_pred = 200 + y_pred = predict_gf(rng, g, f, ϕ_ini, xM, map(get_concrete, interpreters); + get_transPMs, get_ca_int_PMs, n_sample_pred) + @test y_pred isa Array + @test size(y_pred) == (size(y_o)..., n_sample_pred) +end + +@testset "predict_gf gpu" begin + n_sample_pred = 200 + ϕ = CuArray(CA.getdata(ϕ_ini)) + xMg = CuArray(xM) + y_pred = predict_gf(rng, g_flux, f, ϕ, xMg, map(get_concrete, interpreters); + get_transPMs, get_ca_int_PMs, n_sample_pred) + @test y_pred isa Array + @test size(y_pred) == (size(y_o)..., n_sample_pred) +end + From 7b892c6b1154ec15010cb1d086f55d4a05dea2e4 Mon Sep 17 00:00:00 2001 From: Thomas Wutzler Date: Fri, 10 Jan 2025 10:42:37 +0100 Subject: [PATCH 4/6] typos --- src/elbo.jl | 8 ++++---- test/test_logden_normal.jl | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/elbo.jl b/src/elbo.jl index 9523994..15379bd 100644 --- a/src/elbo.jl +++ b/src/elbo.jl @@ -5,7 +5,7 @@ It generates n_MC samples for each site, and uses these to compute the expected value of the likelihood of observations. ## Arguments -- rng: random number generator (ignored on CUDA, if ϕ is a AbstactGPUArray) +- rng: random number generator (ignored on CUDA, if ϕ is a AbstractGPUArray) - g: machine learnig model - f: mechanistic model - ϕ: flat vector of parameters @@ -13,7 +13,7 @@ expected value of the likelihood of observations. interpreted by interpreters.μP_ϕg_unc and interpreters.PMs - y_ob: matrix of observations (n_obs x n_site_batch) - x: matrix of covariates (n_cov x n_site_batch) -- transPMs: Transformations with componets P, Ms, similar to interpreters +- transPMs: Transformations with components P, Ms, similar to interpreters - n_MC: number of MonteCarlo samples from the distribution of parameters to simulate using the mechanistic model f. - logσ2y: observation uncertainty (log of the variance) @@ -38,7 +38,7 @@ end get_transPMs, get_ca_int_PMs, n_sample_pred=200, gpu_data_handler=get_default_GPUHandler()) -Prediction function for hybrid model. Retuns an Array `(n_obs, n_site, n_sample_pred)`. +Prediction function for hybrid model. Returns an Array `(n_obs, n_site, n_sample_pred)`. """ function predict_gf(rng, g, f, ϕ::AbstractVector, xM::AbstractMatrix, interpreters; get_transPMs, get_ca_int_PMs, n_sample_pred=200, @@ -81,7 +81,7 @@ function generate_ζ(rng, g, f, ϕ::AbstractVector, x::AbstractMatrix, end """ -Extract relevant parameters from θ and return n_MC generted draws +Extract relevant parameters from θ and return n_MC generated draws together with the logdet of the transformation. Necessary typestable information on number of compponents are provided with diff --git a/test/test_logden_normal.jl b/test/test_logden_normal.jl index 91147c4..d1be1dc 100644 --- a/test/test_logden_normal.jl +++ b/test/test_logden_normal.jl @@ -12,7 +12,7 @@ using LinearAlgebra logden_norm_l(y, μ, logσ) = -1 / 2 .* (2 .* logσ .+ abs2.(y .- μ) ./ abs2.(exp.(logσ))) neg_logden_norm_l2(y, μ, logσ2) = (logσ2 .+ abs2.(y .- μ) .* exp.(-logσ2)) ./ 2 - # first test that neg_logden_norm_l2 retuns values of logpdf(Normal) up to an additive + # first test that neg_logden_norm_l2 returns values of logpdf(Normal) up to an additive μ = [1.0, 1.0] σ = [1.1, 2.0] logσ2 = log.(abs2.(σ)) From 9e5e21c59ea69b2e8de9bc8352712e6f0e3b9198 Mon Sep 17 00:00:00 2001 From: Thomas Wutzler Date: Fri, 10 Jan 2025 10:43:39 +0100 Subject: [PATCH 5/6] typos --- src/elbo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/elbo.jl b/src/elbo.jl index 15379bd..d7ed51d 100644 --- a/src/elbo.jl +++ b/src/elbo.jl @@ -6,7 +6,7 @@ expected value of the likelihood of observations. ## Arguments - rng: random number generator (ignored on CUDA, if ϕ is a AbstractGPUArray) -- g: machine learnig model +- g: machine learning model - f: mechanistic model - ϕ: flat vector of parameters including parameter of f (ϕ_P), of g (ϕ_Ms), and of VI (ϕ_unc), From c10b69e179098a104de1ee03eb3fcd296810bce9 Mon Sep 17 00:00:00 2001 From: Thomas Wutzler Date: Mon, 13 Jan 2025 08:14:19 +0100 Subject: [PATCH 6/6] check for functional CUDA before GPU tests --- test/test_Flux.jl | 10 +++-- test/test_elbo.jl | 77 ++++++++++++++++++----------------- test/test_sample_zeta.jl | 87 ++++++++++++++++++++++------------------ 3 files changed, 94 insertions(+), 80 deletions(-) diff --git a/test/test_Flux.jl b/test/test_Flux.jl index a3ba11b..ad49eb8 100644 --- a/test/test_Flux.jl +++ b/test/test_Flux.jl @@ -19,9 +19,11 @@ using Flux #typeof(HybridVariationalInference.default_GPU_DataHandler) h = get_default_GPUHandler() @test !(h isa NullGPUDataHandler) - x = CuArray(1:5) - xh = h(x) - @test xh isa Vector + if CUDA.functional() + x = CuArray(1:5) + xh = h(x) + @test xh isa Vector + end end; @@ -32,7 +34,7 @@ end; Dense(n_covar => n_covar * 4, tanh), Dense(n_covar * 4 => n_covar * 4, tanh), Dense(n_covar * 4 => n_out, identity, bias=false), - ); + ) g = construct_FluxApplicator(g_chain) n_site = 3 x = rand(Float32, n_covar, n_site) diff --git a/test/test_elbo.jl b/test/test_elbo.jl index 2fe1676..bcc3f7a 100644 --- a/test/test_elbo.jl +++ b/test/test_elbo.jl @@ -115,20 +115,22 @@ using Flux FluxMLengine = Val(nameof(Flux)) g_flux, ϕg0_flux_cpu = gen_hybridcase_MLapplicator(case, FluxMLengine; scenario) -@testset "generate_ζ gpu" begin - ϕ = CuArray(CA.getdata(ϕ_ini)) - xMg_batch = CuArray(xM[:, 1:n_batch]) - ζ, logdetΣ = CP.generate_ζ( - rng, g_flux, f, ϕ, xMg_batch, map(get_concrete, interpreters); - n_MC = 8) - @test ζ isa CuMatrix - gr = Zygote.gradient( - ϕ -> sum(CP.generate_ζ( +if CUDA.functional() + @testset "generate_ζ gpu" begin + ϕ = CuArray(CA.getdata(ϕ_ini)) + xMg_batch = CuArray(xM[:, 1:n_batch]) + ζ, logdetΣ = CP.generate_ζ( rng, g_flux, f, ϕ, xMg_batch, map(get_concrete, interpreters); - n_MC = 8)[1]), - ϕ) - @test gr[1] isa CuVector -end; + n_MC = 8) + @test ζ isa CuMatrix + gr = Zygote.gradient( + ϕ -> sum(CP.generate_ζ( + rng, g_flux, f, ϕ, xMg_batch, map(get_concrete, interpreters); + n_MC = 8)[1]), + ϕ) + @test gr[1] isa CuVector + end +end @testset "neg_elbo_transnorm_gf cpu" begin cost = neg_elbo_transnorm_gf(rng, g, f, ϕ_ini, y_o[:, 1:n_batch], xM[:, 1:n_batch], @@ -143,20 +145,22 @@ end; @test gr[1] isa Vector end; -@testset "neg_elbo_transnorm_gf gpu" begin - ϕ = CuArray(CA.getdata(ϕ_ini)) - xMg_batch = CuArray(xM[:, 1:n_batch]) - cost = neg_elbo_transnorm_gf(rng, g_flux, f, ϕ, y_o[:, 1:n_batch], xMg_batch, - transPMs_batch, map(get_concrete, interpreters); - n_MC = 8, logσ2y) - @test cost isa Float64 - gr = Zygote.gradient( - ϕ -> neg_elbo_transnorm_gf( - rng, g_flux, f, ϕ, y_o[:, 1:n_batch], xMg_batch, - transPMs_batch, interpreters; n_MC = 8, logσ2y), - ϕ) - @test gr[1] isa CuVector -end; +if CUDA.functional() + @testset "neg_elbo_transnorm_gf gpu" begin + ϕ = CuArray(CA.getdata(ϕ_ini)) + xMg_batch = CuArray(xM[:, 1:n_batch]) + cost = neg_elbo_transnorm_gf(rng, g_flux, f, ϕ, y_o[:, 1:n_batch], xMg_batch, + transPMs_batch, map(get_concrete, interpreters); + n_MC = 8, logσ2y) + @test cost isa Float64 + gr = Zygote.gradient( + ϕ -> neg_elbo_transnorm_gf( + rng, g_flux, f, ϕ, y_o[:, 1:n_batch], xMg_batch, + transPMs_batch, interpreters; n_MC = 8, logσ2y), + ϕ) + @test gr[1] isa CuVector + end +end @testset "predict_gf cpu" begin n_sample_pred = 200 @@ -166,13 +170,14 @@ end; @test size(y_pred) == (size(y_o)..., n_sample_pred) end -@testset "predict_gf gpu" begin - n_sample_pred = 200 - ϕ = CuArray(CA.getdata(ϕ_ini)) - xMg = CuArray(xM) - y_pred = predict_gf(rng, g_flux, f, ϕ, xMg, map(get_concrete, interpreters); - get_transPMs, get_ca_int_PMs, n_sample_pred) - @test y_pred isa Array - @test size(y_pred) == (size(y_o)..., n_sample_pred) +if CUDA.functional() + @testset "predict_gf gpu" begin + n_sample_pred = 200 + ϕ = CuArray(CA.getdata(ϕ_ini)) + xMg = CuArray(xM) + y_pred = predict_gf(rng, g_flux, f, ϕ, xMg, map(get_concrete, interpreters); + get_transPMs, get_ca_int_PMs, n_sample_pred) + @test y_pred isa Array + @test size(y_pred) == (size(y_o)..., n_sample_pred) + end end - diff --git a/test/test_sample_zeta.jl b/test/test_sample_zeta.jl index 6447fcd..b27d0b0 100644 --- a/test/test_sample_zeta.jl +++ b/test/test_sample_zeta.jl @@ -23,7 +23,7 @@ scenario = (:default,) @testset "test_sample_zeta" begin (; xM, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o - ) = gen_hybridcase_synthetic(case, rng; scenario); +) = gen_hybridcase_synthetic(case, rng; scenario) # n_site = 2 # n_θP, n_θM = length(θ_true.θP), length(θ_true.θM) @@ -31,59 +31,66 @@ scenario = (:default,) # θMs_true = θ_true.θM .+ randn(n_θM, n_site) .* σ_θM # set to 0.02 rather than zero for debugging non-zero correlations - ρsP = zeros(sum(1:(n_θP-1))) .+ 0.02 - ρsM = zeros(sum(1:(n_θM-1))) .+ 0.02 + ρsP = zeros(sum(1:(n_θP - 1))) .+ 0.02 + ρsM = zeros(sum(1:(n_θM - 1))) .+ 0.02 ϕunc = CA.ComponentVector(; - logσ2_logP=fill(-10.0, n_θP), - coef_logσ2_logMs=reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), - ρsP, - ρsM) + logσ2_logP = fill(-10.0, n_θP), + coef_logσ2_logMs = reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), + ρsP, + ρsM) transPMs = as( - (P=as(Array, asℝ₊, n_θP), - Ms=as(Array, asℝ₊, n_θM, n_site))) + (P = as(Array, asℝ₊, n_θP), + Ms = as(Array, asℝ₊, n_θM, n_site))) θ_true = θ = CA.ComponentVector(; - P=θP_true, - Ms=θMs_true) + P = θP_true, + Ms = θMs_true) transPMs = as(( - P=as(Array, asℝ₊, n_θP), - Ms=as(Array, asℝ₊, n_θM, n_site))) + P = as(Array, asℝ₊, n_θP), + Ms = as(Array, asℝ₊, n_θM, n_site))) ζ_true = inverse_ca(transPMs, θ_true) - ϕ_true = vcat(ζ_true, CA.ComponentVector(unc=ϕunc)) - ϕ_cpu = vcat(ζ_true .+ 0.01, CA.ComponentVector(unc=ϕunc)) + ϕ_true = vcat(ζ_true, CA.ComponentVector(unc = ϕunc)) + ϕ_cpu = vcat(ζ_true .+ 0.01, CA.ComponentVector(unc = ϕunc)) - interpreters = (; pmu=ComponentArrayInterpreter(ϕ_true)) #, M=int_θM, PMs=int_θPMs) + interpreters = (; pmu = ComponentArrayInterpreter(ϕ_true)) #, M=int_θM, PMs=int_θPMs) n_MC = 3 @testset "sample_ζ_norm0 cpu" begin ϕ = CA.getdata(ϕ_cpu) ϕc = interpreters.pmu(ϕ) ζ_resid, logdetΣ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC) - @test size(ζ_resid) == (length(ϕc.P) + n_θM * n_site, n_MC) - gr = Zygote.gradient(ϕc -> sum(CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc)[1]), ϕc)[1] - @test length(gr) == length(ϕ) - end; + @test size(ζ_resid) == (length(ϕc.P) + n_θM * n_site, n_MC) + gr = Zygote.gradient(ϕc -> sum(CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc)[1]), ϕc)[1] + @test length(gr) == length(ϕ) + end # - @testset "sample_ζ_norm0 gpu" begin - ϕ = CuArray(CA.getdata(ϕ_cpu)); - #tmp = ϕ[1:6] - #vec2uutri(tmp) - ϕc = interpreters.pmu(ϕ); - @test CA.getdata(ϕc) isa GPUArraysCore.AbstractGPUArray - #ζP, ζMs, ϕunc = ϕc.P, ϕc.Ms, ϕc.unc - #urand = CUDA.randn(length(ϕc.P) + length(ϕc.Ms), n_MC) |> gpu - #include(joinpath(@__DIR__, "uncNN", "elbo.jl")) # callback_loss - #ζ_resid, logdetΣ = sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC) - #Zygote.gradient(ϕc -> sum(sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)[1]), ϕc)[1]; - ζ_resid, logdetΣ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC) - @test ζ_resid isa GPUArraysCore.AbstractGPUArray - @test size(ζ_resid) == (length(ϕc.P) + n_θM * n_site, n_MC) - gr = Zygote.gradient(ϕc -> sum(CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)[1]), ϕc)[1]; - @test length(gr) == length(ϕ) - int_unc = ComponentArrayInterpreter(ϕc.unc) - gr2 = Zygote.gradient(ϕc -> sum(CP.sample_ζ_norm0(rng, CA.getdata(ϕc.P), CA.getdata(ϕc.Ms), CA.getdata(ϕc.unc), int_unc; n_MC)[1]), ϕc)[1]; - end; + + if CUDA.functional() + @testset "sample_ζ_norm0 gpu" begin + ϕ = CuArray(CA.getdata(ϕ_cpu)) + #tmp = ϕ[1:6] + #vec2uutri(tmp) + ϕc = interpreters.pmu(ϕ) + @test CA.getdata(ϕc) isa GPUArraysCore.AbstractGPUArray + #ζP, ζMs, ϕunc = ϕc.P, ϕc.Ms, ϕc.unc + #urand = CUDA.randn(length(ϕc.P) + length(ϕc.Ms), n_MC) |> gpu + #include(joinpath(@__DIR__, "uncNN", "elbo.jl")) # callback_loss + #ζ_resid, logdetΣ = sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC) + #Zygote.gradient(ϕc -> sum(sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)[1]), ϕc)[1]; + ζ_resid, logdetΣ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC) + @test ζ_resid isa GPUArraysCore.AbstractGPUArray + @test size(ζ_resid) == (length(ϕc.P) + n_θM * n_site, n_MC) + gr = Zygote.gradient( + ϕc -> sum(CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)[1]), ϕc)[1] + @test length(gr) == length(ϕ) + int_unc = ComponentArrayInterpreter(ϕc.unc) + gr2 = Zygote.gradient( + ϕc -> sum(CP.sample_ζ_norm0(rng, CA.getdata(ϕc.P), CA.getdata(ϕc.Ms), + CA.getdata(ϕc.unc), int_unc; n_MC)[1]), + ϕc)[1] + end + end # @testset "generate_ζ" begin # ϕ = CA.getdata(ϕ_cpu) @@ -94,6 +101,6 @@ scenario = (:default,) # int_μP_ϕg_unc=ComponentArrayInterpreter(ϕ_true) # interpreters = (; PMs = intm_PMs_gen, μP_ϕg_unc = int_μP_ϕg_unc ) # ζs, _ = CP.generate_ζ(rng, g, f, ϕ, xM, interpreters; n_MC=n_sample_pred) - + # end; end;