diff --git a/src/KLLS.jl b/src/KLLS.jl index 39b62d3..f34abbf 100644 --- a/src/KLLS.jl +++ b/src/KLLS.jl @@ -15,12 +15,13 @@ using NonlinearSolve, LinearSolve import SciMLBase: ReturnCode export KLLSModel, SSModel, OTModel, LPModel -export NewtonEQ, SSTrunkLS, SequentialSolve, LevelSet, AdaptiveLevelSet +export NewtonEQ, SSTrunkLS, SequentialSolve, LevelSet, AdaptiveLevelSet, ExpKernel export solve!, scale!, regularize!, histogram, maximize!, reset!, update_y0! DEFAULT_PRECISION(T) = (eps(T))^(1/3) include("logsumexp.jl") +include("exp-kernel.jl") include("klls-model.jl") include("ss-model.jl") include("newtoncg.jl") diff --git a/src/exp-kernel.jl b/src/exp-kernel.jl new file mode 100644 index 0000000..5e3fb7a --- /dev/null +++ b/src/exp-kernel.jl @@ -0,0 +1,65 @@ +struct ExpKernel{T<:AbstractFloat, V1<:AbstractVector{T}, V2<:AbstractVector{T}} + q::V1 # prior + g::V2 # buffer for gradient +end + +""" +Constructor for the ExpKernel object. + +If an n-vector of priors `q` is available: + + ExpKernel(q) + +If no prior is known, instead provide the dimension `n`: + + ExpKernel(n) +""" +function ExpKernel(q::AbstractVector) + @assert (all(ζ -> ζ ≥ 0, q)) "prior is not nonnegative" + ExpKernel(q, similar(q)) +end + +""" +Evaluate ∑exp, its gradient, and Hessian at `p`: + + f = obj!(expk, p) + +where `p` is a vector of length `n` and `expk` is a ExpKernel object. +""" +function obj!(expk::ExpKernel{T}, p) where T + @unpack q, g = expk + @. g = q .* exp(p) + @. g = g ./ exp(one(T)) + return sum(g) +end + +""" +Get the gradient of ∑exp at the point `p` where +the `expk` objective was last evaluated: + + g = grad(expk) +""" +grad(expk::ExpKernel) = expk.g + +""" +Get the Hessian of ∑exp at the point `p` where +the `expk` objective was last evaluated: + + H = hess(expk) +""" +function hess(expk::ExpKernel) + g = expk.g + return Diagonal(g) +end + +""" +Get the Hessian vector product of logΣexp at the point `p` +where the `expk` objective was last evaluated: + + Hz = hessvp!(expk, z) +""" +function hessvp!(expk::ExpKernel{T}, z::AbstractVector{T}) where T + g = expk.g + z .= g.*z + return z +end \ No newline at end of file diff --git a/src/klls-model.jl b/src/klls-model.jl index 52688eb..f479a79 100644 --- a/src/klls-model.jl +++ b/src/klls-model.jl @@ -3,14 +3,14 @@ Structure for KLLS model - `A` is the matrix of constraints - `b` is the right-hand side - `q` is the prior (default: uniform) - - `lse` is the log-sum-exp function + - `kernel` is the kernel used to regularize (default: LSE for simplex) - `λ` is the regularization parameter - `C` is a positive definite scaling matrix - `w` is an n-buffer for the Hessian-vector product - `bNrm` is the norm of the right-hand side - `name` is the name of the problem """ -@kwdef mutable struct KLLSModel{T<:AbstractFloat, M, CT, SB<:AbstractVector{T}, S<:AbstractVector{T}} <: AbstractNLPModel{T, S} +@kwdef mutable struct KLLSModel{T<:AbstractFloat, M, CT, SB<:AbstractVector{T}, S<:AbstractVector{T}, K} <: AbstractNLPModel{T, S} A::M b::SB c::S = begin @@ -29,7 +29,7 @@ Structure for KLLS model nbuf::S = similar(q) bNrm::T = norm(b) scale::T = one(eltype(A)) - lse::LogExpFunction = LogExpFunction(q) + kernel::K = LogExpFunction(q) name::String = "" meta::NLPModelMeta{T, S} = begin m = size(A, 1) diff --git a/src/level-set.jl b/src/level-set.jl index 581d272..502fa68 100644 --- a/src/level-set.jl +++ b/src/level-set.jl @@ -289,7 +289,7 @@ function oracle_callback( trace::Bool=false, ) where {T} y = solver.x - x = kl.scale * grad(kl.lse) + x = kl.scale * grad(kl.kernel) dObj = -trunk_stats.objective - σ iter = trunk_stats.iter r = trunk_stats.dual_feas # = ||∇ dual obj(x)|| @@ -322,7 +322,7 @@ function oracle_callback( elseif tired trunk_stats.status = :max_iter elseif pObj < α * dObj && dObj > 0 - st = -obj!(kl.lse, kl.A'y) + log(kl.scale) + 1 + st = -obj!(kl.kernel, kl.A'y) + log(kl.scale) + 1 ret .= [dObj, pObj, st] trunk_stats.status = :user # Ends the oracle iterations end diff --git a/src/logsumexp.jl b/src/logsumexp.jl index 80ff181..18c0f2b 100644 --- a/src/logsumexp.jl +++ b/src/logsumexp.jl @@ -85,3 +85,15 @@ function hess(lse::LogExpFunction) g = lse.g return Diagonal(g) - g * g' end + +""" +Get the Hessian vector product of logΣexp at the point `p` +where the `lse` objective was last evaluated: + + Hz = hessvp!(lse, z) +""" +function hessvp!(lse::LogExpFunction{T}, z::AbstractVector{T}) where T + g = lse.g + z .= g.*(z .- (dot(g, z))) + return z +end \ No newline at end of file diff --git a/src/newtoncg.jl b/src/newtoncg.jl index 2a737fd..69ee6ad 100644 --- a/src/newtoncg.jl +++ b/src/newtoncg.jl @@ -1,27 +1,27 @@ """ -Compute f(y) = logΣexp(A'y - c) and it's gradient, -stored in the `lse` internal buffer. +Compute f(y) = kernel(A'y - c) and it's gradient, +stored in the `kernel` internal buffer. """ -function lseatyc!(kl, y) - @unpack A, c, nbuf, lse = kl +function kernelatyc!(kl, y) + @unpack A, c, nbuf, kernel = kl nbuf .= c mul!(nbuf, A', y, 1, -1) - return obj!(lse, nbuf) + return obj!(kernel, nbuf) end """ Dual objective: - base case (no scaling, unweighted 2-norm): - f(y) = log∑exp(A'y - c) - 0.5λ y∙Cy - b∙y + f(y) = kernel(A'y - c) - 0.5λ y∙Cy - b∙y - with scaling and weighted 2-norm: - f(y) = τ log∑exp(A'y - c) - τ log τ + 0.5λ y∙Cy - b∙y + f(y) = τ kernel(A'y - c) - τ log τ + 0.5λ y∙Cy - b∙y """ function dObj!(kl::KLLSModel, y) @unpack b, λ, C, scale = kl increment!(kl, :neval_jtprod) - f = lseatyc!(kl, y) + f = kernelatyc!(kl, y) return scale*f - scale*log(scale) + 0.5λ*dot(y, C, y) - b⋅y end @@ -30,14 +30,14 @@ NLPModels.obj(kl::KLLSModel, y) = dObj!(kl, y) """ Dual objective gradient - ∇f(y) = τ A∇log∑exp(A'y-c) + λCy - b + ∇f(y) = τ A∇kernel(A'y-c) + λCy - b evaluated at `y`. Assumes that the objective was last evaluated at the same point `y`. """ function dGrad!(kl::KLLSModel, y, ∇f) - @unpack A, b, λ, C, lse, scale = kl + @unpack A, b, λ, C, kernel, scale = kl increment!(kl, :neval_jprod) - p = grad(lse) + p = grad(kernel) ∇f .= -b if λ > 0 mul!(∇f, C, y, λ, 1) @@ -49,8 +49,8 @@ end NLPModels.grad!(kl::KLLSModel, y, ∇f) = dGrad!(kl, y, ∇f) function dHess(kl::KLLSModel) - @unpack A, λ, C, lse, scale = kl - H = hess(lse) + @unpack A, λ, C, kernel, scale = kl + H = hess(kernel) ∇²dObj = scale*(A*H*A') if λ > 0 ∇²dObj += λ*C @@ -63,19 +63,18 @@ end Product of the dual objective Hessian with a vector `z` - Hz ← ∇²d(y)z = τ A∇²log∑exp(A'y)Az + λCz, + Hz ← ∇²d(y)z = τ A∇²kernel(A'y)Az + λCz, where `y` is the point at which the objective was last evaluated. """ function dHess_prod!(kl::KLLSModel, z, Hz) - @unpack A, λ, C, nbuf, lse, scale = kl + @unpack A, λ, C, nbuf, kernel, scale = kl w = nbuf increment!(kl, :neval_jprod) increment!(kl, :neval_jtprod) - g = grad(lse) - mul!(w, A', z) # w = A'z - w .= g.*(w .- (g⋅w)) # w = (G - gg')(A'z) - mul!(Hz, A, w, scale, 0) # v = scale*A(G - gg')(A'z) + mul!(w, A', z) # w = A'z + hessvp!(kernel, w) # w = (∇²k)(A'z) + mul!(Hz, A, w, scale, 0) # v = scale*A(∇²k)(A'z) if λ > 0 mul!(Hz, C, z, λ, 1) # v += λCz end @@ -146,7 +145,7 @@ function solve!( trunk_stats = SolverCore.solve!(solver, kl; M=M, callback=cb, atol=zero(T), rtol=zero(T)) end - x = kl.scale .* grad(kl.lse) + x = (kl.scale).*grad(kl.kernel) stats = ExecutionStats( trunk_stats.status, diff --git a/src/newtonls.jl b/src/newtonls.jl index c2c56b3..14269df 100644 --- a/src/newtonls.jl +++ b/src/newtonls.jl @@ -61,7 +61,7 @@ function newton_opt( dObj, dGrd, dHes = evaldual(y) end - return grad(kl.lse), y, tracer + return grad(kl.kernel), y, tracer end function armijo(f, ∇fx, x, d; μ=1e-5, α=1, ρ=0.5, maxits=10) diff --git a/src/precon.jl b/src/precon.jl index 9119c22..319a199 100644 --- a/src/precon.jl +++ b/src/precon.jl @@ -62,8 +62,8 @@ See also [`mul!`](@ref), [`ldiv!`](@ref), and [`update!`](@ref). """ function DiagASAPreconditioner(kl::KLLSModel{T}; α::T=zero(T)) where T @unpack A, b, λ = kl - obj!(kl.lse, A'b) - d = diag_ASA!(similar(b), A, grad(kl.lse), α) + obj!(kl.kernel, A'b) + d = diag_ASA!(similar(b), A, grad(kl.kernel), α) DiagASAPreconditioner(kl, d, α) end @@ -92,7 +92,7 @@ function diag_ASA!(d::AbstractVector{T}, A, g, λ) where T end function update!(P::DiagASAPreconditioner) - d = P.d; A = P.kl.A; α = P.α; g = grad(P.kl.lse) + d = P.d; A = P.kl.A; α = P.α; g = grad(P.kl.kernel) diag_ASA!(d, A, g, α) end diff --git a/src/selfscale.jl b/src/selfscale.jl index a318683..86923ae 100644 --- a/src/selfscale.jl +++ b/src/selfscale.jl @@ -14,7 +14,7 @@ where function NLPModels.residual!(ss::SSModel, yt, Fx) increment!(ss, :neval_residual) kl = ss.kl - @unpack A, c, lse = kl + @unpack A, c, kernel = kl m = kl.meta.nvar r = @view Fx[1:m] @@ -22,7 +22,7 @@ function NLPModels.residual!(ss::SSModel, yt, Fx) t = yt[end] scale!(kl, t) # Apply the latest scaling factor - f = lseatyc!(kl, y) # f = logΣexp(A'y). Needed before grad eval + f = kernelatyc!(kl, y) # f = kernel(A'y). Needed before grad eval dGrad!(kl, y, r) # r ≡ Fx[1:m] = ∇d(y) Fx[end] = f - log(t) - 1 # optimal scaling condition return Fx @@ -39,10 +39,10 @@ Compute the Jacobian-vector product, function NLPModels.jprod_residual!(ss::SSModel, yt, zα, Jyt) kl = ss.kl - @unpack A, lse, mbuf = kl + @unpack A, kernel, mbuf = kl Ax = mbuf m = kl.meta.nvar - x = grad(lse) + x = grad(kernel) increment!(ss, :neval_jprod_residual) @@ -108,7 +108,7 @@ function solve!( optimality = sqrt(obj(ss, trunk_stats.solution)) kl = ss.kl - x = kl.scale.*grad(kl.lse) + x = kl.scale.*grad(kl.kernel) y = @view trunk_stats.solution[1:end-1] stats = ExecutionStats( trunk_stats.status, # status @@ -276,7 +276,7 @@ function solve!( λ = kl.λ y = @view nlcache.u[1:m] - x = kl.scale.*grad(kl.lse) + x = kl.scale.*grad(kl.kernel) ∇d = @view nlcache.fu[1:m] r = λ*y stats = ExecutionStats( diff --git a/src/sequential-scale.jl b/src/sequential-scale.jl index 392cc2a..dad6f64 100644 --- a/src/sequential-scale.jl +++ b/src/sequential-scale.jl @@ -8,7 +8,7 @@ function value!(kl::KLLSModel, t; jprods=Int[0], jtprods=Int[0], kwargs...) scale!(kl, t) s = solve!(kl; kwargs...) y = s.residual/λ - dv = obj!(kl.lse, A'y) - log(t) - 1 + dv = obj!(kl.kernel, A'y) - log(t) - 1 jprods[1] += neval_jprod(kl) jtprods[1] += neval_jtprod(kl) update_y0!(kl, s.residual ./ kl.λ) # Set the next runs starting point to the radial projection diff --git a/test/test-precon.jl b/test/test-precon.jl index 4d63a5e..80dffec 100644 --- a/test/test-precon.jl +++ b/test/test-precon.jl @@ -43,7 +43,7 @@ import Krylov: cg # DiagASAtPreconditioner M = KLLS.DiagASAPreconditioner(data) - g = KLLS.grad(data.lse) + g = KLLS.grad(data.kernel) S = Diagonal(g) P = Diagonal(A*S*A') @test all(P*d ≈ mul!(similar(d), M, d))