From b57f12103f74dbcd3fc1900ae8236dbf33c24cc4 Mon Sep 17 00:00:00 2001 From: Fredrik Bagge Carlson Date: Thu, 6 Jun 2019 20:34:07 +0200 Subject: [PATCH 1/3] refactor WIP --- Manifest.toml | 63 +++++- Project.toml | 2 + src/DifferentialDynamicProgramming.jl | 8 +- src/backward_pass.jl | 293 ++++++++------------------ src/boxQP.jl | 28 ++- src/forward_pass.jl | 29 +-- src/iLQG.jl | 59 +++--- src/system_pendcart.jl | 75 +++---- test/test_pendcart.jl | 2 +- test/test_readme.jl | 51 ++--- 10 files changed, 285 insertions(+), 325 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 256450d..93aadea 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -3,12 +3,32 @@ [[Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +[[CSTParser]] +deps = ["LibGit2", "Test", "Tokenize"] +git-tree-sha1 = "437c93bc191cd55957b3f8dee7794b6131997c56" +uuid = "00ebfdb7-1f24-5e51-bd34-a7502290713f" +version = "0.5.2" + +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "84aa74986c5b9b898b0d1acaf3258741ee64754f" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "2.1.0" + [[DataStructures]] deps = ["InteractiveUtils", "OrderedCollections", "Random", "Serialization", "Test"] git-tree-sha1 = "ca971f03e146cf144a9e2f2ce59674f5bf0e8038" uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" version = "0.15.0" +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + [[Distributed]] deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" @@ -17,6 +37,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +[[LibGit2]] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + [[Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" @@ -33,15 +56,24 @@ version = "0.2.1" [[Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +[[MacroTools]] +deps = ["CSTParser", "Compat", "DataStructures", "Test"] +git-tree-sha1 = "daecd9e452f38297c686eba90dba2a6d5da52162" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.0" + [[Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + [[OrderedCollections]] deps = ["Random", "Serialization", "Test"] -git-tree-sha1 = "85619a3f3e17bb4761fe1b1fd47f0e979f964d5b" +git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.0.2" +version = "1.1.0" [[Parameters]] deps = ["Markdown", "OrderedCollections", "REPL", "Test"] @@ -49,6 +81,10 @@ git-tree-sha1 = "70bdbfb2bceabb15345c0b54be4544813b3444e4" uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" version = "0.10.3" +[[Pkg]] +deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + [[Printf]] deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" @@ -73,9 +109,16 @@ git-tree-sha1 = "f6fbf4ba64d295e146e49e021207993b6b48c7d1" uuid = "ae029012-a4dd-5104-9daa-d747884805df" version = "0.5.2" +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + [[Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + [[Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" @@ -83,6 +126,12 @@ uuid = "6462fe0b-24de-5631-8697-dd941f90decc" deps = ["LinearAlgebra", "Random"] uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +[[StaticArrays]] +deps = ["LinearAlgebra", "Random", "Statistics"] +git-tree-sha1 = "db23bbf50064c582b6f2b9b043c8e7e98ea8c0c6" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "0.11.0" + [[Statistics]] deps = ["LinearAlgebra", "SparseArrays"] uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -91,6 +140,16 @@ uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +[[Tokenize]] +deps = ["Printf", "Test"] +git-tree-sha1 = "3e83f60b74911d3042d3550884ca2776386a02b8" +uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" +version = "0.5.3" + +[[UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + [[Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" diff --git a/Project.toml b/Project.toml index 52dda97..afb9140 100644 --- a/Project.toml +++ b/Project.toml @@ -7,9 +7,11 @@ version = "0.4.0" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearTimeVaryingModelsBase = "5e7a79f3-ee03-5374-bd42-db9a52b6e959" +MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Requires = "ae029012-a4dd-5104-9daa-d747884805df" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7" diff --git a/src/DifferentialDynamicProgramming.jl b/src/DifferentialDynamicProgramming.jl index f1d4e3c..ed368ec 100644 --- a/src/DifferentialDynamicProgramming.jl +++ b/src/DifferentialDynamicProgramming.jl @@ -1,12 +1,14 @@ module DifferentialDynamicProgramming -using LinearTimeVaryingModelsBase, Requires, ValueHistories, LinearAlgebra, Statistics, Printf +using LinearTimeVaryingModelsBase, Requires, ValueHistories, LinearAlgebra, Statistics, Printf, StaticArrays +using MacroTools: @capture, postwalk, prewalk const DEBUG = false # Set this flag to true in order to print debug messages -export QPTrace, boxQP, demoQP, iLQG,iLQGkl, demo_linear, demo_linear_kl, demo_pendcart, GaussianPolicy - +export QPTrace, boxQP, demoQP, iLQG,iLQGkl, demo_linear, demo_linear_kl, demo_pendcart, GaussianPolicy, tosvec +tosvec(x) = reinterpret(SVector{size(x,1),eltype(x)}, x)[:] eye(n) = Matrix{Float64}(I,n,n) +Base.strides(::SArray{Tuple{i,j}}) where {i,j} = (1,i) function __init__() @require Plots="91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin diff --git a/src/backward_pass.jl b/src/backward_pass.jl index f71001d..607d20c 100644 --- a/src/backward_pass.jl +++ b/src/backward_pass.jl @@ -1,32 +1,86 @@ choleskyvectens(a,b) = permutedims(sum(a.*b,1), [3 2 1]) -macro setupQTIC() - quote - m = size(u,1) - n,_,N = size(fx) - @assert size(cx) == (n, N) "size(cx) should be (n, N)" - @assert size(cu) == (m, N) "size(cu) should be (m, N)" - @assert size(cxx) == (n, n) "size(cxx) should be(n, n) " - @assert size(cxu) == (n, m) "size(cxu) should be(n, m) " +getmatindex(x,i) = x[i] +getmatindex(x::AbstractArray{<:Number},i) = x # In this case the array is constatnt over time +getmatindex(x::AbstractVector{<:AbstractArray},i) = x[i] # Time varying array - k = zeros(m,N) - K = zeros(m,n,N) - Vx = zeros(n,N) - Vxx = zeros(n,n,N) - Quu = Array{T}(undef,m,m,N) - Quui = Array{T}(undef,m,m,N) - dV = [0., 0.] +macro matviews(ex) + ex = prewalk(ex) do ex + @capture(ex, a_[i_] = rhs_) && (return :(__protect__($a,$i) = $rhs)) + @capture(ex, a_[i_] += rhs_) && (return :(__protect__($a,$i) += $rhs)) + @capture(ex, a_[i_]) && (return :(getmatindex($a,$i))) + ex + end + ex = prewalk(ex) do ex + @capture(ex, __protect__(a_,i_) = rhs_) && (return :($a[$i] = $rhs)) + @capture(ex, __protect__(a_,i_) += rhs_) && (return :($a[$i] += $rhs)) + ex + end + esc(ex) +end - Vx[:,N] = cx[:,N] - Vxx[:,:,N] = cxx - Quu[:,:,N] = cuu - diverge = 0 - end |> esc +@macroexpand @matviews begin + dV = dV + [k_i'Qu; .5*k_i'Quu[i]*k_i] + Vx[i] = Qx + K_i'Quu[i]*k_i + K_i'Qu + Qux'k_i + Qu = cu[i] + fu[i]'Vx[i+1] + Qx = cx[i] + fx[i]'Vx[i+1] + Qux = cxu[i]' + fu[i]'Vxx[i+1]*fx[i] + fxuVx = vectens(Vx[i+1],fxu[i]) + Qux = Qux + fxuVx + Quu[i] = cuu[i] + fu[i]'Vxx[i+1]*fu[i] + fuuVx = vectens(Vx[i+1],fuu[i]) + Quu[i] += fuuVx end -macro end_backward_pass() - quote +Base.zeros(::Type{Matrix{T}}, n::Int) where T = [zeros(T,0,0) for _ in 1:n] +Base.zeros(::Type{Vector{T}}, n::Int) where T = [T[] for _ in 1:n] + +function back_pass(cx,cu,cxx,cxu,cuu,fx,fu,fxx,fxu,fuu,λ,regType,lims,x,u) # nonlinear time variant + m,N = length(u[1]),length(u) + length(cx) == N || throw(ArgumentError("cx must be the same length as u")) + @matviews begin + n = length(cx[1]) + k = zeros(typeof(cu[1]), N) + K = zeros(typeof(copy(fu[1]')), N) + Vx = zeros(typeof(cx[1]), N) + Vxx = zeros(typeof(fx[1]), N) + Quu = zeros(typeof(cuu[1]), N) + Quui = zeros(typeof(cuu[1]), N) + dV = @SVector [0., 0.] + Vx[N] = cx[N] + Vxx[N] = cxx[N] + Quu[N] = cuu[N] + k[N] = 0*cu[N] + K[N] = 0*fu[N]' + end + + diverge = 0 + for i = N-1:-1:1 + @matviews begin + Qu = cu[i] + fu[i]'Vx[i+1] + Qx = cx[i] + fx[i]'Vx[i+1] + Qux = cxu[i]' + fu[i]'Vxx[i+1]*fx[i] + if !(fxu === nothing) + fxuVx = vectens(Vx[i+1],fxu[i]) + Qux = Qux + fxuVx + end + + Quu[i] = cuu[i] + fu[i]'Vxx[i+1]*fu[i] + if !(fuu === nothing) + fuuVx = vectens(Vx[i+1],fuu[i]) + Quu[i] += fuuVx + end + + Qxx = cxx[i] + fx[i]'Vxx[i+1]*fx[i] + fxx === nothing || (Qxx .+= vectens(Vx[i+1],fxx[i])) + Vxx_reg = Vxx[i+1] + (regType == 2 ? λ*I : 0*I) + Qux_reg = cxu[i]' + fu[i]'Vxx_reg*fx[i] + fxu === nothing || (Qux_reg .+= fxuVx) + QuuF = cuu[i] + fu[i]'Vxx_reg*fu[i] + (regType == 1 ? λ*I : 0*I) + fuu === nothing || (QuuF .+= fuuVx) + end + QuF = Qu if isempty(lims) || lims[1,1] > lims[1,2] # debug("# no control limits: Cholesky decomposition, check for non-PD") @@ -35,6 +89,7 @@ macro end_backward_pass() R = cholesky(Hermitian(QuuF)) catch diverge = i + println("Failed chol") return diverge, GaussianPolicy(N,n,m,K,k,Quui,Quu), Vx, Vxx, dV end # debug("# find control law") @@ -42,209 +97,41 @@ macro end_backward_pass() K_i = -(R\Qux_reg) else # debug("# solve Quadratic Program") - lower = lims[:,1]-u[:,i] - upper = lims[:,2]-u[:,i] + lower = lims[:,1]-u[i] + upper = lims[:,2]-u[i] local k_i,result,free try - k_i,result,R,free = boxQP(QuuF,QuF,lower,upper,k[:,min(i+1,N-1)]) + k_i,result,R,free = boxQP(QuuF,QuF,lower,upper,k[min(i+1,N-1)]) catch result = 0 end if result < 1 diverge = i + println("Failed boxQP") return diverge, GaussianPolicy(N,n,m,K,k,Quui,Quu), Vx, Vxx, dV end - K_i = zeros(m,n) + K_i = similar(K[i]) if any(free) Lfree = -R\(R'\Qux_reg[free,:]) K_i[free,:] = Lfree end end # debug("# update cost-to-go approximation") - dV = dV + [k_i'Qu; .5*k_i'Quu[:,:,i]*k_i] - Vx[:,i] = Qx + K_i'Quu[:,:,i]*k_i + K_i'Qu + Qux'k_i - Vxx[:,:,i] = Qxx + K_i'Quu[:,:,i]*K_i + K_i'Qux + Qux'K_i - Vxx[:,:,i] = .5*(Vxx[:,:,i] + Vxx[:,:,i]') - - # debug("# save controls/gains") - k[:,i] = k_i - K[:,:,i] = K_i - - end |> esc -end - -function back_pass(cx,cu,cxx::AbstractArray{T,3},cxu,cuu,fx::AbstractArray{T,3},fu,fxx,fxu,fuu,λ,regType,lims,x,u) where T # nonlinear time variant - - m,N = size(u) - n = size(cx,1) - @assert size(cx) == (n, N) - @assert size(cu) == (m, N) - @assert size(cxx) == (n, n, N) - @assert size(cxu) == (n, m, N) - @assert size(cuu) == (m, m, N) - k = zeros(m,N) - K = zeros(m,n,N) - Vx = zeros(n,N) - Vxx = zeros(n,n,N) - Quu = Array{T}(undef,m,m,N) - Quui = Array{T}(undef,m,m,N) - dV = [0., 0.] - Vx[:,N] = cx[:,N] - Vxx[:,:,N] = cxx[:,:,N] - Quu[:,:,N] = cuu[:,:,N] - - diverge = 0 - for i = N-1:-1:1 - Qu = cu[:,i] + fu[:,:,i]'Vx[:,i+1] - Qx = cx[:,i] + fx[:,:,i]'Vx[:,i+1] - Qux = cxu[:,:,i]' + fu[:,:,i]'Vxx[:,:,i+1]*fx[:,:,i] - if !isempty(fxu) - fxuVx = vectens(Vx[:,i+1],fxu[:,:,:,i]) - Qux = Qux + fxuVx - end - - Quu[:,:,i] = cuu[:,:,i] + fu[:,:,i]'Vxx[:,:,i+1]*fu[:,:,i] - if !isempty(fuu) - fuuVx = vectens(Vx[:,i+1],fuu[:,:,:,i]) - Quu[:,:,i] .+= fuuVx - end - - Qxx = cxx[:,:,i] + fx[:,:,i]'Vxx[:,:,i+1]*fx[:,:,i] - isempty(fxx) || (Qxx .+= vectens(Vx[:,i+1],fxx[:,:,:,i])) - Vxx_reg = Vxx[:,:,i+1] .+ (regType == 2 ? λ*eye(n) : 0) - Qux_reg = cxu[:,:,i]' + fu[:,:,i]'Vxx_reg*fx[:,:,i] - isempty(fxu) || (Qux_reg .+= fxuVx) - QuuF = cuu[:,:,i] + fu[:,:,i]'Vxx_reg*fu[:,:,i] .+ (regType == 1 ? λ*eye(m) : 0) - isempty(fuu) || (QuuF .+= fuuVx) - - @end_backward_pass - end - - return diverge, GaussianPolicy(N,n,m,K,k,Quui,Quu), Vx, Vxx,dV -end - - -function back_pass(cx,cu,cxx::AbstractArray{T,2},cxu,cuu,fx::AbstractArray{T,3},fu,fxx,fxu,fuu,λ,regType,lims,x,u) where T # quadratic timeinvariant cost, dynamics nonlinear time variant - - @setupQTIC - - for i = N-1:-1:1 - Qu = cu[:,i] + fu[:,:,i]'Vx[:,i+1] - Qx = cx[:,i] + fx[:,:,i]'Vx[:,i+1] - Qux = cxu' + fu[:,:,i]'Vxx[:,:,i+1]*fx[:,:,i] - if !isempty(fxu) - fxuVx = vectens(Vx[:,i+1],fxu[:,:,:,i]) - Qux = Qux + fxuVx + @matviews begin + dV += [k_i'Qu; .5*k_i'Quu[i]*k_i] + Vx[i] = Qx + K_i'Quu[i]*k_i + K_i'Qu + Qux'k_i + Vxx[i] = Qxx + K_i'Quu[i]*K_i + K_i'Qux + Qux'K_i + Vxx[i] = .5*(Vxx[i] + Vxx[i]') end - Quu[:,:,i] = cuu + fu[:,:,i]'Vxx[:,:,i+1]*fu[:,:,i] - if !isempty(fuu) - fuuVx = vectens(Vx[:,i+1],fuu[:,:,:,i]) - Quu[:,:,i] = Quu[:,:,i] + fuuVx - end - Qxx = cxx + fx[:,:,i]'Vxx[:,:,i+1]*fx[:,:,i] - isempty(fxx) || (Qxx .+= vectens(Vx[:,i+1],fxx[:,:,:,i])) - Vxx_reg = Vxx[:,:,i+1] .+ (regType == 2 ? λ*eye(n) : 0) - Qux_reg = cxu' + fu[:,:,i]'Vxx_reg*fx[:,:,i] - isempty(fxu) || (Qux_reg .+= fxuVx) - QuuF = cuu + fu[:,:,i]'Vxx_reg*fu[:,:,i] .+ (regType == 1 ? λ*eye(m) : 0) - isempty(fuu) || (QuuF .+= fuuVx) - @end_backward_pass - end - return diverge, GaussianPolicy(N,n,m,K,k,Quui,Quu), Vx, Vxx,dV -end - -function back_pass(cx,cu,cxx::AbstractArray{T,2},cxu,cuu,fx::AbstractArray{T,3},fu,λ,regType,lims,x,u) where T # quadratic timeinvariant cost, linear time variant dynamics - @setupQTIC - for i = N-1:-1:1 - Qu = cu[:,i] + fu[:,:,i]'Vx[:,i+1] - Qx = cx[:,i] + fx[:,:,i]'Vx[:,i+1] - Qux = cxu' + fu[:,:,i]'Vxx[:,:,i+1]*fx[:,:,i] - Quu[:,:,i] = cuu + fu[:,:,i]'Vxx[:,:,i+1]*fu[:,:,i] - Qxx = cxx + fx[:,:,i]'Vxx[:,:,i+1]*fx[:,:,i] - Vxx_reg = Vxx[:,:,i+1] .+ (regType == 2 ? λ*eye(n) : 0) - Qux_reg = cxu' + fu[:,:,i]'Vxx_reg*fx[:,:,i] - QuuF = cuu + fu[:,:,i]'Vxx_reg*fu[:,:,i] .+ (regType == 1 ? λ*eye(m) : 0) - @end_backward_pass - end - - return diverge, GaussianPolicy(N,n,m,K,k,Quui,Quu), Vx, Vxx,dV -end - -function back_pass(cx,cu,cxx::AbstractArray{T,3},cxu,cuu,fx::AbstractArray{T,3},fu,λ,regType,lims,x,u) where T # quadratic timeVariant cost, linear time variant dynamics - m = size(u,1) - n,N = size(fx,1,3) - - @assert size(cx) == (n, N) - @assert size(cu) == (m, N) - @assert size(cxx) == (n, n, N) - @assert size(cxu) == (n, m, N) - @assert size(cuu) == (m, m, N) - - k = zeros(m,N) - K = zeros(m,n,N) - Vx = zeros(n,N) - Vxx = zeros(n,n,N) - Quu = Array{T}(undef,m,m,N) - Quui = Array{T}(undef,m,m,N) - dV = [0., 0.] - - Vx[:,N] = cx[:,N] - Vxx[:,:,N] = cxx[:,:,end] - Quu[:,:,N] = cuu[:,:,N] - - diverge = 0 - for i = N-1:-1:1 - Qu = cu[:,i] + fu[:,:,i]'Vx[:,i+1] - Qx = cx[:,i] + fx[:,:,i]'Vx[:,i+1] - Vxx_reg = Vxx[:,:,i+1] .+ (regType == 2 ? λ*eye(n) : 0) - Qux_reg = cxu[:,:,i]' + fu[:,:,i]'Vxx_reg*fx[:,:,i] - QuuF = cuu[:,:,i] + fu[:,:,i]'Vxx_reg*fu[:,:,i] .+ (regType == 1 ? λ*eye(m) : 0) - Qux = cxu[:,:,i]' + fu[:,:,i]'Vxx[:,:,i+1]*fx[:,:,i] - Quu[:,:,i] .= cuu[:,:,i] .+ fu[:,:,i]'Vxx[:,:,i+1]*fu[:,:,i] - Qxx = cxx[:,:,i] + fx[:,:,i]'Vxx[:,:,i+1]*fx[:,:,i] - @end_backward_pass + # debug("# save controls/gains") + k[i] = k_i + K[i] = K_i end return diverge, GaussianPolicy(N,n,m,K,k,Quui,Quu), Vx, Vxx,dV end -function back_pass(cx,cu,cxx::AbstractArray{T,2},cxu,cuu,fx::AbstractMatrix{T},fu,λ,regType,lims,x,u) where T # cost quadratic and cost and LTI dynamics - - m,N = size(u) - n = size(fx,1) - @assert size(cx) == (n, N) - @assert size(cu) == (m, N) - @assert size(cxx) == (n, n) - @assert size(cxu) == (n, m) - @assert size(cuu) == (m, m) - k = zeros(m,N) - K = zeros(m,n,N) - Vx = zeros(n,N) - Vxx = zeros(n,n,N) - Quu = Array{T}(undef,m,m,N) - Quui = Array{T}(undef,m,m,N) - dV = [0., 0.] - - Vx[:,N] = cx[:,N] - Vxx[:,:,N] = cxx - Quu[:,:,N] = cuu - - diverge = 0 - for i = N-1:-1:1 - Qu = cu[:,i] + fu'Vx[:,i+1] - Qx = cx[:,i] + fx'Vx[:,i+1] - Qux = cxu' + fu'Vxx[:,:,i+1]*fx - Quu[:,:,i] = cuu + fu'Vxx[:,:,i+1]*fu - Qxx = cxx + fx'Vxx[:,:,i+1]*fx - Vxx_reg = Vxx[:,:,i+1] .+ (regType == 2 ? λ*eye(n) : 0) - Qux_reg = cxu' + fu'Vxx_reg*fx - QuuF = cuu + fu'Vxx_reg*fu .+ (regType == 1 ? λ*eye(m) : 0) - @end_backward_pass - end - - return diverge, GaussianPolicy(N,n,m,K,k,Quui,Quu), Vx, Vxx,dV -end function graphics(x...) diff --git a/src/boxQP.jl b/src/boxQP.jl index 07c9f59..285b334 100644 --- a/src/boxQP.jl +++ b/src/boxQP.jl @@ -70,9 +70,8 @@ function boxQP(H,g,lower,upper,x0::AbstractVector; iter = 1 while iter <= maxIter - if result != 0 - break - end + result == 0 || break + # debug("# check relative improvement") if iter>1 && (oldvalue - value) < minRelImprove*abs(oldvalue) @@ -90,7 +89,7 @@ function boxQP(H,g,lower,upper,x0::AbstractVector; # clamped[(x[:,1] .== lower)&(grad[:,1].>0)] = true # clamped[(x[:,1] .== upper)&(grad[:,1].<0)] = true for i = 1:n - clamped[i] = ((x[i,1] == lower[i])&&(grad[i,1]>0)) || ((x[i,1] == upper[i])&&(grad[i,1]<0)) + clamped[i] = ((x[i] == lower[i]) && (grad[i]>0)) || ((x[i] == upper[i]) && (grad[i]<0)) end free = .!clamped @@ -104,7 +103,7 @@ function boxQP(H,g,lower,upper,x0::AbstractVector; if iter == 1 factorize = true else - factorize = any(old_clamped != clamped) + factorize = any(old_clamped .!= clamped) end if factorize @@ -125,7 +124,7 @@ function boxQP(H,g,lower,upper,x0::AbstractVector; # debug("# get search direction") grad_clamped = g + H*(x.*clamped) - search = zeros(n,1) + search = zeros(n) search[free] = -Hfree\(Hfree'\grad_clamped[free]) - x[free] # debug("# check for descent direction") @@ -138,13 +137,13 @@ function boxQP(H,g,lower,upper,x0::AbstractVector; step = 1 nstep = 0 xc = clamp.(x+step*search,lower,upper) - vc = (xc'*g + 0.5*xc'*H*xc)[1] + vc = (xc'g + 0.5*xc'H*xc)[1] while (vc - oldvalue)/(step*sdotg) < Armijo step = step*stepDec nstep += 1 xc = clamp.(x+step*search,lower,upper) - vc = (xc'*g + 0.5*xc'*H*xc)[1] - if step 0 && @printf("\nSUCCESS: gradient norm < tol_grad\n") @@ -263,7 +264,7 @@ function iLQG(f,costfun,df, x0, u0; if back_pass_done debug("# serial backtracking line-search") @elapsed(for outer αi = α - xnew,unew,costnew = forward_pass(traj_new, x0[:,1] ,u, x,αi,f,costfun, lims, diff_fun) + xnew,unew,costnew = forward_pass(traj_new, x0 ,u, x,αi,f,costfun, lims, diff_fun) Δcost = sum(cost) - sum(costnew) expected_reduction = -αi*(dV[1] + αi*dV[2]) reduce_ratio = if expected_reduction > 0 diff --git a/src/system_pendcart.jl b/src/system_pendcart.jl index 4ae4d49..8298d63 100644 --- a/src/system_pendcart.jl +++ b/src/system_pendcart.jl @@ -90,94 +90,89 @@ function demo_pendcart(;x0 = [π-0.6,0,0,0], goal = [π,0,0,0], function cost_quadratic(x,u) - local d = (x.-goal) + local d = (x .- Ref(goal)) 0.5(d'*Q*d + u'R*u)[1] end function cost_quadratic(x::Matrix,u) local d = (x.-goal) - T = size(u,2) + T = length(u) c = Vector{Float64}(undef,T+1) for t = 1:T - c[t] = 0.5(d[:,t]'*Q*d[:,t] + u[:,t]'R*u[:,t])[1] + c[t] = 0.5(d[t]'*Q*d[t] + u[t]'R*u[t])[1] end - c[end] = cost_quadratic(x[:,end][:],[0.0]) + c[end] = cost_quadratic(x[end],[0.0]) return c end - cx = zeros(4,T) - cu = zeros(1,T) - cxu = zeros(D,1) function dcost_quadratic(x,u) - cx .= Q*(x.-goal) - cu .= R.*u - return cx,cu,cxu + cx = Ref(Q) .* (x .- Ref(goal)) + cu = Ref(R) .*u + return cx,cu,zeros(D) end function lin_dyn_f(x,u,i) - u[isnan.(u)] .= 0 f = dfsys(x,u) end - fxc = Array{Float64}(undef,D,D,I) - fuc = Array{Float64}(undef,D,1,I) - fxd = Array{Float64}(undef,D,D,I) - fud = Array{Float64}(undef,D,1,I) - for ii = 1:I - fxc[:,:,ii] = [0 1 0 0; + + fxc = Float64[0 1 0 0; 0 0 0 0; 0 0 0 1; 0 0 0 0] - fuc[:,:,ii] = [0, 0, 0, 1] - end + fuc = reshape(Float64[0, 0, 0, 1], 4,1) + fxc = [copy(fxc) for i = 1:N] + fuc = [copy(fuc) for i = 1:N] + fxd = deepcopy(fxc) + fud = deepcopy(fuc) + function lin_dyn_df(x,u) - u[isnan.(u)] .= 0 - D = size(x,1) - nu,I = size(u) + D = length(x[1]) + nu,I = length(u[1]),length(u) cx,cu,cxu = dcost_quadratic(x,u) cxx = Q - cuu = [R] + cuu = fill(R,1,1) for ii = 1:I - fxc[2,1,ii] = -g/l*cos(x[1,ii])-u[ii]/l*sin(x[1,ii]) - fxc[2,2,ii] = -d - fuc[2,1,ii] = cos(x[1,ii])/l - ABd = exp([fxc[:,:,ii]*h fuc[:,:,ii]*h; zeros(nu, D + nu)])# ZoH sampling - fxd[:,:,ii] = ABd[1:D,1:D] - fud[:,:,ii] = ABd[1:D,D+1:D+nu] + fxc[ii][2,1] = -g/l*cos(x[ii][1])-u[ii][]/l*sin(x[ii][1]) + fxc[ii][2,2] = -d + fuc[ii][2,1] = cos(x[ii][1])/l + ABd = exp([fxc[ii]*h fuc[ii]*h; zeros(nu, D + nu)])# ZoH sampling + fxd[ii] = ABd[1:D,1:D] + fud[ii] = ABd[1:D,D+1:D+nu] end - fxx=fxu=fuu = [] + fxx=fxu=fuu = nothing return fxd,fud,fxx,fxu,fuu,cx,cu,cxx,cxu,cuu end - x = zeros(4,N) - u = zeros(1,T) + x = [zeros(4) for _ in 1:N] + u = [zeros(1) for _ in 1:T] """ Simulate a pendulum on a cart using the non-linear equations """ function simulate_pendcart(x0,L, dfsys, cost) - x[:,1] = x0 - u[1] = 0 + x[1] = x0 + u[1] .= 0 for t = 2:T - dx = copy(x[:,t-1]) + dx = copy(x[t-1]) dx[1] -= pi - u[t] = -(L*dx)[1] + u[t] .= -(L*dx) if !isempty(lims) - u[t] = clamp(u[t],lims[1],lims[2]) + clamp!(u[t],lims[1],lims[2]) end - x[:,t] = dfsys(x[:,t-1],u[t]) + x[t] = dfsys(x[t-1],u[t]) end - dx = copy(x[:,T]) + dx = copy(x[T]) dx[1] -= pi uT = -(L*dx)[1] if !isempty(lims) uT = clamp(uT,lims[1],lims[2]) end - x[:,T+1] = dfsys(x[:,T],uT) + x[T+1] = dfsys(x[T],uT) c = cost(x,u) return x, u, c diff --git a/test/test_pendcart.jl b/test/test_pendcart.jl index 4c02ebb..4b986af 100644 --- a/test/test_pendcart.jl +++ b/test/test_pendcart.jl @@ -123,7 +123,7 @@ g/l 0 0 0; 0 0 0 1; 0 0 0 0] B = [0, -1/l, 0, 1] -C = eye(4) # Assume all states are measurable +C = Matrix{Float64}(I,4,4) # Assume all states are measurable D = 4 sys = ss(A,B,C,zeros(4)) diff --git a/test/test_readme.jl b/test/test_readme.jl index 439c3bb..576c58b 100644 --- a/test/test_readme.jl +++ b/test/test_readme.jl @@ -1,52 +1,50 @@ -using Test, Statistics, LinearAlgebra, Random +using Test, Statistics, LinearAlgebra, Random, DifferentialDynamicProgramming # make stable linear dynamics Random.seed!(0) eye = DifferentialDynamicProgramming.eye -costs = map(1:10) do MCiteration - h = .01 # time step - n = 10 # state dimension - m = 2 # control dimension - A = randn(n,n) +h = .01 # time step +n = 10 # state dimension +m = 2 # control dimension +# control limits +lims = []# ones(m,1)*[-1 1]*.6 + +T = 1000 # horizon +# quadratic costs +const Q = SMatrix{n,n,Float64}(h*eye(n)) +const R = SMatrix{m,m,Float64}(.1*h*eye(m)) + +@time costs = map(1:10) do MCiteration + A = @SMatrix randn(n,n) A = A-A' # skew-symmetric = pure imaginary eigenvalues A = exp(h*A) # discrete time - B = h*randn(n,m) - - # quadratic costs - Q = h*eye(n) - R = .1*h*eye(m) + B = h* @SMatrix(randn(n,m)) - # control limits - lims = []# ones(m,1)*[-1 1]*.6 - T = 1000 # horizon - x0 = ones(n,1) # initial state - u0 = .1*randn(m,T) # initial controls + x0 = ones(n) # initial state + u0 = .1*randn(m,T) |> tosvec # initial controls # optimization problem N = T+1 fx = A fu = B cxx = Q - cxu = zeros(size(B)) + cxu = 0*B cuu = R # Specify dynamics functions function lin_dyn_df(x,u,Q,R) - u[isnan.(u)] .= 0 - cx = Q*x - cu = R*u - fxx=fxu=fuu = [] + cx = Ref(Q) .* x + cu = Ref(R) .* u + fxx=fxu=fuu = nothing return fx,fu,fxx,fxu,fuu,cx,cu,cxx,cxu,cuu end function lin_dyn_f(x,u,A,B) - u[isnan.(u)] .= 0 xnew = A*x + B*u return xnew end function lin_dyn_cost(x,u,Q) - c = 0.5*sum(x.*(Q*x)) + 0.5*sum(u.*(R*u)) - return c + 0.5*sum(x'Q*x + u'R*u for (x,u) in zip(x,u)) end f(x,u,i) = lin_dyn_f(x,u,A,B) @@ -68,3 +66,8 @@ end @test maximum(costs) < 25 # This should be the case most of the times @test mean(costs) < 10 # This should be the case most of the times @test minimum(costs) < 5 # This should be the case most of the times + + + +# 8.136281 seconds (13.75 M allocations: 2.661 GiB, 13.57% gc time) +# 2.174562 seconds (2.90 M allocations: 157.990 MiB, 5.06% gc time) From 568ddb4f6829195f426e23127dbe7a81fa77e43d Mon Sep 17 00:00:00 2001 From: Fredrik Bagge Carlson Date: Thu, 6 Jun 2019 21:31:38 +0200 Subject: [PATCH 2/3] more WIP --- src/DifferentialDynamicProgramming.jl | 10 +++--- src/backward_pass.jl | 41 ++++++++------------- src/iLQG.jl | 1 - src/system_pendcart.jl | 52 +++++++++++++-------------- 4 files changed, 46 insertions(+), 58 deletions(-) diff --git a/src/DifferentialDynamicProgramming.jl b/src/DifferentialDynamicProgramming.jl index ed368ec..1a193ad 100644 --- a/src/DifferentialDynamicProgramming.jl +++ b/src/DifferentialDynamicProgramming.jl @@ -15,19 +15,19 @@ function __init__() @eval LinearAlgebra.adjoint(x::String) = x @eval function plotstuff_linear(x,u,cost,totalcost) p = Plots.plot(layout=(2,2)) - Plots.plot!(p,x', title="State Trajectories", xlabel="Time step",legend=false, subplot=1, show=false) + Plots.plot!(p,reduce(hcat,x)', title="State Trajectories", xlabel="Time step",legend=false, subplot=1, show=false) Plots.plot!(p,cost,c=:black,linewidth=3, title="Cost", xlabel="Time step", subplot=2, show=false) - Plots.plot!(p,u',title="Control signals", xlabel="Time step", subplot=3, show=false) + Plots.plot!(p,reduce(hcat,u)',title="Control signals", xlabel="Time step", subplot=3, show=false) Plots.plot!(p,totalcost,title="Total cost", xlabel="Iteration", subplot=4, show=false) Plots.gui() end @eval function plotstuff_pendcart(x00, u00, x,u,cost00,cost,otrace) cp = Plots.plot(layout=(1,3)) - sp = Plots.plot(x00',title=["\$x_$(i)\$" for i=1:size(x00,1)]', lab="Simulation", layout=(2,2)) - Plots.plot!(sp,x', title=["\$x_$(i)\$" for i=1:size(x00,1)]', lab="Optimized", xlabel="Time step", legend=true) + sp = Plots.plot(reduce(hcat,x00)',title=["\$x_$(i)\$" for i=1:length(x00[1])]', lab="Simulation", layout=(2,2)) + Plots.plot!(sp,reduce(hcat,x)', title=["\$x_$(i)\$" for i=1:length(x00[1])]', lab="Optimized", xlabel="Time step", legend=true) Plots.plot!(cp,cost00, title="Cost", lab="Simulation", subplot=2) - Plots.plot!(cp,u', legend=true, title="Control signal",lab="Optimized", subplot=1) + Plots.plot!(cp,reduce(hcat,u)', legend=true, title="Control signal",lab="Optimized", subplot=1) Plots.plot!(cp,cost[2:end], legend=true, title="Cost",lab="Optimized", xlabel="Time step", subplot=2, yscale=:log10) iters = sum(cost .> 0) filter!(x->x>0,cost) diff --git a/src/backward_pass.jl b/src/backward_pass.jl index 607d20c..4ee5955 100644 --- a/src/backward_pass.jl +++ b/src/backward_pass.jl @@ -20,33 +20,21 @@ macro matviews(ex) esc(ex) end -@macroexpand @matviews begin - dV = dV + [k_i'Qu; .5*k_i'Quu[i]*k_i] - Vx[i] = Qx + K_i'Quu[i]*k_i + K_i'Qu + Qux'k_i - Qu = cu[i] + fu[i]'Vx[i+1] - Qx = cx[i] + fx[i]'Vx[i+1] - Qux = cxu[i]' + fu[i]'Vxx[i+1]*fx[i] - fxuVx = vectens(Vx[i+1],fxu[i]) - Qux = Qux + fxuVx - Quu[i] = cuu[i] + fu[i]'Vxx[i+1]*fu[i] - fuuVx = vectens(Vx[i+1],fuu[i]) - Quu[i] += fuuVx -end -Base.zeros(::Type{Matrix{T}}, n::Int) where T = [zeros(T,0,0) for _ in 1:n] -Base.zeros(::Type{Vector{T}}, n::Int) where T = [T[] for _ in 1:n] +Base.zeros(x::AbstractVecOrMat{T}, n::Int) where T = [zeros(T,size(x)) for _ in 1:n] +Base.zeros(x::StaticArray, n::Int) = zeros(typeof(x), n) function back_pass(cx,cu,cxx,cxu,cuu,fx,fu,fxx,fxu,fuu,λ,regType,lims,x,u) # nonlinear time variant m,N = length(u[1]),length(u) length(cx) == N || throw(ArgumentError("cx must be the same length as u")) @matviews begin n = length(cx[1]) - k = zeros(typeof(cu[1]), N) - K = zeros(typeof(copy(fu[1]')), N) - Vx = zeros(typeof(cx[1]), N) - Vxx = zeros(typeof(fx[1]), N) - Quu = zeros(typeof(cuu[1]), N) - Quui = zeros(typeof(cuu[1]), N) + k = zeros(cu[1], N) + K = zeros(copy(fu[1]'), N) + Vx = zeros(cx[1], N) + Vxx = zeros(fx[1], N) + Quu = zeros(cuu[1], N) + Quui = zeros(cuu[1], N) dV = @SVector [0., 0.] Vx[N] = cx[N] Vxx[N] = cxx[N] @@ -100,17 +88,18 @@ function back_pass(cx,cu,cxx,cxu,cuu,fx,fu,fxx,fxu,fuu,λ,regType,lims,x,u) # no lower = lims[:,1]-u[i] upper = lims[:,2]-u[i] local k_i,result,free - try - k_i,result,R,free = boxQP(QuuF,QuF,lower,upper,k[min(i+1,N-1)]) - catch - result = 0 - end + # try + k_i,result,R,free = boxQP(QuuF,QuF,lower,upper,k[i+1]) + # catch ex + # @show ex + # result = 0 + # end if result < 1 diverge = i println("Failed boxQP") return diverge, GaussianPolicy(N,n,m,K,k,Quui,Quu), Vx, Vxx, dV end - K_i = similar(K[i]) + K_i = 0*K[i] if any(free) Lfree = -R\(R'\Qux_reg[free,:]) K_i[free,:] = Lfree diff --git a/src/iLQG.jl b/src/iLQG.jl index dc9bb97..ba1a930 100644 --- a/src/iLQG.jl +++ b/src/iLQG.jl @@ -183,7 +183,6 @@ function iLQG(f,costfun,df, x0, u0; debug("# test different backtracing parameters α and break loop when first succeeds") x,un,cost, = forward_pass(traj_new,x0,αi.*u,[],1,f,costfun, lims,diff_fun) debug("# simplistic divergence test") - @show typeof(x) if all(all(abs.(x) .< 1e8) for x in x) u = un diverge = false diff --git a/src/system_pendcart.jl b/src/system_pendcart.jl index 8298d63..f471f53 100644 --- a/src/system_pendcart.jl +++ b/src/system_pendcart.jl @@ -40,27 +40,27 @@ Run the iLQG Function to find an optimal trajectory For the "pendulum on a cart `doplot = true` : Plot results """ function demo_pendcart(;x0 = [π-0.6,0,0,0], goal = [π,0,0,0], - Q = Diagonal([10.,1,2,1]), # State weight matrix - R = 1., # Control weight matrix - lims = 5.0*[-1 1], # control limits, - T = 600, # Number of time steps - doplot = true # Plot results - ) + Q = Diagonal([10.,1,2,1]), # State weight matrix + R = 1., # Control weight matrix + lims = 5.0*[-1 1], # control limits, + T = 600, # Number of time steps + doplot = true # Plot results + ) N = T+1 g = 9.82 l = 0.35 # Length of pendulum h = 0.01 # Sample time d = 0.99 - A = [0 1 0 0; # Linearlized system dynamics matrix, continuous time + A = @SMatrix [0 1 0 0; # Linearlized system dynamics matrix, continuous time g/l -d 0 0; 0 0 0 1; 0 0 0 0] - B = [0, -1/l, 0, 1] + B = @SMatrix [0; -1/l; 0; 1] C = eye(4) # Assume all states are measurable D = 4 L = lqr(A,B,Q,R) # Calculate the optimal state feedback - I = T + function fsys_closedloop(t,x,L,xd) dx = copy(x) @@ -90,12 +90,12 @@ function demo_pendcart(;x0 = [π-0.6,0,0,0], goal = [π,0,0,0], function cost_quadratic(x,u) - local d = (x .- Ref(goal)) + local d = (x - goal) 0.5(d'*Q*d + u'R*u)[1] end - function cost_quadratic(x::Matrix,u) - local d = (x.-goal) + function cost_quadratic(x::Vector{<:AbstractVector},u) + local d = (x .- Ref(goal)) T = length(u) c = Vector{Float64}(undef,T+1) for t = 1:T @@ -109,7 +109,7 @@ function demo_pendcart(;x0 = [π-0.6,0,0,0], goal = [π,0,0,0], function dcost_quadratic(x,u) cx = Ref(Q) .* (x .- Ref(goal)) cu = Ref(R) .*u - return cx,cu,zeros(D) + return cx,cu,zeros(D,1) end @@ -119,15 +119,15 @@ function demo_pendcart(;x0 = [π-0.6,0,0,0], goal = [π,0,0,0], - fxc = Float64[0 1 0 0; - 0 0 0 0; - 0 0 0 1; - 0 0 0 0] - fuc = reshape(Float64[0, 0, 0, 1], 4,1) - fxc = [copy(fxc) for i = 1:N] - fuc = [copy(fuc) for i = 1:N] - fxd = deepcopy(fxc) - fud = deepcopy(fuc) + fxc = Float64[0 1 0 0; + 0 0 0 0; + 0 0 0 1; + 0 0 0 0] + fuc = reshape(Float64[0, 0, 0, 1], 4,1) + fxc = [copy(fxc) for i = 1:N] + fuc = [copy(fuc) for i = 1:N] + fxd = deepcopy(fxc) + fud = deepcopy(fuc) function lin_dyn_df(x,u) @@ -180,24 +180,24 @@ function demo_pendcart(;x0 = [π-0.6,0,0,0], goal = [π,0,0,0], # Simulate the closed loop system with regular LQG control and watch it fail due to control limits + # x0 = [π-0.6,0,0,0]; goal = [π,0,0,0] x00, u00, cost00 = simulate_pendcart(x0, L, dfsys, cost_quadratic) - f(x,u,i) = lin_dyn_f(x,u,i) df(x,u) = lin_dyn_df(x,u) # plotFn(x) = plot(squeeze(x,2)') println("Entering iLQG function") # subplot(n=4,nc=2) - x, u, L, Vx, Vxx, cost, trace = iLQG(f,cost_quadratic, df, x0, 0*u00, + x, u, L, Vx, Vxx, cost, trace = iLQG(f,cost_quadratic, df, x0, 0 .* u00, lims = lims, # plotFn = x -> Plots.subplot!(x'), regType = 2, - α = exp10.(range(0.2, stop=-3, length=6)), + α = exp10.(LinRange(0.2, -3, 6)), λmax = 1e15, verbosity = 3, tol_fun = 1e-8, - tol_grad = 1e-8, + tol_grad = 1e-8, max_iter = 1000); doplot && plotstuff_pendcart(x00, u00, x,u,cost00,cost,trace) From 1978d4302422465b75ff30d2071176ac8e418271 Mon Sep 17 00:00:00 2001 From: Fredrik Bagge Carlson Date: Fri, 7 Jun 2019 09:24:27 +0200 Subject: [PATCH 3/3] more WIP --- src/DifferentialDynamicProgramming.jl | 6 +- src/backward_pass.jl | 107 +++++++++++++------------- src/demo_linear.jl | 59 +++++++------- src/forward_pass.jl | 14 ++-- src/iLQG.jl | 2 +- src/iLQGkl.jl | 18 +++-- src/klutils.jl | 58 +++++++------- src/system_pendcart.jl | 47 ++++------- 8 files changed, 150 insertions(+), 161 deletions(-) diff --git a/src/DifferentialDynamicProgramming.jl b/src/DifferentialDynamicProgramming.jl index 1a193ad..ffeeeee 100644 --- a/src/DifferentialDynamicProgramming.jl +++ b/src/DifferentialDynamicProgramming.jl @@ -6,7 +6,9 @@ const DEBUG = false # Set this flag to true in order to print debug messages export QPTrace, boxQP, demoQP, iLQG,iLQGkl, demo_linear, demo_linear_kl, demo_pendcart, GaussianPolicy, tosvec -tosvec(x) = reinterpret(SVector{size(x,1),eltype(x)}, x)[:] +tosvec(x::AbstractMatrix{<:Number}) = reinterpret(SVector{size(x,1),eltype(x)}, x)[:] +tosvec(x::AbstractVector{<:Number}) = SVector{length(x),eltype(x)}(x) +tosmat(x::AbstractMatrix{<:Number}) = SMatrix{size(x,1),size(x,2),eltype(x)}(x) eye(n) = Matrix{Float64}(I,n,n) Base.strides(::SArray{Tuple{i,j}}) where {i,j} = (1,i) @@ -16,7 +18,7 @@ function __init__() @eval function plotstuff_linear(x,u,cost,totalcost) p = Plots.plot(layout=(2,2)) Plots.plot!(p,reduce(hcat,x)', title="State Trajectories", xlabel="Time step",legend=false, subplot=1, show=false) - Plots.plot!(p,cost,c=:black,linewidth=3, title="Cost", xlabel="Time step", subplot=2, show=false) + Plots.hline!(p,cost,c=:black,linewidth=3, title="Cost", xlabel="Time step", subplot=2, show=false) Plots.plot!(p,reduce(hcat,u)',title="Control signals", xlabel="Time step", subplot=3, show=false) Plots.plot!(p,totalcost,title="Total cost", xlabel="Iteration", subplot=4, show=false) Plots.gui() diff --git a/src/backward_pass.jl b/src/backward_pass.jl index 4ee5955..b1acb92 100644 --- a/src/backward_pass.jl +++ b/src/backward_pass.jl @@ -39,6 +39,7 @@ function back_pass(cx,cu,cxx,cxu,cuu,fx,fu,fxx,fxu,fuu,λ,regType,lims,x,u) # no Vx[N] = cx[N] Vxx[N] = cxx[N] Quu[N] = cuu[N] + Quui[N] = inv(Quu[N]) k[N] = 0*cu[N] K[N] = 0*fu[N]' end @@ -106,12 +107,11 @@ function back_pass(cx,cu,cxx,cxu,cuu,fx,fu,fxx,fxu,fuu,λ,regType,lims,x,u) # no end end # debug("# update cost-to-go approximation") - @matviews begin - dV += [k_i'Qu; .5*k_i'Quu[i]*k_i] - Vx[i] = Qx + K_i'Quu[i]*k_i + K_i'Qu + Qux'k_i - Vxx[i] = Qxx + K_i'Quu[i]*K_i + K_i'Qux + Qux'K_i - Vxx[i] = .5*(Vxx[i] + Vxx[i]') - end + + dV += [k_i'Qu; .5*k_i'Quu[i]*k_i] + Vx[i] = Qx + K_i'Quu[i]*k_i + K_i'Qu + Qux'k_i + Vxx[i] = Qxx + K_i'Quu[i]*K_i + K_i'Qux + Qux'K_i + Vxx[i] = .5*(Vxx[i] + Vxx[i]') # debug("# save controls/gains") k[i] = k_i @@ -127,55 +127,57 @@ function graphics(x...) return 0 end -function back_pass_gps(cx,cu,cxx::AbstractArray{T,3},cxu,cuu, fx::AbstractArray{T,3},fu,lims,x,u,kl_cost_terms) where T # quadratic timeVariant cost, linear time variant dynamics - m = size(u,1) - n,_,N = size(fx) +function back_pass_gps(cx,cu,cxx,cxu,cuu, fx,fu,lims,x,u,kl_cost_terms) + m,N = length(u[1]),length(u) ηbracket = kl_cost_terms[2] η = isa(ηbracket,AbstractMatrix) ? ηbracket[2,N] : ηbracket[2] cxkl,cukl,cxxkl,cxukl,cuukl = kl_cost_terms[1] - @assert size(cx) == (n, N) - @assert size(cu) == (m, N) - @assert size(cxx) == (n, n, N) - @assert size(cxu) == (n, m, N) - @assert size(cuu) == (m, m, N) - - k = zeros(m,N) - K = zeros(m,n,N) - Vx = zeros(n,N) - Vxx = zeros(n,n,N) - Quu = Array{T}(undef,m,m,N) - Quui = Array{T}(undef,m,m,N) - dV = [0., 0.] - - Vx[:,N] = cx[:,N] - Vxx[:,:,N] = cxx[:,:,end] - Quu[:,:,N] = cuu[:,:,N]./ η .+ cuukl[:,:,N] - Quui[:,:,N] = inv(Quu[:,:,N]) + @show length.((cx,cu,cxx,cxu,cuu, fx,fu,lims,x,u)) + @show length.((cxkl,cukl,cxxkl,cxukl,cuukl)) + + @matviews begin + n = length(cx[1]) + k = zeros(cu[1], N) + K = zeros(copy(fu[1]'), N) + Vx = zeros(cx[1], N) + Vxx = zeros(fx[1], N) + Quu = zeros(cuu[1], N) + Quui = zeros(cuu[1], N) + dV = @SVector [0., 0.] + Vx[N] = cx[N] + Vxx[N] = cxx[N] + Quu[N] = cuu[N]./ η .+ cuukl[N] + Quui[N] = inv(Quu[N]) + k[N] = 0*cu[N] + K[N] = 0*fu[N]' + end diverge = 0 for i = N-1:-1:1 - Qu = cu[:,i] + fu[:,:,i]'Vx[:,i+1] - Qx = cx[:,i] + fx[:,:,i]'Vx[:,i+1] - Qux = cxu[:,:,i]' + fu[:,:,i]'Vxx[:,:,i+1]*fx[:,:,i] - Quu[:,:,i] .= cuu[:,:,i] .+ fu[:,:,i]'Vxx[:,:,i+1]*fu[:,:,i] - Qxx = cxx[:,:,i] + fx[:,:,i]'Vxx[:,:,i+1]*fx[:,:,i] - ηbracket = kl_cost_terms[2] η = isa(ηbracket,AbstractMatrix) ? ηbracket[2,i] : ηbracket[2] - Qu = Qu ./ η + cukl[:,i] - Qux = Qux ./ η + cxukl[:,:,i] - Quu[:,:,i] = Quu[:,:,i] ./ η + cuukl[:,:,i] - Qx = Qx ./ η + cxkl[:,i] - Qxx = Qxx ./ η + cxxkl[:,:,i] - - Quu[:,:,i] = 0.5*(Quu[:,:,i] + Quu[:,:,i]') + @matviews begin + Qu = cu[i] + fu[i]'Vx[i+1] + Qx = cx[i] + fx[i]'Vx[i+1] + Qux = cxu[i]' + fu[i]'Vxx[i+1]*fx[i] + Quu[i] = cuu[i] + fu[i]'Vxx[i+1]*fu[i] + Qxx = cxx[i] + fx[i]'Vxx[i+1]*fx[i] + + Qu = Qu ./ η + cukl[i] + Qux = Qux ./ η + cxukl[i] + Quu[i] = Quu[i] ./ η + cuukl[i] + Qx = Qx ./ η + cxkl[i] + Qxx = Qxx ./ η + cxxkl[i] + + Quu[i] = 0.5*(Quu[i] + Quu[i]') + end if isempty(lims) || lims[1,1] > lims[1,2] # debug("# no control limits: Cholesky decomposition, check for non-PD") local R try - R = cholesky(Hermitian(Quu[:,:,i])) + R = cholesky(Hermitian(Quu[i])) catch diverge = i return diverge, GaussianPolicy(N,n,m,K,k,Quui,Quu), Vx, Vxx, dV @@ -186,11 +188,11 @@ function back_pass_gps(cx,cu,cxx::AbstractArray{T,3},cxu,cuu, fx::AbstractArray{ K_i = -(R\Qux) else # debug("# solve Quadratic Program") - lower = lims[:,1]-u[:,i] - upper = lims[:,2]-u[:,i] + lower = lims[:,1]-u[i] + upper = lims[:,2]-u[i] local k_i,result,free try - k_i,result,R,free = boxQP(Quu[:,:,i],Qu,lower,upper,k[:,min(i+1,N-1)]) + k_i,result,R,free = boxQP(Quu[i],Qu,lower,upper,k[i+1]) catch result = 0 end @@ -198,7 +200,7 @@ function back_pass_gps(cx,cu,cxx::AbstractArray{T,3},cxu,cuu, fx::AbstractArray{ diverge = i return diverge, GaussianPolicy(N,n,m,K,k,Quui,Quu), Vx, Vxx, dV end - K_i = zeros(m,n) + K_i = 0*K[i] if any(free) Lfree = -R\(R'\Qux[free,:]) K_i[free,:] = Lfree @@ -206,15 +208,16 @@ function back_pass_gps(cx,cu,cxx::AbstractArray{T,3},cxu,cuu, fx::AbstractArray{ end # debug("# update cost-to-go approximation") - dV = dV + [k_i'Qu; .5*k_i'Quu[:,:,i]*k_i] - Vx[:,i] = Qx + K_i'Quu[:,:,i]*k_i + K_i'Qu + Qux'k_i - Vxx[:,:,i] = Qxx + K_i'Quu[:,:,i]*K_i + K_i'Qux + Qux'K_i - Vxx[:,:,i] = .5*(Vxx[:,:,i] + Vxx[:,:,i]') + dV += [k_i'Qu; .5*k_i'Quu[i]*k_i] + Vx[i] = Qx + K_i'Quu[i]*k_i + K_i'Qu + Qux'k_i + Vxx[i] = Qxx + K_i'Quu[i]*K_i + K_i'Qux + Qux'K_i + Vxx[i] = .5*(Vxx[i] + Vxx[i]') + # debug("# save controls/gains") - k[:,i] = k_i - K[:,:,i] = K_i - Quui[:,:,i] = inv(Quu[:,:,i]) + k[i] = k_i + K[i] = K_i + Quui[i] = inv(Quu[i]) end return diverge, GaussianPolicy(N,n,m,K,k,Quui,Quu), Vx, Vxx,dV diff --git a/src/demo_linear.jl b/src/demo_linear.jl index 257a0d3..33d6cec 100644 --- a/src/demo_linear.jl +++ b/src/demo_linear.jl @@ -22,8 +22,8 @@ function demo_linear(;kwargs...) lims = [] #ones(m,1)*[-1 1]*.6 T = 1000 # horizon - x0 = ones(n,1) # initial state - u0 = .1*randn(m,T) # initial controls + x0 = ones(n) # initial state + u0 = .1*randn(m,T) |> tosvec # initial controls # optimization problem N = T+1 @@ -33,20 +33,19 @@ function demo_linear(;kwargs...) cxu = zeros(size(B)) cuu = R function lin_dyn_df(x,u,Q,R) - u[isnan.(u)] .= 0 - cx = Q*x - cu = R*u - fxx=fxu=fuu = [] + cx = Ref(Q) .* x + cu = Ref(R) .* u + fxx=fxu=fuu = nothing return fx,fu,fxx,fxu,fuu,cx,cu,cxx,cxu,cuu end function lin_dyn_f(x,u,A,B,Q,R) - u[isnan.(u)] .= 0 - xnew = A*x + B*u - return xnew + A*x + B*u end - lin_dyn_fT(x,Q) = 0.5*sum(x.*(Q*x)) + quadform(x::AbstractVector{<:AbstractVector},Q) = sum(x'Q*x for x in x) + quadform(x,Q) = x'Q*x + lin_dyn_fT(x,Q) = 0.5*quadform(x,Q) f(x,u,i) = lin_dyn_f(x,u,A,B,Q,R) - costfun(x,u) = 0.5*sum(x.*(Q*x)) + 0.5*sum(u.*(R*u)) + costfun(x,u) = 0.5*quadform(x,Q) + 0.5*quadform(u,R) df(x,u) = lin_dyn_df(x,u,Q,R) # plotFn(x) = plot(squeeze(x,2)') @@ -54,7 +53,6 @@ function demo_linear(;kwargs...) @time x, u, traj_new, Vx, Vxx, cost, otrace = iLQG(f,costfun,df, x0, u0; lims=lims,kwargs...); totalcost = get(otrace, :cost)[2] - plotstuff_linear(x,u,[cost],totalcost) x, u, traj_new, Vx, Vxx, cost, otrace end @@ -81,36 +79,37 @@ function demo_linear_kl(;kwargs...) T = 1000 # horizon x0 = ones(n) # initial state - u = .1*randn(m,T) # initial controls + u = .1*randn(m,T) |> tosvec # initial controls # optimization problem N = T+1 - fx = repeat(A,1,1,T) - fu = repeat(B,1,1,T) - cxx = repeat(Q,1,1,T) - cxu = repeat(zeros(size(B)),1,1,T) - cuu = repeat(R,1,1,T) + fx = [tosmat(A) for _ in 1:T] + fu = [tosmat(B) for _ in 1:T] + cxx = [tosmat(Q) for _ in 1:T] + cxu = [tosmat(zeros(size(B))) for _ in 1:T] + cuu = [tosmat(R) for _ in 1:T] function lin_dyn_df(x,u,Q,R) - u[isnan.(u)] .= 0 - cx = Q*x - cu = R*u - fxx=fxu=fuu = [] + cx = Ref(Q) .* x + cu = Ref(R) .* u + fxx=fxu=fuu = nothing return fx,fu,fxx,fxu,fuu,cx,cu,cxx,cxu,cuu end function lin_dyn_f(x,u,A,B,Q,R) - u[isnan.(u)] .= 0 - xnew = A*x + B*u - return xnew + A*x + B*u end + quadform(x::AbstractVector{<:AbstractVector},Q) = sum(x'Q*x for x in x) + quadform(x,Q) = x'Q*x + dyn = (x,u,i) -> lin_dyn_f(x,u,A,B,Q,R) - costf = (x,u) -> 0.5*(sum(x.*(Q*x),dims=1) + sum(u.*(R*u),dims=1))[:] + costf = (x,u) -> 0.5*quadform(x,Q) + 0.5*quadform(u,R) diffdyn = (x,u) -> lin_dyn_df(x,u,Q,R) function rollout(u) - x = zeros(n,T) - x[:,1] = x0 + x = zeros(n,T) |> tosvec + @show typeof(x) + x[1] = x0 for t = 1:T-1 - x[:,t+1] = dyn(x[:,t],u[:,t],t) + x[t+1] = dyn(x[t],u[t],t) end x end @@ -122,7 +121,7 @@ function demo_linear_kl(;kwargs...) local Vx, Vxx, cost, otrace, totalcost outercosts = zeros(5) @time for iter = 1:5 - cost0 = 0.5*sum(x.*(Q*x)) + 0.5*sum(u.*(R*u)) + cost0 = 0.5*(quadform(x,Q) +quadform(u,R)) x, u, traj, Vx, Vxx, cost, otrace = iLQGkl(dyn,costf,diffdyn, x, traj, model; cost=cost0, lims=lims,kwargs...); totalcost = get(otrace, :cost)[2] outercosts[iter] = sum(totalcost) diff --git a/src/forward_pass.jl b/src/forward_pass.jl index 80b8ca8..05ac0b0 100644 --- a/src/forward_pass.jl +++ b/src/forward_pass.jl @@ -44,14 +44,14 @@ function forward_covariance(model, x, u, traj) Σ0 = R1 # TODO: I was lazy here ix = 1:n iu = n+1:n+m - sigmanew = Array{Float64}(undef,n+m,n+m,N) - sigmanew[ix,ix,1] = Σ0 + sigmanew = [Array{Float64}(undef,n+m,n+m) for _ in 1:N] + sigmanew[1][ix,ix] = Σ0 for i = 1:N-1 - K,Σ = traj.K[i], traj.Σ[:,:,i] - sigmanew[ix,ix,i+1] = fx[i]*sigmanew[ix,ix,i]*fx[i]' + R1 # Iterate dLyap forward - sigmanew[iu,ix,i] = K*sigmanew[ix,ix,i] - sigmanew[ix,iu,i] = sigmanew[ix,ix,i]*K' - sigmanew[iu,iu,i] = K*sigmanew[ix,ix,i]*K' + Σ + K,Σ = traj.K[i], traj.Σ[i] + sigmanew[i+1][ix,ix] = fx[i]*sigmanew[i][ix,ix]*fx[i]' + R1 # Iterate dLyap forward + sigmanew[i][iu,ix] = K*sigmanew[i][ix,ix] + sigmanew[i][ix,iu] = sigmanew[i][ix,ix]*K' + sigmanew[i][iu,iu] = K*sigmanew[i][ix,ix]*K' + Σ end sigmanew end diff --git a/src/iLQG.jl b/src/iLQG.jl index ba1a930..be19945 100644 --- a/src/iLQG.jl +++ b/src/iLQG.jl @@ -50,7 +50,7 @@ eye(P,n) = Matrix{P}(I,n,n) GaussianPolicy(P) = GaussianPolicy(0,0,0,emptyMat3(P),emptyMat2(P),emptyMat3(P),emptyMat3(P)) # GaussianPolicy(P,T,n,m) = GaussianPolicy(T,n,m,zeros(P,m,n,T),zeros(P,m,T),cat([eye(P,m) for t=1:T]..., dims=3),cat([eye(P,m) for t=1:T]..., dims=3)) -GaussianPolicy(P,T,n,m) = GaussianPolicy(0,0,0,zeros(SMatrix{m,n,P},T),zeros(SVector{m,P},T),zeros(SMatrix{m,m,P},T),zeros(SMatrix{m,m,P},T)) +GaussianPolicy(P,T,n,m) = GaussianPolicy(T,n,m,zeros(SMatrix{m,n,P},T),zeros(SVector{m,P},T),zeros(SMatrix{m,m,P},T),zeros(SMatrix{m,m,P},T)) Base.isempty(gp::GaussianPolicy) = gp.T == gp.n == gp.m == 0 Base.length(gp::GaussianPolicy) = gp.T diff --git a/src/iLQGkl.jl b/src/iLQGkl.jl index 8108206..fbdae24 100644 --- a/src/iLQGkl.jl +++ b/src/iLQGkl.jl @@ -46,8 +46,8 @@ function iLQGkl(dynamics,costfun,derivs, x0, traj_prev, model; # --- initial sizes and controls u = copy(traj_prev.k) # initial control sequence n = size(x0, 1) # dimension of state vector - m,N = size(u) # dimension of control vector and number of state transitions - traj_new = GaussianPolicy(Float64) + m,N = length(u[1]),length(u) # dimension of control vector and number of state transitions + traj_new = GaussianPolicy(Float64, N,n,m) k_old = copy(traj_prev.k) traj_prev.k *= 0 # We are adding new k to u, so must set this to zero for correct kl calculations ηbracket = copy(ηbracket) # Because we do changes in this Array @@ -62,7 +62,7 @@ function iLQGkl(dynamics,costfun,derivs, x0, traj_prev, model; # --- initial trajectory debug("Checking initial trajectory") - if size(x0,2) == N + if x0 isa Vector{<:AbstractVector} debug("# pre-rolled initial forward pass, initial traj provided") x = x0 diverge = false @@ -124,14 +124,16 @@ function iLQGkl(dynamics,costfun,derivs, x0, traj_prev, model; end # check for termination due to small gradient - g_norm = mean(maximum(abs.(traj_new.k) ./ (abs.(u) .+1),dims=1)) + g_norm = mean(zip(traj_new.k,u)) do (k,u) + maximum(abs.(k) ./ (abs.(u) .+ 1)) + end trace(:grad_norm, iter, g_norm) # ====== STEP 3: Forward pass _t = @elapsed begin # debug("# entering forward_pass") - xnew,unew,costnew = forward_pass(traj_new, x0[:,1] ,u, x,1,dynamics,costfun, lims, diff_fun) + xnew,unew,costnew = forward_pass(traj_new, x0[1] ,u, x,1,dynamics,costfun, lims, diff_fun) sigmanew = forward_covariance(model, x, u, traj_new) traj_new.k .+= traj_prev.k # unew = k_new + k_old + Knew*Δx, this doesn't matter since traj_prev.k set to 0 above Δcost = sum(cost) - sum(costnew) @@ -202,7 +204,7 @@ function iLQGkl(dynamics,costfun,derivs, x0, traj_prev, model; end end - xnew,unew,costnew = forward_pass(traj_new, x0[:,1] ,u, x,1,dynamics,costfun, lims, diff_fun) + xnew,unew,costnew = forward_pass(traj_new, x0[1] ,u, x,1,dynamics,costfun, lims, diff_fun) sigmanew = forward_covariance(model, x, u, traj_new) traj_new.k .+= traj_prev.k # unew = k_new + k_old + Knew*Δx Δcost = sum(cost) - sum(costnew) @@ -216,7 +218,9 @@ function iLQGkl(dynamics,costfun,derivs, x0, traj_prev, model; # println(maximum(constraint_violation), " ", extrema(η), " ", indmax(constraint_violation)) # println(round.(constraint_violation,4)) η .= clamp.(η, ηbracket[1,:], ηbracket[3,:]) - g_norm = mean(maximum(abs.(traj_new.k) ./ (abs.(u) .+1),dims=1)) + g_norm = mean(zip(traj_new.k,u)) do (k,u) + maximum(abs.(k) ./ (abs.(u) .+ 1)) + end trace(:grad_norm, iter, g_norm) # @show maximum(constraint_violation) if all(divergence .< 2*kl_step) && mean(constraint_violation) < 0.1*kl_step[1] diff --git a/src/klutils.jl b/src/klutils.jl index 1ebf4e3..ec533c0 100644 --- a/src/klutils.jl +++ b/src/klutils.jl @@ -9,15 +9,15 @@ function ∇kl(traj_prev) isempty(traj_prev) && (return (0,0,0,0,0)) debug("Calculating KL cost addition terms") m,n,T = traj_prev.m,traj_prev.n,traj_prev.T - cx,cu,cxx,cuu,cxu = zeros(n,T),zeros(m,T),zeros(n,n,T),zeros(m,m,T),zeros(m,n,T) + cx,cu,cxx,cuu,cxu = [zeros(n) for _ in 1:T],[zeros(m) for _ in 1:T],[zeros(n,n) for _ in 1:T],[zeros(m,m) for _ in 1:T],[zeros(m,n) for _ in 1:T] for t in 1:T - K, k = traj_prev.K[:,:,t], traj_prev.k[:,t] - Σi = traj_prev.Σi[:,:,t] - cx[:,t] = K'*Σi*k - cu[:,t] = -Σi*k - cxx[:,:,t] = K'*Σi*K - cuu[:,:,t] = Σi - cxu[:,:,t] = -Σi*K # https://github.com/cbfinn/gps/blob/master/python/gps/algorithm/traj_opt/traj_opt_lqr_python.py#L355 + K, k = traj_prev.K[t], traj_prev.k[t] + Σi = traj_prev.Σi[t] + cx[t] = K'*Σi*k + cu[t] = -Σi*k + cxx[t] = K'*Σi*K + cuu[t] = Σi + cxu[t] = -Σi*K # https://github.com/cbfinn/gps/blob/master/python/gps/algorithm/traj_opt/traj_opt_lqr_python.py#L355 end return cx,cu,cxx,cxu,cuu end @@ -43,16 +43,16 @@ function kl_div(xnew,xold, Σ_new, traj_new, traj_prev) # m = traj_new.m kldiv = zeros(T) for t = 1:T - μt = μ_new[:,t] - Σt = Σ_new[:,:,t] - Kp = traj_prev.K[:,:,t] - Kn = traj_new.K[:,:,t] - kp = traj_prev.k[:,t] - kn = traj_new.k[:,t] + kp # unew must be added here - Σp = traj_prev.Σ[:,:,t] - Σn = traj_new.Σ[:,:,t] - Σip = traj_prev.Σi[:,:,t] - Σin = traj_new.Σi[:,:,t] + μt = μ_new[t] + Σt = Σ_new[t] + Kp = traj_prev.K[t] + Kn = traj_new.K[t] + kp = traj_prev.k[t] + kn = traj_new.k[t] + kp # unew must be added here + Σp = traj_prev.Σ[t] + Σn = traj_new.Σ[t] + Σip = traj_prev.Σi[t] + Σin = traj_new.Σi[t] Mp,vp = KLmv(Σip,Kp,kp) Mn,vn = KLmv(Σin,Kn,kn) cp = .5*kp'Σip*kp @@ -72,16 +72,16 @@ function kl_div_wiki(xnew,xold, Σ_new, traj_new, traj_prev) T,m,n = traj_new.T, traj_new.m, traj_new.n kldiv = zeros(T) for t = 1:T - μt = μ_new[:,t] - Σt = Σ_new[1:n,1:n,t] - Kp = traj_prev.K[:,:,t] - Kn = traj_new.K[:,:,t] - kp = traj_prev.k[:,t] - kn = traj_new.k[:,t] #traj_new.k[:,t] contains kp already - Σp = traj_prev.Σ[:,:,t] - Σn = traj_new.Σ[:,:,t] - Σip = traj_prev.Σi[:,:,t] - Σin = traj_new.Σi[:,:,t] + μt = μ_new[t] + Σt = Σ_new[t][1:n,1:n] + Kp = traj_prev.K[t] + Kn = traj_new.K[t] + kp = traj_prev.k[t] + kn = traj_new.k[t] #traj_new.k[t] contains kp already + Σp = traj_prev.Σ[t] + Σn = traj_new.Σ[t] + Σip = traj_prev.Σi[t] + Σin = traj_new.Σi[t] dim = m k_diff = kp-kn K_diff = Kp-Kn @@ -101,7 +101,7 @@ end -entropy(traj::GaussianPolicy) = mean(logdet(traj.Σ[:,:,t])/2 for t = 1:traj.T) + traj.m*log(2π)/2 +entropy(traj::GaussianPolicy) = mean(logdet(traj.Σ[t])/2 for t = 1:traj.T) + traj.m*log(2π)/2 """ new_η, satisfied, divergence = calc_η(xnew,xold,sigmanew,η, traj_new, traj_prev, kl_step) diff --git a/src/system_pendcart.jl b/src/system_pendcart.jl index f471f53..4e546f9 100644 --- a/src/system_pendcart.jl +++ b/src/system_pendcart.jl @@ -26,7 +26,7 @@ end """ - demo_pendcart(;kwargs...) +demo_pendcart(;kwargs...) Run the iLQG Function to find an optimal trajectory For the "pendulum on a cart system". Requires package ControlSystems.jl @@ -40,12 +40,12 @@ Run the iLQG Function to find an optimal trajectory For the "pendulum on a cart `doplot = true` : Plot results """ function demo_pendcart(;x0 = [π-0.6,0,0,0], goal = [π,0,0,0], - Q = Diagonal([10.,1,2,1]), # State weight matrix - R = 1., # Control weight matrix - lims = 5.0*[-1 1], # control limits, - T = 600, # Number of time steps - doplot = true # Plot results - ) + Q = Diagonal([10.,1,2,1]), # State weight matrix + R = 1., # Control weight matrix + lims = 5.0*[-1 1], # control limits, + T = 600, # Number of time steps + doplot = true # Plot results + ) N = T+1 g = 9.82 @@ -62,30 +62,11 @@ function demo_pendcart(;x0 = [π-0.6,0,0,0], goal = [π,0,0,0], L = lqr(A,B,Q,R) # Calculate the optimal state feedback - function fsys_closedloop(t,x,L,xd) - dx = copy(x) - dx[1] -= pi - u = -(L*dx)[1] - xd[1] = x[2] - xd[2] = -g/l * sin(x[1]) + u/l * cos(x[1]) - d*x[2] - xd[3] = x[4] - xd[4] = u - end - - function fsys(t,x,u,xd) - xd[1] = x[2] - xd[2] = -g/l * sin(x[1]) + u/l * cos(x[1]) - d*x[2] - xd[3] = x[4] - xd[4] = u - end - - dfvec = zeros(4) function dfsys(x,u) - dfvec[1] = x[1]+h*x[2] - dfvec[2] = x[2]+h*(-g/l*sin(x[1])+u[1]/l*cos(x[1])- d*x[2]) - dfvec[3] = x[3]+h*x[4] - dfvec[4] = x[4]+h*u[1] - dfvec + SVector(x[1]+h*x[2], + x[2]+h*(-g/l*sin(x[1])+u[1]/l*cos(x[1])- d*x[2]), + x[3]+h*x[4], + x[4]+h*u[1]) end @@ -120,9 +101,9 @@ function demo_pendcart(;x0 = [π-0.6,0,0,0], goal = [π,0,0,0], fxc = Float64[0 1 0 0; - 0 0 0 0; - 0 0 0 1; - 0 0 0 0] + 0 0 0 0; + 0 0 0 1; + 0 0 0 0] fuc = reshape(Float64[0, 0, 0, 1], 4,1) fxc = [copy(fxc) for i = 1:N] fuc = [copy(fuc) for i = 1:N]