diff --git a/README.md b/README.md index 207934d..0450d6a 100644 --- a/README.md +++ b/README.md @@ -9,24 +9,28 @@ This package can be used in combination with [ProximalOperators.jl](https://gith [StructuredOptimization.jl](https://github.com/kul-forbes/StructuredOptimization.jl) provides a higher-level interface to formulate and solve problems using (some of) the algorithms here included. -### Installation +### Quick start + +To install the package, simply issue the following command in the Julia REPL: ```julia -julia> Pkg.add("ProximalAlgorithms") +] add ProximalAlgorithms ``` +Check out [these test scripts](test/problems) for examples on how to apply +the provided algorithms to problems. + ### Implemented Algorithms Algorithm | Function | Reference --------------------------------------|---------------|----------- -Douglas-Rachford splitting algorithm | [`douglasrachford`](src/algorithms/douglasrachford.jl) | [[1]][eckstein_1989] -Forward-backward splitting (i.e. proximal gradient) algorithm | [`forwardbackward`](src/algorithms/forwardbackward.jl) | [[2]][tseng_2008], [[3]][beck_2009] -Chambolle-Pock primal dual algorithm | [`chambollepock`](src/algorithms/primaldual.jl) | [[4]][chambolle_2011] -Vũ-Condat primal-dual algorithm | [`vucondat`](src/algorithms/primaldual.jl) | [[6]][vu_2013], [[7]][condat_2013] -Davis-Yin splitting algorithm | [`davisyin`](src/algorithms/davisyin.jl) | [[9]][davis_2017] -Asymmetric forward-backward-adjoint algorithm | [`afba`](src/algorithms/primaldual.jl) | [[10]][latafat_2017] -PANOC (L-BFGS) | [`panoc`](src/algorithms/panoc.jl) | [[11]][stella_2017] -ZeroFPR (L-BFGS) | [`zerofpr`](src/algorithms/zerofpr.jl) | [[12]][themelis_2018] +Douglas-Rachford splitting algorithm | [`DouglasRachford`](src/algorithms/douglasrachford.jl) | [[1]][eckstein_1989] +Forward-backward splitting (i.e. proximal gradient) algorithm | [`ForwardBackward`](src/algorithms/forwardbackward.jl) | [[2]][tseng_2008], [[3]][beck_2009] +Vũ-Condat primal-dual algorithm | [`VuCondat`](src/algorithms/primaldual.jl) | [[4]][chambolle_2011], [[6]][vu_2013], [[7]][condat_2013] +Davis-Yin splitting algorithm | [`DavisYin`](src/algorithms/davisyin.jl) | [[9]][davis_2017] +Asymmetric forward-backward-adjoint algorithm | [`AFBA`](src/algorithms/primaldual.jl) | [[10]][latafat_2017] +PANOC (L-BFGS) | [`PANOC`](src/algorithms/panoc.jl) | [[11]][stella_2017] +ZeroFPR (L-BFGS) | [`ZeroFPR`](src/algorithms/zerofpr.jl) | [[12]][themelis_2018] ### Contributing diff --git a/src/algorithms/davisyin.jl b/src/algorithms/davisyin.jl index da9d2c9..90e007a 100644 --- a/src/algorithms/davisyin.jl +++ b/src/algorithms/davisyin.jl @@ -1,10 +1,6 @@ -################################################################################ -# Davis-Yin splitting iterable -# -# See: -# [1] Davis, Yin "A Three-Operator Splitting Scheme and its Optimization Applications", -# Set-Valued and Variational Analysis, vol. 25, no. 4, pp 829–858 (2017). -# +# Davis, Yin. "A Three-Operator Splitting Scheme and its Optimization +# Applications", Set-Valued and Variational Analysis, vol. 25, no. 4, +# pp. 829–858 (2017). using Base.Iterators using ProximalAlgorithms.IterationTools @@ -64,60 +60,90 @@ function Base.iterate(iter::DYS_iterable, state::DYS_state) return state, state end +# Solver + +struct DavisYin{R} + gamma::Maybe{R} + lambda::R + maxit::Int + tol::R + verbose::Bool + freq::Int + + function DavisYin{R}(; gamma::Maybe{R}=nothing, lambda::R=R(1.0), + maxit::Int=10000, tol::R=R(1e-8), verbose::Bool=false, freq::Int=100 + ) where R + @assert gamma === nothing || gamma > 0 + @assert lambda > 0 + @assert maxit > 0 + @assert tol > 0 + @assert freq > 0 + new(gamma, lambda, maxit, tol, verbose, freq) + end +end + +function (solver::DavisYin{R})(x0::AbstractArray{C}; + f=Zero(), g=Zero(), h=Zero(), A=I, L::Maybe{R}=nothing +) where {R, C <: Union{R, Complex{R}}} + + stop(state::DYS_state) = norm(state.res, Inf) <= solver.tol + disp((it, state)) = @printf("%5d | %.3e\n", it, norm(state.res, Inf)) + + if solver.gamma === nothing + if L !== nothing + gamma = R(1)/L + else + error("You must specify either L or gamma") + end + else + gamma = solver.gamma + end + + iter = DYS_iterable(f, g, h, A, x0, gamma, solver.lambda) + iter = take(halt(iter, stop), solver.maxit) + iter = enumerate(iter) + if solver.verbose iter = tee(sample(iter, solver.freq), disp) end + + num_iters, state_final = loop(iter) + + return state_final.xf, state_final.xg, num_iters + +end + +# Outer constructors + """ - davisyin(x0; f, g, h, A, [...]) + DavisYin([gamma, lambda, maxit, tol, verbose, freq]) -Solves convex optimization problems of the form +Instantiate the Davis-Yin splitting algorithm (see [1]) for solving +convex optimization problems of the form minimize f(x) + g(x) + h(A x), -where `h` is smooth and `A` is a linear mapping, using the Davis-Yin splitting -algorithm, see [1]. - -Either of the following arguments must be specified: +where `h` is smooth and `A` is a linear mapping (for example, a matrix). +If `solver = DavisYin(args...)`, then the above problem is solved with -* `L::Real`, Lipschitz constant of the gradient of `h(A x)`. -* `gamma:Real`, stepsize parameter. + solver(x0; [f, g, h, A]) -Other optional keyword arguments: +Optional keyword arguments: +* `gamma::Real` (default: `nothing`), stepsize parameter. * `labmda::Real` (default: `1.0`), relaxation parameter, see [1]. * `maxit::Integer` (default: `1000`), maximum number of iterations to perform. * `tol::Real` (default: `1e-8`), absolute tolerance on the fixed-point residual. * `verbose::Bool` (default: `true`), whether or not to print information during the iterations. * `freq::Integer` (default: `100`), frequency of verbosity. +If `gamma` is not specified at construction time, the following keyword +argument must be specified at solve time: + +* `L::Real`, Lipschitz constant of the gradient of `h(A x)`. + References: [1] Davis, Yin. "A Three-Operator Splitting Scheme and its Optimization Applications", Set-Valued and Variational Analysis, vol. 25, no. 4, pp. 829–858 (2017). """ -function davisyin(x0; - f=Zero(), g=Zero(), h=Zero(), A=I, - lambda=1.0, L=nothing, gamma=nothing, - maxit=10_000, tol=1e-8, - verbose=false, freq=100) - - R = real(eltype(x0)) - - stop(state::DYS_state) = norm(state.res, Inf) <= R(tol) - disp((it, state)) = @printf("%5d | %.3e\n", it, norm(state.res, Inf)) - - if gamma === nothing - if L !== nothing - gamma = R(1)/R(L) - else - error("You must specify either L or gamma") - end - end - - iter = DYS_iterable(f, g, h, A, x0, R(gamma), R(lambda)) - iter = take(halt(iter, stop), maxit) - iter = enumerate(iter) - if verbose iter = tee(sample(iter, freq), disp) end - - num_iters, state_final = loop(iter) - - return state_final.xf, state_final.xg, num_iters -end +DavisYin(::Type{R}; kwargs...) where R = DavisYin{R}(; kwargs...) +DavisYin(; kwargs...) = DavisYin(Float64; kwargs...) diff --git a/src/algorithms/douglasrachford.jl b/src/algorithms/douglasrachford.jl index a5a22f7..8677f7c 100644 --- a/src/algorithms/douglasrachford.jl +++ b/src/algorithms/douglasrachford.jl @@ -1,10 +1,6 @@ -################################################################################ -# Douglas-Rachford splitting iterable -# -# [1] Eckstein, Bertsekas "On the Douglas-Rachford Splitting Method and the -# Proximal Point Algorithm for Maximal Monotone Operators*", +# Eckstein, Bertsekas, "On the Douglas-Rachford Splitting Method and the +# Proximal Point Algorithm for Maximal Monotone Operators", # Mathematical Programming, vol. 55, no. 1, pp. 293-318 (1989). -# using Base.Iterators using ProximalAlgorithms: LBFGS @@ -39,16 +35,61 @@ function Base.iterate(iter::DRS_iterable, state::DRS_state=DRS_state(iter)) return state, state end +# Solver + +struct DouglasRachford{R} + gamma::R + maxit::Int + tol::R + verbose::Bool + freq::Int + + function DouglasRachford{R}(; gamma::R, maxit::Int=1000, tol::R=R(1e-8), + verbose::Bool=false, freq::Int=100 + ) where R + @assert gamma > 0 + @assert maxit > 0 + @assert tol > 0 + @assert freq > 0 + new(gamma, maxit, tol, verbose, freq) + end +end + +function (solver::DouglasRachford{R})( + x0::AbstractArray{C}; f=Zero(), g=Zero() +) where {R, C <: Union{R, Complex{R}}} + + stop(state::DRS_state) = norm(state.res, Inf) <= solver.tol + disp((it, state)) = @printf("%5d | %.3e\n", it, norm(state.res, Inf)) + + iter = DRS_iterable(f, g, x0, solver.gamma) + iter = take(halt(iter, stop), solver.maxit) + iter = enumerate(iter) + if solver.verbose iter = tee(sample(iter, solver.freq), disp) end + + num_iters, state_final = loop(iter) + + return state_final.y, state_final.z, num_iters + +end + +# Outer constructors + """ - douglasrachford(x0; f, g, gamma, [...]) + DouglasRachford([gamma, maxit, tol, verbose, freq]) + +Instantiate the Douglas-Rachford splitting algorithm (see [1]) for solving +convex optimization problems of the form + + minimize f(x) + g(x), + +If `solver = DouglasRachford(args...)`, then the above problem is solved with -Minimizes `f(x) + g(x)` with respect to `x`, using the Douglas-Rachfor splitting -algorithm starting from `x0`, with stepsize `gamma`. -If unspecified, `f` and `g` default to the identically zero function, -while `gamma` defaults to one. + solver(x0, [f, g]) -Other optional keyword arguments: +Optional keyword arguments: +* `gamma::Real` (default: `1.0`), stepsize parameter. * `maxit::Integer` (default: `1000`), maximum number of iterations to perform. * `tol::Real` (default: `1e-8`), absolute tolerance on the fixed-point residual. * `verbose::Bool` (default: `true`), whether or not to print information during the iterations. @@ -60,23 +101,5 @@ References: Proximal Point Algorithm for Maximal Monotone Operators", Mathematical Programming, vol. 55, no. 1, pp. 293-318 (1989). """ -function douglasrachford(x0; - f=Zero(), g=Zero(), - gamma=1.0, - maxit=1000, tol=1e-8, - verbose=false, freq=100) - - R = real(eltype(x0)) - - stop(state::DRS_state) = norm(state.res, Inf) <= R(tol) - disp((it, state)) = @printf("%5d | %.3e\n", it, norm(state.res, Inf)) - - iter = DRS_iterable(f, g, x0, R(gamma)) - iter = take(halt(iter, stop), maxit) - iter = enumerate(iter) - if verbose iter = tee(sample(iter, freq), disp) end - - num_iters, state_final = loop(iter) - - return state_final.y, state_final.z, num_iters -end +DouglasRachford(::Type{R}; kwargs...) where R = DouglasRachford{R}(; kwargs...) +DouglasRachford(; kwargs...) = DouglasRachford(Float64; kwargs...) diff --git a/src/algorithms/forwardbackward.jl b/src/algorithms/forwardbackward.jl index 29edd8d..2a8ecb9 100644 --- a/src/algorithms/forwardbackward.jl +++ b/src/algorithms/forwardbackward.jl @@ -1,3 +1,10 @@ +# Tseng, "On Accelerated Proximal Gradient Methods for Convex-Concave +# Optimization" (2008). +# +# Beck, Teboulle, "A Fast Iterative Shrinkage-Thresholding Algorithm +# for Linear Inverse Problems", SIAM Journal on Imaging Sciences, vol. 2, +# no. 1, pp. 183-202 (2009). + using Base.Iterators using ProximalAlgorithms.IterationTools using ProximalOperators: Zero @@ -111,25 +118,88 @@ function Base.iterate(iter::FBS_iterable{R}, state::FBS_state{R, Tx, TAx}) where return state, state end +# Solver + +struct ForwardBackward{R <: Real} + gamma::Maybe{R} + adaptive::Bool + fast::Bool + maxit::Int + tol::R + verbose::Bool + freq::Int + + function ForwardBackward{R}(; gamma::Maybe{R}=nothing, adaptive::Bool=false, + fast::Bool=false, maxit::Int=10000, tol::R=R(1e-8), verbose::Bool=false, + freq::Int=100 + ) where R + @assert gamma === nothing || gamma > 0 + @assert maxit > 0 + @assert tol > 0 + @assert freq > 0 + new(gamma, adaptive, fast, maxit, tol, verbose, freq) + end +end + +function (solver::ForwardBackward{R})( + x0::AbstractArray{C}; f=Zero(), A=I, g=Zero(), L::Maybe{R}=nothing +) where {R, C <: Union{R, Complex{R}}} + + stop(state::FBS_state) = norm(state.res, Inf)/state.gamma <= solver.tol + disp((it, state)) = @printf( + "%5d | %.3e | %.3e\n", + it, state.gamma, norm(state.res, Inf)/state.gamma + ) + + if solver.gamma === nothing && L !== nothing + gamma = R(1)/L + elseif solver.gamma !== nothing + gamma = solver.gamma + else + gamma = nothing + end + + iter = FBS_iterable(f, A, g, x0, gamma, solver.adaptive, solver.fast) + iter = take(halt(iter, stop), solver.maxit) + iter = enumerate(iter) + if solver.verbose iter = tee(sample(iter, solver.freq), disp) end + + num_iters, state_final = loop(iter) + + return state_final.z, num_iters + +end + +# Outer constructors + """ - forwardbackward(x0; f, A, g, [...]) + ForwardBackward([gamma, adaptive, fast, maxit, tol, verbose, freq]) -Minimizes f(A*x) + g(x) with respect to x, starting from x0, using the -forward-backward splitting algorithm (also known as proximal gradient method). -If unspecified, f and g default to the identically zero function, while A -defaults to the identity. +Instantiate the Forward-Backward splitting algorithm (see [1, 2]) for solving +optimization problems of the form -Other optional keyword arguments: + minimize f(Ax) + g(x), + +where `f` is smooth and `A` is a linear mapping (for example, a matrix). +If `solver = ForwardBackward(args...)`, then the above problem is solved with + + solver(x0, [f, A, g, L]) + +Optional keyword arguments: -* `L::Real` (default: `nothing`), the Lipschitz constant of the gradient of x ↦ f(Ax). * `gamma::Real` (default: `nothing`), the stepsize to use; defaults to `1/L` if not set (but `L` is). * `adaptive::Bool` (default: `false`), if true, forces the method stepsize to be adaptively adjusted. * `fast::Bool` (default: `false`), if true, uses Nesterov acceleration. -* `maxit::Integer` (default: `1000`), maximum number of iterations to perform. +* `maxit::Integer` (default: `10000`), maximum number of iterations to perform. * `tol::Real` (default: `1e-8`), absolute tolerance on the fixed-point residual. * `verbose::Bool` (default: `true`), whether or not to print information during the iterations. * `freq::Integer` (default: `10`), frequency of verbosity. +If `gamma` is not specified at construction time, the following keyword +argument can be used to set the stepsize parameter: + +* `L::Real` (default: `nothing`), the Lipschitz constant of the gradient of x ↦ f(Ax). + References: [1] Tseng, "On Accelerated Proximal Gradient Methods for Convex-Concave @@ -139,33 +209,5 @@ Optimization" (2008). for Linear Inverse Problems", SIAM Journal on Imaging Sciences, vol. 2, no. 1, pp. 183-202 (2009). """ -function forwardbackward(x0; - f=Zero(), A=I, g=Zero(), - L=nothing, gamma=nothing, - adaptive=false, fast=false, - maxit=10_000, tol=1e-8, - verbose=false, freq=100) - - R = real(eltype(x0)) - - stop(state::FBS_state) = norm(state.res, Inf)/state.gamma <= R(tol) - disp((it, state)) = @printf( - "%5d | %.3e | %.3e\n", - it, state.gamma, norm(state.res, Inf)/state.gamma - ) - - if gamma === nothing && L !== nothing - gamma = R(1)/R(L) - elseif gamma !== nothing - gamma = R(gamma) - end - - iter = FBS_iterable(f, A, g, x0, gamma, adaptive, fast) - iter = take(halt(iter, stop), maxit) - iter = enumerate(iter) - if verbose iter = tee(sample(iter, freq), disp) end - - num_iters, state_final = loop(iter) - - return state_final.z, num_iters -end +ForwardBackward(::Type{R}; kwargs...) where R = ForwardBackward{R}(; kwargs...) +ForwardBackward(; kwargs...) = ForwardBackward(Float64; kwargs...) diff --git a/src/algorithms/panoc.jl b/src/algorithms/panoc.jl index 5e2a485..380139c 100644 --- a/src/algorithms/panoc.jl +++ b/src/algorithms/panoc.jl @@ -1,3 +1,7 @@ +# Stella, Themelis, Sopasakis, Patrinos, "A simple and efficient algorithm +# for nonlinear model predictive control", 56th IEEE Conference on Decision +# and Control (2017). + using Base.Iterators using ProximalAlgorithms: LBFGS using ProximalAlgorithms.IterationTools @@ -179,16 +183,82 @@ function Base.iterate(iter::PANOC_iterable{R}, state::PANOC_state{R, Tx, TAx}) w return nothing end +# Solver + +struct PANOC{R <: Real} + alpha::R + beta::R + gamma::Maybe{R} + adaptive::Bool + memory::Int + maxit::Int + tol::R + verbose::Bool + freq::Int + + function PANOC{R}(; alpha::R=R(0.95), beta::R=R(0.5), + gamma::Maybe{R}=nothing, adaptive::Bool=false, memory::Int=5, + maxit::Int=1000, tol::R=R(1e-8), verbose::Bool=false, freq::Int=10 + ) where R + @assert 0 < alpha < 1 + @assert 0 < beta < 1 + @assert gamma === nothing || gamma > 0 + @assert memory >= 0 + @assert maxit > 0 + @assert tol > 0 + @assert freq > 0 + new(alpha, beta, gamma, adaptive, memory, maxit, tol, verbose, freq) + end +end + +function (solver::PANOC{R})( + x0::AbstractArray{C}; f=Zero(), A=I, g=Zero(), L::Maybe{R}=nothing +) where {R, C <: Union{R, Complex{R}}} + + stop(state::PANOC_state) = norm(state.res, Inf)/state.gamma <= solver.tol + disp((it, state)) = @printf( + "%5d | %.3e | %.3e | %.3e\n", + it, state.gamma, norm(state.res, Inf)/state.gamma, + (state.tau === nothing ? 0.0 : state.tau) + ) + + if solver.gamma === nothing && L !== nothing + gamma = solver.alpha/L + elseif solver.gamma !== nothing + gamma = solver.gamma + end + + iter = PANOC_iterable( + f, A, g, x0, + solver.alpha, solver.beta, solver.gamma, solver.adaptive, solver.memory + ) + iter = take(halt(iter, stop), solver.maxit) + iter = enumerate(iter) + if solver.verbose iter = tee(sample(iter, solver.freq), disp) end + + num_iters, state_final = loop(iter) + + return state_final.z, num_iters + +end + +# Outer constructors + """ - panoc(x0; f, A, g, [...]) + PANOC([gamma, adaptive, memory, maxit, tol, verbose, freq, alpha, beta]) -Minimizes f(A*x) + g(x) with respect to x, starting from x0, using PANOC. -If unspecified, f and g default to the identically zero function, while A -defaults to the identity. +Instantiate the PANOC algorithm (see [1]) for solving optimization problems +of the form -Other optional keyword arguments: + minimize f(Ax) + g(x), + +where `f` is smooth and `A` is a linear mapping (for example, a matrix). +If `solver = PANOC(args...)`, then the above problem is solved with + + solver(x0, [f, A, g, L]) + +Optional keyword arguments: -* `L::Real` (default: `nothing`), the Lipschitz constant of the gradient of x ↦ f(Ax). * `gamma::Real` (default: `nothing`), the stepsize to use; defaults to `alpha/L` if not set (but `L` is). * `adaptive::Bool` (default: `false`), if true, forces the method stepsize to be adaptively adjusted even if `L` is provided (this behaviour is always enforced if `L` is not provided). * `memory::Integer` (default: `5`), memory parameter for L-BFGS. @@ -199,41 +269,16 @@ Other optional keyword arguments: * `alpha::Real` (default: `0.95`), stepsize to inverse-Lipschitz-constant ratio; should be in (0, 1). * `beta::Real` (default: `0.5`), sufficient decrease parameter; should be in (0, 1). +If `gamma` is not specified at construction time, the following keyword +argument can be used to set the stepsize parameter: + +* `L::Real` (default: `nothing`), the Lipschitz constant of the gradient of x ↦ f(Ax). + References: [1] Stella, Themelis, Sopasakis, Patrinos, "A simple and efficient algorithm for nonlinear model predictive control", 56th IEEE Conference on Decision and Control (2017). """ -function panoc(x0; - f=Zero(), A=I, g=Zero(), - L=nothing, gamma=nothing, - adaptive=false, memory=5, - maxit=1000, tol=1e-8, - verbose=false, freq=10, - alpha=0.95, beta=0.5) - - R = real(eltype(x0)) - - stop(state::PANOC_state) = norm(state.res, Inf)/state.gamma <= R(tol) - disp((it, state)) = @printf( - "%5d | %.3e | %.3e | %.3e\n", - it, state.gamma, norm(state.res, Inf)/state.gamma, - (state.tau === nothing ? 0.0 : state.tau) - ) - - if gamma === nothing && L !== nothing - gamma = R(alpha)/R(L) - elseif gamma !== nothing - gamma = R(gamma) - end - - iter = PANOC_iterable(f, A, g, x0, R(alpha), R(beta), gamma, adaptive, memory) - iter = take(halt(iter, stop), maxit) - iter = enumerate(iter) - if verbose iter = tee(sample(iter, freq), disp) end - - num_iters, state_final = loop(iter) - - return state_final.z, num_iters -end +PANOC(::Type{R}; kwargs...) where R = PANOC{R}(; kwargs...) +PANOC(; kwargs...) = PANOC(Float64; kwargs...) diff --git a/src/algorithms/primaldual.jl b/src/algorithms/primaldual.jl index 4c06c16..5e734f6 100644 --- a/src/algorithms/primaldual.jl +++ b/src/algorithms/primaldual.jl @@ -1,17 +1,19 @@ -################################################################################ -# Primal-dual algorithms based on Asymmetric Forward-Backward-Adjoint +# Latafat, Patrinos, "Asymmetric forward–backward–adjoint splitting for +# solving monotone inclusions involving three operators", Computational +# Optimization and Applications, vol. 68, no. 1, pp. 57-93 (2017). # -# [1] Latafat, Patrinos. "Asymmetric forward–backward–adjoint splitting for -# solving monotone inclusions involving three operators" Computational -# Optimization and Applications, pages 1–37, 2017. -# -# [2] Condat. "A primal–dual splitting method for convex optimization involving -# Lipschitzian, proximable and linear composite terms" Journal of Optimization -# Theory and Applications 158.2 (2013): 460-479. -# -# [3] Vũ. "A splitting algorithm for dual monotone inclusions involving -# cocoercive operators"" Advances in Computational Mathematics, 38(3), pp.667-681. +# Chambolle, Pock, "A First-Order Primal-Dual Algorithm for Convex Problems +# with Applications to Imaging", Journal of Mathematical Imaging and Vision, +# vol. 40, no. 1, pp. 120-145 (2011). # +# Condat, "A primal–dual splitting method for convex optimization +# involving Lipschitzian, proximable and linear composite terms", +# Journal of Optimization Theory and Applications, vol. 158, no. 2, +# pp 460-479 (2013). +# +# Vũ, "A splitting algorithm for dual monotone inclusions involving +# cocoercive operators", Advances in Computational Mathematics, vol. 38, no. 3, +# pp. 667-681 (2013). using Base.Iterators using ProximalAlgorithms.IterationTools @@ -29,7 +31,7 @@ struct AFBA_iterable{R, Tx, Ty, Tf, Tg, Th, Tl, TL} y0::Ty theta::R mu::R - lam::R + lambda::R gamma1::R gamma2::R end @@ -80,12 +82,12 @@ function Base.iterate(iter::AFBA_iterable, state::AFBA_state=AFBA_state(iter)) # perform x-update step state.temp_y .= (iter.mu * (2-iter.theta) * iter.gamma1) .* state.FPR_y mul!(state.temp_x, iter.L', state.temp_y) - state.x .+= iter.lam .* (state.FPR_x .- state.temp_x) + state.x .+= iter.lambda .* (state.FPR_x .- state.temp_x) # perform y-update step state.temp_x .= ((1-iter.mu) * (2-iter.theta) * iter.gamma2) .* state.FPR_x mul!(state.temp_y, iter.L, state.temp_x) - state.y .+= iter.lam .* (state.FPR_y .+ state.temp_y) + state.y .+= iter.lambda .* (state.FPR_y .+ state.temp_y) return state, state end @@ -152,21 +154,85 @@ function AFBA_default_stepsizes(L, h, theta::R, mu::R, betaQ::R, betaR::R) where return gamma1, gamma2 end +# Solver + +struct AFBA{R} + gamma1::Maybe{R} + gamma2::Maybe{R} + theta::R + mu::R + lambda::R + maxit::Int + tol::R + verbose::Bool + freq::Int + + function AFBA{R}(; gamma1::Maybe{R}=nothing, gamma2::Maybe{R}=nothing, + theta::R=R(1), mu::R=R(1), lambda::R=R(1), maxit::Int=10000, + tol::R=R(1e-5), verbose::Bool=false, freq::Int=100 + ) where R + @assert gamma1 === nothing || gamma > 0 + @assert gamma2 === nothing || gamma > 0 + @assert theta >= 0 + @assert 0 <= mu <= 1 + @assert lambda > 0 + @assert maxit > 0 + @assert tol > 0 + @assert freq > 0 + new(gamma1, gamma2, theta, mu, lambda, maxit, tol, verbose, freq) + end +end + +function (solver::AFBA{R})(x0::AbstractArray{C}, y0::AbstractArray{C}; + f=Zero(), g=Zero(), h=Zero(), l=IndZero(), L=I, betaQ::R=R(0), betaR::R=R(0) +) where {R, C <: Union{R, Complex{R}}} + + stop(state::AFBA_state) = norm(state.FPR_x, Inf) + norm(state.FPR_y, Inf) <= solver.tol + disp((it, state)) = @printf( + "%6d | %7.4e\n", + it, norm(state.FPR_x, Inf)+norm(state.FPR_y, Inf) + ) + + if solver.gamma1 === nothing || solver.gamma2 === nothing + gamma1, gamma2 = AFBA_default_stepsizes( + L, h, solver.theta, solver.mu, betaQ, betaR + ) + else + gamma1, gamma2 = solver.gamma1, solver.gamma2 + end + + iter = AFBA_iterable(f, g, h, l, L, x0, y0, + solver.theta, solver.mu, solver.lambda, gamma1, gamma2 + ) + iter = take(halt(iter, stop), solver.maxit) + iter = enumerate(iter) + if solver.verbose iter = tee(sample(iter, solver.freq), disp) end + + num_iters, state_final = loop(iter) + + return state_final.x, state_final.y, num_iters +end + +# Outer constructors + """ - afba(x0, y0; f, g, h, l, L, [...]) + AFBA([gamma1, gamma2, theta, mu, lambda, maxit, tol, verbose, freq]) -Solves convex optimization problems of the form +Instantiate the asymmetric forward-backward-adjoing algorithm (AFBA, see [1]) +for solving convex optimization problems of the form minimize f(x) + g(x) + (h □ l)(L x), -where `f` is smooth, `g` and `h` are possibly nonsmooth and `l` is strongly convex, -using the asymmetric forward-backward-adjoint algorithm (AFBA), see [1]. -Symbol `□` denotes the infimal convolution, and `L` is a linear mapping. -Points `x0` and `y0` are the initial primal and dual iterates, respectively. -If unspecified, functions `f`, `g`, and `h` default to the identically zero function, -`l` defaults to the indicator of the set `{0}`, and `L` defaults to the identity. +where `f` is smooth, `g` and `h` are possibly nonsmooth and `l` is strongly +convex. Symbol `□` denotes the infimal convolution, and `L` is a linear mapping. +If `solver = AFBA(args...)`, then the above problem is solved with + + solver(x0, y0; [f, g, h, l, L, betaQ, betaR]) -Important keyword arguments, in case `f` and `l` are set, are: +Points `x0` and `y0` are the initial primal and dual iterates, respectively. +If unspecified, functions `f`, `g`, and `h` default to the identically zero +function, `l` defaults to the indicator of the set `{0}`, and `L` defaults to +the identity. Important keyword arguments, in case `f` and `l` are set, are: * `betaQ`: Lipschitz constant of gradient of `f` (default: zero) * `betaR`: Lipschitz constant of gradient of the conjugate of `l` (default: zero) @@ -174,12 +240,12 @@ Important keyword arguments, in case `f` and `l` are set, are: These are used to determine the algorithm default stepsizes, `gamma1` and `gamma2`, in case they are not directly specified. -Other optional keyword arguments are: +Optional keyword arguments are: * `gamma1`: stepsize corresponding to the primal updates (default: see [1] for each case) * `gamma2`: stepsize corresponding to the dual updates (default: see [1] for each case) -* `theta`: nonnegative algorithm parameter (default: '1') -* `mu`: algorithm parameter in the range [0,1] (default: '1') +* `theta`: nonnegative algorithm parameter (default: `1.0`) +* `mu`: algorithm parameter in the range [0,1] (default: `1.0`) * `tol`: primal-dual termination tolerance (default: `1e-5`) * `maxit`: maximum number of iterations (default: `10000`) * `verbose`, verbosity level (default: `1`) @@ -209,83 +275,39 @@ pp 460-479 (2013). cocoercive operators", Advances in Computational Mathematics, vol. 38, no. 3, pp. 667-681 (2013). """ -function afba(x0, y0; - f=Zero(), g=Zero(), h=Zero(), l=IndZero(), L=I, - theta=1.0, mu=1.0, lam=1.0, betaQ=0.0, betaR=0.0, gamma1=nothing, gamma2=nothing, - maxit=10000, tol=1e-5, verbose=false, freq=100) - - R = real(eltype(x0)) - - stop(state::AFBA_state) = norm(state.FPR_x, Inf) + norm(state.FPR_y, Inf) <= R(tol) - disp((it, state)) = @printf( - "%6d | %7.4e\n", - it, norm(state.FPR_x, Inf)+norm(state.FPR_y, Inf) - ) - - if gamma1 === nothing || gamma2 === nothing - gamma1, gamma2 = AFBA_default_stepsizes(L, h, R(theta), R(mu), R(betaQ), R(betaR)) - end - - iter = AFBA_iterable(f, g, h, l, L, x0, y0, R(theta), R(mu), R(lam), gamma1, gamma2) - iter = take(halt(iter, stop), maxit) - iter = enumerate(iter) - if verbose iter = tee(sample(iter, freq), disp) end - - num_iters, state_final = loop(iter) - - return state_final.x, state_final.y, num_iters -end +AFBA(::Type{R}; kwargs...) where R = AFBA{R}(; kwargs...) +AFBA(; kwargs...) = AFBA(Float64; kwargs...) """ - vucondat(x0, y0; f, g, h, l, L, [...]) + VuCondat([gamma1, gamma2, theta, mu, lambda, maxit, tol, verbose, freq]) -Solves convex optimization problems of the form +Instantiate the Vû-Condat splitting algorithm (see [2, 3]) +for solving convex optimization problems of the form - minimize f(x) + g(x) + (h □ l)(L x). + minimize f(x) + g(x) + (h □ l)(L x), -where `f` is smooth, `g` and `h` are possibly nonsmooth and `l` is strongly convex, -using the Vũ-Condat primal-dual algorithm. +where `f` is smooth, `g` and `h` are possibly nonsmooth and `l` is strongly +convex. Symbol `□` denotes the infimal convolution, and `L` is a linear mapping. +If `solver = VuCondat(args...)`, then the above problem is solved with -Symbol `□` denotes the infimal convolution, and `L` is a linear mapping. -Points `x0` and `y0` are the initial primal and dual iterates, respectively. + solver(x0, y0; [f, g, h, l, L, betaQ, betaR]) -See documentation of `afba` for the list of keyword arguments. +See documentation of `AFBA` for the list of keyword arguments. References: -[1] Condat, "A primal–dual splitting method for convex optimization +[1] Chambolle, Pock, "A First-Order Primal-Dual Algorithm for Convex Problems +with Applications to Imaging", Journal of Mathematical Imaging and Vision, +vol. 40, no. 1, pp. 120-145 (2011). + +[2] Condat, "A primal–dual splitting method for convex optimization involving Lipschitzian, proximable and linear composite terms", Journal of Optimization Theory and Applications, vol. 158, no. 2, pp 460-479 (2013). -[2] Vũ, "A splitting algorithm for dual monotone inclusions involving +[3] Vũ, "A splitting algorithm for dual monotone inclusions involving cocoercive operators", Advances in Computational Mathematics, vol. 38, no. 3, pp. 667-681 (2013). """ -function vucondat(x0, y0; kwargs...) - return afba(x0, y0; kwargs..., theta=2.0) -end - -""" - chambollepock(x0, y0; g, h, l, L, [...]) - -Solves convex optimization problems of the form - - minimize g(x) + (h □ l)(L x). - -where `g` and `h` are possibly nonsmooth and `l` is strongly convex, -using the Chambolle-Pock primal-dual algorithm. -Symbol `□` denotes the infimal convolution, and `L` is a linear mapping. -Points `x0` and `y0` are the initial primal and dual iterates, respectively. - -See documentation of `afba` for the list of keyword arguments. - -References: - -[1] Chambolle, Pock, "A First-Order Primal-Dual Algorithm for Convex Problems -with Applications to Imaging", Journal of Mathematical Imaging and Vision, -vol. 40, no. 1, pp. 120-145 (2011). -""" -function chambollepock(x0, y0; kwargs...) - return vucondat(x0, y0; kwargs..., f=Zero()) -end +VuCondat(::Type{R}; kwargs...) where R = AFBA{R}(; kwargs..., theta=R(2.0)) +VuCondat(; kwargs...) = VuCondat(Float64; kwargs...) diff --git a/src/algorithms/zerofpr.jl b/src/algorithms/zerofpr.jl index c4b4c43..28e510f 100644 --- a/src/algorithms/zerofpr.jl +++ b/src/algorithms/zerofpr.jl @@ -1,3 +1,8 @@ +# Themelis, Stella, Patrinos, "Forward-backward envelope for the sum of two +# nonconvex functions: Further properties and nonmonotone line-search +# algorithms", SIAM Journal on Optimization, vol. 28, no. 3, pp. 2274–2303 +# (2018). + using Base.Iterators using ProximalAlgorithms: LBFGS using ProximalAlgorithms.IterationTools @@ -154,16 +159,82 @@ function Base.iterate(iter::ZeroFPR_iterable{R}, state::ZeroFPR_state{R, Tx, TAx return nothing end +# Solver + +struct ZeroFPR{R <: Real} + alpha::R + beta::R + gamma::Maybe{R} + adaptive::Bool + memory::Int + maxit::Int + tol::R + verbose::Bool + freq::Int + + function ZeroFPR{R}(; alpha::R=R(0.95), beta::R=R(0.5), + gamma::Maybe{R}=nothing, adaptive::Bool=false, memory::Int=5, + maxit::Int=1000, tol::R=R(1e-8), verbose::Bool=false, freq::Int=10 + ) where R + @assert 0 < alpha < 1 + @assert 0 < beta < 1 + @assert gamma === nothing || gamma > 0 + @assert memory >= 0 + @assert maxit > 0 + @assert tol > 0 + @assert freq > 0 + new(alpha, beta, gamma, adaptive, memory, maxit, tol, verbose, freq) + end +end + +function (solver::ZeroFPR{R})( + x0::AbstractArray{C}; f=Zero(), A=I, g=Zero(), L::Maybe{R}=nothing +) where {R, C <: Union{R, Complex{R}}} + + stop(state::ZeroFPR_state) = norm(state.res, Inf)/state.gamma <= solver.tol + disp((it, state)) = @printf( + "%5d | %.3e | %.3e | %.3e\n", + it, state.gamma, norm(state.res, Inf)/state.gamma, + (state.tau === nothing ? 0.0 : state.tau) + ) + + if solver.gamma === nothing && L !== nothing + gamma = solver.alpha/L + elseif solver.gamma !== nothing + gamma = solver.gamma + end + + iter = ZeroFPR_iterable( + f, A, g, x0, + solver.alpha, solver.beta, solver.gamma, solver.adaptive, solver.memory + ) + iter = take(halt(iter, stop), solver.maxit) + iter = enumerate(iter) + if solver.verbose iter = tee(sample(iter, solver.freq), disp) end + + num_iters, state_final = loop(iter) + + return state_final.xbar, num_iters + +end + +# Outer constructors + """ - zerofpr(x0; f, A, g, [...]) + ZeroFPR([gamma, adaptive, memory, maxit, tol, verbose, freq, alpha, beta]) -Minimizes f(A*x) + g(x) with respect to x, starting from x0, using ZeroFPR. -If unspecified, f and g default to the identically zero function, while A -defaults to the identity. +Instantiate the ZeroFPR algorithm (see [1]) for solving optimization problems +of the form -Other optional keyword arguments: + minimize f(Ax) + g(x), + +where `f` is smooth and `A` is a linear mapping (for example, a matrix). +If `solver = ZeroFPR(args...)`, then the above problem is solved with + + solver(x0, [f, A, g, L]) + +Optional keyword arguments: -* `L::Real` (default: `nothing`), the Lipschitz constant of the gradient of x ↦ f(Ax). * `gamma::Real` (default: `nothing`), the stepsize to use; defaults to `alpha/L` if not set (but `L` is). * `adaptive::Bool` (default: `false`), if true, forces the method stepsize to be adaptively adjusted even if `L` is provided (this behaviour is always enforced if `L` is not provided). * `memory::Integer` (default: `5`), memory parameter for L-BFGS. @@ -174,41 +245,16 @@ Other optional keyword arguments: * `alpha::Real` (default: `0.95`), stepsize to inverse-Lipschitz-constant ratio; should be in (0, 1). * `beta::Real` (default: `0.5`), sufficient decrease parameter; should be in (0, 1). +If `gamma` is not specified at construction time, the following keyword +argument can be used to set the stepsize parameter: + +* `L::Real` (default: `nothing`), the Lipschitz constant of the gradient of x ↦ f(Ax). + References: [1] Themelis, Stella, Patrinos, "Forward-backward envelope for the sum of two nonconvex functions: Further properties and nonmonotone line-search algorithms", SIAM Journal on Optimization, vol. 28, no. 3, pp. 2274–2303 (2018). """ -function zerofpr(x0; - f=Zero(), A=I, g=Zero(), - L=nothing, gamma=nothing, - adaptive=false, memory=5, - maxit=1000, tol=1e-8, - verbose=false, freq=10, - alpha=0.95, beta=0.5) - - R = real(eltype(x0)) - - stop(state::ZeroFPR_state) = norm(state.res, Inf)/state.gamma <= R(tol) - disp((it, state)) = @printf( - "%5d | %.3e | %.3e | %.3e\n", - it, state.gamma, norm(state.res, Inf)/state.gamma, - (state.tau === nothing ? 0.0 : state.tau) - ) - - if gamma === nothing && L !== nothing - gamma = R(alpha)/R(L) - elseif gamma !== nothing - gamma = R(gamma) - end - - iter = ZeroFPR_iterable(f, A, g, x0, R(alpha), R(beta), gamma, adaptive, memory) - iter = take(halt(iter, stop), maxit) - iter = enumerate(iter) - if verbose iter = tee(sample(iter, freq), disp) end - - num_iters, state_final = loop(iter) - - return state_final.xbar, num_iters -end +ZeroFPR(::Type{R}; kwargs...) where R = ZeroFPR{R}(; kwargs...) +ZeroFPR(; kwargs...) = ZeroFPR(Float64; kwargs...) diff --git a/src/utilities/iterationtools.jl b/src/utilities/iterationtools.jl index 506d10f..c3f906a 100644 --- a/src/utilities/iterationtools.jl +++ b/src/utilities/iterationtools.jl @@ -11,6 +11,11 @@ struct HaltingIterable{I, F} fun::F end +Base.IteratorSize(::Type{HaltingIterable{I, F}}) where {I, F} = Base.SizeUnknown() +Base.IteratorEltype(::Type{HaltingIterable{I, F}}) where {I, F} = Base.IteratorEltype(I) + +Base.eltype(iter::HaltingIterable{I, F}) where {I, F} = eltype(iter.iter) + function Base.iterate(iter::HaltingIterable) next = iterate(iter.iter) return dispatch(iter, next) @@ -36,6 +41,13 @@ struct TeeIterable{I, F} fun::F end +Base.IteratorSize(::Type{TeeIterable{I, F}}) where {I, F} = Base.IteratorSize(I) +Base.IteratorEltype(::Type{TeeIterable{I, F}}) where {I, F} = Base.IteratorEltype(I) + +Base.length(iter::TeeIterable{I, F}) where {I, F} = length(iter.iter) +Base.axes(iter::TeeIterable{I, F}) where {I, F} = axes(iter.iter) +Base.eltype(iter::TeeIterable{I, F}) where {I, F} = eltype(iter.iter) + function Base.iterate(iter::TeeIterable, args...) next = iterate(iter.iter, args...) if next !== nothing iter.fun(next[1]) end @@ -51,6 +63,18 @@ struct SamplingIterable{I} period::UInt end +Base.IteratorSize(::Type{SamplingIterable{I}}) where I = Base.IteratorSize(I) +Base.IteratorEltype(::Type{SamplingIterable{I}}) where I = Base.IteratorEltype(I) + +function Base.length(iter::SamplingIterable{I}) where I + remainder = length(iter.iter) % iter.period + quotient = length(iter.iter) ÷ iter.period + return remainder == 0 ? quotient : quotient + 1 +end + +Base.size(iter::SamplingIterable{I}) where I = (length(iter), ) +Base.eltype(iter::SamplingIterable{I}) where I = eltype(iter.iter) + function Base.iterate(iter::SamplingIterable, state=iter.iter) current = iterate(state) if current === nothing return nothing end @@ -70,6 +94,13 @@ struct StopwatchIterable{I} iter::I end +Base.IteratorSize(::Type{StopwatchIterable{I}}) where I = Base.IteratorSize(I) +Base.IteratorEltype(::Type{StopwatchIterable{I}}) where I = Base.IteratorEltype(I) + +Base.length(iter::StopwatchIterable{I}) where I = length(iter.iter) +Base.axes(iter::StopwatchIterable{I}) where I = axes(iter.iter) +Base.eltype(iter::StopwatchIterable{I}) where I = (UInt64, eltype(iter.iter)) + function Base.iterate(iter::StopwatchIterable) t0 = time_ns() next = iterate(iter.iter) diff --git a/test/problems/test_elasticnet.jl b/test/problems/test_elasticnet.jl index 5c82900..059d45d 100644 --- a/test/problems/test_elasticnet.jl +++ b/test/problems/test_elasticnet.jl @@ -24,13 +24,15 @@ L = opnorm(A)^2 x0 = zeros(T, n) - x_panoc, it = ProximalAlgorithms.panoc(x0, f=loss, A=A, g=reg, tol=eps(R)) + solver = ProximalAlgorithms.PANOC{R}(tol=eps(R)) + x_panoc, it = solver(x0, f=loss, A=A, g=reg) - @testset "DYS" begin + @testset "DavisYin" begin x0 = zeros(T, n) - xf_dys, xg_dys, it_dys = ProximalAlgorithms.davisyin( - x0, f=reg1, g=reg2, h=loss, A=A, L=L, tol=1e-6 + solver = ProximalAlgorithms.DavisYin{R}(tol=R(1e-6)) + xf_dys, xg_dys, it_dys = solver( + x0, f=reg1, g=reg2, h=loss, A=A, L=L ) @test eltype(xf_dys) == T @test eltype(xg_dys) == T @@ -39,8 +41,9 @@ @test it_dys <= 1900 x0 = randn(T, n) - xf_dys, xg_dys, it_dys = ProximalAlgorithms.davisyin( - x0, f=reg1, g=reg2, h=loss, A=A, L=L, tol=1e-6 + solver = ProximalAlgorithms.DavisYin{R}(tol=R(1e-6)) + xf_dys, xg_dys, it_dys = solver( + x0, f=reg1, g=reg2, h=loss, A=A, L=L ) @test eltype(xf_dys) == T @test eltype(xg_dys) == T @@ -63,8 +66,9 @@ x0 = randn(T, n) y0 = randn(T, m) - x_afba, y_afba, it_afba = ProximalAlgorithms.afba( - x0, y0, f=reg2, g=reg1, h=loss, L=A, betaQ=R(1), theta=theta, mu=mu, tol=1e-6 + solver = ProximalAlgorithms.AFBA{R}(theta=theta, mu=mu, tol=R(1e-6)) + x_afba, y_afba, it_afba = solver( + x0, y0, f=reg2, g=reg1, h=loss, L=A, betaQ=R(1), ) @test eltype(x_afba) == T @test eltype(y_afba) == T diff --git a/test/problems/test_lasso_small.jl b/test/problems/test_lasso_small.jl index ae2a1ec..fbc9203 100644 --- a/test/problems/test_lasso_small.jl +++ b/test/problems/test_lasso_small.jl @@ -26,14 +26,15 @@ x_star = T[-3.877278911564627e-01, 0, 0, 2.174149659863943e-02, 6.168435374149660e-01] - TOL = 1e-4 + TOL = R(1e-4) - @testset "FBS" begin + @testset "ForwardBackward" begin ## Nonfast/Nonadaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, L=opnorm(A)^2, tol=TOL) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm(A)^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 150 @@ -41,7 +42,8 @@ # Nonfast/Adaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, adaptive=true) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 300 @@ -49,7 +51,8 @@ # Fast/Nonadaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, L=opnorm(A)^2, tol=TOL, fast=true) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, fast=true) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm(A)^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 100 @@ -57,7 +60,8 @@ # Fast/Adaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, adaptive=true, tol=TOL, fast=true) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, adaptive=true, fast=true) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 200 @@ -68,7 +72,8 @@ # ZeroFPR/Nonadaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.zerofpr(x0, f=f, A=A, g=g, L=opnorm(A)^2, tol=TOL) + solver = ProximalAlgorithms.ZeroFPR{R}(tol=TOL) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm(A)^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -76,7 +81,8 @@ # ZeroFPR/Adaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.zerofpr(x0, f=f, A=A, g=g, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.ZeroFPR{R}(adaptive=true, tol=TOL) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -88,7 +94,8 @@ # PANOC/Nonadaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.panoc(x0, f=f, A=A, g=g, L=opnorm(A)^2, tol=TOL) + solver = ProximalAlgorithms.PANOC{R}(tol=TOL) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm(A)^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -96,19 +103,21 @@ ## PANOC/Adaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.panoc(x0, f=f, A=A, g=g, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.PANOC{R}(adaptive=true, tol=TOL) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 end - @testset "DRS" begin + @testset "DouglasRachford" begin # Douglas-Rachford x0 = zeros(T, n) - y, z, it = ProximalAlgorithms.douglasrachford(x0, f=f2, g=g, gamma=10.0/opnorm(A)^2, tol=TOL) + solver = ProximalAlgorithms.DouglasRachford{R}(gamma=R(10.0)/opnorm(A)^2, tol=TOL) + y, z, it = solver(x0, f=f2, g=g) @test eltype(y) == T @test eltype(z) == T @test norm(y - x_star, Inf) <= TOL diff --git a/test/problems/test_lasso_small_h_split.jl b/test/problems/test_lasso_small_h_split.jl index 9fa6222..4dacef1 100644 --- a/test/problems/test_lasso_small_h_split.jl +++ b/test/problems/test_lasso_small_h_split.jl @@ -37,14 +37,15 @@ T[2.174149659863943e-02, 6.168435374149660e-01] ) - TOL = 1e-4 + TOL = R(1e-4) - @testset "FBS" begin + @testset "ForwardBackward" begin ## Nonfast/Nonadaptive x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, L=opnorm([A1 A2])^2, tol=TOL) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm([A1 A2])^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 150 @@ -52,7 +53,8 @@ # Nonfast/Adaptive x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, adaptive=true) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 300 @@ -60,7 +62,8 @@ # Fast/Nonadaptive x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, L=opnorm([A1 A2])^2, tol=TOL, fast=true) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, fast=true) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm([A1 A2])^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 100 @@ -68,7 +71,8 @@ # Fast/Adaptive x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, adaptive=true, tol=TOL, fast=true) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, adaptive=true, fast=true) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 200 @@ -79,7 +83,8 @@ # ZeroFPR/Nonadaptive x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x, it = ProximalAlgorithms.zerofpr(x0, f=f, A=A, g=g, L=opnorm([A1 A2])^2, tol=TOL) + solver = ProximalAlgorithms.ZeroFPR{R}(tol=TOL) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm([A1 A2])^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -87,7 +92,8 @@ # ZeroFPR/Adaptive x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x, it = ProximalAlgorithms.zerofpr(x0, f=f, A=A, g=g, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.ZeroFPR{R}(adaptive=true, tol=TOL) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -99,7 +105,8 @@ # PANOC/Nonadaptive x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x, it = ProximalAlgorithms.panoc(x0, f=f, A=A, g=g, L=opnorm([A1 A2])^2, tol=TOL) + solver = ProximalAlgorithms.PANOC{R}(tol=TOL) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm([A1 A2])^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -107,7 +114,8 @@ ## PANOC/Adaptive x0 = ArrayPartition(zeros(T, n1), zeros(T, n2)) - x, it = ProximalAlgorithms.panoc(x0, f=f, A=A, g=g, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.PANOC{R}(adaptive=true, tol=TOL) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 diff --git a/test/problems/test_lasso_small_v_split.jl b/test/problems/test_lasso_small_v_split.jl index c87aa15..b8c3a70 100644 --- a/test/problems/test_lasso_small_v_split.jl +++ b/test/problems/test_lasso_small_v_split.jl @@ -30,14 +30,15 @@ x_star = T[-3.877278911564627e-01, 0, 0, 2.174149659863943e-02, 6.168435374149660e-01] - TOL = 1e-4 + TOL = R(1e-4) - @testset "FBS" begin + @testset "ForwardBackward" begin ## Nonfast/Nonadaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, L=opnorm([A1; A2])^2, tol=TOL) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm([A1; A2])^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 150 @@ -45,7 +46,8 @@ # Nonfast/Adaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, adaptive=true) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 300 @@ -53,7 +55,8 @@ # Fast/Nonadaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, L=opnorm([A1; A2])^2, tol=TOL, fast=true) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, fast=true) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm([A1; A2])^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 100 @@ -61,7 +64,8 @@ # Fast/Adaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, adaptive=true, tol=TOL, fast=true) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, adaptive=true, fast=true) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 200 @@ -72,7 +76,8 @@ # ZeroFPR/Nonadaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.zerofpr(x0, f=f, A=A, g=g, L=opnorm([A1; A2])^2, tol=TOL) + solver = ProximalAlgorithms.ZeroFPR{R}(tol=TOL) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm([A1; A2])^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -80,7 +85,8 @@ # ZeroFPR/Adaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.zerofpr(x0, f=f, A=A, g=g, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.ZeroFPR{R}(adaptive=true, tol=TOL) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -92,7 +98,8 @@ # PANOC/Nonadaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.panoc(x0, f=f, A=A, g=g, L=opnorm([A1; A2])^2, tol=TOL) + solver = ProximalAlgorithms.PANOC{R}(tol=TOL) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm([A1; A2])^2) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 @@ -100,7 +107,8 @@ ## PANOC/Adaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.panoc(x0, f=f, A=A, g=g, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.PANOC{R}(adaptive=true, tol=TOL) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= TOL @test it < 20 diff --git a/test/problems/test_linear_programs.jl b/test/problems/test_linear_programs.jl index 322f8e6..417b2af 100644 --- a/test/problems/test_linear_programs.jl +++ b/test/problems/test_linear_programs.jl @@ -54,7 +54,7 @@ b = A*x_star c = A'*y_star + s_star - tol = 1e2*eps(T) + tol = 100*eps(T) maxit = 10_000 @testset "AFBA" begin @@ -66,9 +66,8 @@ x0 = zeros(T, n) y0 = zeros(T, m) - x, y, it = ProximalAlgorithms.afba( - x0, y0, f=f, g=g, h=h, L=A, tol=tol, maxit=maxit - ) + solver = ProximalAlgorithms.AFBA{T}(tol=tol, maxit=maxit) + x, y, it = solver(x0, y0, f=f, g=g, h=h, L=A) @test eltype(x) == T @test eltype(y) == T @@ -88,9 +87,8 @@ x0 = zeros(T, n) y0 = zeros(T, m) - x, y, it = ProximalAlgorithms.vucondat( - x0, y0, f=f, g=g, h=h, L=A, tol=tol, maxit=maxit - ) + solver = ProximalAlgorithms.VuCondat(T, tol=tol, maxit=maxit) + x, y, it = solver(x0, y0, f=f, g=g, h=h, L=A) @test eltype(x) == T @test eltype(y) == T @@ -111,9 +109,8 @@ x0 = zeros(T, n) - xf, xg, it = ProximalAlgorithms.davisyin( - x0, f=f, g=g, h=h, gamma=T(1), tol=tol, maxit=maxit - ) + solver = ProximalAlgorithms.DavisYin{T}(gamma=T(1), tol=tol, maxit=maxit) + xf, xg, it = solver(x0, f=f, g=g, h=h) @test eltype(xf) == T @test eltype(xg) == T diff --git a/test/problems/test_sparse_logistic_small.jl b/test/problems/test_sparse_logistic_small.jl index 25833a3..52a8acf 100644 --- a/test/problems/test_sparse_logistic_small.jl +++ b/test/problems/test_sparse_logistic_small.jl @@ -23,12 +23,13 @@ x_star = T[0, 0, 2.114635341704963e-01, 0, 2.845881348733116e+00] - TOL = 1e-6 + TOL = R(1e-6) # Nonfast/Adaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, fast=false, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, adaptive=true, fast=false) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= 1e-4 @test it < 1100 @@ -36,7 +37,8 @@ # Fast/Adaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.forwardbackward(x0, f=f, A=A, g=g, fast=true, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, adaptive=true, fast=true) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= 1e-4 @test it < 500 @@ -44,7 +46,8 @@ # ZeroFPR/Adaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.zerofpr(x0, f=f, A=A, g=g, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.ZeroFPR{R}(adaptive=true, tol=TOL) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= 1e-4 @test it < 25 @@ -52,7 +55,8 @@ # PANOC/Adaptive x0 = zeros(T, n) - x, it = ProximalAlgorithms.panoc(x0, f=f, A=A, g=g, adaptive=true, tol=TOL) + solver = ProximalAlgorithms.PANOC{R}(adaptive=true, tol=TOL) + x, it = solver(x0, f=f, A=A, g=g) @test eltype(x) == T @test norm(x - x_star, Inf) <= 1e-4 @test it < 50 diff --git a/test/problems/test_verbose.jl b/test/problems/test_verbose.jl new file mode 100644 index 0000000..6e64448 --- /dev/null +++ b/test/problems/test_verbose.jl @@ -0,0 +1,129 @@ +@testset "Verbose" for T in [Float64] + + using ProximalOperators + using ProximalAlgorithms + using LinearAlgebra + using Random + + Random.seed!(0) + + A = T[ 1.0 -2.0 3.0 -4.0 5.0; + 2.0 -1.0 0.0 -1.0 3.0; + -1.0 0.0 4.0 -3.0 2.0; + -1.0 -1.0 -1.0 1.0 3.0] + b = T[1.0, 2.0, 3.0, 4.0] + + m, n = size(A) + + R = real(T) + + lam = R(0.1)*norm(A'*b, Inf) + @test typeof(lam) == R + + f = Translate(SqrNormL2(R(1)), -b) + f2 = LeastSquares(A, b) + g = NormL1(lam) + + x_star = T[-3.877278911564627e-01, 0, 0, 2.174149659863943e-02, 6.168435374149660e-01] + + TOL = R(1e-4) + + @testset "ForwardBackward" begin + + ## Nonfast/Nonadaptive + + x0 = zeros(T, n) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, verbose=true) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm(A)^2) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 150 + + # Nonfast/Adaptive + + x0 = zeros(T, n) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, adaptive=true, verbose=true) + x, it = solver(x0, f=f, A=A, g=g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 300 + + # Fast/Nonadaptive + + x0 = zeros(T, n) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, fast=true, verbose=true) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm(A)^2) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 100 + + # Fast/Adaptive + + x0 = zeros(T, n) + solver = ProximalAlgorithms.ForwardBackward{R}(tol=TOL, adaptive=true, fast=true, verbose=true) + x, it = solver(x0, f=f, A=A, g=g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 200 + end + + @testset "ZeroFPR" begin + + # ZeroFPR/Nonadaptive + + x0 = zeros(T, n) + solver = ProximalAlgorithms.ZeroFPR{R}(tol=TOL, verbose=true) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm(A)^2) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 20 + + # ZeroFPR/Adaptive + + x0 = zeros(T, n) + solver = ProximalAlgorithms.ZeroFPR{R}(adaptive=true, tol=TOL, verbose=true) + x, it = solver(x0, f=f, A=A, g=g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 20 + + end + + @testset "PANOC" begin + + # PANOC/Nonadaptive + + x0 = zeros(T, n) + solver = ProximalAlgorithms.PANOC{R}(tol=TOL, verbose=true) + x, it = solver(x0, f=f, A=A, g=g, L=opnorm(A)^2) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 20 + + ## PANOC/Adaptive + + x0 = zeros(T, n) + solver = ProximalAlgorithms.PANOC{R}(adaptive=true, tol=TOL, verbose=true) + x, it = solver(x0, f=f, A=A, g=g) + @test eltype(x) == T + @test norm(x - x_star, Inf) <= TOL + @test it < 20 + + end + + @testset "DouglasRachford" begin + + # Douglas-Rachford + + x0 = zeros(T, n) + solver = ProximalAlgorithms.DouglasRachford{R}(gamma=R(10.0)/opnorm(A)^2, tol=TOL, verbose=true) + y, z, it = solver(x0, f=f2, g=g) + @test eltype(y) == T + @test eltype(z) == T + @test norm(y - x_star, Inf) <= TOL + @test norm(z - x_star, Inf) <= TOL + @test it < 30 + + end + +end diff --git a/test/runtests.jl b/test/runtests.jl index cf6a8dd..4e24dee 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,3 +12,4 @@ include("problems/test_lasso_small_v_split.jl") include("problems/test_lasso_small_h_split.jl") include("problems/test_linear_programs.jl") include("problems/test_sparse_logistic_small.jl") +include("problems/test_verbose.jl") diff --git a/test/utilities/iterationtools.jl b/test/utilities/iterationtools.jl index ab6b042..8672a2f 100644 --- a/test/utilities/iterationtools.jl +++ b/test/utilities/iterationtools.jl @@ -22,7 +22,10 @@ end @testset "Halting" begin iter = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765] - last = IterationTools.loop(IterationTools.halt(iter, x -> x >= 1000)) + truncated = IterationTools.halt(iter, x -> x >= 1000) + @test Base.IteratorSize(truncated) == Base.SizeUnknown() + @test eltype(truncated) == eltype(iter) + last = IterationTools.loop(truncated) @test last == 1597 end @@ -41,8 +44,11 @@ end @testset "Sampling" begin iter = randn(Float64, 147) + sample = IterationTools.sample(iter, 10) + @test eltype(sample) == Float64 + @test length(sample) == 15 k = 0 - for x in IterationTools.sample(iter, 10) + for x in sample idx = min(147, (k+1)*10) @test x == iter[idx] k = k+1 @@ -51,8 +57,12 @@ end @testset "Timing" begin iter = randn(Float64, 10) + timed = IterationTools.stopwatch(iter) + @test length(timed) == length(iter) + @test axes(timed) == axes(iter) + @test eltype(timed) == (UInt64, eltype(iter)) k = 0 - for (t, x) in IterationTools.stopwatch(iter) + for (t, x) in timed @test x == iter[k+1] @test t >= k * 1e8 * 0.9 # 1e8 ns = 0.1 s, factor 0.9 is for safety sleep(0.1)