alpaqa 0.0.1
Nonconvex constrained optimization
guarded-aa-pga.hpp
Go to the documentation of this file.
1#pragma once
2
9
10#include <cassert>
11#include <chrono>
12#include <cmath>
13#include <iomanip>
14#include <iostream>
15#include <stdexcept>
16
17namespace alpaqa {
18
20 /// Parameters related to the Lipschitz constant estimate and step size.
22 /// Length of the history to keep in the limited-memory QR algorithm.
23 unsigned limitedqr_mem = 10;
24 /// Maximum number of inner iterations.
25 unsigned max_iter = 100;
26 /// Maximum duration.
27 std::chrono::microseconds max_time = std::chrono::minutes(5);
28 /// Minimum Lipschitz constant estimate.
29 real_t L_min = 1e-5;
30 /// Maximum Lipschitz constant estimate.
31 real_t L_max = 1e20;
32 /// What stopping criterion to use.
34
35 /// When to print progress. If set to zero, nothing will be printed.
36 /// If set to N != 0, progress is printed every N iterations.
37 unsigned print_interval = 0;
38
40 10 * std::numeric_limits<real_t>::epsilon();
41
42 /// Maximum number of iterations without any progress before giving up.
43 unsigned max_no_progress = 10;
44
46};
47
49 unsigned k;
65};
66
67/// Guarded Anderson Accelerated Proximal Gradient Algorithm.
68/// Vien V. Mai and Mikael Johansson, Anderson Acceleration of Proximal Gradient Methods.
69/// https://arxiv.org/abs/1910.08590v2
70///
71/// @ingroup grp_InnerSolvers
73 public:
75
77
78 struct Stats {
81 std::chrono::microseconds elapsed_time;
82 unsigned iterations = 0;
84 };
85
87
88 Stats operator()(const Problem &problem, // in
89 crvec Σ, // in
90 real_t ε, // in
91 bool always_overwrite_results, // in
92 rvec x, // inout
93 rvec λ, // inout
94 rvec err_z); // out
95
97 set_progress_callback(std::function<void(const ProgressInfo &)> cb) {
98 this->progress_cb = cb;
99 return *this;
100 }
101
102 std::string get_name() const { return "GAAPGA"; }
103
104 void stop() { stop_signal.stop(); }
105
106 const Params &get_params() const { return params; }
107
108 private:
111 std::function<void(const ProgressInfo &)> progress_cb;
112};
113
114using std::chrono::duration_cast;
115using std::chrono::microseconds;
116
119 crvec Σ, // in
120 real_t ε, // in
121 bool always_overwrite_results, // in
122 rvec x, // inout
123 rvec y, // inout
124 rvec err_z // out
125) {
126 auto start_time = std::chrono::steady_clock::now();
127 Stats s;
128
129 const auto n = problem.n;
130 const auto m = problem.m;
131
132 vec xₖ = x, // Value of x at the beginning of the iteration
133 x̂ₖ(n), // Value of x after a projected gradient step
134 gₖ(n), // <?>
135 rₖ₋₁(n), // <?>
136 rₖ(n), // <?>
137 pₖ(n), // Projected gradient step
138 yₖ(n), // Value of x after a gradient or AA step
139 ŷₖ(m), // <?>
140 grad_ψₖ(n), // ∇ψ(xₖ)
141 grad_ψx̂ₖ(n); // ∇ψ(x̂ₖ)
142
143 vec work_n(n), work_n2(n), work_m(m);
144
145 unsigned m_AA = std::min(params.limitedqr_mem, n);
146 LimitedMemoryQR qr(n, m_AA);
147 mat G(n, m_AA);
148 vec γ_LS(m_AA);
149
150 // Helper functions --------------------------------------------------------
151
152 // Wrappers for helper functions that automatically pass along any arguments
153 // that are constant within AAPGA (for readability in the main algorithm)
154 auto calc_ψ_ŷ = [&problem, &y, &Σ](crvec x, rvec ŷ) {
155 return detail::calc_ψ_ŷ(problem, x, y, Σ, ŷ);
156 };
157 auto calc_ψ_grad_ψ = [&problem, &y, &Σ, &work_n, &work_m](crvec x,
158 rvec grad_ψ) {
159 return detail::calc_ψ_grad_ψ(problem, x, y, Σ, grad_ψ, work_n, work_m);
160 };
161 auto calc_grad_ψ_from_ŷ = [&problem, &work_n](crvec x, crvec ŷ,
162 rvec grad_ψ) {
163 detail::calc_grad_ψ_from_ŷ(problem, x, ŷ, grad_ψ, work_n);
164 };
165 auto calc_x̂ = [&problem](real_t γ, crvec x, crvec grad_ψ, rvec x̂, rvec p) {
166 detail::calc_x̂(problem, γ, x, grad_ψ, x̂, p);
167 };
168 auto calc_err_z = [&problem, &y, &Σ](crvec x̂, rvec err_z) {
170 };
171 auto descent_lemma = [this, &problem, &y,
172 &Σ](crvec xₖ, real_t ψₖ, crvec grad_ψₖ, rvec x̂ₖ,
173 rvec pₖ, rvec ŷx̂ₖ, real_t &ψx̂ₖ, real_t &pₖᵀpₖ,
174 real_t &grad_ψₖᵀpₖ, real_t &Lₖ, real_t &γₖ) {
177 xₖ, ψₖ, grad_ψₖ, y, Σ, x̂ₖ, pₖ, ŷx̂ₖ, ψx̂ₖ, pₖᵀpₖ, grad_ψₖᵀpₖ, Lₖ, γₖ);
178 };
179 auto print_progress = [&](unsigned k, real_t ψₖ, crvec grad_ψₖ, crvec pₖ,
180 real_t γₖ, real_t εₖ) {
181 std::cout << "[AAPGA] " << std::setw(6) << k
182 << ": ψ = " << std::setw(13) << ψₖ
183 << ", ‖∇ψ‖ = " << std::setw(13) << grad_ψₖ.norm()
184 << ", ‖p‖ = " << std::setw(13) << pₖ.norm()
185 << ", γ = " << std::setw(13) << γₖ
186 << ", εₖ = " << std::setw(13) << εₖ << "\r\n";
187 };
188
189 // Estimate Lipschitz constant ---------------------------------------------
190
191 real_t ψₖ, Lₖ;
192 // Finite difference approximation of ∇²ψ in starting point
193 if (params.Lipschitz.L₀ <= 0) {
197 /* in ⟹ out */ grad_ψₖ, /* work */ x̂ₖ, work_n2, work_n, work_m);
198 }
199 // Initial Lipschitz constant provided by the user
200 else {
201 Lₖ = params.Lipschitz.L₀;
202 // Calculate ψ(xₖ), ∇ψ(x₀)
203 ψₖ = calc_ψ_grad_ψ(xₖ, /* in ⟹ out */ grad_ψₖ);
204 }
205 if (not std::isfinite(Lₖ)) {
207 return s;
208 }
209
210 real_t γₖ = params.Lipschitz.Lγ_factor / Lₖ;
211
212 // First projected gradient step -------------------------------------------
213
214 rₖ₋₁ = -γₖ * grad_ψₖ;
215 yₖ = xₖ + rₖ₋₁;
216 xₖ = project(yₖ, problem.C);
217 G.col(0) = yₖ;
218
219 unsigned no_progress = 0;
220
221 // Calculate gradient in second iterate ------------------------------------
222
223 // Calculate ψ(x₁) and ∇ψ(x₁)
224 ψₖ = calc_ψ_grad_ψ(xₖ, /* in ⟹ out */ grad_ψₖ);
225
226 // Main loop
227 // =========================================================================
228 for (unsigned k = 0; k <= params.max_iter; ++k) {
229 // From previous iteration:
230 // - xₖ
231 // - grad_ψₖ
232 // - ψₖ
233 // - rₖ₋₁
234 // - history in qr and G
235
236 // Quadratic upper bound -----------------------------------------------
237
238 // Projected gradient step: x̂ₖ and pₖ
239 calc_x̂(γₖ, xₖ, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
240 // Calculate ψ(x̂ₖ) and ŷ(x̂ₖ)
241 real_t ψx̂ₖ = calc_ψ_ŷ(x̂ₖ, /* in ⟹ out */ ŷₖ);
242 // Calculate ∇ψ(xₖ)ᵀpₖ and ‖pₖ‖²
243 real_t grad_ψₖᵀpₖ = grad_ψₖ.dot(pₖ);
244 real_t pₖᵀpₖ = pₖ.squaredNorm();
245
246 real_t old_γₖ = descent_lemma(xₖ, ψₖ, grad_ψₖ, x̂ₖ, pₖ, ŷₖ, ψx̂ₖ, pₖᵀpₖ,
247 grad_ψₖᵀpₖ, Lₖ, γₖ);
248
249 // Flush or update Anderson buffers if step size changed
250 if (γₖ != old_γₖ) {
252 // Save the latest function evaluation gₖ at the first index
253 size_t newest_g_idx = qr.ring_tail();
254 if (newest_g_idx != 0)
255 G.col(0) = G.col(newest_g_idx);
256 // Flush everything else and reset indices
257 qr.reset();
258 } else {
259 // When not near the boundaries of the feasible set,
260 // r(x) = g(x) - x = Π(x - γ∇ψ(x)) - x = -γ∇ψ(x),
261 // in other words, r(x) is proportional to γ, and so is Δr,
262 // so when γ changes, these values have to be updated as well
263 qr.scale_R(γₖ / old_γₖ);
264 }
265 rₖ₋₁ *= γₖ / old_γₖ;
266 }
267
268 // Calculate ∇ψ(x̂ₖ)
269 calc_grad_ψ_from_ŷ(x̂ₖ, ŷₖ, /* in ⟹ out */ grad_ψx̂ₖ);
270
271 // Check stop condition ------------------------------------------------
272
274 problem.C, params.stop_crit, pₖ, γₖ, xₖ, x̂ₖ, ŷₖ, grad_ψₖ, grad_ψx̂ₖ);
275
276 // Print progress
277 if (params.print_interval != 0 && k % params.print_interval == 0)
278 print_progress(k, ψₖ, grad_ψₖ, pₖ, γₖ, εₖ);
279 if (progress_cb)
280 progress_cb({k, xₖ, pₖ, pₖᵀpₖ, x̂ₖ, ψₖ, grad_ψₖ, ψx̂ₖ, grad_ψx̂ₖ, Lₖ,
281 γₖ, εₖ, Σ, y, problem, params});
282
283 auto time_elapsed = std::chrono::steady_clock::now() - start_time;
284 auto stop_status = detail::check_all_stop_conditions(
285 params, time_elapsed, k, stop_signal, ε, εₖ, no_progress);
286 if (stop_status != SolverStatus::Unknown) {
287 // TODO: We could cache g(x) and ẑ, but would that faster?
288 // It saves 1 evaluation of g per ALM iteration, but requires
289 // many extra stores in the inner loops of PANOC.
290 // TODO: move the computation of ẑ and g(x) to ALM?
291 if (stop_status == SolverStatus::Converged ||
292 stop_status == SolverStatus::Interrupted ||
293 always_overwrite_results) {
294 calc_err_z(x̂ₖ, /* in ⟹ out */ err_z);
295 x = std::move(x̂ₖ);
296 y = std::move(ŷₖ);
297 }
298 s.iterations = k;
299 s.ε = εₖ;
300 s.elapsed_time = duration_cast<microseconds>(time_elapsed);
301 s.status = stop_status;
302 return s;
303 }
304
305 // Standard gradient descent step
306 gₖ = xₖ - γₖ * grad_ψₖ;
307 rₖ = gₖ - yₖ;
308
309 // Solve Anderson acceleration least squares problem and update history
310 minimize_update_anderson(qr, G, rₖ, rₖ₋₁, gₖ, γ_LS, yₖ);
311
312 // Project accelerated step onto feasible set
313 xₖ = project(yₖ, problem.C);
314
315 // Calculate the objective at the projected accelerated point
316 real_t ψₖ₊₁ = calc_ψ_ŷ(xₖ, /* in ⟹ out */ ŷₖ);
317 real_t old_ψ = ψₖ;
318
319 // Check sufficient decrease condition for Anderson iterate
320 bool sufficient_decrease;
321 if (0) // From paper
322 sufficient_decrease = ψₖ₊₁ <= ψₖ - 0.5 * γₖ * grad_ψₖ.squaredNorm();
323 else // Since we compute ψ(x̂ₖ) we might as well pick the best one
324 sufficient_decrease = ψₖ₊₁ <= ψx̂ₖ;
325
326 if (sufficient_decrease) {
327 // Accept Anderson step
328 // yₖ and xₖ are already overwritten by yₑₓₜ and Π(yₑₓₜ)
329 ψₖ = ψₖ₊₁;
330 calc_grad_ψ_from_ŷ(xₖ, ŷₖ, /* in ⟹ out */ grad_ψₖ);
331 }
332 // If not satisfied, take normal projected gradient step
333 else {
334 yₖ.swap(gₖ);
335 xₖ.swap(x̂ₖ);
336 ψₖ = ψx̂ₖ;
337 grad_ψₖ.swap(grad_ψx̂ₖ);
338 }
339 rₖ.swap(rₖ₋₁);
340 s.accelerated_steps_accepted += sufficient_decrease;
341
342 // Check if we made any progress, prevents us from exceeding the maximum
343 // number of iterations doing nothing if the step size gets too small
344 // TODO: is this a valid test?
345 no_progress = (ψₖ == old_ψ) ? no_progress + 1 : 0;
346 }
347 throw std::logic_error("[AAPGA] loop error");
348}
349
350template <class InnerSolverStats>
352
353template <>
355 std::chrono::microseconds elapsed_time;
356 unsigned iterations = 0;
357 unsigned accelerated_steps_accepted = 0;
358};
359
362 const GAAPGASolver::Stats &s) {
363 acc.elapsed_time += s.elapsed_time;
364 acc.iterations += s.iterations;
366 return acc;
367}
368
369} // namespace alpaqa
Guarded Anderson Accelerated Proximal Gradient Algorithm.
std::string get_name() const
GAAPGASolver & set_progress_callback(std::function< void(const ProgressInfo &)> cb)
std::function< void(const ProgressInfo &)> progress_cb
Stats operator()(const Problem &problem, crvec Σ, real_t ε, bool always_overwrite_results, rvec x, rvec λ, rvec err_z)
AtomicStopSignal stop_signal
GAAPGASolver(const Params &params)
const Params & get_params() const
std::chrono::microseconds elapsed_time
Incremental QR factorization using modified Gram-Schmidt with reorthogonalization.
size_t ring_tail() const
Get the tail index of the circular buffer (points to one past the most recent element).
void reset()
Reset all indices, clearing the Q and R matrices.
void scale_R(real_t scal)
Multiply the matrix R by a scalar.
real_t calc_error_stop_crit(const Box &C, PANOCStopCrit crit, crvec pₖ, real_t γ, crvec xₖ, crvec x̂ₖ, crvec ŷₖ, crvec grad_ψₖ, crvec grad_̂ψₖ)
Compute the ε from the stopping criterion, see PANOCStopCrit.
SolverStatus check_all_stop_conditions(const ParamsT &params, DurationT time_elapsed, unsigned iteration, const AtomicStopSignal &stop_signal, real_t ε, real_t εₖ, unsigned no_progress)
Check all stop conditions (required tolerance reached, out of time, maximum number of iterations exce...
real_t calc_ψ_ŷ(const Problem &p, crvec x, crvec y, crvec Σ, rvec ŷ)
Calculate both ψ(x) and the vector ŷ that can later be used to compute ∇ψ.
real_t descent_lemma(const Problem &problem, real_t rounding_tolerance, real_t L_max, crvec xₖ, real_t ψₖ, crvec grad_ψₖ, crvec y, crvec Σ, rvec x̂ₖ, rvec pₖ, rvec ŷx̂ₖ, real_t &ψx̂ₖ, real_t &norm_sq_pₖ, real_t &grad_ψₖᵀpₖ, real_t &Lₖ, real_t &γₖ)
Increase the estimate of the Lipschitz constant of the objective gradient and decrease the step size ...
real_t calc_ψ_grad_ψ(const Problem &p, crvec x, crvec y, crvec Σ, rvec grad_ψ, rvec work_n, rvec work_m)
Calculate both ψ(x) and its gradient ∇ψ(x).
void calc_err_z(const Problem &p, crvec x̂, crvec y, crvec Σ, rvec err_z)
Calculate the error between ẑ and g(x).
real_t initial_lipschitz_estimate(const Problem &problem, crvec xₖ, crvec y, crvec Σ, real_t ε, real_t δ, real_t L_min, real_t L_max, real_t &ψ, rvec grad_ψ, rvec work_n1, rvec work_n2, rvec work_n3, rvec work_m)
Estimate the Lipschitz constant of the gradient using finite differences.
void calc_grad_ψ_from_ŷ(const Problem &p, crvec x, crvec ŷ, rvec grad_ψ, rvec work_n)
Calculate ∇ψ(x) using ŷ.
void calc_x̂(const Problem &prob, real_t γ, crvec x, crvec grad_ψ, rvec x̂, rvec p)
void print_progress(unsigned k, real_t ψₖ, crvec grad_ψₖ, real_t pₖᵀpₖ, real_t γₖ, real_t εₖ)
int Σ
Definition: test.py:72
int m
Definition: test.py:41
int n
Definition: test.py:40
int ε
Definition: test.py:73
LipschitzEstimateParams Lipschitz
Parameters related to the Lipschitz constant estimate and step size.
InnerStatsAccumulator< PolymorphicInnerSolverWrapper::Stats > & operator+=(InnerStatsAccumulator< PolymorphicInnerSolverWrapper::Stats > &acc, const PolymorphicInnerSolverWrapper::Stats &s)
@ ApproxKKT
Find an ε-approximate KKT point in the ∞-norm:
void minimize_update_anderson(LimitedMemoryQR &qr, rmat G, crvec rₖ, crvec rₖ₋₁, crvec gₖ, rvec γ_LS, rvec xₖ_aa)
Solve one step of Anderson acceleration to find a fixed point of a function g(x):
real_t Lγ_factor
Factor that relates step size γ and Lipschitz constant.
Definition: lipschitz.hpp:15
auto project(const Vec &v, const Box &box)
Project a vector onto a box.
Definition: box.hpp:15
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18
unsigned limitedqr_mem
Length of the history to keep in the limited-memory QR algorithm.
real_t δ
Minimum step size for initial finite difference Lipschitz estimate.
Definition: lipschitz.hpp:13
constexpr real_t inf
Definition: vec.hpp:26
unsigned max_no_progress
Maximum number of iterations without any progress before giving up.
real_t L_max
Maximum Lipschitz constant estimate.
SolverStatus
Exit status of a numerical solver such as ALM or PANOC.
Definition: solverstatus.hpp:7
@ Interrupted
Solver was interrupted by the user.
@ Unknown
Initial value.
@ Converged
Converged and reached given tolerance.
@ NotFinite
Intermediate results were infinite or not-a-number.
std::chrono::microseconds max_time
Maximum duration.
realmat mat
Default type for matrices.
Definition: vec.hpp:20
realvec vec
Default type for vectors.
Definition: vec.hpp:14
real_t L₀
Initial estimate of the Lipschitz constant of ∇ψ(x)
Definition: lipschitz.hpp:9
const GAAPGAParams & params
real_t quadratic_upperbound_tolerance_factor
double real_t
Default floating point type.
Definition: vec.hpp:8
unsigned print_interval
When to print progress.
real_t L_min
Minimum Lipschitz constant estimate.
unsigned max_iter
Maximum number of inner iterations.
PANOCStopCrit stop_crit
What stopping criterion to use.
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16
problem
Definition: main.py:16
def cb(it)
Definition: rosenbrock.py:56
Problem description for minimization problems.