alpaqa 0.0.1
Nonconvex constrained optimization
standalone/panoc.hpp
Go to the documentation of this file.
1#pragma once
2
7#include <alpaqa/util/box.hpp>
8
9#include <iomanip>
10#include <iostream>
11#include <stdexcept>
12
13namespace alpaqa {
14
15namespace detail {
16
17/// Estimate the Lipschitz constant of the gradient @f$ \nabla \psi @f$ using
18/// finite differences.
19template <class ObjFunT, class ObjGradFunT>
21 /// [in] Objective
22 const ObjFunT &ψ,
23 /// [in] Gradient of
24 const ObjGradFunT &grad_ψ,
25 /// [in] Current iterate @f$ x^k @f$
26 crvec xₖ,
27 /// [in] Finite difference step size relative to xₖ
28 real_t ε,
29 /// [in] Minimum absolute finite difference step size
30 real_t δ,
31 /// [in] Minimum allowed Lipschitz estimate.
32 real_t L_min,
33 /// [in] Maximum allowed Lipschitz estimate.
34 real_t L_max,
35 /// [out] @f$ \psi(x^k) @f$
36 real_t &ψₖ,
37 /// [out] Gradient @f$ \nabla \psi(x^k) @f$
38 rvec grad_ψₖ,
39 /// [in]
40 vec_allocator &alloc) {
41
42 auto h = (xₖ * ε).cwiseAbs().cwiseMax(δ);
43 auto xh = alloc.alloc() = xₖ + h;
44 real_t norm_h = h.norm();
45 auto grad_ψxh = alloc.alloc();
46 // Calculate ∇ψ(x₀ + h)
47 grad_ψ(xh, grad_ψxh);
48 // Calculate ψ(xₖ), ∇ψ(x₀)
49 ψₖ = ψ(xₖ);
50 grad_ψ(xₖ, grad_ψₖ);
51
52 // Estimate Lipschitz constant using finite differences
53 real_t L = (grad_ψxh - grad_ψₖ).norm() / norm_h;
54 alloc.free(xh, grad_ψxh);
55 return std::clamp(L, L_min, L_max);
56}
57
58inline void calc_x̂(real_t γ, ///< [in] Step size
59 crvec x, ///< [in] Decision variable @f$ x @f$
60 const Box &C, ///< [in] Box constraints @f$ C @f$
61 crvec grad_ψ, ///< [in] @f$ \nabla \psi(x^k) @f$
62 rvec x̂, ///< [out] @f$ \hat{x}^k = T_{\gamma^k}(x^k) @f$
63 rvec p ///< [out] @f$ \hat{x}^k - x^k @f$
64) {
65 p = projected_gradient_step(C, γ, x, grad_ψ);
66 x̂ = x + p;
67}
68
69/// Increase the estimate of the Lipschitz constant of the objective gradient
70/// and decrease the step size until quadratic upper bound or descent lemma is
71/// satisfied:
72/// @f[ \psi(x + p) \le \psi(x) + \nabla\psi(x)^\top p + \frac{L}{2} \|p\|^2 @f]
73/// The projected gradient iterate @f$ \hat x^k @f$ and step @f$ p^k @f$ are
74/// updated with the new step size @f$ \gamma^k @f$, and so are other quantities
75/// that depend on them, such as @f$ \nabla\psi(x^k)^\top p^k @f$ and
76/// @f$ \|p\|^2 @f$. The intermediate vector @f$ \hat y(x^k) @f$ can be used to
77/// compute the gradient @f$ \nabla\psi(\hat x^k) @f$ or to update the Lagrange
78/// multipliers.
79///
80/// @return The original step size, before it was reduced by this function.
81template <class ObjFunT>
83 /// [in] Objective.
84 const ObjFunT &ψ,
85 /// [in] Box constraints.
86 const Box &C,
87 /// [in] Tolerance used to ignore rounding errors when the function
88 /// @f$ \psi(x) @f$ is relatively flat or the step size is very
89 /// small, which could cause @f$ \psi(x^k) < \psi(\hat x^k) @f$,
90 /// which is mathematically impossible but could occur in finite
91 /// precision floating point arithmetic.
92 real_t rounding_tolerance,
93 /// [in] Maximum allowed Lipschitz constant estimate (prevents infinite
94 /// loop if function or derivatives are discontinuous, and keeps
95 /// step size bounded away from zero).
96 real_t L_max,
97 /// [in] Current iterate @f$ x^k @f$
98 crvec xₖ,
99 /// [in] Objective function @f$ \psi(x^k) @f$
100 real_t ψₖ,
101 /// [in] Gradient of objective @f$ \nabla\psi(x^k) @f$
102 crvec grad_ψₖ,
103 /// [out] Projected gradient iterate @f$ \hat x^k @f$
104 rvec x̂ₖ,
105 /// [out] Projected gradient step @f$ p^k @f$
106 rvec pₖ,
107 /// [inout] Objective function @f$ \psi(\hat x^k) @f$
108 real_t &ψx̂ₖ,
109 /// [inout] Squared norm of the step @f$ \left\| p^k \right\|^2 @f$
110 real_t &norm_sq_pₖ,
111 /// [inout] Gradient of objective times step @f$ \nabla\psi(x^k)^\top p^k@f$
112 real_t &grad_ψₖᵀpₖ,
113 /// [inout] Lipschitz constant estimate @f$ L_{\nabla\psi}^k @f$
114 real_t &Lₖ,
115 /// [inout] Step size @f$ \gamma^k @f$
116 real_t &γₖ) {
117
118 real_t old_γₖ = γₖ;
119 real_t margin = (1 + std::abs(ψₖ)) * rounding_tolerance;
120 while (ψx̂ₖ - ψₖ > grad_ψₖᵀpₖ + 0.5 * Lₖ * norm_sq_pₖ + margin) {
121 if (not(Lₖ * 2 <= L_max))
122 break;
123
124 Lₖ *= 2;
125 γₖ /= 2;
126
127 // Calculate x̂ₖ and pₖ (with new step size)
128 calc_x̂(γₖ, xₖ, C, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
129 // Calculate ∇ψ(xₖ)ᵀpₖ and ‖pₖ‖²
130 grad_ψₖᵀpₖ = grad_ψₖ.dot(pₖ);
131 norm_sq_pₖ = pₖ.squaredNorm();
132
133 // Calculate ψ(x̂ₖ)
134 ψx̂ₖ = ψ(x̂ₖ);
135 }
136 return old_γₖ;
137}
138
139inline void print_progress(unsigned k, real_t ψₖ, crvec grad_ψₖ, real_t pₖᵀpₖ,
140 real_t γₖ, real_t εₖ) {
141 std::cout << "[PANOC] " << std::setw(6) << k //
142 << ": ψ = " << std::setw(13) << ψₖ //
143 << ", ‖∇ψ‖ = " << std::setw(13) << grad_ψₖ.norm() //
144 << ", ‖p‖ = " << std::setw(13) << std::sqrt(pₖᵀpₖ) //
145 << ", γ = " << std::setw(13) << γₖ //
146 << ", εₖ = " << std::setw(13) << εₖ << "\r\n";
147};
148
149} // namespace detail
150
151using std::chrono::duration_cast;
152using std::chrono::microseconds;
153
154constexpr size_t panoc_min_alloc_size = 10;
155
156template <class ObjFunT, class ObjGradFunT, class DirectionT>
157inline PANOCStats panoc_impl(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C,
158 rvec x, real_t ε, const PANOCParams &params,
159 vec_allocator &alloc,
160 DirectionT &direction_provider) {
161 auto start_time = std::chrono::steady_clock::now();
162 PANOCStats s;
163
164 // Keep track of how many successive iterations didn't update the iterate
165 unsigned no_progress = 0;
166
167 // Future parameters?
168 AtomicStopSignal stop_signal;
169
170 // Estimate Lipschitz constant ---------------------------------------------
171
172 real_t ψₖ, Lₖ;
173 auto grad_ψₖ = alloc.alloc();
174 auto xₖ = alloc.alloc() = x;
175 // Finite difference approximation of ∇²ψ in starting point
176 if (params.Lipschitz.L₀ <= 0) {
178 ψ, grad_ψ, xₖ, params.Lipschitz.ε, params.Lipschitz.δ, params.L_min,
179 params.L_max, ψₖ, grad_ψₖ, alloc);
180 }
181 // Initial Lipschitz constant provided by the user
182 else {
183 Lₖ = params.Lipschitz.L₀;
184 // Calculate ψ(xₖ), ∇ψ(x₀)
185 ψₖ = ψ(xₖ);
186 grad_ψ(xₖ, grad_ψₖ);
187 }
188 if (not std::isfinite(Lₖ)) {
190 return s;
191 }
192 real_t γₖ = params.Lipschitz.Lγ_factor / Lₖ;
193 real_t τ = NaN;
194
195 // First projected gradient step -------------------------------------------
196
197 auto x̂ₖ = alloc.alloc(), pₖ = alloc.alloc(), grad_̂ψₖ = alloc.alloc();
198 auto xₖ₊₁ = alloc.alloc(), qₖ = alloc.alloc(), grad_ψₖ₊₁ = alloc.alloc();
199 auto x̂ₖ₊₁ = alloc.alloc(), pₖ₊₁ = alloc.alloc();
200
201 // Calculate x̂₀, p₀ (projected gradient step)
202 detail::calc_x̂(γₖ, xₖ, C, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
203 // Calculate ψ(x̂ₖ)
204 real_t ψx̂ₖ = ψ(x̂ₖ);
205 real_t grad_ψₖᵀpₖ = grad_ψₖ.dot(pₖ);
206 real_t pₖᵀpₖ = pₖ.squaredNorm();
207 // Compute forward-backward envelope
208 real_t φₖ = ψₖ + 1 / (2 * γₖ) * pₖᵀpₖ + grad_ψₖᵀpₖ;
209
210 // Main PANOC loop
211 // =========================================================================
212 for (unsigned k = 0; k <= params.max_iter; ++k) {
213
214 // Quadratic upper bound -----------------------------------------------
215 if (k == 0 || params.update_lipschitz_in_linesearch == false) {
216 // Decrease step size until quadratic upper bound is satisfied
217 real_t old_γₖ = detail::descent_lemma(
218 ψ, C, params.quadratic_upperbound_tolerance_factor,
219 params.L_max, xₖ, ψₖ, grad_ψₖ,
220 /* in ⟹ out */ x̂ₖ, pₖ,
221 /* inout */ ψx̂ₖ, pₖᵀpₖ, grad_ψₖᵀpₖ, Lₖ, γₖ);
222 if (k > 0 && γₖ != old_γₖ) // Flush L-BFGS if γ changed
223 direction_provider.changed_γ(γₖ, old_γₖ);
224 else if (k == 0) // Initialize L-BFGS
225 direction_provider.initialize(xₖ, x̂ₖ, pₖ, grad_ψₖ);
226 if (γₖ != old_γₖ)
227 φₖ = ψₖ + 1 / (2 * γₖ) * pₖᵀpₖ + grad_ψₖᵀpₖ;
228 }
229 // Calculate ∇ψ(x̂ₖ)
230 grad_ψ(x̂ₖ, grad_̂ψₖ);
231 // Check stop condition ------------------------------------------------
233 C, params.stop_crit, pₖ, γₖ, xₖ, x̂ₖ, vec(), grad_ψₖ, grad_̂ψₖ);
234
235 // Print progress
236 if (params.print_interval != 0 && k % params.print_interval == 0)
237 detail::print_progress(k, ψₖ, grad_ψₖ, pₖᵀpₖ, γₖ, εₖ);
238
239 auto time_elapsed = std::chrono::steady_clock::now() - start_time;
240 auto stop_status = detail::check_all_stop_conditions(
241 params, time_elapsed, k, stop_signal, ε, εₖ, no_progress);
242 if (stop_status != SolverStatus::Unknown) {
243 x = std::move(x̂ₖ);
244 s.iterations = k;
245 s.ε = εₖ;
246 s.elapsed_time = duration_cast<microseconds>(time_elapsed);
247 s.status = stop_status;
248 alloc.free(grad_ψₖ, xₖ, x̂ₖ, pₖ, grad_̂ψₖ, xₖ₊₁, qₖ, grad_ψₖ₊₁, x̂ₖ₊₁,
249 pₖ₊₁);
250 return s;
251 }
252
253 // Calculate quasi-Newton step -----------------------------------------
254 real_t step_size = -1;
256 step_size = 1;
257 if (k > 0)
258 direction_provider.apply(xₖ, x̂ₖ, pₖ, step_size,
259 /* in ⟹ out */ qₖ);
260
261 // Line search initialization ------------------------------------------
262 τ = 1;
263 real_t σₖγₖ⁻¹pₖᵀpₖ = (1 - γₖ * Lₖ) * pₖᵀpₖ / (2 * γₖ);
264 real_t φₖ₊₁, ψₖ₊₁, ψx̂ₖ₊₁, grad_ψₖ₊₁ᵀpₖ₊₁, pₖ₊₁ᵀpₖ₊₁;
265 real_t Lₖ₊₁, γₖ₊₁;
266 real_t ls_cond;
267 // TODO: make separate parameter
268 real_t margin =
269 (1 + std::abs(φₖ)) * params.quadratic_upperbound_tolerance_factor;
270
271 // Make sure quasi-Newton step is valid
272 if (k == 0) {
273 τ = 0; // Always use prox step on first iteration
274 } else if (not qₖ.allFinite()) {
275 τ = 0;
276 ++s.lbfgs_failures;
277 direction_provider.reset(); // Is there anything else we can do?
278 }
279
280 // Line search loop ----------------------------------------------------
281 do {
282 Lₖ₊₁ = Lₖ;
283 γₖ₊₁ = γₖ;
284
285 // Calculate xₖ₊₁
286 if (τ / 2 < params.τ_min) { // line search failed
287 xₖ₊₁.swap(x̂ₖ); // → safe prox step
288 ψₖ₊₁ = ψx̂ₖ;
289 grad_ψₖ₊₁.swap(grad_̂ψₖ);
290 } else { // line search didn't fail (yet)
291 if (τ == 1) // → faster quasi-Newton step
292 xₖ₊₁ = xₖ + qₖ;
293 else
294 xₖ₊₁ = xₖ + (1 - τ) * pₖ + τ * qₖ;
295 // Calculate ψ(xₖ₊₁), ∇ψ(xₖ₊₁)
296 ψₖ₊₁ = ψ(xₖ₊₁);
297 grad_ψ(xₖ₊₁, grad_ψₖ₊₁);
298 }
299
300 // Calculate x̂ₖ₊₁, pₖ₊₁ (projected gradient step in xₖ₊₁)
301 detail::calc_x̂(γₖ₊₁, xₖ₊₁, C, grad_ψₖ₊₁, /* in ⟹ out */ x̂ₖ₊₁, pₖ₊₁);
302 // Calculate ψ(x̂ₖ₊₁)
303 ψx̂ₖ₊₁ = ψ(x̂ₖ₊₁);
304
305 // Quadratic upper bound -------------------------------------------
306 grad_ψₖ₊₁ᵀpₖ₊₁ = grad_ψₖ₊₁.dot(pₖ₊₁);
307 pₖ₊₁ᵀpₖ₊₁ = pₖ₊₁.squaredNorm();
308 real_t pₖ₊₁ᵀpₖ₊₁_ₖ = pₖ₊₁ᵀpₖ₊₁; // prox step with step size γₖ
309
310 if (params.update_lipschitz_in_linesearch == true) {
311 // Decrease step size until quadratic upper bound is satisfied
313 ψ, C, params.quadratic_upperbound_tolerance_factor,
314 params.L_max, xₖ₊₁, ψₖ₊₁, grad_ψₖ₊₁,
315 /* in ⟹ out */ x̂ₖ₊₁, pₖ₊₁,
316 /* inout */ ψx̂ₖ₊₁, pₖ₊₁ᵀpₖ₊₁, grad_ψₖ₊₁ᵀpₖ₊₁, Lₖ₊₁, γₖ₊₁);
317 }
318
319 // Compute forward-backward envelope
320 φₖ₊₁ = ψₖ₊₁ + 1 / (2 * γₖ₊₁) * pₖ₊₁ᵀpₖ₊₁ + grad_ψₖ₊₁ᵀpₖ₊₁;
321 // Compute line search condition
322 ls_cond = φₖ₊₁ - (φₖ - σₖγₖ⁻¹pₖᵀpₖ);
323 if (params.alternative_linesearch_cond)
324 ls_cond -= (0.5 / γₖ₊₁ - 0.5 / γₖ) * pₖ₊₁ᵀpₖ₊₁_ₖ;
325
326 τ /= 2;
327 } while (ls_cond > margin && τ >= params.τ_min);
328
329 // If τ < τ_min the line search failed and we accepted the prox step
330 if (τ < params.τ_min && k != 0) {
332 τ = 0;
333 }
334 if (k != 0) {
335 s.count_τ += 1;
336 s.sum_τ += τ * 2;
337 s.τ_1_accepted += τ * 2 == 1;
338 }
339
340 // Update L-BFGS -------------------------------------------------------
341 if (γₖ != γₖ₊₁) // Flush L-BFGS if γ changed
342 direction_provider.changed_γ(γₖ₊₁, γₖ);
343
344 s.lbfgs_rejected += not direction_provider.update(xₖ, xₖ₊₁, pₖ, pₖ₊₁,
345 grad_ψₖ₊₁, C, γₖ₊₁);
346
347 // Check if we made any progress
348 if (no_progress > 0 || k % params.max_no_progress == 0)
349 no_progress = xₖ == xₖ₊₁ ? no_progress + 1 : 0;
350
351 // Advance step --------------------------------------------------------
352 Lₖ = Lₖ₊₁;
353 γₖ = γₖ₊₁;
354
355 ψₖ = ψₖ₊₁;
356 ψx̂ₖ = ψx̂ₖ₊₁;
357 φₖ = φₖ₊₁;
358
359 xₖ.swap(xₖ₊₁);
360 x̂ₖ.swap(x̂ₖ₊₁);
361 pₖ.swap(pₖ₊₁);
362 grad_ψₖ.swap(grad_ψₖ₊₁);
363 grad_ψₖᵀpₖ = grad_ψₖ₊₁ᵀpₖ₊₁;
364 pₖᵀpₖ = pₖ₊₁ᵀpₖ₊₁;
365 }
366 throw std::logic_error("[PANOC] loop error");
367}
368
369template <class DirectionProviderT = LBFGS, class ObjFunT, class ObjGradFunT>
370PANOCStats panoc(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C, rvec x,
371 real_t ε, const PANOCParams &params,
373 vec_allocator &alloc) {
374 if (alloc.size() < panoc_min_alloc_size)
375 throw std::logic_error("Allocator too small, should be at least " +
376 std::to_string(panoc_min_alloc_size));
377 const auto n = x.size();
378 if (alloc.vector_size() != n)
379 throw std::logic_error("Allocator size mismatch");
380
381 return panoc_impl(ψ, grad_ψ, C, x, ε, params, alloc, direction);
382}
383
384template <class DirectionProviderT = LBFGS, class ObjFunT, class ObjGradFunT>
385PANOCStats panoc(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C, rvec x,
386 real_t ε, const PANOCParams &params,
388 vec_allocator alloc{panoc_min_alloc_size, x.size()};
389 return panoc_impl(ψ, grad_ψ, C, x, ε, params, alloc, direction);
390}
391
392} // namespace alpaqa
size_t size() const
Definition: alloc.hpp:109
Eigen::Index vector_size() const
Definition: alloc.hpp:112
void free(rvec v)
Definition: alloc.hpp:94
real_t calc_error_stop_crit(const Box &C, PANOCStopCrit crit, crvec pₖ, real_t γ, crvec xₖ, crvec x̂ₖ, crvec ŷₖ, crvec grad_ψₖ, crvec grad_̂ψₖ)
Compute the ε from the stopping criterion, see PANOCStopCrit.
SolverStatus check_all_stop_conditions(const ParamsT &params, DurationT time_elapsed, unsigned iteration, const AtomicStopSignal &stop_signal, real_t ε, real_t εₖ, unsigned no_progress)
Check all stop conditions (required tolerance reached, out of time, maximum number of iterations exce...
real_t descent_lemma(const Problem &problem, real_t rounding_tolerance, real_t L_max, crvec xₖ, real_t ψₖ, crvec grad_ψₖ, crvec y, crvec Σ, rvec x̂ₖ, rvec pₖ, rvec ŷx̂ₖ, real_t &ψx̂ₖ, real_t &norm_sq_pₖ, real_t &grad_ψₖᵀpₖ, real_t &Lₖ, real_t &γₖ)
Increase the estimate of the Lipschitz constant of the objective gradient and decrease the step size ...
auto projected_gradient_step(const Box &C, real_t γ, crvec x, crvec grad_ψ)
Projected gradient step.
real_t initial_lipschitz_estimate(const Problem &problem, crvec xₖ, crvec y, crvec Σ, real_t ε, real_t δ, real_t L_min, real_t L_max, real_t &ψ, rvec grad_ψ, rvec work_n1, rvec work_n2, rvec work_n3, rvec work_m)
Estimate the Lipschitz constant of the gradient using finite differences.
void calc_x̂(const Problem &prob, real_t γ, crvec x, crvec grad_ψ, rvec x̂, rvec p)
void print_progress(unsigned k, real_t ψₖ, crvec grad_ψₖ, real_t pₖᵀpₖ, real_t γₖ, real_t εₖ)
int L
Definition: test.py:49
int n
Definition: test.py:40
int ε
Definition: test.py:73
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18
constexpr real_t NaN
Not a number.
Definition: vec.hpp:28
PANOCStats panoc(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C, rvec x, real_t ε, const PANOCParams &params, PANOCDirection< DirectionProviderT > direction, vec_allocator &alloc)
@ Unknown
Initial value.
@ NotFinite
Intermediate results were infinite or not-a-number.
PANOCStats panoc_impl(ObjFunT &ψ, ObjGradFunT &grad_ψ, const Box &C, rvec x, real_t ε, const PANOCParams &params, vec_allocator &alloc, DirectionT &direction_provider)
realvec vec
Default type for vectors.
Definition: vec.hpp:14
constexpr size_t panoc_min_alloc_size
std::chrono::microseconds elapsed_time
double real_t
Default floating point type.
Definition: vec.hpp:8
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16
Tuning parameters for the PANOC algorithm.