alpaqa pi-pico
Nonconvex constrained optimization
Loading...
Searching...
No Matches
panoc-helpers.tpp
Go to the documentation of this file.
1#pragma once
2
4#include <alpaqa/export.hpp>
11
12#include <stdexcept>
13
14namespace alpaqa::detail {
15
16template <Config Conf>
21
22 /// Calculate the error between ẑ and g(x).
23 /// @f[ \hat{z}^k = \Pi_D\left(g(x^k) + \Sigma^{-1}y\right) @f]
24 static void calc_err_z(const Problem &p, ///< [in] Problem description
25 crvec x̂, ///< [in] Decision variable @f$ \hat{x} @f$
26 crvec y, ///< [in] Lagrange multipliers @f$ y @f$
27 crvec Σ, ///< [in] Penalty weights @f$ \Sigma @f$
28 rvec err_z ///< [out] @f$ g(\hat{x}) - \hat{z} @f$
29 ) {
30 if (p.get_m() == 0) /* [[unlikely]] */
31 return;
32
33 // g(x̂)
34 p.eval_g(x̂, err_z);
35 // ζ = g(x̂) + Σ⁻¹y
36 err_z += y.cwiseQuotient(Σ);
37 // ẑ = Π(ζ, D)
39 // g(x) - ẑ
40 err_z -= y.cwiseQuotient(Σ);
41 // TODO: catastrophic cancellation?
42 }
43
45 switch (crit) {
47 case PANOCStopCrit::ApproxKKT2: return true;
53 case PANOCStopCrit::FPRNorm2: return false;
54 case PANOCStopCrit::Ipopt: return true;
55 case PANOCStopCrit::LBFGSBpp: return false;
56 default:;
57 }
58 throw std::out_of_range("Invalid PANOCStopCrit");
59 }
60
61 /// Compute the ε from the stopping criterion, see @ref PANOCStopCrit.
63 const Problem &problem, ///< [in] Problem description
64 PANOCStopCrit crit, ///< [in] What stoppint criterion to use
65 crvec pₖ, ///< [in] Projected gradient step @f$ \hat x^k - x^k @f$
66 real_t γ, ///< [in] Step size
67 crvec xₖ, ///< [in] Current iterate
68 crvec x̂ₖ, ///< [in] Current iterate after projected gradient step
69 crvec ŷₖ, ///< [in] Candidate Lagrange multipliers
70 crvec grad_ψₖ, ///< [in] Gradient in @f$ x^k @f$
71 crvec grad_̂ψₖ, ///< [in] Gradient in @f$ \hat x^k @f$
72 rvec work_n1, ///< Workspace of dimension n
73 rvec work_n2 ///< Workspace of dimension n
74 ) {
75 using vec_util::norm_1;
77 switch (crit) {
79 auto err = (1 / γ) * pₖ + (grad_ψₖ - grad_̂ψₖ);
80 // These parentheses ^^^ ^^^ are important
81 // to prevent catastrophic cancellation when the step is small
82 return norm_inf(err);
83 }
85 auto err = (1 / γ) * pₖ + (grad_ψₖ - grad_̂ψₖ);
86 // These parentheses ^^^ ^^^ are important
87 // to prevent catastrophic cancellation when the step is small
88 return err.norm();
89 }
91 return norm_inf(pₖ);
92 }
94 return pₖ.norm();
95 }
98 work_n2);
99 return norm_inf(work_n2);
100 }
103 work_n2);
104 return work_n2.norm();
105 }
107 return norm_inf(pₖ) / γ;
108 }
110 return pₖ.norm() / γ;
111 }
113 // work_n2 ← x̂ₖ - Π_C(x̂ₖ - ∇ψ(x̂ₖ))
115 work_n2);
116 auto err = norm_inf(work_n2);
117 auto n = 2 * (ŷₖ.size() + x̂ₖ.size());
118 if (n == 0)
119 return err;
120 // work_n2 ← x̂ₖ - ∇ψ(x̂ₖ) - Π_C(x̂ₖ - ∇ψ(x̂ₖ))
121 work_n2 -= grad_̂ψₖ;
122 auto C_lagr_mult = norm_1(work_n2);
123 auto D_lagr_mult = norm_1(ŷₖ);
124 const real_t s_max = 100;
125 const real_t s_n = static_cast<real_t>(n);
126 real_t s_d =
127 std::max(s_max, (C_lagr_mult + D_lagr_mult) / s_n) / s_max;
128 return err / s_d;
129 }
132 work_n2);
133 return norm_inf(work_n2) / std::fmax(real_t(1), xₖ.norm());
134 }
135 default:;
136 }
137 throw std::out_of_range("Invalid PANOCStopCrit");
138 }
139
140 /// Increase the estimate of the Lipschitz constant of the objective gradient
141 /// and decrease the step size until quadratic upper bound or descent lemma is
142 /// satisfied:
143 /// @f[ \psi(x + p) \le \psi(x) + \nabla\psi(x)^\top p + \frac{L}{2} \|p\|^2 @f]
144 /// The projected gradient iterate @f$ \hat x^k @f$ and step @f$ p^k @f$ are
145 /// updated with the new step size @f$ \gamma^k @f$, and so are other quantities
146 /// that depend on them, such as @f$ \nabla\psi(x^k)^\top p^k @f$ and
147 /// @f$ \|p\|^2 @f$. The intermediate vector @f$ \hat y(x^k) @f$ can be used to
148 /// compute the gradient @f$ \nabla\psi(\hat x^k) @f$ or to update the Lagrange
149 /// multipliers.
150 ///
151 /// @return The original step size, before it was reduced by this function.
153 /// [in] Problem description
154 const Problem &problem,
155 /// [in] Tolerance used to ignore rounding errors when the function
156 /// @f$ \psi(x) @f$ is relatively flat or the step size is very
157 /// small, which could cause @f$ \psi(x^k) < \psi(\hat x^k) @f$,
158 /// which is mathematically impossible but could occur in finite
159 /// precision floating point arithmetic.
161 /// [in] Maximum allowed Lipschitz constant estimate (prevents infinite
162 /// loop if function or derivatives are discontinuous, and keeps
163 /// step size bounded away from zero).
164 real_t L_max,
165 /// [in] Current iterate @f$ x^k @f$
166 crvec xₖ,
167 /// [in] Objective function @f$ \psi(x^k) @f$
169 /// [in] Gradient of objective @f$ \nabla\psi(x^k) @f$
171 /// [in] Lagrange multipliers @f$ y @f$
172 crvec y,
173 /// [in] Penalty weights @f$ \Sigma @f$
174 crvec Σ,
175 /// [out] Projected gradient iterate @f$ \hat x^k @f$
176 rvec x̂ₖ,
177 /// [out] Projected gradient step @f$ p^k @f$
178 rvec pₖ,
179 /// [out] Intermediate vector @f$ \hat y(\hat x^k) @f$
181 /// [inout] Objective function @f$ \psi(\hat x^k) @f$
183 /// [inout] Squared norm of the step @f$ \left\| p^k \right\|^2 @f$
185 /// [inout] Gradient of objective times step @f$ \nabla\psi(x^k)^\top p^k@f$
187 /// [inout] Lipschitz constant estimate @f$ L_{\nabla\psi}^k @f$
188 real_t &Lₖ,
189 /// [inout] Step size @f$ \gamma^k @f$
190 real_t &γₖ) {
191
193 real_t margin = (1 + std::abs(ψₖ)) * rounding_tolerance;
195 if (not(Lₖ * 2 <= L_max))
196 break;
197
198 Lₖ *= 2;
199 γₖ /= 2;
200
201 // Calculate x̂ₖ and pₖ (with new step size)
202 problem.eval_prox_grad_step(γₖ, xₖ, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
203 // Calculate ∇ψ(xₖ)ᵀpₖ and ‖pₖ‖²
205 norm_sq_pₖ = pₖ.squaredNorm();
206
207 // Calculate ψ(x̂ₖ) and ŷ(x̂ₖ)
208 ψx̂ₖ = problem.eval_ψ(x̂ₖ, y, Σ, /* in ⟹ out */ ŷx̂ₖ);
209 }
210 return old_γₖ;
211 }
212
213 /// Check all stop conditions (required tolerance reached, out of time,
214 /// maximum number of iterations exceeded, interrupted by user,
215 /// infinite iterate, no progress made)
216 template <class ParamsT, class DurationT>
218 /// [in] Parameters including `max_iter`, `max_time` and `max_no_progress`
219 const ParamsT &params,
220 /// [in] Options for the current solve
222 /// [in] Time elapsed since the start of the algorithm
224 /// [in] The current iteration number
225 unsigned iteration,
226 /// [in] A stop signal for the user to interrupt the algorithm
227 const AtomicStopSignal &stop_signal,
228 /// [in] Tolerance of the current iterate
230 /// [in] The number of successive iterations no progress was made
231 unsigned no_progress) {
232
233 auto max_time = params.max_time;
234 if (opts.max_time)
235 max_time = std::min(max_time, *opts.max_time);
236 auto tolerance = opts.tolerance > 0 ? opts.tolerance : real_t(1e-8);
237 bool out_of_time = time_elapsed > max_time;
238 bool out_of_iter = iteration == params.max_iter;
239 bool interrupted = stop_signal.stop_requested();
240 bool not_finite = not std::isfinite(εₖ);
241 bool converged = εₖ <= tolerance;
242 bool max_no_progress = no_progress > params.max_no_progress;
247 : max_no_progress ? SolverStatus::NoProgress
250 }
251
252 /// Compute the Hessian matrix of the augmented Lagrangian function multiplied
253 /// by the given vector, using finite differences.
254 /// @f[ \nabla^2_{xx} L_\Sigma(x, y)\, v \approx
255 /// \frac{\nabla_x L_\Sigma(x+hv, y) - \nabla_x L_\Sigma(x, y)}{h} @f]
257 /// [in] Problem description
258 const Problem &problem,
259 /// [in] Current iterate @f$ x^k @f$
260 crvec xₖ,
261 /// [in] Lagrange multipliers @f$ y @f$
262 crvec y,
263 /// [in] Penalty weights @f$ \Sigma @f$
264 crvec Σ,
265 /// [in] Gradient @f$ \nabla \psi(x^k) @f$
266 crvec grad_ψ,
267 /// [in] Vector to multiply with the Hessian
268 crvec v,
269 /// [out] Hessian-vector product
270 rvec Hv,
271 /// Dimension n
273 /// Dimension n
274 rvec work_n2,
275 /// Dimension m
276 rvec work_m) {
277
278 real_t cbrt_ε = std::cbrt(std::numeric_limits<real_t>::epsilon());
279 real_t h = cbrt_ε * (1 + xₖ.norm());
281 xₖh = xₖ + h * v;
282 problem.eval_grad_ψ(xₖh, y, Σ, Hv, work_n2, work_m);
283 Hv -= grad_ψ;
284 Hv /= h;
285 }
286
287 /// Estimate the Lipschitz constant of the gradient @f$ \nabla \psi @f$ using
288 /// finite differences.
290 /// [in] Problem description
291 const Problem &problem,
292 /// [in] Current iterate @f$ x^k @f$
293 crvec x,
294 /// [in] Lagrange multipliers @f$ y @f$
295 crvec y,
296 /// [in] Penalty weights @f$ \Sigma @f$
297 crvec Σ,
298 /// [in] Finite difference step size relative to x
299 real_t ε,
300 /// [in] Minimum absolute finite difference step size
301 real_t δ,
302 /// [in] Minimum allowed Lipschitz estimate.
303 real_t L_min,
304 /// [in] Maximum allowed Lipschitz estimate.
305 real_t L_max,
306 /// [out] @f$ \psi(x^k) @f$
307 real_t &ψ,
308 /// [out] Gradient @f$ \nabla \psi(x^k) @f$
309 rvec grad_ψ,
310 /// Dimension n
311 rvec work_x,
312 /// Dimension n
314 /// Dimension n
315 rvec work_n,
316 /// Dimension m
317 rvec work_m) {
318
319 // Calculate ψ(x₀), ∇ψ(x₀)
320 ψ = problem.eval_ψ_grad_ψ(x, y, Σ, /* in ⟹ out */ grad_ψ, work_n,
321 work_m);
322 // Select a small step h for finite differences
323 auto h =
324 (grad_ψ.array() > 0)
325 .select((ε * grad_ψ).cwiseMax(δ), (ε * grad_ψ).cwiseMin(-δ));
326 work_x = x - h;
327 real_t norm_h = h.norm();
328 // Calculate ∇ψ(x₀ - h)
329 problem.eval_grad_ψ(work_x, y, Σ, /* in ⟹ out */ work_grad_ψ, work_n,
330 work_m);
331
332 // Estimate Lipschitz constant using finite differences
333 real_t L = (work_grad_ψ - grad_ψ).norm() / norm_h;
334 return std::clamp(L, L_min, L_max);
335 }
336
337 /// Estimate the Lipschitz constant of the gradient @f$ \nabla \psi @f$ using
338 /// finite differences.
340 /// [in] Problem description
341 const Problem &problem,
342 /// [in] Current iterate @f$ x^k @f$
343 crvec xₖ,
344 /// [in] Lagrange multipliers @f$ y @f$
345 crvec y,
346 /// [in] Penalty weights @f$ \Sigma @f$
347 crvec Σ,
348 /// [in] Finite difference step size relative to xₖ
349 real_t ε,
350 /// [in] Minimum absolute finite difference step size
351 real_t δ,
352 /// [in] Minimum allowed Lipschitz estimate.
353 real_t L_min,
354 /// [in] Maximum allowed Lipschitz estimate.
355 real_t L_max,
356 /// [out] Gradient @f$ \nabla \psi(x^k) @f$
357 rvec grad_ψ,
358 /// Dimension n
360 /// Dimension n
361 rvec work_n2,
362 /// Dimension n
364 /// Dimension m
365 rvec work_m) {
366
367 auto h = (xₖ * ε).cwiseAbs().cwiseMax(δ);
368 work_n1 = xₖ + h;
369 real_t norm_h = h.norm();
370 // Calculate ∇ψ(x_0 + h)
371 problem.eval_grad_ψ(work_n1, y, Σ, /* in ⟹ out */ work_n2, work_n3,
372 work_m);
373 // Calculate ∇ψ(x_0)
374 problem.eval_grad_ψ(xₖ, y, Σ, /* in ⟹ out */ grad_ψ, work_n1, work_m);
375
376 // Estimate Lipschitz constant using finite differences
377 real_t L = (work_n2 - grad_ψ).norm() / norm_h;
378 return std::clamp(L, L_min, L_max);
379 }
380};
381
382// clang-format off
383ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, PANOCHelpers, EigenConfigd);
387// clang-format on
388
389} // namespace alpaqa::detail
real_t eval_prox_grad_step(real_t γ, crvec x, crvec grad_ψ, rvec x̂, rvec p) const
[Required] Function that computes a proximal gradient step.
real_t eval_ψ_grad_ψ(crvec x, crvec y, crvec Σ, rvec grad_ψ, rvec work_n, rvec work_m) const
[Optional] Calculate both ψ(x) and its gradient ∇ψ(x).
length_t get_m() const
[Required] Number of constraints.
real_t eval_ψ(crvec x, crvec y, crvec Σ, rvec ŷ) const
[Optional] Calculate both ψ(x) and the vector ŷ that can later be used to compute ∇ψ.
void eval_g(crvec x, rvec gx) const
[Required] Function that evaluates the constraints,
void eval_proj_diff_g(crvec z, rvec e) const
[Required] Function that evaluates the difference between the given point and its projection onto th...
void eval_grad_ψ(crvec x, crvec y, crvec Σ, rvec grad_ψ, rvec work_n, rvec work_m) const
[Optional] Calculate the gradient ∇ψ(x).
#define USING_ALPAQA_CONFIG(Conf)
Definition config.hpp:77
#define ALPAQA_IF_QUADF(...)
Definition config.hpp:223
#define ALPAQA_IF_LONGD(...)
Definition config.hpp:235
#define ALPAQA_IF_FLOAT(...)
Definition config.hpp:229
#define ALPAQA_EXPORT_EXTERN_TEMPLATE(...)
Definition export.hpp:21
auto norm_inf(const Eigen::MatrixBase< Derived > &v)
Get the maximum or infinity-norm of the given vector.
Definition config.hpp:204
auto norm_1(const Eigen::MatrixBase< Derived > &v)
Get the 1-norm of the given vector.
Definition config.hpp:212
@ LBFGSBpp
The stopping criterion used by LBFGS++, see https://lbfgspp.statr.me/doc/classLBFGSpp_1_1LBFGSBParam....
@ ProjGradUnitNorm
∞-norm of the projected gradient with unit step size:
@ ProjGradNorm
∞-norm of the projected gradient with step size γ:
@ Ipopt
The stopping criterion used by Ipopt, see https://link.springer.com/article/10.1007/s10107-004-0559-y...
@ FPRNorm2
2-norm of fixed point residual:
@ ProjGradNorm2
2-norm of the projected gradient with step size γ:
@ ApproxKKT
Find an ε-approximate KKT point in the ∞-norm:
@ FPRNorm
∞-norm of fixed point residual:
@ ApproxKKT2
Find an ε-approximate KKT point in the 2-norm:
@ ProjGradUnitNorm2
2-norm of the projected gradient with unit step size:
SolverStatus
Exit status of a numerical solver such as ALM or PANOC.
@ Interrupted
Solver was interrupted by the user.
@ MaxTime
Maximum allowed execution time exceeded.
@ NoProgress
No progress was made in the last iteration.
@ MaxIter
Maximum number of iterations exceeded.
@ Busy
In progress.
@ Converged
Converged and reached given tolerance.
@ NotFinite
Intermediate results were infinite or not-a-number.
typename Conf::real_t real_t
Definition config.hpp:86
constexpr const auto inf
Definition config.hpp:112
typename Conf::rvec rvec
Definition config.hpp:91
typename Conf::crvec crvec
Definition config.hpp:92
Double-precision double configuration.
Definition config.hpp:176
Single-precision float configuration.
Definition config.hpp:172
long double configuration.
Definition config.hpp:181
static SolverStatus check_all_stop_conditions(const ParamsT &params, const InnerSolveOptions< config_t > &opts, DurationT time_elapsed, unsigned iteration, const AtomicStopSignal &stop_signal, real_t εₖ, unsigned no_progress)
Check all stop conditions (required tolerance reached, out of time, maximum number of iterations exce...
static real_t descent_lemma(const Problem &problem, real_t rounding_tolerance, real_t L_max, crvec xₖ, real_t ψₖ, crvec grad_ψₖ, crvec y, crvec Σ, rvec x̂ₖ, rvec pₖ, rvec ŷx̂ₖ, real_t &ψx̂ₖ, real_t &norm_sq_pₖ, real_t &grad_ψₖᵀpₖ, real_t &Lₖ, real_t &γₖ)
Increase the estimate of the Lipschitz constant of the objective gradient and decrease the step size ...
static bool stop_crit_requires_grad_ψx̂(PANOCStopCrit crit)
static real_t initial_lipschitz_estimate(const Problem &problem, crvec x, crvec y, crvec Σ, real_t ε, real_t δ, real_t L_min, real_t L_max, real_t &ψ, rvec grad_ψ, rvec work_x, rvec work_grad_ψ, rvec work_n, rvec work_m)
Estimate the Lipschitz constant of the gradient using finite differences.
static real_t calc_error_stop_crit(const Problem &problem, PANOCStopCrit crit, crvec pₖ, real_t γ, crvec xₖ, crvec x̂ₖ, crvec ŷₖ, crvec grad_ψₖ, crvec grad_̂ψₖ, rvec work_n1, rvec work_n2)
Compute the ε from the stopping criterion, see PANOCStopCrit.
static real_t initial_lipschitz_estimate(const Problem &problem, crvec xₖ, crvec y, crvec Σ, real_t ε, real_t δ, real_t L_min, real_t L_max, rvec grad_ψ, rvec work_n1, rvec work_n2, rvec work_n3, rvec work_m)
Estimate the Lipschitz constant of the gradient using finite differences.
static void calc_err_z(const Problem &p, crvec x̂, crvec y, crvec Σ, rvec err_z)
Calculate the error between ẑ and g(x).
static void calc_augmented_lagrangian_hessian_prod_fd(const Problem &problem, crvec xₖ, crvec y, crvec Σ, crvec grad_ψ, crvec v, rvec Hv, rvec work_n1, rvec work_n2, rvec work_m)
Compute the Hessian matrix of the augmented Lagrangian function multiplied by the given vector,...