alpaqa cmake-targets
Nonconvex constrained optimization
Loading...
Searching...
No Matches
panoc-helpers.tpp
Go to the documentation of this file.
1#pragma once
2
4#include <alpaqa/export.hpp>
11
12#include <stdexcept>
13
14namespace alpaqa::detail {
15
16template <Config Conf>
21
22 /// Calculate the error between ẑ and g(x).
23 /// @f[ \hat{z}^k = \Pi_D\left(g(x^k) + \Sigma^{-1}y\right) @f]
24 static void calc_err_z(const Problem &p, ///< [in] Problem description
25 crvec x̂, ///< [in] Decision variable @f$ \hat{x} @f$
26 crvec y, ///< [in] Lagrange multipliers @f$ y @f$
27 crvec Σ, ///< [in] Penalty weights @f$ \Sigma @f$
28 rvec err_z ///< [out] @f$ g(\hat{x}) - \hat{z} @f$
29 ) {
30 if (p.get_m() == 0) /* [[unlikely]] */
31 return;
32
33 // g(x̂)
34 p.eval_g(x̂, err_z);
35 // ζ = g(x̂) + Σ⁻¹y
36 err_z += Σ.asDiagonal().inverse() * y;
37 // ẑ = Π(ζ, D)
39 // g(x) - ẑ
40 err_z -= Σ.asDiagonal().inverse() * y;
41 // TODO: catastrophic cancellation?
42 }
43
45 switch (crit) {
47 case PANOCStopCrit::ApproxKKT2: return true;
53 case PANOCStopCrit::FPRNorm2: return false;
54 case PANOCStopCrit::Ipopt: return true;
55 case PANOCStopCrit::LBFGSBpp: return false;
56 default:;
57 }
58 throw std::out_of_range("Invalid PANOCStopCrit");
59 }
60
61 /// Compute the ε from the stopping criterion, see @ref PANOCStopCrit.
63 const Problem &problem, ///< [in] Problem description
64 PANOCStopCrit crit, ///< [in] What stoppint criterion to use
65 crvec pₖ, ///< [in] Projected gradient step @f$ \hat x^k - x^k @f$
66 real_t γ, ///< [in] Step size
67 crvec xₖ, ///< [in] Current iterate
68 crvec x̂ₖ, ///< [in] Current iterate after projected gradient step
69 crvec ŷₖ, ///< [in] Candidate Lagrange multipliers
70 crvec grad_ψₖ, ///< [in] Gradient in @f$ x^k @f$
71 crvec grad_̂ψₖ, ///< [in] Gradient in @f$ \hat x^k @f$
72 rvec work_n1, ///< Workspace of dimension n
73 rvec work_n2 ///< Workspace of dimension n
74 ) {
75 switch (crit) {
77 auto err = (1 / γ) * pₖ + (grad_ψₖ - grad_̂ψₖ);
78 // These parentheses ^^^ ^^^ are important
79 // to prevent catastrophic cancellation when the step is small
80 return vec_util::norm_inf(err);
81 }
83 auto err = (1 / γ) * pₖ + (grad_ψₖ - grad_̂ψₖ);
84 // These parentheses ^^^ ^^^ are important
85 // to prevent catastrophic cancellation when the step is small
86 return err.norm();
87 }
90 }
92 return pₖ.norm();
93 }
96 work_n2);
97 return vec_util::norm_inf(work_n2);
98 }
101 work_n2);
102 return work_n2.norm();
103 }
105 return vec_util::norm_inf(pₖ) / γ;
106 }
108 return pₖ.norm() / γ;
109 }
111 // work_n2 ← x̂ₖ - Π_C(x̂ₖ - ∇ψ(x̂ₖ))
113 work_n2);
114 auto err = vec_util::norm_inf(work_n2);
115 auto n = 2 * (ŷₖ.size() + x̂ₖ.size());
116 if (n == 0)
117 return err;
118 // work_n2 ← x̂ₖ - ∇ψ(x̂ₖ) - Π_C(x̂ₖ - ∇ψ(x̂ₖ))
119 work_n2 -= grad_̂ψₖ;
120 auto C_lagr_mult = vec_util::norm_1(work_n2);
122 const real_t s_max = 100;
123 const real_t s_n = static_cast<real_t>(n);
124 real_t s_d =
125 std::max(s_max, (C_lagr_mult + D_lagr_mult) / s_n) / s_max;
126 return err / s_d;
127 }
130 work_n2);
131 return vec_util::norm_inf(work_n2) /
132 std::fmax(real_t(1), xₖ.norm());
133 }
134 default:;
135 }
136 throw std::out_of_range("Invalid PANOCStopCrit");
137 }
138
139 /// Increase the estimate of the Lipschitz constant of the objective gradient
140 /// and decrease the step size until quadratic upper bound or descent lemma is
141 /// satisfied:
142 /// @f[ \psi(x + p) \le \psi(x) + \nabla\psi(x)^\top p + \frac{L}{2} \|p\|^2 @f]
143 /// The projected gradient iterate @f$ \hat x^k @f$ and step @f$ p^k @f$ are
144 /// updated with the new step size @f$ \gamma^k @f$, and so are other quantities
145 /// that depend on them, such as @f$ \nabla\psi(x^k)^\top p^k @f$ and
146 /// @f$ \|p\|^2 @f$. The intermediate vector @f$ \hat y(x^k) @f$ can be used to
147 /// compute the gradient @f$ \nabla\psi(\hat x^k) @f$ or to update the Lagrange
148 /// multipliers.
149 ///
150 /// @return The original step size, before it was reduced by this function.
152 /// [in] Problem description
153 const Problem &problem,
154 /// [in] Tolerance used to ignore rounding errors when the function
155 /// @f$ \psi(x) @f$ is relatively flat or the step size is very
156 /// small, which could cause @f$ \psi(x^k) < \psi(\hat x^k) @f$,
157 /// which is mathematically impossible but could occur in finite
158 /// precision floating point arithmetic.
160 /// [in] Maximum allowed Lipschitz constant estimate (prevents infinite
161 /// loop if function or derivatives are discontinuous, and keeps
162 /// step size bounded away from zero).
163 real_t L_max,
164 /// [in] Current iterate @f$ x^k @f$
165 crvec xₖ,
166 /// [in] Objective function @f$ \psi(x^k) @f$
168 /// [in] Gradient of objective @f$ \nabla\psi(x^k) @f$
170 /// [in] Lagrange multipliers @f$ y @f$
171 crvec y,
172 /// [in] Penalty weights @f$ \Sigma @f$
173 crvec Σ,
174 /// [out] Projected gradient iterate @f$ \hat x^k @f$
175 rvec x̂ₖ,
176 /// [out] Projected gradient step @f$ p^k @f$
177 rvec pₖ,
178 /// [out] Intermediate vector @f$ \hat y(\hat x^k) @f$
180 /// [inout] Objective function @f$ \psi(\hat x^k) @f$
182 /// [inout] Squared norm of the step @f$ \left\| p^k \right\|^2 @f$
184 /// [inout] Gradient of objective times step @f$ \nabla\psi(x^k)^\top p^k@f$
186 /// [inout] Lipschitz constant estimate @f$ L_{\nabla\psi}^k @f$
187 real_t &Lₖ,
188 /// [inout] Step size @f$ \gamma^k @f$
189 real_t &γₖ) {
190
192 real_t margin = (1 + std::abs(ψₖ)) * rounding_tolerance;
194 if (not(Lₖ * 2 <= L_max))
195 break;
196
197 Lₖ *= 2;
198 γₖ /= 2;
199
200 // Calculate x̂ₖ and pₖ (with new step size)
201 problem.eval_prox_grad_step(γₖ, xₖ, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
202 // Calculate ∇ψ(xₖ)ᵀpₖ and ‖pₖ‖²
204 norm_sq_pₖ = pₖ.squaredNorm();
205
206 // Calculate ψ(x̂ₖ) and ŷ(x̂ₖ)
207 ψx̂ₖ = problem.eval_ψ(x̂ₖ, y, Σ, /* in ⟹ out */ ŷx̂ₖ);
208 }
209 return old_γₖ;
210 }
211
212 /// Check all stop conditions (required tolerance reached, out of time,
213 /// maximum number of iterations exceeded, interrupted by user,
214 /// infinite iterate, no progress made)
215 template <class ParamsT, class DurationT>
217 /// [in] Parameters including `max_iter`, `max_time` and `max_no_progress`
218 const ParamsT &params,
219 /// [in] Options for the current solve
221 /// [in] Time elapsed since the start of the algorithm
223 /// [in] The current iteration number
224 unsigned iteration,
225 /// [in] A stop signal for the user to interrupt the algorithm
226 const AtomicStopSignal &stop_signal,
227 /// [in] Tolerance of the current iterate
229 /// [in] The number of successive iterations no progress was made
230 unsigned no_progress) {
231
232 auto max_time = params.max_time;
233 if (opts.max_time)
234 max_time = std::min(max_time, *opts.max_time);
235 auto tolerance = opts.tolerance > 0 ? opts.tolerance : real_t(1e-8);
236 bool out_of_time = time_elapsed > max_time;
237 bool out_of_iter = iteration == params.max_iter;
238 bool interrupted = stop_signal.stop_requested();
239 bool not_finite = not std::isfinite(εₖ);
240 bool converged = εₖ <= tolerance;
241 bool max_no_progress = no_progress > params.max_no_progress;
246 : max_no_progress ? SolverStatus::NoProgress
249 }
250
251 /// Compute the Hessian matrix of the augmented Lagrangian function multiplied
252 /// by the given vector, using finite differences.
253 /// @f[ \nabla^2_{xx} L_\Sigma(x, y)\, v \approx
254 /// \frac{\nabla_x L_\Sigma(x+hv, y) - \nabla_x L_\Sigma(x, y)}{h} @f]
256 /// [in] Problem description
257 const Problem &problem,
258 /// [in] Current iterate @f$ x^k @f$
259 crvec xₖ,
260 /// [in] Lagrange multipliers @f$ y @f$
261 crvec y,
262 /// [in] Penalty weights @f$ \Sigma @f$
263 crvec Σ,
264 /// [in] Gradient @f$ \nabla \psi(x^k) @f$
265 crvec grad_ψ,
266 /// [in] Vector to multiply with the Hessian
267 crvec v,
268 /// [out] Hessian-vector product
269 rvec Hv,
270 /// Dimension n
272 /// Dimension n
273 rvec work_n2,
274 /// Dimension m
275 rvec work_m) {
276
277 real_t cbrt_ε = std::cbrt(std::numeric_limits<real_t>::epsilon());
278 real_t h = cbrt_ε * (1 + xₖ.norm());
280 xₖh = xₖ + h * v;
281 problem.eval_grad_ψ(xₖh, y, Σ, Hv, work_n2, work_m);
282 Hv -= grad_ψ;
283 Hv /= h;
284 }
285
286 /// Estimate the Lipschitz constant of the gradient @f$ \nabla \psi @f$ using
287 /// finite differences.
289 /// [in] Problem description
290 const Problem &problem,
291 /// [in] Current iterate @f$ x^k @f$
292 crvec x,
293 /// [in] Lagrange multipliers @f$ y @f$
294 crvec y,
295 /// [in] Penalty weights @f$ \Sigma @f$
296 crvec Σ,
297 /// [in] Finite difference step size relative to x
298 real_t ε,
299 /// [in] Minimum absolute finite difference step size
300 real_t δ,
301 /// [in] Minimum allowed Lipschitz estimate.
302 real_t L_min,
303 /// [in] Maximum allowed Lipschitz estimate.
304 real_t L_max,
305 /// [out] @f$ \psi(x^k) @f$
306 real_t &ψ,
307 /// [out] Gradient @f$ \nabla \psi(x^k) @f$
308 rvec grad_ψ,
309 /// Dimension n
310 rvec work_x,
311 /// Dimension n
313 /// Dimension n
314 rvec work_n,
315 /// Dimension m
316 rvec work_m) {
317
318 // Calculate ψ(x₀), ∇ψ(x₀)
319 ψ = problem.eval_ψ_grad_ψ(x, y, Σ, /* in ⟹ out */ grad_ψ, work_n,
320 work_m);
321 // Select a small step h for finite differences
322 auto h = grad_ψ.unaryExpr([&](real_t g) {
323 return g > 0 ? std::max(g * ε, δ) : std::min(g * ε, -δ);
324 });
325 work_x = x - h;
326 real_t norm_h = h.norm();
327 // Calculate ∇ψ(x₀ - h)
328 problem.eval_grad_ψ(work_x, y, Σ, /* in ⟹ out */ work_grad_ψ, work_n,
329 work_m);
330
331 // Estimate Lipschitz constant using finite differences
332 real_t L = (work_grad_ψ - grad_ψ).norm() / norm_h;
333 return std::clamp(L, L_min, L_max);
334 }
335
336 /// Estimate the Lipschitz constant of the gradient @f$ \nabla \psi @f$ using
337 /// finite differences.
339 /// [in] Problem description
340 const Problem &problem,
341 /// [in] Current iterate @f$ x^k @f$
342 crvec xₖ,
343 /// [in] Lagrange multipliers @f$ y @f$
344 crvec y,
345 /// [in] Penalty weights @f$ \Sigma @f$
346 crvec Σ,
347 /// [in] Finite difference step size relative to xₖ
348 real_t ε,
349 /// [in] Minimum absolute finite difference step size
350 real_t δ,
351 /// [in] Minimum allowed Lipschitz estimate.
352 real_t L_min,
353 /// [in] Maximum allowed Lipschitz estimate.
354 real_t L_max,
355 /// [out] Gradient @f$ \nabla \psi(x^k) @f$
356 rvec grad_ψ,
357 /// Dimension n
359 /// Dimension n
360 rvec work_n2,
361 /// Dimension n
363 /// Dimension m
364 rvec work_m) {
365
366 auto h = (xₖ * ε).cwiseAbs().cwiseMax(δ);
367 work_n1 = xₖ + h;
368 real_t norm_h = h.norm();
369 // Calculate ∇ψ(x_0 + h)
370 problem.eval_grad_ψ(work_n1, y, Σ, /* in ⟹ out */ work_n2, work_n3,
371 work_m);
372 // Calculate ∇ψ(x_0)
373 problem.eval_grad_ψ(xₖ, y, Σ, /* in ⟹ out */ grad_ψ, work_n1, work_m);
374
375 // Estimate Lipschitz constant using finite differences
376 real_t L = (work_n2 - grad_ψ).norm() / norm_h;
377 return std::clamp(L, L_min, L_max);
378 }
379};
380
381// clang-format off
382ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, PANOCHelpers, EigenConfigd);
386// clang-format on
387
388} // namespace alpaqa::detail
real_t eval_prox_grad_step(real_t γ, crvec x, crvec grad_ψ, rvec x̂, rvec p) const
[Required] Function that computes a proximal gradient step.
real_t eval_ψ_grad_ψ(crvec x, crvec y, crvec Σ, rvec grad_ψ, rvec work_n, rvec work_m) const
[Optional] Calculate both ψ(x) and its gradient ∇ψ(x).
length_t get_m() const
[Required] Number of constraints.
real_t eval_ψ(crvec x, crvec y, crvec Σ, rvec ŷ) const
[Optional] Calculate both ψ(x) and the vector ŷ that can later be used to compute ∇ψ.
void eval_g(crvec x, rvec gx) const
[Required] Function that evaluates the constraints,
void eval_proj_diff_g(crvec z, rvec e) const
[Required] Function that evaluates the difference between the given point and its projection onto th...
void eval_grad_ψ(crvec x, crvec y, crvec Σ, rvec grad_ψ, rvec work_n, rvec work_m) const
[Optional] Calculate the gradient ∇ψ(x).
#define USING_ALPAQA_CONFIG(Conf)
Definition config.hpp:56
#define ALPAQA_IF_QUADF(...)
Definition config.hpp:182
#define ALPAQA_IF_LONGD(...)
Definition config.hpp:194
#define ALPAQA_IF_FLOAT(...)
Definition config.hpp:188
#define ALPAQA_EXPORT_EXTERN_TEMPLATE(...)
Definition export.hpp:21
auto norm_inf(const Eigen::MatrixBase< Derived > &v)
Get the maximum or infinity-norm of the given vector.
Definition config.hpp:163
auto norm_1(const Eigen::MatrixBase< Derived > &v)
Get the 1-norm of the given vector.
Definition config.hpp:171
@ LBFGSBpp
The stopping criterion used by LBFGS++, see https://lbfgspp.statr.me/doc/classLBFGSpp_1_1LBFGSBParam....
@ ProjGradUnitNorm
∞-norm of the projected gradient with unit step size:
@ ProjGradNorm
∞-norm of the projected gradient with step size γ:
@ Ipopt
The stopping criterion used by Ipopt, see https://link.springer.com/article/10.1007/s10107-004-0559-y...
@ FPRNorm2
2-norm of fixed point residual:
@ ProjGradNorm2
2-norm of the projected gradient with step size γ:
@ ApproxKKT
Find an ε-approximate KKT point in the ∞-norm:
@ FPRNorm
∞-norm of fixed point residual:
@ ApproxKKT2
Find an ε-approximate KKT point in the 2-norm:
@ ProjGradUnitNorm2
2-norm of the projected gradient with unit step size:
SolverStatus
Exit status of a numerical solver such as ALM or PANOC.
@ Interrupted
Solver was interrupted by the user.
@ MaxTime
Maximum allowed execution time exceeded.
@ NoProgress
No progress was made in the last iteration.
@ MaxIter
Maximum number of iterations exceeded.
@ Busy
In progress.
@ Converged
Converged and reached given tolerance.
@ NotFinite
Intermediate results were infinite or not-a-number.
typename Conf::real_t real_t
Definition config.hpp:65
constexpr const auto inf
Definition config.hpp:85
typename Conf::rvec rvec
Definition config.hpp:69
typename Conf::crvec crvec
Definition config.hpp:70
Double-precision double configuration.
Definition config.hpp:135
Single-precision float configuration.
Definition config.hpp:131
long double configuration.
Definition config.hpp:140
static SolverStatus check_all_stop_conditions(const ParamsT &params, const InnerSolveOptions< config_t > &opts, DurationT time_elapsed, unsigned iteration, const AtomicStopSignal &stop_signal, real_t εₖ, unsigned no_progress)
Check all stop conditions (required tolerance reached, out of time, maximum number of iterations exce...
static real_t descent_lemma(const Problem &problem, real_t rounding_tolerance, real_t L_max, crvec xₖ, real_t ψₖ, crvec grad_ψₖ, crvec y, crvec Σ, rvec x̂ₖ, rvec pₖ, rvec ŷx̂ₖ, real_t &ψx̂ₖ, real_t &norm_sq_pₖ, real_t &grad_ψₖᵀpₖ, real_t &Lₖ, real_t &γₖ)
Increase the estimate of the Lipschitz constant of the objective gradient and decrease the step size ...
static bool stop_crit_requires_grad_ψx̂(PANOCStopCrit crit)
static real_t initial_lipschitz_estimate(const Problem &problem, crvec x, crvec y, crvec Σ, real_t ε, real_t δ, real_t L_min, real_t L_max, real_t &ψ, rvec grad_ψ, rvec work_x, rvec work_grad_ψ, rvec work_n, rvec work_m)
Estimate the Lipschitz constant of the gradient using finite differences.
static real_t calc_error_stop_crit(const Problem &problem, PANOCStopCrit crit, crvec pₖ, real_t γ, crvec xₖ, crvec x̂ₖ, crvec ŷₖ, crvec grad_ψₖ, crvec grad_̂ψₖ, rvec work_n1, rvec work_n2)
Compute the ε from the stopping criterion, see PANOCStopCrit.
static real_t initial_lipschitz_estimate(const Problem &problem, crvec xₖ, crvec y, crvec Σ, real_t ε, real_t δ, real_t L_min, real_t L_max, rvec grad_ψ, rvec work_n1, rvec work_n2, rvec work_n3, rvec work_m)
Estimate the Lipschitz constant of the gradient using finite differences.
static void calc_err_z(const Problem &p, crvec x̂, crvec y, crvec Σ, rvec err_z)
Calculate the error between ẑ and g(x).
static void calc_augmented_lagrangian_hessian_prod_fd(const Problem &problem, crvec xₖ, crvec y, crvec Σ, crvec grad_ψ, crvec v, rvec Hv, rvec work_n1, rvec work_n2, rvec work_m)
Compute the Hessian matrix of the augmented Lagrangian function multiplied by the given vector,...