alpaqa 0.0.1
Nonconvex constrained optimization
second-order-panoc.hpp
Go to the documentation of this file.
1#pragma once
2
5
6#include <cassert>
7#include <cmath>
8#include <iomanip>
9#include <iostream>
10#include <stdexcept>
11
12#include <Eigen/Cholesky>
13
14namespace alpaqa {
15
16using std::chrono::duration_cast;
17using std::chrono::microseconds;
18
20 /// [in] Problem description
21 const Problem &problem,
22 /// [in] Constraint weights @f$ \Sigma @f$
23 crvec Σ,
24 /// [in] Tolerance @f$ \varepsilon @f$
25 real_t ε,
26 /// [in] Overwrite @p x, @p y and @p err_z even if not converged
27 bool always_overwrite_results,
28 /// [inout] Decision variable @f$ x @f$
29 rvec x,
30 /// [inout] Lagrange multipliers @f$ y @f$
31 rvec y,
32 /// [out] Slack variable error @f$ g(x) - z @f$
33 rvec err_z) {
34
35 auto start_time = std::chrono::steady_clock::now();
36 Stats s;
37
38 const auto n = problem.n;
39 const auto m = problem.m;
40
41 // Allocate vectors, init L-BFGS -------------------------------------------
42
43 // TODO: the L-BFGS objects and vectors allocate on each iteration of ALM,
44 // and there are more vectors than strictly necessary.
45
46 vec xₖ = x, // Value of x at the beginning of the iteration
47 x̂ₖ(n), // Value of x after a projected gradient step
48 xₖ₊₁(n), // xₖ for next iteration
49 x̂ₖ₊₁(n), // x̂ₖ for next iteration
50 ŷx̂ₖ(m), // ŷ(x̂ₖ) = Σ (g(x̂ₖ) - ẑₖ)
51 ŷx̂ₖ₊₁(m), // ŷ(x̂ₖ) for next iteration
52 pₖ(n), // Projected gradient step pₖ = x̂ₖ - xₖ
53 pₖ₊₁(n), // Projected gradient step pₖ₊₁ = x̂ₖ₊₁ - xₖ₊₁
54 qₖ(n), // Newton step Hₖ pₖ
55 grad_ψₖ(n), // ∇ψ(xₖ)
56 grad_̂ψₖ(n), // ∇ψ(x̂ₖ)
57 grad_ψₖ₊₁(n); // ∇ψ(xₖ₊₁)
58
59 vec work_n(n), work_m(m);
60
61 mat H(n, n); // Hessian of the (augmented) Lagrangian function and ψ
62 vec g(m), // Value of the constraint function
63 grad_gi(n); // Gradient of one of the constraints
64
65 using indexvec = std::vector<vec::Index>;
66 using bvec = Eigen::Matrix<bool, Eigen::Dynamic, 1>;
67 bvec inactive_C = bvec::Zero(n);
68
69 indexvec J, K;
70 J.reserve(n);
71 K.reserve(n);
72 vec qJ(n); // Solution of Newton Hessian system
73 vec rhs(n);
74
75 // Keep track of how many successive iterations didn't update the iterate
76 unsigned no_progress = 0;
77
78 // Helper functions --------------------------------------------------------
79
80 // Wrappers for helper functions that automatically pass along any arguments
81 // that are constant within PANOC (for readability in the main algorithm)
82 auto calc_ψ_ŷ = [&problem, &y, &Σ](crvec x, rvec ŷ) {
83 return detail::calc_ψ_ŷ(problem, x, y, Σ, ŷ);
84 };
85 auto calc_ψ_grad_ψ = [&problem, &y, &Σ, &work_n, &work_m](crvec x,
86 rvec grad_ψ) {
87 return detail::calc_ψ_grad_ψ(problem, x, y, Σ, grad_ψ, work_n, work_m);
88 };
89 auto calc_grad_ψ_from_ŷ = [&problem, &work_n](crvec x, crvec ŷ,
90 rvec grad_ψ) {
91 detail::calc_grad_ψ_from_ŷ(problem, x, ŷ, grad_ψ, work_n);
92 };
93 auto calc_x̂ = [&problem](real_t γ, crvec x, crvec grad_ψ, rvec x̂, rvec p) {
94 detail::calc_x̂(problem, γ, x, grad_ψ, x̂, p);
95 };
96 auto calc_err_z = [&problem, &y, &Σ](crvec x̂, rvec err_z) {
98 };
99 auto descent_lemma = [this, &problem, &y,
100 &Σ](crvec xₖ, real_t ψₖ, crvec grad_ψₖ, rvec x̂ₖ,
101 rvec pₖ, rvec ŷx̂ₖ, real_t &ψx̂ₖ, real_t &pₖᵀpₖ,
102 real_t &grad_ψₖᵀpₖ, real_t &Lₖ, real_t &γₖ) {
105 xₖ, ψₖ, grad_ψₖ, y, Σ, x̂ₖ, pₖ, ŷx̂ₖ, ψx̂ₖ, pₖᵀpₖ, grad_ψₖᵀpₖ, Lₖ, γₖ);
106 };
107 auto print_progress = [&](unsigned k, real_t ψₖ, crvec grad_ψₖ,
108 real_t pₖᵀpₖ, real_t γₖ, real_t εₖ) {
109 std::cout << "[PANOC] " << std::setw(6) << k
110 << ": ψ = " << std::setw(13) << ψₖ
111 << ", ‖∇ψ‖ = " << std::setw(13) << grad_ψₖ.norm()
112 << ", ‖p‖ = " << std::setw(13) << std::sqrt(pₖᵀpₖ)
113 << ", γ = " << std::setw(13) << γₖ
114 << ", εₖ = " << std::setw(13) << εₖ << "\r\n";
115 };
116
117 // Estimate Lipschitz constant ---------------------------------------------
118
119 real_t ψₖ, Lₖ;
120 // Finite difference approximation of ∇²ψ in starting point
121 if (params.Lipschitz.L₀ <= 0) {
125 /* in ⟹ out */ ψₖ, grad_ψₖ, x̂ₖ, grad_̂ψₖ, work_n, work_m);
126 }
127 // Initial Lipschitz constant provided by the user
128 else {
129 Lₖ = params.Lipschitz.L₀;
130 // Calculate ψ(xₖ), ∇ψ(x₀)
131 ψₖ = calc_ψ_grad_ψ(xₖ, /* in ⟹ out */ grad_ψₖ);
132 }
133 if (not std::isfinite(Lₖ)) {
135 return s;
136 }
137 real_t γₖ = params.Lipschitz.Lγ_factor / Lₖ;
138
139 // First projected gradient step -------------------------------------------
140
141 // Calculate x̂₀, p₀ (projected gradient step)
142 calc_x̂(γₖ, xₖ, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
143 // Calculate ψ(x̂ₖ) and ŷ(x̂ₖ)
144 real_t ψx̂ₖ = calc_ψ_ŷ(x̂ₖ, /* in ⟹ out */ ŷx̂ₖ);
145 real_t grad_ψₖᵀpₖ = grad_ψₖ.dot(pₖ);
146 real_t pₖᵀpₖ = pₖ.squaredNorm();
147 // Forward-backward envelope
148 real_t φₖ;
149
150 // Main PANOC loop
151 // =========================================================================
152 for (unsigned k = 0; k <= params.max_iter; ++k) {
153
154 // Quadratic upper bound -----------------------------------------------
155 if (k == 0 || params.update_lipschitz_in_linesearch == false) {
156 // Decrease step size until quadratic upper bound is satisfied
157 descent_lemma(xₖ, ψₖ, grad_ψₖ,
158 /* in ⟹ out */ x̂ₖ, pₖ, ŷx̂ₖ,
159 /* inout */ ψx̂ₖ, pₖᵀpₖ, grad_ψₖᵀpₖ, Lₖ, γₖ);
160 φₖ = ψₖ + 1 / (2 * γₖ) * pₖᵀpₖ + grad_ψₖᵀpₖ;
161 }
162 // Calculate ∇ψ(x̂ₖ)
163 calc_grad_ψ_from_ŷ(x̂ₖ, ŷx̂ₖ, /* in ⟹ out */ grad_̂ψₖ);
164
165 // Check stop condition ------------------------------------------------
167 problem.C, params.stop_crit, pₖ, γₖ, xₖ, x̂ₖ, ŷx̂ₖ, grad_ψₖ, grad_̂ψₖ);
168
169 // Print progress
170 if (params.print_interval != 0 && k % params.print_interval == 0)
171 print_progress(k, ψₖ, grad_ψₖ, pₖᵀpₖ, γₖ, εₖ);
172 if (progress_cb)
173 progress_cb({k, xₖ, pₖ, pₖᵀpₖ, x̂ₖ, ψₖ, grad_ψₖ, ψx̂ₖ, grad_̂ψₖ, Lₖ,
174 γₖ, εₖ, Σ, y, problem, params});
175
176 auto time_elapsed = std::chrono::steady_clock::now() - start_time;
177 auto stop_status = detail::check_all_stop_conditions(
178 params, time_elapsed, k, stop_signal, ε, εₖ, no_progress);
179 if (stop_status != SolverStatus::Unknown) {
180 // TODO: We could cache g(x) and ẑ, but would that faster?
181 // It saves 1 evaluation of g per ALM iteration, but requires
182 // many extra stores in the inner loops of PANOC.
183 // TODO: move the computation of ẑ and g(x) to ALM?
184 if (stop_status == SolverStatus::Converged ||
185 stop_status == SolverStatus::Interrupted ||
186 always_overwrite_results) {
187 calc_err_z(x̂ₖ, /* in ⟹ out */ err_z);
188 x = std::move(x̂ₖ);
189 y = std::move(ŷx̂ₖ);
190 }
191 s.iterations = k;
192 s.ε = εₖ;
193 s.elapsed_time = duration_cast<microseconds>(time_elapsed);
194 s.status = stop_status;
195 return s;
196 }
197
198 // Calculate Newton step -----------------------------------------------
199
200 // TODO: all of this is suboptimal and ugly :(
201
202 // TODO: write helper lambda above
204 grad_gi);
205
206 K.clear();
207 J.clear();
208 for (vec::Index i = 0; i < n; ++i) {
209 real_t gd_step = xₖ(i) - γₖ * grad_ψₖ(i);
210 if (gd_step < problem.C.lowerbound(i)) {
211 K.push_back(i);
212 } else if (problem.C.upperbound(i) < gd_step) {
213 K.push_back(i);
214 } else {
215 J.push_back(i);
216 }
217 }
218 qₖ = pₖ;
219 bool newton_success = true;
220 if (not J.empty()) {
221 // Compute right-hand side of 6.1c
222 vec::Index ri = 0;
223 for (auto j : J) {
224 real_t hess_q = 0;
225 for (auto k : K)
226 hess_q += H(j, k) * qₖ(k);
227 rhs(ri++) = -grad_ψₖ(j) - hess_q;
228 }
229
230 // Permute H to get the xJxJ block in the top left
231 auto hess_Ljj = H.block(0, 0, J.size(), J.size());
232 vec::Index r = 0;
233 for (auto rj : J) {
234 vec::Index c = 0;
235 for (auto cj : J) {
236 hess_Ljj(r, c) = H(rj, cj);
237 ++c;
238 }
239 ++r;
240 }
241 auto ldl = hess_Ljj.ldlt(); // TODO: does this allocate?
242 if (ldl.isPositive()) {
243 qJ.topRows(J.size()) = ldl.solve(rhs.topRows(J.size()));
244 } else {
245 std::cerr << "\x1b[0;31m"
246 "not PD"
247 "\x1b[0m"
248 << std::endl;
249 // newton_success = false; // TODO
250 }
251
252 r = 0;
253 for (auto j : J)
254 qₖ(j) = qJ(r++);
255 }
256
257 // Line search initialization ------------------------------------------
258 real_t τ = 1;
259 real_t σₖγₖ⁻¹pₖᵀpₖ = (1 - γₖ * Lₖ) * pₖᵀpₖ / (2 * γₖ);
260 real_t φₖ₊₁, ψₖ₊₁, ψx̂ₖ₊₁, grad_ψₖ₊₁ᵀpₖ₊₁, pₖ₊₁ᵀpₖ₊₁;
261 real_t Lₖ₊₁, γₖ₊₁;
262 real_t ls_cond;
263
264 // Make sure quasi-Newton step is valid
265 if (not newton_success || not qₖ.allFinite()) {
266 τ = 0;
267 ++s.newton_failures;
268 }
269
270 // Line search loop ----------------------------------------------------
271 do {
272 Lₖ₊₁ = Lₖ;
273 γₖ₊₁ = γₖ;
274
275 // Calculate xₖ₊₁
276 if (τ / 2 < params.τ_min) { // line search failed
277 xₖ₊₁.swap(x̂ₖ); // → safe prox step
278 ψₖ₊₁ = ψx̂ₖ;
279 grad_ψₖ₊₁.swap(grad_̂ψₖ);
280 } else { // line search didn't fail (yet)
281 if (τ == 1) // → faster quasi-Newton step
282 xₖ₊₁ = xₖ + qₖ;
283 else
284 xₖ₊₁ = xₖ + (1 - τ) * pₖ + τ * qₖ;
285 // Calculate ψ(xₖ₊₁), ∇ψ(xₖ₊₁)
286 ψₖ₊₁ = calc_ψ_grad_ψ(xₖ₊₁, /* in ⟹ out */ grad_ψₖ₊₁);
287 }
288
289 // Calculate x̂ₖ₊₁, pₖ₊₁ (projected gradient step in xₖ₊₁)
290 calc_x̂(γₖ₊₁, xₖ₊₁, grad_ψₖ₊₁, /* in ⟹ out */ x̂ₖ₊₁, pₖ₊₁);
291 // Calculate ψ(x̂ₖ₊₁) and ŷ(x̂ₖ₊₁)
292 ψx̂ₖ₊₁ = calc_ψ_ŷ(x̂ₖ₊₁, /* in ⟹ out */ ŷx̂ₖ₊₁);
293
294 // Quadratic upper bound -------------------------------------------
295 grad_ψₖ₊₁ᵀpₖ₊₁ = grad_ψₖ₊₁.dot(pₖ₊₁);
296 pₖ₊₁ᵀpₖ₊₁ = pₖ₊₁.squaredNorm();
297 real_t pₖ₊₁ᵀpₖ₊₁_ₖ = pₖ₊₁ᵀpₖ₊₁; // prox step with step size γₖ
298
300 // Decrease step size until quadratic upper bound is satisfied
301 descent_lemma(xₖ₊₁, ψₖ₊₁, grad_ψₖ₊₁,
302 /* in ⟹ out */ x̂ₖ₊₁, pₖ₊₁, ŷx̂ₖ₊₁,
303 /* inout */ ψx̂ₖ₊₁, pₖ₊₁ᵀpₖ₊₁, grad_ψₖ₊₁ᵀpₖ₊₁,
304 Lₖ₊₁, γₖ₊₁);
305 }
306
307 // Compute forward-backward envelope
308 φₖ₊₁ = ψₖ₊₁ + 1 / (2 * γₖ₊₁) * pₖ₊₁ᵀpₖ₊₁ + grad_ψₖ₊₁ᵀpₖ₊₁;
309 // Compute line search condition
310 ls_cond = φₖ₊₁ - (φₖ - σₖγₖ⁻¹pₖᵀpₖ);
312 ls_cond -= (0.5 / γₖ₊₁ - 0.5 / γₖ) * pₖ₊₁ᵀpₖ₊₁_ₖ;
313
314 τ /= 2;
315 } while (ls_cond > 0 && τ >= params.τ_min);
316
317 // τ < τ_min the line search failed and we accepted the prox step
318 if (τ < params.τ_min && k != 0) {
320 τ = 0;
321 }
322 if (k != 0) {
323 s.count_τ += 1;
324 s.sum_τ += τ * 2;
325 s.τ_1_accepted += τ * 2 == 1;
326 }
327
328 // Check if we made any progress
329 if (no_progress > 0 || k % params.max_no_progress == 0)
330 no_progress = xₖ == xₖ₊₁ ? no_progress + 1 : 0;
331
332 // Advance step --------------------------------------------------------
333 Lₖ = Lₖ₊₁;
334 γₖ = γₖ₊₁;
335
336 ψₖ = ψₖ₊₁;
337 ψx̂ₖ = ψx̂ₖ₊₁;
338 φₖ = φₖ₊₁;
339
340 xₖ.swap(xₖ₊₁);
341 x̂ₖ.swap(x̂ₖ₊₁);
342 ŷx̂ₖ.swap(ŷx̂ₖ₊₁);
343 pₖ.swap(pₖ₊₁);
344 grad_ψₖ.swap(grad_ψₖ₊₁);
345 grad_ψₖᵀpₖ = grad_ψₖ₊₁ᵀpₖ₊₁;
346 pₖᵀpₖ = pₖ₊₁ᵀpₖ₊₁;
347 }
348 throw std::logic_error("[PANOC] loop error");
349}
350
351} // namespace alpaqa
std::function< void(const ProgressInfo &)> progress_cb
Stats operator()(const Problem &problem, crvec Σ, real_t ε, bool always_overwrite_results, rvec x, rvec y, rvec err_z)
real_t calc_error_stop_crit(const Box &C, PANOCStopCrit crit, crvec pₖ, real_t γ, crvec xₖ, crvec x̂ₖ, crvec ŷₖ, crvec grad_ψₖ, crvec grad_̂ψₖ)
Compute the ε from the stopping criterion, see PANOCStopCrit.
SolverStatus check_all_stop_conditions(const ParamsT &params, DurationT time_elapsed, unsigned iteration, const AtomicStopSignal &stop_signal, real_t ε, real_t εₖ, unsigned no_progress)
Check all stop conditions (required tolerance reached, out of time, maximum number of iterations exce...
real_t calc_ψ_ŷ(const Problem &p, crvec x, crvec y, crvec Σ, rvec ŷ)
Calculate both ψ(x) and the vector ŷ that can later be used to compute ∇ψ.
real_t descent_lemma(const Problem &problem, real_t rounding_tolerance, real_t L_max, crvec xₖ, real_t ψₖ, crvec grad_ψₖ, crvec y, crvec Σ, rvec x̂ₖ, rvec pₖ, rvec ŷx̂ₖ, real_t &ψx̂ₖ, real_t &norm_sq_pₖ, real_t &grad_ψₖᵀpₖ, real_t &Lₖ, real_t &γₖ)
Increase the estimate of the Lipschitz constant of the objective gradient and decrease the step size ...
void calc_augmented_lagrangian_hessian(const Problem &problem, crvec xₖ, crvec ŷxₖ, crvec y, crvec Σ, rvec g, mat &H, rvec work_n)
Compute the Hessian matrix of the augmented Lagrangian function.
real_t calc_ψ_grad_ψ(const Problem &p, crvec x, crvec y, crvec Σ, rvec grad_ψ, rvec work_n, rvec work_m)
Calculate both ψ(x) and its gradient ∇ψ(x).
void calc_err_z(const Problem &p, crvec x̂, crvec y, crvec Σ, rvec err_z)
Calculate the error between ẑ and g(x).
real_t initial_lipschitz_estimate(const Problem &problem, crvec xₖ, crvec y, crvec Σ, real_t ε, real_t δ, real_t L_min, real_t L_max, real_t &ψ, rvec grad_ψ, rvec work_n1, rvec work_n2, rvec work_n3, rvec work_m)
Estimate the Lipschitz constant of the gradient using finite differences.
void calc_grad_ψ_from_ŷ(const Problem &p, crvec x, crvec ŷ, rvec grad_ψ, rvec work_n)
Calculate ∇ψ(x) using ŷ.
void calc_x̂(const Problem &prob, real_t γ, crvec x, crvec grad_ψ, rvec x̂, rvec p)
void print_progress(unsigned k, real_t ψₖ, crvec grad_ψₖ, real_t pₖᵀpₖ, real_t γₖ, real_t εₖ)
int Σ
Definition: test.py:72
int m
Definition: test.py:41
int n
Definition: test.py:40
int ε
Definition: test.py:73
LipschitzEstimateParams Lipschitz
Parameters related to the Lipschitz constant estimate and step size.
real_t Lγ_factor
Factor that relates step size γ and Lipschitz constant.
Definition: lipschitz.hpp:15
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18
real_t δ
Minimum step size for initial finite difference Lipschitz estimate.
Definition: lipschitz.hpp:13
unsigned max_no_progress
Maximum number of iterations without any progress before giving up.
real_t L_max
Maximum Lipschitz constant estimate.
@ Interrupted
Solver was interrupted by the user.
@ Unknown
Initial value.
@ Converged
Converged and reached given tolerance.
@ NotFinite
Intermediate results were infinite or not-a-number.
realmat mat
Default type for matrices.
Definition: vec.hpp:20
realvec vec
Default type for vectors.
Definition: vec.hpp:14
real_t ε
Relative step size for initial finite difference Lipschitz estimate.
Definition: lipschitz.hpp:11
real_t L₀
Initial estimate of the Lipschitz constant of ∇ψ(x)
Definition: lipschitz.hpp:9
double real_t
Default floating point type.
Definition: vec.hpp:8
unsigned print_interval
When to print progress.
real_t τ_min
Minimum weight factor between Newton step and projected gradient step.
real_t L_min
Minimum Lipschitz constant estimate.
unsigned max_iter
Maximum number of inner PANOC iterations.
PANOCStopCrit stop_crit
What stopping criterion to use.
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16
problem
Definition: main.py:16
H
Definition: main.py:8
Problem description for minimization problems.