alpaqa 0.0.1
Nonconvex constrained optimization
inner/panoc.hpp
Go to the documentation of this file.
1#pragma once
2
6
7#include <cassert>
8#include <cmath>
9#include <iomanip>
10#include <iostream>
11#include <stdexcept>
12
13namespace alpaqa {
14
15using std::chrono::duration_cast;
16using std::chrono::microseconds;
17
18template <class DirectionProviderT>
20 return "PANOCSolver<" + direction_provider.get_name() + ">";
21}
22
23template <class DirectionProviderT>
26 /// [in] Problem description
27 const Problem &problem,
28 /// [in] Constraint weights @f$ \Sigma @f$
29 crvec Σ,
30 /// [in] Tolerance @f$ \varepsilon @f$
31 real_t ε,
32 /// [in] Overwrite @p x, @p y and @p err_z even if not converged
33 bool always_overwrite_results,
34 /// [inout] Decision variable @f$ x @f$
35 rvec x,
36 /// [inout] Lagrange multipliers @f$ y @f$
37 rvec y,
38 /// [out] Slack variable error @f$ g(x) - z @f$
39 rvec err_z) {
40
41 auto start_time = std::chrono::steady_clock::now();
42 Stats s;
43
44 const auto n = problem.n;
45 const auto m = problem.m;
46
47 // Allocate vectors, init L-BFGS -------------------------------------------
48
49 // TODO: the L-BFGS objects and vectors allocate on each iteration of ALM,
50 // and there are more vectors than strictly necessary.
51
52 bool need_grad_̂ψₖ = detail::stop_crit_requires_grad_̂ψₖ(params.stop_crit);
53
54 vec xₖ = x, // Value of x at the beginning of the iteration
55 x̂ₖ(n), // Value of x after a projected gradient step
56 xₖ₊₁(n), // xₖ for next iteration
57 x̂ₖ₊₁(n), // x̂ₖ for next iteration
58 ŷx̂ₖ(m), // ŷ(x̂ₖ) = Σ (g(x̂ₖ) - ẑₖ)
59 ŷx̂ₖ₊₁(m), // ŷ(x̂ₖ) for next iteration
60 pₖ(n), // Projected gradient step pₖ = x̂ₖ - xₖ
61 pₖ₊₁(n), // Projected gradient step pₖ₊₁ = x̂ₖ₊₁ - xₖ₊₁
62 qₖ(n), // Newton step Hₖ pₖ
63 grad_ψₖ(n), // ∇ψ(xₖ)
64 grad_̂ψₖ(need_grad_̂ψₖ ? n : 0), // ∇ψ(x̂ₖ)
65 grad_ψₖ₊₁(n); // ∇ψ(xₖ₊₁)
66
67 vec work_n(n), work_m(m);
68
69 // Keep track of how many successive iterations didn't update the iterate
70 unsigned no_progress = 0;
71
72 // Helper functions --------------------------------------------------------
73
74 // Wrappers for helper functions that automatically pass along any arguments
75 // that are constant within PANOC (for readability in the main algorithm)
76 auto calc_ψ_ŷ = [&problem, &y, &Σ](crvec x, rvec ŷ) {
77 return detail::calc_ψ_ŷ(problem, x, y, Σ, ŷ);
78 };
79 auto calc_ψ_grad_ψ = [&problem, &y, &Σ, &work_n, &work_m](crvec x,
80 rvec grad_ψ) {
81 return detail::calc_ψ_grad_ψ(problem, x, y, Σ, grad_ψ, work_n, work_m);
82 };
83 auto calc_grad_ψ_from_ŷ = [&problem, &work_n](crvec x, crvec ŷ,
84 rvec grad_ψ) {
85 detail::calc_grad_ψ_from_ŷ(problem, x, ŷ, grad_ψ, work_n);
86 };
87 auto calc_x̂ = [&problem](real_t γ, crvec x, crvec grad_ψ, rvec x̂, rvec p) {
88 detail::calc_x̂(problem, γ, x, grad_ψ, x̂, p);
89 };
90 auto calc_err_z = [&problem, &y, &Σ](crvec x̂, rvec err_z) {
92 };
93 auto descent_lemma = [this, &problem, &y,
94 &Σ](crvec xₖ, real_t ψₖ, crvec grad_ψₖ, rvec x̂ₖ,
95 rvec pₖ, rvec ŷx̂ₖ, real_t &ψx̂ₖ, real_t &pₖᵀpₖ,
96 real_t &grad_ψₖᵀpₖ, real_t &Lₖ, real_t &γₖ) {
98 problem, params.quadratic_upperbound_tolerance_factor, params.L_max,
99 xₖ, ψₖ, grad_ψₖ, y, Σ, x̂ₖ, pₖ, ŷx̂ₖ, ψx̂ₖ, pₖᵀpₖ, grad_ψₖᵀpₖ, Lₖ, γₖ);
100 };
101 auto print_progress = [&](unsigned k, real_t ψₖ, crvec grad_ψₖ,
102 real_t pₖᵀpₖ, real_t γₖ, real_t εₖ) {
103 std::cout << "[PANOC] " << std::setw(6) << k
104 << ": ψ = " << std::setw(13) << ψₖ
105 << ", ‖∇ψ‖ = " << std::setw(13) << grad_ψₖ.norm()
106 << ", ‖p‖ = " << std::setw(13) << std::sqrt(pₖᵀpₖ)
107 << ", γ = " << std::setw(13) << γₖ
108 << ", εₖ = " << std::setw(13) << εₖ << "\r\n";
109 };
110
111 // Estimate Lipschitz constant ---------------------------------------------
112
113 real_t ψₖ, Lₖ;
114 // Finite difference approximation of ∇²ψ in starting point
115 if (params.Lipschitz.L₀ <= 0) {
117 problem, xₖ, y, Σ, params.Lipschitz.ε, params.Lipschitz.δ,
118 params.L_min, params.L_max,
119 /* in ⟹ out */ ψₖ, grad_ψₖ, x̂ₖ, grad_ψₖ₊₁, work_n, work_m);
120 }
121 // Initial Lipschitz constant provided by the user
122 else {
123 Lₖ = params.Lipschitz.L₀;
124 // Calculate ψ(xₖ), ∇ψ(x₀)
125 ψₖ = calc_ψ_grad_ψ(xₖ, /* in ⟹ out */ grad_ψₖ);
126 }
127 if (not std::isfinite(Lₖ)) {
129 return s;
130 }
131 real_t γₖ = params.Lipschitz.Lγ_factor / Lₖ;
132 real_t τ = NaN;
133
134 // First projected gradient step -------------------------------------------
135
136 // Calculate x̂₀, p₀ (projected gradient step)
137 calc_x̂(γₖ, xₖ, grad_ψₖ, /* in ⟹ out */ x̂ₖ, pₖ);
138 // Calculate ψ(x̂ₖ) and ŷ(x̂ₖ)
139 real_t ψx̂ₖ = calc_ψ_ŷ(x̂ₖ, /* in ⟹ out */ ŷx̂ₖ);
140 real_t grad_ψₖᵀpₖ = grad_ψₖ.dot(pₖ);
141 real_t pₖᵀpₖ = pₖ.squaredNorm();
142 // Compute forward-backward envelope
143 real_t φₖ = ψₖ + 1 / (2 * γₖ) * pₖᵀpₖ + grad_ψₖᵀpₖ;
144
145 // Main PANOC loop
146 // =========================================================================
147 for (unsigned k = 0; k <= params.max_iter; ++k) {
148
149 // Quadratic upper bound -----------------------------------------------
150 if (k == 0 || params.update_lipschitz_in_linesearch == false) {
151 // Decrease step size until quadratic upper bound is satisfied
152 real_t old_γₖ =
153 descent_lemma(xₖ, ψₖ, grad_ψₖ,
154 /* in ⟹ out */ x̂ₖ, pₖ, ŷx̂ₖ,
155 /* inout */ ψx̂ₖ, pₖᵀpₖ, grad_ψₖᵀpₖ, Lₖ, γₖ);
156 if (k > 0 && γₖ != old_γₖ) // Flush L-BFGS if γ changed
157 direction_provider.changed_γ(γₖ, old_γₖ);
158 else if (k == 0) // Initialize L-BFGS
159 direction_provider.initialize(xₖ, x̂ₖ, pₖ, grad_ψₖ);
160 if (γₖ != old_γₖ)
161 φₖ = ψₖ + 1 / (2 * γₖ) * pₖᵀpₖ + grad_ψₖᵀpₖ;
162 }
163 // Calculate ∇ψ(x̂ₖ)
164 if (need_grad_̂ψₖ)
165 calc_grad_ψ_from_ŷ(x̂ₖ, ŷx̂ₖ, /* in ⟹ out */ grad_̂ψₖ);
166
167 // Check stop condition ------------------------------------------------
169 problem.C, params.stop_crit, pₖ, γₖ, xₖ, x̂ₖ, ŷx̂ₖ, grad_ψₖ, grad_̂ψₖ);
170
171 // Print progress
172 if (params.print_interval != 0 && k % params.print_interval == 0)
173 print_progress(k, ψₖ, grad_ψₖ, pₖᵀpₖ, γₖ, εₖ);
174 if (progress_cb)
175 progress_cb({k, xₖ, pₖ, pₖᵀpₖ, x̂ₖ, φₖ, ψₖ, grad_ψₖ, ψx̂ₖ, grad_̂ψₖ,
176 Lₖ, γₖ, τ, εₖ, Σ, y, problem, params});
177
178 auto time_elapsed = std::chrono::steady_clock::now() - start_time;
179 auto stop_status = detail::check_all_stop_conditions(
180 params, time_elapsed, k, stop_signal, ε, εₖ, no_progress);
181 if (stop_status != SolverStatus::Unknown) {
182 // TODO: We could cache g(x) and ẑ, but would that faster?
183 // It saves 1 evaluation of g per ALM iteration, but requires
184 // many extra stores in the inner loops of PANOC.
185 // TODO: move the computation of ẑ and g(x) to ALM?
186 if (stop_status == SolverStatus::Converged ||
187 stop_status == SolverStatus::Interrupted ||
188 always_overwrite_results) {
189 calc_err_z(x̂ₖ, /* in ⟹ out */ err_z);
190 x = std::move(x̂ₖ);
191 y = std::move(ŷx̂ₖ);
192 }
193 s.iterations = k;
194 s.ε = εₖ;
195 s.elapsed_time = duration_cast<microseconds>(time_elapsed);
196 s.status = stop_status;
197 return s;
198 }
199
200 // Calculate quasi-Newton step -----------------------------------------
201 real_t step_size =
203 ? 1
204 : -1;
205 if (k > 0)
206 direction_provider.apply(xₖ, x̂ₖ, pₖ, step_size,
207 /* in ⟹ out */ qₖ);
208
209 // Line search initialization ------------------------------------------
210 τ = 1;
211 real_t σₖγₖ⁻¹pₖᵀpₖ = (1 - γₖ * Lₖ) * pₖᵀpₖ / (2 * γₖ);
212 real_t φₖ₊₁, ψₖ₊₁, ψx̂ₖ₊₁, grad_ψₖ₊₁ᵀpₖ₊₁, pₖ₊₁ᵀpₖ₊₁;
213 real_t Lₖ₊₁, γₖ₊₁;
214 real_t ls_cond;
215 // TODO: make separate parameter
216 real_t margin =
217 (1 + std::abs(φₖ)) * params.quadratic_upperbound_tolerance_factor;
218
219 // Make sure quasi-Newton step is valid
220 if (k == 0) {
221 τ = 0; // Always use prox step on first iteration
222 } else if (not qₖ.allFinite()) {
223 τ = 0;
224 ++s.lbfgs_failures;
225 direction_provider.reset(); // Is there anything else we can do?
226 }
227
228 // Line search loop ----------------------------------------------------
229 do {
230 Lₖ₊₁ = Lₖ;
231 γₖ₊₁ = γₖ;
232
233 // Calculate xₖ₊₁
234 if (τ / 2 < params.τ_min) { // line search failed
235 xₖ₊₁.swap(x̂ₖ); // → safe prox step
236 ψₖ₊₁ = ψx̂ₖ;
237 if (need_grad_̂ψₖ)
238 grad_ψₖ₊₁.swap(grad_̂ψₖ);
239 else
240 calc_grad_ψ_from_ŷ(xₖ₊₁, ŷx̂ₖ, /* in ⟹ out */ grad_ψₖ₊₁);
241 } else { // line search didn't fail (yet)
242 if (τ == 1) // → faster quasi-Newton step
243 xₖ₊₁ = xₖ + qₖ;
244 else
245 xₖ₊₁ = xₖ + (1 - τ) * pₖ + τ * qₖ;
246 // Calculate ψ(xₖ₊₁), ∇ψ(xₖ₊₁)
247 ψₖ₊₁ = calc_ψ_grad_ψ(xₖ₊₁, /* in ⟹ out */ grad_ψₖ₊₁);
248 }
249
250 // Calculate x̂ₖ₊₁, pₖ₊₁ (projected gradient step in xₖ₊₁)
251 calc_x̂(γₖ₊₁, xₖ₊₁, grad_ψₖ₊₁, /* in ⟹ out */ x̂ₖ₊₁, pₖ₊₁);
252 // Calculate ψ(x̂ₖ₊₁) and ŷ(x̂ₖ₊₁)
253 ψx̂ₖ₊₁ = calc_ψ_ŷ(x̂ₖ₊₁, /* in ⟹ out */ ŷx̂ₖ₊₁);
254
255 // Quadratic upper bound -------------------------------------------
256 grad_ψₖ₊₁ᵀpₖ₊₁ = grad_ψₖ₊₁.dot(pₖ₊₁);
257 pₖ₊₁ᵀpₖ₊₁ = pₖ₊₁.squaredNorm();
258 real_t pₖ₊₁ᵀpₖ₊₁_ₖ = pₖ₊₁ᵀpₖ₊₁; // prox step with step size γₖ
259
260 if (params.update_lipschitz_in_linesearch == true) {
261 // Decrease step size until quadratic upper bound is satisfied
262 (void)descent_lemma(xₖ₊₁, ψₖ₊₁, grad_ψₖ₊₁,
263 /* in ⟹ out */ x̂ₖ₊₁, pₖ₊₁, ŷx̂ₖ₊₁,
264 /* inout */ ψx̂ₖ₊₁, pₖ₊₁ᵀpₖ₊₁,
265 grad_ψₖ₊₁ᵀpₖ₊₁, Lₖ₊₁, γₖ₊₁);
266 }
267
268 // Compute forward-backward envelope
269 φₖ₊₁ = ψₖ₊₁ + 1 / (2 * γₖ₊₁) * pₖ₊₁ᵀpₖ₊₁ + grad_ψₖ₊₁ᵀpₖ₊₁;
270 // Compute line search condition
271 ls_cond = φₖ₊₁ - (φₖ - σₖγₖ⁻¹pₖᵀpₖ);
272 if (params.alternative_linesearch_cond)
273 ls_cond -= (0.5 / γₖ₊₁ - 0.5 / γₖ) * pₖ₊₁ᵀpₖ₊₁_ₖ;
274
275 τ /= 2;
276 } while (ls_cond > margin && τ >= params.τ_min);
277
278 // If τ < τ_min the line search failed and we accepted the prox step
279 if (τ < params.τ_min && k != 0) {
281 τ = 0;
282 }
283 if (k != 0) {
284 s.count_τ += 1;
285 s.sum_τ += τ * 2;
286 s.τ_1_accepted += τ * 2 == 1;
287 }
288
289 // Update L-BFGS -------------------------------------------------------
290 if (γₖ != γₖ₊₁) // Flush L-BFGS if γ changed
291 direction_provider.changed_γ(γₖ₊₁, γₖ);
292
293 s.lbfgs_rejected += not direction_provider.update(
294 xₖ, xₖ₊₁, pₖ, pₖ₊₁, grad_ψₖ₊₁, problem.C, γₖ₊₁);
295
296 // Check if we made any progress
297 if (no_progress > 0 || k % params.max_no_progress == 0)
298 no_progress = xₖ == xₖ₊₁ ? no_progress + 1 : 0;
299
300 // Advance step --------------------------------------------------------
301 Lₖ = Lₖ₊₁;
302 γₖ = γₖ₊₁;
303
304 ψₖ = ψₖ₊₁;
305 ψx̂ₖ = ψx̂ₖ₊₁;
306 φₖ = φₖ₊₁;
307
308 xₖ.swap(xₖ₊₁);
309 x̂ₖ.swap(x̂ₖ₊₁);
310 ŷx̂ₖ.swap(ŷx̂ₖ₊₁);
311 pₖ.swap(pₖ₊₁);
312 grad_ψₖ.swap(grad_ψₖ₊₁);
313 grad_ψₖᵀpₖ = grad_ψₖ₊₁ᵀpₖ₊₁;
314 pₖᵀpₖ = pₖ₊₁ᵀpₖ₊₁;
315 }
316 throw std::logic_error("[PANOC] loop error");
317}
318
319} // namespace alpaqa
std::string get_name() const
Definition: inner/panoc.hpp:19
Stats operator()(const Problem &problem, crvec Σ, real_t ε, bool always_overwrite_results, rvec x, rvec y, rvec err_z)
Definition: inner/panoc.hpp:25
real_t calc_error_stop_crit(const Box &C, PANOCStopCrit crit, crvec pₖ, real_t γ, crvec xₖ, crvec x̂ₖ, crvec ŷₖ, crvec grad_ψₖ, crvec grad_̂ψₖ)
Compute the ε from the stopping criterion, see PANOCStopCrit.
SolverStatus check_all_stop_conditions(const ParamsT &params, DurationT time_elapsed, unsigned iteration, const AtomicStopSignal &stop_signal, real_t ε, real_t εₖ, unsigned no_progress)
Check all stop conditions (required tolerance reached, out of time, maximum number of iterations exce...
real_t calc_ψ_ŷ(const Problem &p, crvec x, crvec y, crvec Σ, rvec ŷ)
Calculate both ψ(x) and the vector ŷ that can later be used to compute ∇ψ.
real_t descent_lemma(const Problem &problem, real_t rounding_tolerance, real_t L_max, crvec xₖ, real_t ψₖ, crvec grad_ψₖ, crvec y, crvec Σ, rvec x̂ₖ, rvec pₖ, rvec ŷx̂ₖ, real_t &ψx̂ₖ, real_t &norm_sq_pₖ, real_t &grad_ψₖᵀpₖ, real_t &Lₖ, real_t &γₖ)
Increase the estimate of the Lipschitz constant of the objective gradient and decrease the step size ...
bool stop_crit_requires_grad_̂ψₖ(PANOCStopCrit crit)
real_t calc_ψ_grad_ψ(const Problem &p, crvec x, crvec y, crvec Σ, rvec grad_ψ, rvec work_n, rvec work_m)
Calculate both ψ(x) and its gradient ∇ψ(x).
void calc_err_z(const Problem &p, crvec x̂, crvec y, crvec Σ, rvec err_z)
Calculate the error between ẑ and g(x).
real_t initial_lipschitz_estimate(const Problem &problem, crvec xₖ, crvec y, crvec Σ, real_t ε, real_t δ, real_t L_min, real_t L_max, real_t &ψ, rvec grad_ψ, rvec work_n1, rvec work_n2, rvec work_n3, rvec work_m)
Estimate the Lipschitz constant of the gradient using finite differences.
void calc_grad_ψ_from_ŷ(const Problem &p, crvec x, crvec ŷ, rvec grad_ψ, rvec work_n)
Calculate ∇ψ(x) using ŷ.
void calc_x̂(const Problem &prob, real_t γ, crvec x, crvec grad_ψ, rvec x̂, rvec p)
void print_progress(unsigned k, real_t ψₖ, crvec grad_ψₖ, real_t pₖᵀpₖ, real_t γₖ, real_t εₖ)
int Σ
Definition: test.py:72
int m
Definition: test.py:41
int n
Definition: test.py:40
int ε
Definition: test.py:73
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18
constexpr real_t NaN
Not a number.
Definition: vec.hpp:28
@ Interrupted
Solver was interrupted by the user.
@ Unknown
Initial value.
@ Converged
Converged and reached given tolerance.
@ NotFinite
Intermediate results were infinite or not-a-number.
realvec vec
Default type for vectors.
Definition: vec.hpp:14
std::chrono::microseconds elapsed_time
double real_t
Default floating point type.
Definition: vec.hpp:8
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16
problem
Definition: main.py:16
Problem description for minimization problems.