alpaqa pantr
Nonconvex constrained optimization
Loading...
Searching...
No Matches
zerofpr.tpp
Go to the documentation of this file.
1#pragma once
2
4
5#include <cassert>
6#include <cmath>
7#include <iomanip>
8#include <iostream>
9#include <stdexcept>
10
16#include <alpaqa/util/timed.hpp>
17
18namespace alpaqa {
19
20template <class DirectionProviderT>
22 return "ZeroFPRSolver<" + std::string(direction.get_name()) + ">";
23}
24
25template <class DirectionProviderT>
27 /// [in] Problem description
28 const Problem &problem,
29 /// [in] Solve options
30 const SolveOptions &opts,
31 /// [inout] Decision variable @f$ x @f$
32 rvec x,
33 /// [inout] Lagrange multipliers @f$ y @f$
34 rvec y,
35 /// [in] Constraint weights @f$ \Sigma @f$
36 crvec Σ,
37 /// [out] Slack variable error @f$ g(x) - \Pi_D(g(x) + \Sigma^{-1} y) @f$
38 rvec err_z) -> Stats {
39
40 if (opts.check)
41 problem.check();
42
43 using std::chrono::nanoseconds;
44 auto os = opts.os ? opts.os : this->os;
45 auto start_time = std::chrono::steady_clock::now();
46 Stats s;
47
48 const auto n = problem.get_n();
49 const auto m = problem.get_m();
50
51 // Represents an intermediate proximal iterate in the algorithm.
52 struct ProxIterate {
53 vec x̂; //< Decision variables after proximal gradient step
54 vec grad_ψ; //< Gradient of cost in x
55 vec p; //< Proximal gradient step in x
56 vec ŷx̂; //< Candidate Lagrange multipliers in x̂
57 real_t pᵀp = NaN<config_t>; //< Norm squared of p
58 real_t grad_ψᵀp = NaN<config_t>; //< Dot product of gradient and p
59 real_t hx̂ = NaN<config_t>; //< Non-smooth function value in x̂
60
61 ProxIterate(length_t n, length_t m) : x̂(n), grad_ψ(n), p(n), ŷx̂(m) {}
62 } prox_iterate{n, m};
63 // Represents an iterate in the algorithm, keeping track of some
64 // intermediate values and function evaluations.
65 struct Iterate {
66 vec x; //< Decision variables
67 vec x̂; //< Decision variables after proximal gradient step
68 vec grad_ψ; //< Gradient of cost in x
69 vec p; //< Proximal gradient step in x
70 vec ŷx̂; //< Candidate Lagrange multipliers in x̂
71 real_t ψx = NaN<config_t>; //< Cost in x
72 real_t ψx̂ = NaN<config_t>; //< Cost in x̂
73 real_t γ = NaN<config_t>; //< Step size γ
74 real_t L = NaN<config_t>; //< Lipschitz estimate L
75 real_t pᵀp = NaN<config_t>; //< Norm squared of p
76 real_t grad_ψᵀp = NaN<config_t>; //< Dot product of gradient and p
77 real_t hx̂ = NaN<config_t>; //< Non-smooth function value in x̂
78
79 // @pre @ref ψx, @ref hx̂ @ref pᵀp, @ref grad_ψᵀp
80 // @return φγ
81 real_t fbe() const { return ψx + hx̂ + pᵀp / (2 * γ) + grad_ψᵀp; }
82
83 Iterate(length_t n, length_t m) : x(n), x̂(n), grad_ψ(n), p(n), ŷx̂(m) {}
84 } iterates[2]{{n, m}, {n, m}};
85 Iterate *curr = &iterates[0];
86 ProxIterate *prox = &prox_iterate;
87 Iterate *next = &iterates[1];
88
89 vec work_n(n), work_m(m);
90 vec q(n); // (quasi-)Newton step Hₖ pₖ
91
92 // Helper functions --------------------------------------------------------
93
94 auto qub_violated = [this](const Iterate &i) {
95 real_t margin =
96 (1 + std::abs(i.ψx)) * params.quadratic_upperbound_tolerance_factor;
97 return i.ψx̂ > i.ψx + i.grad_ψᵀp + real_t(0.5) * i.L * i.pᵀp + margin;
98 };
99
100 auto linesearch_violated = [this](const Iterate &curr,
101 const Iterate &next) {
102 if (params.force_linesearch)
103 return false;
104 real_t β = params.linesearch_strictness_factor;
105 real_t σ = β * (1 - curr.γ * curr.L) / (2 * curr.γ);
106 real_t φγ = curr.fbe();
107 real_t margin = (1 + std::abs(φγ)) * params.linesearch_tolerance_factor;
108 return next.fbe() > φγ - σ * curr.pᵀp + margin;
109 };
110
111 // Problem functions -------------------------------------------------------
112
113 auto eval_ψ_grad_ψ = [&problem, &y, &Σ, &work_n, &work_m](Iterate &i) {
114 i.ψx = problem.eval_ψ_grad_ψ(i.x, y, Σ, i.grad_ψ, work_n, work_m);
115 };
116 auto eval_prox_grad_step = [&problem](Iterate &i) {
117 i.hx̂ = problem.eval_prox_grad_step(i.γ, i.x, i.grad_ψ, i.x̂, i.p);
118 i.pᵀp = i.p.squaredNorm();
119 i.grad_ψᵀp = i.p.dot(i.grad_ψ);
120 };
121 auto eval_cost_in_prox = [&problem, &y, &Σ](Iterate &i) {
122 i.ψx̂ = problem.eval_ψ(i.x̂, y, Σ, i.ŷx̂);
123 };
124 auto eval_grad_in_prox = [&problem, &prox, &work_n](const Iterate &i) {
125 problem.eval_grad_L(i.x̂, i.ŷx̂, prox->grad_ψ, work_n);
126 };
127 auto eval_prox_grad_step_in_prox = [&problem, &prox](const Iterate &i) {
128 prox->hx̂ = problem.eval_prox_grad_step(i.γ, i.x̂, prox->grad_ψ, prox->x̂,
129 prox->p);
130 prox->pᵀp = prox->p.squaredNorm();
131 prox->grad_ψᵀp = prox->p.dot(prox->grad_ψ);
132 };
133
134 // Printing ----------------------------------------------------------------
135
136 std::array<char, 64> print_buf;
137 auto print_real = [this, &print_buf](real_t x) {
138 return float_to_str_vw(print_buf, x, params.print_precision);
139 };
140 auto print_real3 = [&print_buf](real_t x) {
141 return float_to_str_vw(print_buf, x, 3);
142 };
143 auto print_progress_1 = [&print_real, os](unsigned k, real_t φₖ, real_t ψₖ,
144 crvec grad_ψₖ, real_t pₖᵀpₖ,
145 real_t γₖ, real_t εₖ) {
146 if (k == 0)
147 *os << "┌─[ZeroFPR]\n";
148 else
149 *os << "├─ " << std::setw(6) << k << '\n';
150 *os << "│ φγ = " << print_real(φₖ) //
151 << ", ψ = " << print_real(ψₖ) //
152 << ", ‖∇ψ‖ = " << print_real(grad_ψₖ.norm()) //
153 << ", ‖p‖ = " << print_real(std::sqrt(pₖᵀpₖ)) //
154 << ", γ = " << print_real(γₖ) //
155 << ", ε = " << print_real(εₖ) << '\n';
156 };
157 auto print_progress_2 = [&print_real, &print_real3, os](crvec qₖ,
158 real_t τₖ) {
159 *os << "│ ‖q‖ = " << print_real(qₖ.norm()) //
160 << ", τ = " << print_real3(τₖ) //
161 << std::endl; // Flush for Python buffering
162 };
163 auto print_progress_n = [&](SolverStatus status) {
164 *os << "└─ " << status << " ──"
165 << std::endl; // Flush for Python buffering
166 };
167
168 auto do_progress_cb = [this, &s, &problem, &Σ, &y, &opts](
169 unsigned k, Iterate &it, crvec q, crvec grad_ψx̂,
170 real_t τ, real_t εₖ, SolverStatus status) {
171 using enum SolverStatus;
172 if (!progress_cb)
173 return;
176 progress_cb(ProgressInfo{
177 .k = k,
178 .status = status,
179 .x = it.x,
180 .p = it.p,
181 .norm_sq_p = it.pᵀp,
182 .x̂ = it.x̂,
183 .φγ = it.fbe(),
184 .ψ = it.ψx,
185 .grad_ψ = it.grad_ψ,
186 .ψ_hat = it.ψx̂,
187 .grad_ψ_hat = grad_ψx̂,
188 .q = q,
189 .L = it.L,
190 .γ = it.γ,
191 .τ = τ,
192 .ε = εₖ,
193 .Σ = Σ,
194 .y = y,
195 .outer_iter = opts.outer_iter,
196 .problem = &problem,
197 .params = &params,
198 });
199 };
200
201 // Initialization ----------------------------------------------------------
202
203 curr->x = x;
204
205 // Estimate Lipschitz constant ---------------------------------------------
206
207 // Finite difference approximation of ∇²ψ in starting point
208 if (params.Lipschitz.L_0 <= 0) {
209 curr->L = Helpers::initial_lipschitz_estimate(
210 problem, curr->x, y, Σ, params.Lipschitz.ε, params.Lipschitz.δ,
211 params.L_min, params.L_max,
212 /* in ⟹ out */ curr->ψx, curr->grad_ψ, curr->x̂, next->grad_ψ,
213 work_n, work_m);
214 }
215 // Initial Lipschitz constant provided by the user
216 else {
217 curr->L = params.Lipschitz.L_0;
218 // Calculate ψ(xₖ), ∇ψ(x₀)
219 eval_ψ_grad_ψ(*curr);
220 }
221 if (not std::isfinite(curr->L)) {
223 return s;
224 }
225 curr->γ = params.Lipschitz.Lγ_factor / curr->L;
226
227 // First proximal gradient step --------------------------------------------
228
229 // Calculate x̂ₖ, ψ(x̂ₖ)
230 eval_prox_grad_step(*curr);
231 eval_cost_in_prox(*curr);
232
233 // Quadratic upper bound
234 while (curr->L < params.L_max && qub_violated(*curr)) {
235 curr->γ /= 2;
236 curr->L *= 2;
237 eval_prox_grad_step(*curr);
238 eval_cost_in_prox(*curr);
239 }
240
241 // Loop data ---------------------------------------------------------------
242
243 unsigned k = 0; // iteration
244 real_t τ = NaN<config_t>; // line search parameter
245 // Keep track of how many successive iterations didn't update the iterate
246 unsigned no_progress = 0;
247
248 // Main ZeroFPR loop
249 // =========================================================================
250
251 ScopedMallocBlocker mb; // Don't allocate in the inner loop
252 while (true) {
253
254 // Check stopping criteria ---------------------------------------------
255
256 // Calculate ∇ψ(x̂ₖ), p̂ₖ
257 eval_grad_in_prox(*curr);
258 eval_prox_grad_step_in_prox(*curr);
259
260 real_t εₖ = Helpers::calc_error_stop_crit(
261 problem, params.stop_crit, curr->p, curr->γ, curr->x, curr->x̂,
262 curr->ŷx̂, curr->grad_ψ, prox->grad_ψ, work_n, next->p);
263
264 // Print progress ------------------------------------------------------
265 bool do_print =
266 params.print_interval != 0 && k % params.print_interval == 0;
267 if (do_print)
268 print_progress_1(k, curr->fbe(), curr->ψx, curr->grad_ψ, curr->pᵀp,
269 curr->γ, εₖ);
270
271 // Return solution -----------------------------------------------------
272
273 auto time_elapsed = std::chrono::steady_clock::now() - start_time;
274 auto stop_status = Helpers::check_all_stop_conditions(
275 params, opts, time_elapsed, k, stop_signal, εₖ, no_progress);
276 if (stop_status != SolverStatus::Busy) {
277 do_progress_cb(k, *curr, null_vec<config_t>, prox->grad_ψ, -1, εₖ,
278 stop_status);
279 bool do_final_print = params.print_interval != 0;
280 if (!do_print && do_final_print)
281 print_progress_1(k, curr->fbe(), curr->ψx, curr->grad_ψ,
282 curr->pᵀp, curr->γ, εₖ);
283 if (do_print || do_final_print)
284 print_progress_n(stop_status);
285 if (stop_status == SolverStatus::Converged ||
286 stop_status == SolverStatus::Interrupted ||
287 opts.always_overwrite_results) {
288 auto &ŷ = curr->ŷx̂;
289 if (err_z.size() > 0)
290 err_z = Σ.asDiagonal().inverse() * (ŷ - y);
291 x = std::move(curr->x̂);
292 y = std::move(curr->ŷx̂);
293 }
294 s.iterations = k;
295 s.ε = εₖ;
296 s.elapsed_time = duration_cast<nanoseconds>(time_elapsed);
297 s.status = stop_status;
298 s.final_γ = curr->γ;
299 s.final_ψ = curr->ψx̂;
300 s.final_h = curr->hx̂;
301 s.final_φγ = curr->fbe();
302 return s;
303 }
304
305 // Calculate quasi-Newton step -----------------------------------------
306
307 real_t τ_init = NaN<config_t>;
308 if (k == 0) { // Initialize L-BFGS
310 direction.initialize(problem, y, Σ, curr->γ, curr->x̂, prox->x̂,
311 prox->p, prox->grad_ψ);
312 τ_init = 0;
313 }
314 if (k > 0 || direction.has_initial_direction()) {
315 τ_init = direction.apply(curr->γ, curr->x̂, prox->x̂, prox->p,
316 prox->grad_ψ, q)
317 ? 1
318 : 0;
319 // Make sure quasi-Newton step is valid
320 if (τ_init == 1 && not q.allFinite())
321 τ_init = 0;
322 if (τ_init != 1) { // If we computed a quasi-Newton step
323 ++s.lbfgs_failures;
324 direction.reset(); // Is there anything else we can do?
325 }
326 }
327
328 // Line search ---------------------------------------------------------
329
330 next->γ = curr->γ;
331 next->L = curr->L;
332 τ = τ_init;
333 real_t τ_prev = -1;
334 bool update_lbfgs_in_linesearch = params.update_direction_in_candidate;
335 bool update_lbfgs_later = !update_lbfgs_in_linesearch;
336
337 // xₖ₊₁ = xₖ + pₖ
338 auto take_safe_step = [&] {
339 next->x = curr->x̂; // → safe prox step
340 next->ψx = curr->ψx̂;
341 next->grad_ψ = prox->grad_ψ;
342 // TODO: could swap gradients, but need for direction update
343 };
344
345 // xₖ₊₁ = x̂ₖ + τ qₖ
346 auto take_accelerated_step = [&](real_t τ) {
347 if (τ == 1) // → faster quasi-Newton step
348 next->x = curr->x̂ + q;
349 else
350 next->x = curr->x̂ + τ * q;
351 // Calculate ψ(xₖ₊₁), ∇ψ(xₖ₊₁)
352 eval_ψ_grad_ψ(*next);
353 };
354
355 while (!stop_signal.stop_requested()) {
356
357 // Recompute step only if τ changed
358 if (τ != τ_prev) {
359 τ != 0 ? take_accelerated_step(τ) : take_safe_step();
360 τ_prev = τ;
361 }
362
363 // If the cost is not finite, abandon the direction entirely, don't
364 // even bother backtracking.
365 if (τ > 0 && !std::isfinite(next->ψx)) {
366 τ = 0;
367 direction.reset();
368 continue;
369 }
370
371 // Calculate x̂ₖ₊₁, ψ(x̂ₖ₊₁)
372 eval_prox_grad_step(*next);
373 eval_cost_in_prox(*next);
374
375 // Quadratic upper bound
376 if (next->L < params.L_max && qub_violated(*next)) {
377 next->γ /= 2;
378 next->L *= 2;
379 τ = τ_init;
381 update_lbfgs_in_linesearch = false;
382 update_lbfgs_later = true;
383 continue;
384 }
385
386 // Update L-BFGS
387 if (τ == 1 && update_lbfgs_in_linesearch) {
388 if (params.update_direction_from_prox_step) {
389 s.lbfgs_rejected += not direction.update(
390 curr->γ, next->γ, curr->x̂, next->x, prox->p, next->p,
391 prox->grad_ψ, next->grad_ψ);
392 } else {
393 s.lbfgs_rejected += not direction.update(
394 curr->γ, next->γ, curr->x, next->x, curr->p, next->p,
395 curr->grad_ψ, next->grad_ψ);
396 }
397 update_lbfgs_in_linesearch = false;
398 update_lbfgs_later = false;
399 }
400
401 // Line search condition
402 if (τ > 0 && linesearch_violated(*curr, *next)) {
403 τ /= 2;
404 if (τ < params.min_linesearch_coefficient)
405 τ = 0;
407 continue;
408 }
409
410 // QUB and line search satisfied
411 break;
412 }
413 // If τ < τ_min the line search failed and we accepted the prox step
414 s.linesearch_failures += (τ == 0 && τ_init > 0);
415 s.τ_1_accepted += τ == 1;
416 s.count_τ += 1;
417 s.sum_τ += τ;
418
419 // Check if we made any progress
420 if (no_progress > 0 || k % params.max_no_progress == 0)
421 no_progress = curr->x == next->x ? no_progress + 1 : 0;
422
423 // Update L-BFGS -------------------------------------------------------
424
425 if (τ_init < 1 || update_lbfgs_later) {
426 if (curr->γ != next->γ) { // Flush L-BFGS if γ changed
427 direction.changed_γ(next->γ, curr->γ);
428 if (params.recompute_last_prox_step_after_lbfgs_flush) {
429 curr->γ = next->γ;
430 curr->L = next->L;
431 eval_prox_grad_step_in_prox(*curr);
432 }
433 }
434 if (τ > 0 && params.update_direction_from_prox_step) {
435 s.lbfgs_rejected += not direction.update(
436 curr->γ, next->γ, curr->x̂, next->x, prox->p, next->p,
437 prox->grad_ψ, next->grad_ψ);
438 } else {
439 s.lbfgs_rejected += not direction.update(
440 curr->γ, next->γ, curr->x, next->x, curr->p, next->p,
441 curr->grad_ψ, next->grad_ψ);
442 }
443 }
444
445 // Print ---------------------------------------------------------------
446 do_progress_cb(k, *curr, q, prox->grad_ψ, τ, εₖ, SolverStatus::Busy);
447 if (do_print && (k != 0 || direction.has_initial_direction()))
448 print_progress_2(q, τ);
449
450 // Advance step --------------------------------------------------------
451 std::swap(curr, next);
452 ++k;
453
454#ifndef NDEBUG
455 {
457 *prox = {n, m};
458 *next = {n, m};
459 }
460#endif
461 }
462 throw std::logic_error("[ZeroFPR] loop error");
463}
464
465} // namespace alpaqa
std::string get_name() const
Definition: zerofpr.tpp:21
Stats operator()(const Problem &problem, const SolveOptions &opts, rvec x, rvec y, crvec Σ, rvec err_z)
Definition: zerofpr.tpp:26
unsigned stepsize_backtracks
Definition: zerofpr.hpp:76
unsigned lbfgs_rejected
Definition: zerofpr.hpp:78
unsigned τ_1_accepted
Definition: zerofpr.hpp:79
unsigned lbfgs_failures
Definition: zerofpr.hpp:77
SolverStatus
Exit status of a numerical solver such as ALM or PANOC.
@ Interrupted
Solver was interrupted by the user.
@ Busy
In progress.
@ Converged
Converged and reached given tolerance.
@ NotFinite
Intermediate results were infinite or not-a-number.
std::chrono::nanoseconds time_progress_callback
Definition: zerofpr.hpp:72
std::chrono::nanoseconds elapsed_time
Definition: zerofpr.hpp:71
typename Conf::real_t real_t
Definition: config.hpp:51
unsigned linesearch_backtracks
Definition: zerofpr.hpp:75
typename Conf::length_t length_t
Definition: config.hpp:62
typename Conf::rvec rvec
Definition: config.hpp:55
std::string_view float_to_str_vw(auto &buf, double value, int precision=std::numeric_limits< double >::max_digits10)
Definition: print.tpp:38
typename Conf::crvec crvec
Definition: config.hpp:56
unsigned linesearch_failures
Definition: zerofpr.hpp:74
typename Conf::vec vec
Definition: config.hpp:52
unsigned iterations
Definition: zerofpr.hpp:73
SolverStatus status
Definition: zerofpr.hpp:69