alpaqa dll
Nonconvex constrained optimization
Loading...
Searching...
No Matches
pantr.tpp
Go to the documentation of this file.
1#pragma once
2
4
5#include <cassert>
6#include <cmath>
7#include <iomanip>
8#include <iostream>
9#include <stdexcept>
10
14#include <alpaqa/util/print.hpp>
15#include <guanaqo/timed.hpp>
16
17namespace alpaqa {
18
19template <class DirectionProviderT>
21 return "PANTRSolver<" + std::string(direction.get_name()) + ">";
22}
23
24template <class DirectionProviderT>
26 /// [in] Problem description
27 const Problem &problem,
28 /// [in] Solve options
29 const SolveOptions &opts,
30 /// [inout] Decision variable @f$ x @f$
31 rvec x,
32 /// [inout] Lagrange multipliers @f$ y @f$
33 rvec y,
34 /// [in] Constraint weights @f$ \Sigma @f$
35 crvec Σ,
36 /// [out] Slack variable error @f$ g(x) - \Pi_D(g(x) + \Sigma^{-1} y) @f$
37 rvec err_z) -> Stats {
38
39 if (opts.check)
40 problem.check();
41
42 using std::chrono::nanoseconds;
43 auto os = opts.os ? opts.os : this->os;
44 auto start_time = std::chrono::steady_clock::now();
45 Stats s;
46
47 const auto n = problem.get_num_variables();
48 const auto m = problem.get_num_constraints();
49
50 // Represents an iterate in the algorithm, keeping track of some
51 // intermediate values and function evaluations.
52 struct Iterate {
53 vec x; //< Decision variables
54 vec x̂; //< Decision variables after proximal gradient step
55 vec grad_ψ; //< Gradient of cost in x
56 vec p; //< Proximal gradient step in x
57 vec ŷx̂; //< Candidate Lagrange multipliers in x̂
58 real_t ψx = NaN<config_t>; //< Cost in x
59 real_t ψx̂ = NaN<config_t>; //< Cost in x̂
60 real_t γ = NaN<config_t>; //< Step size γ
61 real_t L = NaN<config_t>; //< Lipschitz estimate L
62 real_t pᵀp = NaN<config_t>; //< Norm squared of p
63 real_t grad_ψᵀp = NaN<config_t>; //< Dot product of gradient and p
64 real_t hx̂ = NaN<config_t>; //< Non-smooth function value in x̂
65
66 // @pre @ref ψx, @ref hx̂ @ref pᵀp, @ref grad_ψᵀp
67 // @return φγ
68 real_t fbe() const { return ψx + hx̂ + pᵀp / (2 * γ) + grad_ψᵀp; }
69
70 Iterate(length_t n, length_t m) : x(n), x̂(n), grad_ψ(n), p(n), ŷx̂(m) {}
71 } iterates[3]{{n, m}, {n, m}, {n, m}};
72 Iterate *curr = &iterates[0];
73 Iterate *prox = &iterates[1];
74 Iterate *cand = &iterates[2];
75
76 bool need_grad_ψx̂ = Helpers::stop_crit_requires_grad_ψx̂(params.stop_crit);
77 vec grad_ψx̂(n);
78 vec work_n(n), work_m(m);
79 vec q(n); // (quasi-)Newton step Hₖ pₖ
80 std::chrono::nanoseconds direction_duration{};
81
82 // Problem functions -------------------------------------------------------
83
84 auto eval_ψ_grad_ψ = [&problem, &y, &Σ, &work_n, &work_m](Iterate &i) {
85 i.ψx = problem.eval_augmented_lagrangian_and_gradient(
86 i.x, y, Σ, i.grad_ψ, work_n, work_m);
87 };
88 auto eval_prox_grad_step = [&problem](Iterate &i) {
89 i.hx̂ =
90 problem.eval_proximal_gradient_step(i.γ, i.x, i.grad_ψ, i.x̂, i.p);
91 i.pᵀp = i.p.squaredNorm();
92 i.grad_ψᵀp = i.p.dot(i.grad_ψ);
93 };
94 auto eval_ψx̂ = [&problem, &y, &Σ](Iterate &i) {
95 i.ψx̂ = problem.eval_augmented_lagrangian(i.x̂, y, Σ, i.ŷx̂);
96 };
97 auto eval_grad_ψx̂ = [&problem, &work_n](Iterate &i, rvec grad_ψx̂) {
98 problem.eval_lagrangian_gradient(i.x̂, i.ŷx̂, grad_ψx̂, work_n);
99 };
100
101 // Helper functions --------------------------------------------------------
102
103 auto qub_violated = [this](const Iterate &i) {
104 real_t margin =
105 (1 + std::abs(i.ψx)) * params.quadratic_upperbound_tolerance_factor;
106 return i.ψx̂ > i.ψx + i.grad_ψᵀp + real_t(0.5) * i.L * i.pᵀp + margin;
107 };
108 auto backtrack_qub = [&](Iterate &i) {
109 while (i.L < params.L_max && qub_violated(i)) {
110 i.γ /= 2;
111 i.L *= 2;
112 // Compute x̂, p, ψ(x̂)
113 eval_prox_grad_step(i);
114 eval_ψx̂(i);
116 }
117 };
118
119 // Printing ----------------------------------------------------------------
120
121 std::array<char, 64> print_buf;
122 auto print_real = [this, &print_buf](real_t x) {
123 return float_to_str_vw(print_buf, x, params.print_precision);
124 };
125 auto print_real3 = [&print_buf](real_t x) {
126 return float_to_str_vw(print_buf, x, 3);
127 };
128
129 auto print_progress_1 = [&](unsigned k, real_t φₖ, real_t ψₖ, crvec grad_ψₖ,
130 real_t pₖᵀpₖ, real_t γₖ, real_t εₖ, real_t Δₖ) {
131 if (k == 0)
132 *os << "┌─[PANTR]\n";
133 else
134 *os << "├─ " << std::setw(6) << k << " ──\n";
135 *os << "│ φγ = " << print_real(φₖ) //
136 << ", ψ = " << print_real(ψₖ) //
137 << ", ‖∇ψ‖ = " << print_real(grad_ψₖ.norm()) //
138 << ", ‖p‖ = " << print_real(std::sqrt(pₖᵀpₖ)) //
139 << ", γ = " << print_real(γₖ) //
140 << ", Δ = " << print_real(Δₖ) //
141 << ", ε = " << print_real(εₖ) << '\n';
142 };
143 auto print_progress_2 = [&](crvec qₖ, real_t ρₖ, bool accept,
144 std::chrono::nanoseconds direction_duration) {
145 *os << "│ ‖q‖ = " << print_real(qₖ.norm()) //
146 << ", ρ = " << print_real3(ρₖ) //
147 << ", time = "
148 << print_real3(
149 static_cast<real_t>(1e6) *
150 std::chrono::duration<real_t>(direction_duration).count())
151 << " µs, "
152 << (accept ? "\033[0;32maccepted\033[0m"
153 : "\033[0;35mrejected\033[0m") //
154 << std::endl; // Flush for Python buffering
155 };
156 auto print_progress_n = [&](SolverStatus status) {
157 *os << "└─ " << status << " ──"
158 << std::endl; // Flush for Python buffering
159 };
160 auto do_progress_cb = [this, &s, &problem, &Σ, &y,
161 &opts](unsigned k, Iterate &it, crvec q,
162 crvec grad_ψx̂, real_t Δ, real_t ρ, real_t εₖ,
163 bool accepted, SolverStatus status) {
164 if (!progress_cb)
165 return;
167 guanaqo::Timed t{s.time_progress_callback};
169 .k = k,
170 .status = status,
171 .x = it.x,
172 .p = it.p,
173 .norm_sq_p = it.pᵀp,
174 .x̂ = it.x̂,
175 .ŷ = it.ŷx̂,
176 .φγ = it.fbe(),
177 .ψ = it.ψx,
178 .grad_ψ = it.grad_ψ,
179 .ψ_hat = it.ψx̂,
180 .grad_ψ_hat = grad_ψx̂,
181 .q = q,
182 .L = it.L,
183 .γ = it.γ,
184 .Δ = Δ,
185 .ρ = ρ,
186 .τ = static_cast<real_t>(accepted),
187 .ε = εₖ,
188 .Σ = Σ,
189 .y = y,
190 .outer_iter = opts.outer_iter,
191 .problem = &problem,
192 .params = &params,
193 });
194 };
195
196 // Initialization ----------------------------------------------------------
197
198 curr->x = x;
199
200 // Estimate Lipschitz constant ---------------------------------------------
201
202 // Finite difference approximation of ∇²ψ in starting point
203 if (params.Lipschitz.L_0 <= 0) {
205 problem, curr->x, y, Σ, params.Lipschitz.ε, params.Lipschitz.δ,
206 params.L_min, params.L_max,
207 /* in ⟹ out */ curr->ψx, curr->grad_ψ, curr->x̂, cand->grad_ψ,
208 work_n, work_m);
209 }
210 // Initial Lipschitz constant provided by the user
211 else {
212 curr->L = params.Lipschitz.L_0;
213 // Calculate ψ(xₖ), ∇ψ(x₀)
214 eval_ψ_grad_ψ(*curr);
215 }
216 if (not std::isfinite(curr->L)) {
218 return s;
219 }
220 curr->γ = params.Lipschitz.Lγ_factor / curr->L;
221
222 // First proximal gradient step --------------------------------------------
223
224 eval_prox_grad_step(*curr);
225 eval_ψx̂(*curr);
226 backtrack_qub(*curr);
227
228 // Loop data ---------------------------------------------------------------
229
230 unsigned k = 0; // iteration
231 bool accept_candidate = false;
232 // Keep track of how many successive iterations didn't update the iterate
233 unsigned no_progress = 0;
234 // Trust radius
235 real_t Δ = params.initial_radius;
236 if (!std::isfinite(Δ) || Δ == 0)
237 Δ = real_t(0.1) * curr->grad_ψ.norm();
238 Δ = std::fmax(Δ, params.min_radius);
239 // Reduction ratio
241
242 // Main PANTR loop
243 // =========================================================================
244
245 ScopedMallocBlocker mb; // Don't allocate in the inner loop
246 while (true) {
247
248 // Check stopping criteria ---------------------------------------------
249
250 // Calculate ∇ψ(x̂ₖ)
251 if (need_grad_ψx̂)
252 eval_grad_ψx̂(*curr, grad_ψx̂);
253 bool have_grad_ψx̂ = need_grad_ψx̂;
254
256 problem, params.stop_crit, curr->p, curr->γ, curr->x, curr->x̂,
257 curr->ŷx̂, curr->grad_ψ, grad_ψx̂, work_n, cand->p);
258
259 // Print progress ------------------------------------------------------
260
261 bool do_print =
262 params.print_interval != 0 && k % params.print_interval == 0;
263 if (do_print)
264 print_progress_1(k, curr->fbe(), curr->ψx, curr->grad_ψ, curr->pᵀp,
265 curr->γ, εₖ, Δ);
266
267 // Return solution -----------------------------------------------------
268
269 auto time_elapsed = std::chrono::steady_clock::now() - start_time;
270 auto stop_status = Helpers::check_all_stop_conditions(
271 params, opts, time_elapsed, k, stop_signal, εₖ, no_progress);
272 if (stop_status != SolverStatus::Busy) {
273 do_progress_cb(k, *curr, null_vec<config_t>, grad_ψx̂, NaN<config_t>,
274 NaN<config_t>, εₖ, accept_candidate, stop_status);
275 bool do_final_print = params.print_interval != 0;
276 if (!do_print && do_final_print)
277 print_progress_1(k, curr->fbe(), curr->ψx, curr->grad_ψ,
278 curr->pᵀp, curr->γ, εₖ, Δ);
279 if (do_print || do_final_print)
280 print_progress_n(stop_status);
281 // Overwrite output arguments
282 if (stop_status == SolverStatus::Converged ||
283 stop_status == SolverStatus::Interrupted ||
284 opts.always_overwrite_results) {
285 auto &ŷ = curr->ŷx̂;
286 if (err_z.size() > 0)
287 err_z = (ŷ - y).cwiseQuotient(Σ);
288 x = curr->x̂;
289 y = curr->ŷx̂;
290 }
291 // Save statistics
292 s.iterations = k;
293 s.ε = εₖ;
294 s.elapsed_time = duration_cast<nanoseconds>(time_elapsed);
295 s.status = stop_status;
296 s.final_γ = curr->γ;
297 s.final_ψ = curr->ψx̂;
298 s.final_h = curr->hx̂;
299 s.final_φγ = curr->fbe();
300 return s;
301 }
302
303 // Perform FBS step ----------------------------------------------------
304
305 // x̂ₖ = xₖ + pₖ
306 auto compute_FBS_step = [&] {
307 assert(curr->L >= params.L_max || !qub_violated(*curr));
308 // Calculate ∇ψ(x̂ₖ)
309 if (not have_grad_ψx̂)
310 eval_grad_ψx̂(*curr, grad_ψx̂);
311 have_grad_ψx̂ = true;
312 prox->x = curr->x̂;
313 prox->ψx = curr->ψx̂;
314 prox->grad_ψ.swap(grad_ψx̂);
315 prox->γ = curr->γ;
316 prox->L = curr->L;
317 eval_ψ_grad_ψ(*prox);
318 eval_prox_grad_step(*prox);
319 };
320
321 // store x̂ₖ in prox->x
322 compute_FBS_step();
323
324 // Initialize direction
325 if (k == 0) {
327 direction.initialize(problem, y, Σ, prox->γ, prox->x, prox->x̂,
328 prox->p, prox->grad_ψ);
329 }
330
331 // Check if x̂ₖ + q provides sufficient decrease
332 auto compute_candidate_fbe = [&](crvec q) {
333 // Candidate step xₖ₊₁ = x̂ₖ + q
334 cand->x = prox->x + q;
335 // Compute ψ(xₖ₊₁), ∇ψ(xₖ₊₁)
336 eval_ψ_grad_ψ(*cand);
337 cand->γ = prox->γ;
338 cand->L = prox->L;
339 // Compute x̂ₖ₊₁, pₖ₊₁, ψ(x̂ₖ₊₁)
340 eval_prox_grad_step(*cand);
341
342 // Quadratic upper bound in candidate point
343 if (params.compute_ratio_using_new_stepsize) {
344 eval_ψx̂(*cand);
345 backtrack_qub(*cand);
346 }
347 };
348
349 // Check ratio ρ
350 auto compute_candidate_ratio = [this, prox, cand](real_t q_model) {
351 real_t ϕγ = prox->fbe();
352 real_t ϕγ_next = cand->fbe();
353 real_t margin = (1 + std::abs(ϕγ)) * params.TR_tolerance_factor;
354 real_t ρ = (ϕγ - ϕγ_next + margin) / (-q_model);
355 return params.ratio_approx_fbe_quadratic_model
356 ? ρ / (1 - params.Lipschitz.Lγ_factor)
357 : ρ;
358 };
359
360 // update trust radius accordingly
361 auto compute_updated_radius = [this](crvec q, real_t ρ, real_t old_Δ) {
362 // Very successful TR step
363 if (ρ >= params.ratio_threshold_good)
364 return std::max(params.radius_factor_good * q.norm(), old_Δ);
365 // Successful TR step
366 else if (ρ >= params.ratio_threshold_acceptable)
367 return old_Δ * params.radius_factor_acceptable;
368 // Unsuccessful TR step
369 else
370 return params.radius_factor_rejected * q.norm();
371 };
372
373 // Compute trust region direction from x̂ₖ
374 auto compute_trust_region_step = [&](rvec q, real_t Δ) {
375 auto t0 = std::chrono::steady_clock::now();
376 real_t q_model = direction.apply(prox->γ, prox->x, prox->x̂, prox->p,
377 prox->grad_ψ, Δ, q);
378 auto t1 = std::chrono::steady_clock::now();
379 direction_duration = t1 - t0;
380
381 // Check if step is valid
382 if (not q.allFinite()) {
383 *os << "Direction fail: not finite" << std::endl;
385 direction.reset();
386 return +inf<config_t>;
387 }
388 if (q_model >= 0) {
389 *os << "Direction fail: no decrease on model ("
390 << guanaqo::float_to_str(q_model) << ')' << std::endl;
392 direction.reset(); // Is there anything else we can do?
393 }
394 return q_model;
395 };
396
397 // Solve TR subproblem and update radius
398 accept_candidate = false;
399 bool accelerated_iteration = k > 0 || direction.has_initial_direction();
400 if (accelerated_iteration && !params.disable_acceleration) {
401 if (auto q_model = compute_trust_region_step(q, Δ); q_model < 0) {
402 compute_candidate_fbe(q);
403 ρ = compute_candidate_ratio(q_model);
404 accept_candidate = ρ >= params.ratio_threshold_acceptable;
405 Δ = std::fmax(compute_updated_radius(q, ρ, Δ),
406 params.min_radius);
407 }
408 }
409
410 // Progress callback
411 do_progress_cb(k, *curr, q, grad_ψx̂, Δ, ρ, εₖ, accept_candidate,
413
414 // Accept TR step
415 if (accept_candidate) {
416 // Quadratic upper bound in next iterate
417 if (!params.compute_ratio_using_new_stepsize) {
418 eval_ψx̂(*cand);
419 backtrack_qub(*cand);
420 }
421 // Flush L-BFGS if γ changed
422 if (prox->γ != cand->γ) {
423 direction.changed_γ(cand->γ, prox->γ);
424 if (params.recompute_last_prox_step_after_direction_reset) {
425 std::tie(prox->γ, prox->L) = std::tie(cand->γ, cand->L);
426 eval_prox_grad_step(*prox);
427 }
428 }
429 // update L-BFGS
430 s.direction_update_rejected += not direction.update(
431 prox->γ, cand->γ, prox->x, cand->x, prox->p, cand->p,
432 prox->grad_ψ, cand->grad_ψ);
433
434 if (do_print)
435 print_progress_2(q, ρ, true, direction_duration);
436 // Candidate becomes new iterate
437 std::swap(curr, cand);
438 }
439 // Fall back to proximal gradient step
440 else {
441 if (accelerated_iteration)
443 // Quadratic upper bound in x̂ₖ
444 eval_ψx̂(*prox);
445 backtrack_qub(*prox);
446 if (prox->γ != curr->γ) {
447 direction.changed_γ(prox->γ, curr->γ);
448 if (params.recompute_last_prox_step_after_direction_reset) {
449 std::tie(curr->γ, curr->L) = std::tie(prox->γ, prox->L);
450 eval_prox_grad_step(*curr);
451 }
452 }
453 // update direction
454 if (params.update_direction_on_prox_step)
455 s.direction_update_rejected += not direction.update(
456 curr->γ, prox->γ, curr->x, prox->x, curr->p, prox->p,
457 curr->grad_ψ, prox->grad_ψ);
458 if (do_print && accelerated_iteration)
459 print_progress_2(q, ρ, false, direction_duration);
460 // x̂ₖ becomes new iterate
461 std::swap(curr, prox);
462 }
463
464#ifndef NDEBUG
465 { // Make sure that we don't rely on any data from previous iterations,
466 // reset to NaN:
468 *prox = {n, m};
469 *cand = {n, m};
470 }
471#endif
472
473 // Advance step --------------------------------------------------------
474 ++k;
475 }
476 throw std::logic_error("[PANTR] loop error");
477}
478
479} // namespace alpaqa
std::string get_name() const
Definition pantr.tpp:20
std::function< void(const ProgressInfo &)> progress_cb
Definition pantr.hpp:209
Stats operator()(const Problem &problem, const SolveOptions &opts, rvec x, rvec y, crvec Σ, rvec err_z)
Definition pantr.tpp:25
Direction direction
Definition pantr.hpp:213
InnerSolveOptions< config_t > SolveOptions
Definition pantr.hpp:160
PANTRProgressInfo< config_t > ProgressInfo
Definition pantr.hpp:159
PANTRStats< config_t > Stats
Definition pantr.hpp:158
guanaqo::AtomicStopSignal stop_signal
Definition pantr.hpp:208
TypeErasedProblem< config_t > Problem
Definition pantr.hpp:155
std::ostream * os
Definition pantr.hpp:214
struct alpaqa::prox_fn prox
Compute the proximal mapping.
unsigned stepsize_backtracks
Definition pantr.hpp:108
unsigned direction_update_rejected
Definition pantr.hpp:110
unsigned accelerated_step_rejected
Definition pantr.hpp:107
SolverStatus
Exit status of a numerical solver such as ALM or PANOC.
@ Interrupted
Solver was interrupted by the user.
@ Converged
Converged and reached given tolerance.
@ NotFinite
Intermediate results were infinite or not-a-number.
std::chrono::nanoseconds time_progress_callback
Definition pantr.hpp:105
std::chrono::nanoseconds elapsed_time
Definition pantr.hpp:104
typename Conf::real_t real_t
Definition config.hpp:86
const rvec< Conf > null_vec
Global empty vector for convenience.
Definition config.hpp:193
constexpr const auto NaN
Definition config.hpp:114
unsigned direction_failures
Definition pantr.hpp:109
typename Conf::length_t length_t
Definition config.hpp:103
constexpr const auto inf
Definition config.hpp:112
typename Conf::rvec rvec
Definition config.hpp:91
typename Conf::crvec crvec
Definition config.hpp:92
typename Conf::vec vec
Definition config.hpp:88
unsigned iterations
Definition pantr.hpp:106
SolverStatus status
Definition pantr.hpp:102
static bool stop_crit_requires_grad_ψx̂(PANOCStopCrit crit)
static real_t initial_lipschitz_estimate(const Problem &problem, crvec x, crvec y, crvec Σ, real_t ε, real_t δ, real_t L_min, real_t L_max, real_t &ψ, rvec grad_ψ, rvec work_x, rvec work_grad_ψ, rvec work_n, rvec work_m)
static real_t calc_error_stop_crit(const Problem &problem, PANOCStopCrit crit, crvec pₖ, real_t γ, crvec xₖ, crvec x̂ₖ, crvec ŷₖ, crvec grad_ψₖ, crvec grad_̂ψₖ, rvec work_n1, rvec work_n2)
static SolverStatus check_all_stop_conditions(const ParamsT &params, const InnerSolveOptions< config_t > &opts, DurationT time_elapsed, unsigned iteration, const guanaqo::AtomicStopSignal &stop_signal, real_t εₖ, unsigned no_progress)