alpaqa 1.0.0a8
Nonconvex constrained optimization
Loading...
Searching...
No Matches
pantr.tpp
Go to the documentation of this file.
1#pragma once
2
4
5#include <cassert>
6#include <cmath>
7#include <iomanip>
8#include <iostream>
9#include <stdexcept>
10
16#include <alpaqa/util/timed.hpp>
17
18namespace alpaqa {
19
20template <class DirectionProviderT>
22 return "PANTRSolver<" + std::string(direction.get_name()) + ">";
23}
24
25template <class DirectionProviderT>
27 /// [in] Problem description
28 const Problem &problem,
29 /// [in] Solve options
30 const SolveOptions &opts,
31 /// [inout] Decision variable @f$ x @f$
32 rvec x,
33 /// [inout] Lagrange multipliers @f$ y @f$
34 rvec y,
35 /// [in] Constraint weights @f$ \Sigma @f$
36 crvec Σ,
37 /// [out] Slack variable error @f$ g(x) - \Pi_D(g(x) + \Sigma^{-1} y) @f$
38 rvec err_z) -> Stats {
39
40 if (opts.check)
41 problem.check();
42
43 using std::chrono::nanoseconds;
44 auto os = opts.os ? opts.os : this->os;
45 auto start_time = std::chrono::steady_clock::now();
46 Stats s;
47
48 const auto n = problem.get_n();
49 const auto m = problem.get_m();
50
51 // Represents an iterate in the algorithm, keeping track of some
52 // intermediate values and function evaluations.
53 struct Iterate {
54 vec x; //< Decision variables
55 vec x̂; //< Decision variables after proximal gradient step
56 vec grad_ψ; //< Gradient of cost in x
57 vec p; //< Proximal gradient step in x
58 vec ŷx̂; //< Candidate Lagrange multipliers in x̂
59 real_t ψx = NaN<config_t>; //< Cost in x
60 real_t ψx̂ = NaN<config_t>; //< Cost in x̂
61 real_t γ = NaN<config_t>; //< Step size γ
62 real_t L = NaN<config_t>; //< Lipschitz estimate L
63 real_t pᵀp = NaN<config_t>; //< Norm squared of p
64 real_t grad_ψᵀp = NaN<config_t>; //< Dot product of gradient and p
65 real_t hx̂ = NaN<config_t>; //< Non-smooth function value in x̂
66
67 // @pre @ref ψx, @ref hx̂ @ref pᵀp, @ref grad_ψᵀp
68 // @return φγ
69 real_t fbe() const { return ψx + hx̂ + pᵀp / (2 * γ) + grad_ψᵀp; }
70
71 Iterate(length_t n, length_t m) : x(n), x̂(n), grad_ψ(n), p(n), ŷx̂(m) {}
72 } iterates[3]{{n, m}, {n, m}, {n, m}};
73 Iterate *curr = &iterates[0];
74 Iterate *prox = &iterates[1];
75 Iterate *cand = &iterates[2];
76
77 bool need_grad_ψx̂ = Helpers::stop_crit_requires_grad_ψx̂(params.stop_crit);
78 vec grad_ψx̂(n);
79 vec work_n(n), work_m(m);
80 vec q(n); // (quasi-)Newton step Hₖ pₖ
81 std::chrono::nanoseconds direction_duration{};
82
83 // Problem functions -------------------------------------------------------
84
85 auto eval_ψ_grad_ψ = [&problem, &y, &Σ, &work_n, &work_m](Iterate &i) {
86 i.ψx = problem.eval_ψ_grad_ψ(i.x, y, Σ, i.grad_ψ, work_n, work_m);
87 };
88 auto eval_prox_grad_step = [&problem](Iterate &i) {
89 i.hx̂ = problem.eval_prox_grad_step(i.γ, i.x, i.grad_ψ, i.x̂, i.p);
90 i.pᵀp = i.p.squaredNorm();
91 i.grad_ψᵀp = i.p.dot(i.grad_ψ);
92 };
93 auto eval_ψx̂ = [&problem, &y, &Σ](Iterate &i) {
94 i.ψx̂ = problem.eval_ψ(i.x̂, y, Σ, i.ŷx̂);
95 };
96 auto eval_grad_ψx̂ = [&problem, &work_n](Iterate &i, rvec grad_ψx̂) {
97 problem.eval_grad_L(i.x̂, i.ŷx̂, grad_ψx̂, work_n);
98 };
99
100 // Helper functions --------------------------------------------------------
101
102 auto qub_violated = [this](const Iterate &i) {
103 real_t margin =
104 (1 + std::abs(i.ψx)) * params.quadratic_upperbound_tolerance_factor;
105 return i.ψx̂ > i.ψx + i.grad_ψᵀp + real_t(0.5) * i.L * i.pᵀp + margin;
106 };
107 auto backtrack_qub = [&](Iterate &i) {
108 while (i.L < params.L_max && qub_violated(i)) {
109 i.γ /= 2;
110 i.L *= 2;
111 // Compute x̂, p, ψ(x̂)
112 eval_prox_grad_step(i);
113 eval_ψx̂(i);
114 }
115 };
116
117 // Printing ----------------------------------------------------------------
118
119 std::array<char, 64> print_buf;
120 auto print_real = [this, &print_buf](real_t x) {
121 return float_to_str_vw(print_buf, x, params.print_precision);
122 };
123 auto print_real3 = [&print_buf](real_t x) {
124 return float_to_str_vw(print_buf, x, 3);
125 };
126
127 auto print_progress_1 = [&](unsigned k, real_t φₖ, real_t ψₖ, crvec grad_ψₖ,
128 real_t pₖᵀpₖ, real_t γₖ, real_t εₖ, real_t Δₖ) {
129 if (k == 0)
130 *os << "┌─[PANTR]\n";
131 else
132 *os << "├─ " << std::setw(6) << k << " ──\n";
133 *os << "│ φγ = " << print_real(φₖ) //
134 << ", ψ = " << print_real(ψₖ) //
135 << ", ‖∇ψ‖ = " << print_real(grad_ψₖ.norm()) //
136 << ", ‖p‖ = " << print_real(std::sqrt(pₖᵀpₖ)) //
137 << ", γ = " << print_real(γₖ) //
138 << ", Δ = " << print_real(Δₖ) //
139 << ", ε = " << print_real(εₖ) << '\n';
140 };
141 auto print_progress_2 = [&](crvec qₖ, real_t ρₖ, bool accept,
142 std::chrono::nanoseconds direction_duration) {
143 *os << "│ ‖q‖ = " << print_real(qₖ.norm()) //
144 << ", ρ = " << print_real3(ρₖ) //
145 << ", time = "
146 << print_real3(
147 static_cast<real_t>(1e6) *
148 std::chrono::duration<real_t>(direction_duration).count())
149 << " µs, "
150 << (accept ? "\033[0;32maccepted\033[0m"
151 : "\033[0;35mrejected\033[0m") //
152 << std::endl; // Flush for Python buffering
153 };
154 auto print_progress_n = [&](SolverStatus status) {
155 *os << "└─ " << status << " ──"
156 << std::endl; // Flush for Python buffering
157 };
158 auto do_progress_cb = [this, &s, &problem, &Σ, &y,
159 &opts](unsigned k, Iterate &it, crvec q,
160 crvec grad_ψx̂, real_t Δ, real_t ρ, real_t εₖ,
161 SolverStatus status) {
162 if (!progress_cb)
163 return;
166 progress_cb(ProgressInfo{
167 .k = k,
168 .status = status,
169 .x = it.x,
170 .p = it.p,
171 .norm_sq_p = it.pᵀp,
172 .x̂ = it.x̂,
173 .φγ = it.fbe(),
174 .ψ = it.ψx,
175 .grad_ψ = it.grad_ψ,
176 .ψ_hat = it.ψx̂,
177 .grad_ψ_hat = grad_ψx̂,
178 .q = q,
179 .L = it.L,
180 .γ = it.γ,
181 .Δ = Δ,
182 .ρ = ρ,
183 .ε = εₖ,
184 .Σ = Σ,
185 .y = y,
186 .outer_iter = opts.outer_iter,
187 .problem = &problem,
188 .params = &params,
189 });
190 };
191
192 // Initialization ----------------------------------------------------------
193
194 curr->x = x;
195
196 // Estimate Lipschitz constant ---------------------------------------------
197
198 // Finite difference approximation of ∇²ψ in starting point
199 if (params.Lipschitz.L_0 <= 0) {
200 curr->L = Helpers::initial_lipschitz_estimate(
201 problem, curr->x, y, Σ, params.Lipschitz.ε, params.Lipschitz.δ,
202 params.L_min, params.L_max,
203 /* in ⟹ out */ curr->ψx, curr->grad_ψ, curr->x̂, cand->grad_ψ,
204 work_n, work_m);
205 }
206 // Initial Lipschitz constant provided by the user
207 else {
208 curr->L = params.Lipschitz.L_0;
209 // Calculate ψ(xₖ), ∇ψ(x₀)
210 eval_ψ_grad_ψ(*curr);
211 }
212 if (not std::isfinite(curr->L)) {
214 return s;
215 }
216 curr->γ = params.Lipschitz.Lγ_factor / curr->L;
217
218 // First proximal gradient step --------------------------------------------
219
220 eval_prox_grad_step(*curr);
221 eval_ψx̂(*curr);
222 backtrack_qub(*curr);
223
224 // Loop data ---------------------------------------------------------------
225
226 unsigned k = 0; // iteration
227 bool accept_candidate = false;
228 // Keep track of how many successive iterations didn't update the iterate
229 unsigned no_progress = 0;
230 // Trust radius
231 real_t Δ = params.initial_radius;
232 if (!std::isfinite(Δ) || Δ == 0)
233 Δ = real_t(0.1) * curr->grad_ψ.norm();
234 Δ = std::fmax(Δ, params.min_radius);
235 // Reduction ratio
236 real_t ρ = NaN<config_t>;
237
238 // Main PANTR loop
239 // =========================================================================
240
241 ScopedMallocBlocker mb; // Don't allocate in the inner loop
242 while (true) {
243
244 // Check stopping criteria ---------------------------------------------
245
246 // Calculate ∇ψ(x̂ₖ)
247 if (need_grad_ψx̂)
248 eval_grad_ψx̂(*curr, grad_ψx̂);
249 bool have_grad_ψx̂ = need_grad_ψx̂;
250
251 real_t εₖ = Helpers::calc_error_stop_crit(
252 problem, params.stop_crit, curr->p, curr->γ, curr->x, curr->x̂,
253 curr->ŷx̂, curr->grad_ψ, grad_ψx̂, work_n, cand->p);
254
255 // Print progress ------------------------------------------------------
256
257 bool do_print =
258 params.print_interval != 0 && k % params.print_interval == 0;
259 if (do_print)
260 print_progress_1(k, curr->fbe(), curr->ψx, curr->grad_ψ, curr->pᵀp,
261 curr->γ, εₖ, Δ);
262
263 // Return solution -----------------------------------------------------
264
265 auto time_elapsed = std::chrono::steady_clock::now() - start_time;
266 auto stop_status = Helpers::check_all_stop_conditions(
267 params, opts, time_elapsed, k, stop_signal, εₖ, no_progress);
268 if (stop_status != SolverStatus::Busy) {
269 do_progress_cb(k, *curr, null_vec<config_t>, grad_ψx̂, NaN<config_t>,
270 NaN<config_t>, εₖ, stop_status);
271 bool do_final_print = params.print_interval != 0;
272 if (!do_print && do_final_print)
273 print_progress_1(k, curr->fbe(), curr->ψx, curr->grad_ψ,
274 curr->pᵀp, curr->γ, εₖ, Δ);
275 if (do_print || do_final_print)
276 print_progress_n(stop_status);
277 // Overwrite output arguments
278 if (stop_status == SolverStatus::Converged ||
279 stop_status == SolverStatus::Interrupted ||
280 opts.always_overwrite_results) {
281 auto &ŷ = curr->ŷx̂;
282 if (err_z.size() > 0)
283 err_z = Σ.asDiagonal().inverse() * (ŷ - y);
284 x = std::move(curr->x̂);
285 y = std::move(curr->ŷx̂);
286 }
287 // Save statistics
288 s.iterations = k;
289 s.ε = εₖ;
290 s.elapsed_time = duration_cast<nanoseconds>(time_elapsed);
291 s.status = stop_status;
292 s.final_γ = curr->γ;
293 s.final_ψ = curr->ψx̂;
294 s.final_h = curr->hx̂;
295 s.final_φγ = curr->fbe();
296 return s;
297 }
298
299 // Perform FBS step ----------------------------------------------------
300
301 // x̂ₖ = xₖ + pₖ
302 auto compute_FBS_step = [&] {
303 assert(curr->L >= params.L_max || !qub_violated(*curr));
304 // Calculate ∇ψ(x̂ₖ)
305 if (not have_grad_ψx̂)
306 eval_grad_ψx̂(*curr, grad_ψx̂);
307 have_grad_ψx̂ = true;
308 prox->x = curr->x̂;
309 prox->ψx = curr->ψx̂;
310 prox->grad_ψ.swap(grad_ψx̂);
311 prox->γ = curr->γ;
312 prox->L = curr->L;
313 eval_ψ_grad_ψ(*prox);
314 eval_prox_grad_step(*prox);
315 };
316
317 // store x̂ₖ in prox->x
318 compute_FBS_step();
319
320 // Initialize direction
321 if (k == 0) {
323 direction.initialize(problem, y, Σ, prox->γ, prox->x, prox->x̂,
324 prox->p, prox->grad_ψ);
325 }
326
327 // Check if x̂ₖ + q provides sufficient decrease
328 auto compute_candidate_fbe = [&](crvec q) {
329 // Candidate step xₖ₊₁ = x̂ₖ + q
330 cand->x = prox->x + q;
331 // Compute ψ(xₖ₊₁), ∇ψ(xₖ₊₁)
332 eval_ψ_grad_ψ(*cand);
333 cand->γ = prox->γ;
334 cand->L = prox->L;
335 // Compute x̂ₖ₊₁, pₖ₊₁, ψ(x̂ₖ₊₁)
336 eval_prox_grad_step(*cand);
337
338 // Quadratic upper bound in candidate point
339 if (params.compute_ratio_using_new_stepsize) {
340 eval_ψx̂(*cand);
341 backtrack_qub(*cand);
342 }
343 };
344
345 // Check ratio ρ
346 auto compute_candidate_ratio = [this, prox, cand](real_t q_model) {
347 real_t ϕγ = prox->fbe();
348 real_t ϕγ_next = cand->fbe();
349 real_t margin = (1 + std::abs(ϕγ)) * params.TR_tolerance_factor;
350 real_t ρ = (ϕγ - ϕγ_next + margin) / (-q_model);
351 return params.ratio_approx_fbe_quadratic_model
352 ? ρ / (1 - params.Lipschitz.Lγ_factor)
353 : ρ;
354 };
355
356 // update trust radius accordingly
357 auto compute_updated_radius = [this](crvec q, real_t ρ, real_t old_Δ) {
358 // Very successful TR step
359 if (ρ >= params.ratio_threshold_good)
360 return std::max(params.radius_factor_good * q.norm(), old_Δ);
361 // Successful TR step
362 else if (ρ >= params.ratio_threshold_acceptable)
363 return old_Δ * params.radius_factor_acceptable;
364 // Unsuccessful TR step
365 else
366 return params.radius_factor_rejected * q.norm();
367 };
368
369 // Compute trust region direction from x̂ₖ
370 auto compute_trust_region_step = [&](rvec q, real_t Δ) {
371 auto t0 = std::chrono::steady_clock::now();
372 real_t q_model = direction.apply(prox->γ, prox->x, prox->x̂, prox->p,
373 prox->grad_ψ, Δ, q);
374 auto t1 = std::chrono::steady_clock::now();
375 direction_duration = t1 - t0;
376
377 // Check if step is valid
378 if (not q.allFinite()) {
379 *os << "Direction fail: not finite" << std::endl;
381 direction.reset();
382 return +inf<config_t>;
383 }
384 if (q_model >= 0) {
385 *os << "Direction fail: no decrease on model (" << q_model
386 << ')' << std::endl;
388 direction.reset(); // Is there anything else we can do?
389 }
390 return q_model;
391 };
392
393 // Solve TR subproblem and update radius
394 accept_candidate = false;
395 bool accelerated_iteration = k > 0 || direction.has_initial_direction();
396 if (accelerated_iteration && !params.disable_acceleration) {
397 if (auto q_model = compute_trust_region_step(q, Δ); q_model < 0) {
398 compute_candidate_fbe(q);
399 ρ = compute_candidate_ratio(q_model);
400 accept_candidate = ρ >= params.ratio_threshold_acceptable;
401 Δ = std::fmax(compute_updated_radius(q, ρ, Δ),
402 params.min_radius);
403 }
404 }
405
406 // Progress callback
407 do_progress_cb(k, *curr, q, grad_ψx̂, Δ, ρ, εₖ, SolverStatus::Busy);
408
409 // Accept TR step
410 if (accept_candidate) {
411 // Quadratic upper bound in next iterate
412 if (!params.compute_ratio_using_new_stepsize) {
413 eval_ψx̂(*cand);
414 backtrack_qub(*cand);
415 }
416 // Flush L-BFGS if γ changed
417 if (prox->γ != cand->γ) {
418 direction.changed_γ(cand->γ, prox->γ);
419 if (params.recompute_last_prox_step_after_direction_reset) {
420 std::tie(prox->γ, prox->L) = std::tie(cand->γ, cand->L);
421 eval_prox_grad_step(*prox);
422 }
423 }
424 // update L-BFGS
425 s.direction_update_rejected += not direction.update(
426 prox->γ, cand->γ, prox->x, cand->x, prox->p, cand->p,
427 prox->grad_ψ, cand->grad_ψ);
428
429 if (do_print)
430 print_progress_2(q, ρ, true, direction_duration);
431 // Candidate becomes new iterate
432 std::swap(curr, cand);
433 }
434 // Fall back to proximal gradient step
435 else {
436 if (accelerated_iteration)
438 // Quadratic upper bound in x̂ₖ
439 eval_ψx̂(*prox);
440 backtrack_qub(*prox);
441 if (prox->γ != curr->γ) {
442 direction.changed_γ(prox->γ, curr->γ);
443 if (params.recompute_last_prox_step_after_direction_reset) {
444 std::tie(curr->γ, curr->L) = std::tie(prox->γ, prox->L);
445 eval_prox_grad_step(*curr);
446 }
447 }
448 // update direction
449 if (params.update_direction_on_prox_step)
450 s.direction_update_rejected += not direction.update(
451 curr->γ, prox->γ, curr->x, prox->x, curr->p, prox->p,
452 curr->grad_ψ, prox->grad_ψ);
453 if (do_print && accelerated_iteration)
454 print_progress_2(q, ρ, false, direction_duration);
455 // x̂ₖ becomes new iterate
456 std::swap(curr, prox);
457 }
458
459#ifndef NDEBUG
460 { // Make sure that we don't rely on any data from previous iterations,
461 // reset to NaN:
463 *prox = {n, m};
464 *cand = {n, m};
465 }
466#endif
467
468 // Advance step --------------------------------------------------------
469 ++k;
470 }
471 throw std::logic_error("[PANTR] loop error");
472}
473
474} // namespace alpaqa
std::string get_name() const
Definition: pantr.tpp:21
Stats operator()(const Problem &problem, const SolveOptions &opts, rvec x, rvec y, crvec Σ, rvec err_z)
Definition: pantr.tpp:26
unsigned direction_update_rejected
Definition: pantr.hpp:105
real_t final_φγ
Definition: pantr.hpp:109
unsigned accelerated_step_rejected
Definition: pantr.hpp:102
SolverStatus
Exit status of a numerical solver such as ALM or PANOC.
@ Interrupted
Solver was interrupted by the user.
@ Busy
In progress.
@ Converged
Converged and reached given tolerance.
@ NotFinite
Intermediate results were infinite or not-a-number.
std::chrono::nanoseconds time_progress_callback
Definition: pantr.hpp:100
std::chrono::nanoseconds elapsed_time
Definition: pantr.hpp:99
typename Conf::real_t real_t
Definition: config.hpp:51
unsigned direction_failures
Definition: pantr.hpp:104
typename Conf::length_t length_t
Definition: config.hpp:62
typename Conf::rvec rvec
Definition: config.hpp:55
std::string_view float_to_str_vw(auto &buf, double value, int precision=std::numeric_limits< double >::max_digits10)
Definition: print.tpp:39
typename Conf::crvec crvec
Definition: config.hpp:56
typename Conf::vec vec
Definition: config.hpp:52
unsigned iterations
Definition: pantr.hpp:101
SolverStatus status
Definition: pantr.hpp:97