42 using std::chrono::nanoseconds;
43 auto os = opts.os ? opts.os : this->
os;
44 auto start_time = std::chrono::steady_clock::now();
47 const auto n = problem.get_num_variables();
48 const auto m = problem.get_num_constraints();
68 real_t fbe()
const {
return ψx + hx̂ + pᵀp / (2 * γ) + grad_ψᵀp; }
71 } iterates[3]{{n, m}, {n, m}, {n, m}};
72 Iterate *curr = &iterates[0];
73 Iterate *
prox = &iterates[1];
74 Iterate *cand = &iterates[2];
78 vec work_n(n), work_m(m);
80 std::chrono::nanoseconds direction_duration{};
84 auto eval_ψ_grad_ψ = [&problem, &y, &Σ, &work_n, &work_m](Iterate &i) {
85 i.ψx = problem.eval_augmented_lagrangian_and_gradient(
86 i.x, y, Σ, i.grad_ψ, work_n, work_m);
88 auto eval_prox_grad_step = [&problem](Iterate &i) {
90 problem.eval_proximal_gradient_step(i.γ, i.x, i.grad_ψ, i.x̂, i.p);
91 i.pᵀp = i.p.squaredNorm();
92 i.grad_ψᵀp = i.p.dot(i.grad_ψ);
94 auto eval_ψx̂ = [&problem, &y, &Σ](Iterate &i) {
95 i.ψx̂ = problem.eval_augmented_lagrangian(i.x̂, y, Σ, i.ŷx̂);
97 auto eval_grad_ψx̂ = [&problem, &work_n](Iterate &i,
rvec grad_ψx̂) {
98 problem.eval_lagrangian_gradient(i.x̂, i.ŷx̂, grad_ψx̂, work_n);
103 auto qub_violated = [
this](
const Iterate &i) {
105 (1 + std::abs(i.ψx)) *
params.quadratic_upperbound_tolerance_factor;
106 return i.ψx̂ > i.ψx + i.grad_ψᵀp +
real_t(0.5) * i.L * i.pᵀp + margin;
108 auto backtrack_qub = [&](Iterate &i) {
109 while (i.L <
params.L_max && qub_violated(i)) {
113 eval_prox_grad_step(i);
121 std::array<char, 64> print_buf;
122 auto print_real = [
this, &print_buf](
real_t x) {
123 return float_to_str_vw(print_buf, x,
params.print_precision);
125 auto print_real3 = [&print_buf](
real_t x) {
126 return float_to_str_vw(print_buf, x, 3);
132 *
os <<
"┌─[PANTR]\n";
134 *
os <<
"├─ " << std::setw(6) << k <<
" ──\n";
135 *
os <<
"│ φγ = " << print_real(φₖ)
136 <<
", ψ = " << print_real(ψₖ)
137 <<
", ‖∇ψ‖ = " << print_real(grad_ψₖ.norm())
138 <<
", ‖p‖ = " << print_real(std::sqrt(pₖᵀpₖ))
139 <<
", γ = " << print_real(γₖ)
140 <<
", Δ = " << print_real(Δₖ)
141 <<
", ε = " << print_real(εₖ) <<
'\n';
143 auto print_progress_2 = [&](
crvec qₖ,
real_t ρₖ,
bool accept,
144 std::chrono::nanoseconds direction_duration) {
145 *
os <<
"│ ‖q‖ = " << print_real(qₖ.norm())
146 <<
", ρ = " << print_real3(ρₖ)
149 static_cast<real_t>(1e6) *
150 std::chrono::duration<real_t>(direction_duration).count())
152 << (accept ?
"\033[0;32maccepted\033[0m"
153 :
"\033[0;35mrejected\033[0m")
157 *
os <<
"└─ " << status <<
" ──"
160 auto do_progress_cb = [
this, &s, &problem, &Σ, &y,
161 &opts](
unsigned k, Iterate &it,
crvec q,
180 .grad_ψ_hat = grad_ψx̂,
186 .τ =
static_cast<real_t>(accepted),
190 .outer_iter = opts.outer_iter,
203 if (
params.Lipschitz.L_0 <= 0) {
205 problem, curr->x, y, Σ,
params.Lipschitz.ε,
params.Lipschitz.δ,
207 curr->ψx, curr->grad_ψ, curr->x̂, cand->grad_ψ,
212 curr->L =
params.Lipschitz.L_0;
214 eval_ψ_grad_ψ(*curr);
216 if (not std::isfinite(curr->L)) {
220 curr->γ =
params.Lipschitz.Lγ_factor / curr->L;
224 eval_prox_grad_step(*curr);
226 backtrack_qub(*curr);
231 bool accept_candidate =
false;
233 unsigned no_progress = 0;
236 if (!std::isfinite(Δ) || Δ == 0)
237 Δ =
real_t(0.1) * curr->grad_ψ.norm();
238 Δ = std::fmax(Δ,
params.min_radius);
252 eval_grad_ψx̂(*curr, grad_ψx̂);
253 bool have_grad_ψx̂ = need_grad_ψx̂;
256 problem,
params.stop_crit, curr->p, curr->γ, curr->x, curr->x̂,
257 curr->ŷx̂, curr->grad_ψ, grad_ψx̂, work_n, cand->p);
262 params.print_interval != 0 && k %
params.print_interval == 0;
264 print_progress_1(k, curr->fbe(), curr->ψx, curr->grad_ψ, curr->pᵀp,
269 auto time_elapsed = std::chrono::steady_clock::now() - start_time;
275 bool do_final_print =
params.print_interval != 0;
276 if (!do_print && do_final_print)
277 print_progress_1(k, curr->fbe(), curr->ψx, curr->grad_ψ,
278 curr->pᵀp, curr->γ, εₖ, Δ);
279 if (do_print || do_final_print)
280 print_progress_n(stop_status);
284 opts.always_overwrite_results) {
286 if (err_z.size() > 0)
287 err_z = (ŷ - y).cwiseQuotient(Σ);
294 s.
elapsed_time = duration_cast<nanoseconds>(time_elapsed);
306 auto compute_FBS_step = [&] {
307 assert(curr->L >=
params.L_max || !qub_violated(*curr));
309 if (not have_grad_ψx̂)
310 eval_grad_ψx̂(*curr, grad_ψx̂);
311 have_grad_ψx̂ =
true;
313 prox->ψx = curr->ψx̂;
314 prox->grad_ψ.swap(grad_ψx̂);
317 eval_ψ_grad_ψ(*
prox);
318 eval_prox_grad_step(*
prox);
332 auto compute_candidate_fbe = [&](
crvec q) {
334 cand->x =
prox->x + q;
336 eval_ψ_grad_ψ(*cand);
340 eval_prox_grad_step(*cand);
343 if (
params.compute_ratio_using_new_stepsize) {
345 backtrack_qub(*cand);
350 auto compute_candidate_ratio = [
this,
prox, cand](
real_t q_model) {
352 real_t ϕγ_next = cand->fbe();
353 real_t margin = (1 + std::abs(ϕγ)) *
params.TR_tolerance_factor;
354 real_t ρ = (ϕγ - ϕγ_next + margin) / (-q_model);
355 return params.ratio_approx_fbe_quadratic_model
356 ? ρ / (1 -
params.Lipschitz.Lγ_factor)
363 if (ρ >=
params.ratio_threshold_good)
364 return std::max(
params.radius_factor_good * q.norm(), old_Δ);
366 else if (ρ >=
params.ratio_threshold_acceptable)
367 return old_Δ *
params.radius_factor_acceptable;
370 return params.radius_factor_rejected * q.norm();
374 auto compute_trust_region_step = [&](
rvec q,
real_t Δ) {
375 auto t0 = std::chrono::steady_clock::now();
378 auto t1 = std::chrono::steady_clock::now();
379 direction_duration = t1 - t0;
382 if (not q.allFinite()) {
383 *
os <<
"Direction fail: not finite" << std::endl;
389 *
os <<
"Direction fail: no decrease on model ("
390 << guanaqo::float_to_str(q_model) <<
')' << std::endl;
398 accept_candidate =
false;
399 bool accelerated_iteration = k > 0 ||
direction.has_initial_direction();
400 if (accelerated_iteration && !
params.disable_acceleration) {
401 if (
auto q_model = compute_trust_region_step(q, Δ); q_model < 0) {
402 compute_candidate_fbe(q);
403 ρ = compute_candidate_ratio(q_model);
404 accept_candidate = ρ >=
params.ratio_threshold_acceptable;
405 Δ = std::fmax(compute_updated_radius(q, ρ, Δ),
411 do_progress_cb(k, *curr, q, grad_ψx̂, Δ, ρ, εₖ, accept_candidate,
415 if (accept_candidate) {
417 if (!
params.compute_ratio_using_new_stepsize) {
419 backtrack_qub(*cand);
422 if (
prox->γ != cand->γ) {
424 if (
params.recompute_last_prox_step_after_direction_reset) {
425 std::tie(
prox->γ,
prox->L) = std::tie(cand->γ, cand->L);
426 eval_prox_grad_step(*
prox);
432 prox->grad_ψ, cand->grad_ψ);
435 print_progress_2(q, ρ,
true, direction_duration);
437 std::swap(curr, cand);
441 if (accelerated_iteration)
445 backtrack_qub(*
prox);
446 if (
prox->γ != curr->γ) {
448 if (
params.recompute_last_prox_step_after_direction_reset) {
449 std::tie(curr->γ, curr->L) = std::tie(
prox->γ,
prox->L);
450 eval_prox_grad_step(*curr);
454 if (
params.update_direction_on_prox_step)
457 curr->grad_ψ,
prox->grad_ψ);
458 if (do_print && accelerated_iteration)
459 print_progress_2(q, ρ,
false, direction_duration);
461 std::swap(curr,
prox);
476 throw std::logic_error(
"[PANTR] loop error");