cyqlone develop
Fast, parallel and vectorized solver for linear systems with optimal control structure.
Loading...
Searching...
No Matches
solve.tpp
Go to the documentation of this file.
1#pragma once
2
3#include <cyqlone/linalg.hpp>
5
6#include <iostream>
7
8/**
9 * @file
10 * Functions for the factorization and solution of Newton system in QPALM for the Cyqlone backend.
11 */
12
13namespace CYQLONE_NS(cyqlone::qpalm) {
14
15template <index_t VL, StorageOrder DefaultOrder>
16void CyQPALMBackend<VL, DefaultOrder>::solve(Context &ctx, [[maybe_unused]] const var_vec_t &x,
17 const var_vec_t &grad, const var_vec_t &Mᵀλ,
18 const var_vec_t &Aᵀŷ, const eq_constr_vec_t &Mxb,
19 real_t S, [[maybe_unused]] const ineq_constr_vec_t &Σ,
20 const active_set_t &J, //
22 eq_constr_vec_t &Δλ, var_vec_t &MᵀΔλ) {
23 const auto rhs = [&](auto, auto, auto di, auto gi, auto Mᵀλi, auto Aᵀŷi, auto Mxbi, auto Δλi) {
24 linalg::axpy<0>(di, {-1, -1, -1}, gi, Mᵀλi, Aᵀŷi);
25 batmat::linalg::copy(Mxbi, Δλi);
26 };
27 ocp.foreach_stage(ctx, rhs, d, grad, Mᵀλ, Aᵀŷ, Mxb, Δλ);
28 if (settings.print_residuals)
29 print_solve_rhs_norms(ctx, d, Δλ, grad, Mᵀλ, Aᵀŷ);
31 auto t = get_timed(&Timings::factor);
32 ocp.factor_solve(ctx, S, J, d, Δλ); // (d, Δλ) ← L⁻¹ (d, Δλ)
33 if (ctx.is_master()) {
34 // No synchronization needed here, barriers in factor_solve and solve_reverse
35 reset_factorization = false;
36 update_pending = false;
37 num_updates = 0;
38 ++stats.num_factor;
39 }
40 } else if (update_pending) {
42 ocp.update_solve(ctx, ΔΣ, d, Δλ); // (d, Δλ) ← L⁻¹ (d, Δλ)
43 if (ctx.is_master()) {
44 // No synchronization needed here, barriers in update_solve and solve_reverse
45 update_pending = false;
46 }
47 } else { // (d, Δλ) ← L⁻¹ (d, Δλ)
48 auto t = get_timed(&Timings::solve);
49 ocp.solve_forward(ctx, d, Δλ);
50 }
51 { // (d, Δλ) ← L⁻ᵀ (d, Δλ) and MᵀΔλ ← Mᵀ Δλ
52 auto t = get_timed(&Timings::solve);
53 ocp.solve_reverse_mul(ctx, d, Δλ, MᵀΔλ);
54 }
55 { // Ad ← A d
56 auto t = get_timed(&Timings::solve_A);
57 mat_vec_A(ctx, d, Ad);
58 }
59 { // ξ ← Q d + S⁻¹ d
61 ocp.cost_gradient(ctx, d, 1 / S, d, 0, ξ);
62 }
63 if (settings.print_residuals)
64 print_solve_resid_norms(ctx, x, d, grad, ξ, Mᵀλ, Aᵀŷ, MᵀΔλ, Ad, J);
65}
66
67template <index_t VL, StorageOrder DefaultOrder>
69 const eq_constr_vec_t &Δλ,
70 const var_vec_t &grad,
71 const var_vec_t &Mᵀλ,
72 const var_vec_t &Aᵀŷ) const {
73 int prec = settings.print_precision;
74 auto grad_norm = norm_inf_l1_sq(ctx, d);
75 auto constr_norm = norm_inf_l1_sq(ctx, Δλ);
76 auto cost_grad_norm = norm_inf_l1_sq(ctx, grad);
77 auto eq_constr_grad_norm = norm_inf_l1_sq(ctx, Mᵀλ);
78 auto ineq_constr_grad_norm = norm_inf_l1_sq(ctx, Aᵀŷ);
79 if (ctx.is_master()) {
80 std::cout << " gradient: abs∞="
81 << guanaqo::float_to_str(grad_norm.norm_inf(), prec)
82 << ", abs₂=" << guanaqo::float_to_str(grad_norm.norm_2(), prec)
83 << " {grad cost=" << guanaqo::float_to_str(cost_grad_norm.norm_2())
84 << ", Mᵀλ=" << guanaqo::float_to_str(eq_constr_grad_norm.norm_2())
85 << ", Aᵀŷ=" << guanaqo::float_to_str(ineq_constr_grad_norm.norm_2()) << "}\n"
86 << " constraints: abs∞="
87 << guanaqo::float_to_str(constr_norm.norm_inf(), prec)
88 << ", abs₂=" << guanaqo::float_to_str(constr_norm.norm_2(), prec) << "\n";
89 }
90}
91
92template <index_t VL, StorageOrder DefaultOrder>
94 Context &ctx, const var_vec_t &x, const var_vec_t &d, const var_vec_t &grad, const var_vec_t &ξ,
95 const var_vec_t &Mᵀλ, const var_vec_t &Aᵀŷ, const var_vec_t &MᵀΔλ, const ineq_constr_vec_t &Ad,
96 const active_set_t &J) {
97 // allocate workspaces
98 if (temp_var.size() == 0 || temp_eq.size() == 0 || temp_ineq.size() == 0)
99 ctx.run_single_sync([this] {
100 temp_var = var_vec();
103 });
105 int prec = settings.print_precision;
106 using batmat::datapar::hmax;
107 using std::abs;
108 using std::isfinite;
109 using std::max;
110 using std::sqrt;
111 // Compute the product Σ ⊙ (A d) and then Aᵀ(Σ ⊙ (A d))
112 const auto ΣAd = [&](auto, auto, auto Ji, auto Adi, auto temp_ineqi) {
113 linalg::hadamard(Ji, Adi, temp_ineqi);
114 };
115 ocp.foreach_stage(ctx, ΣAd, J, Ad, temp_ineq);
116 auto &res = temp_var;
117 mat_vec_AT(ctx, temp_ineq, res);
118 // Compute the augmented Lagrangian gradient norms
119 real_t r_norm_sq = 0, grad_norm_sq = 0, r_norm_inf = 0;
120 real_t r_kkt_norm_sq = 0, r_kkt_norm_inf = 0;
121 const auto resid_simd = [&](auto gradi, auto ξi, auto Mᵀλi, auto Aᵀŷi, auto MᵀΔλi, auto ri) {
122 auto gi = NeumaierSum(gradi) + Mᵀλi + Aᵀŷi;
123 simd r_kkt_i = gi + MᵀΔλi + ξi;
124 ri += gi + MᵀΔλi + ξi;
125 r_norm_inf = max(r_norm_inf, hmax(abs(ri)));
126 r_norm_sq += reduce(ri * ri);
127 r_kkt_norm_inf = max(r_kkt_norm_inf, hmax(abs(r_kkt_i)));
128 r_kkt_norm_sq += reduce(r_kkt_i * r_kkt_i);
129 grad_norm_sq += reduce(simd{gi} * simd{gi});
130 return ri;
131 };
132 const auto resid_batch = [&](auto, auto, auto gradi, auto ξi, auto Mᵀλi, auto Aᵀŷi, auto MᵀΔλi,
133 auto resi) {
134 linalg::transform_elementwise(resid_simd, resi, //
135 gradi, ξi, Mᵀλi, Aᵀŷi, MᵀΔλi, resi);
136 };
137 ocp.foreach_stage(ctx, resid_batch, grad, ξ, Mᵀλ, Aᵀŷ, MᵀΔλ, res);
138 r_norm_sq = ctx.reduce(r_norm_sq);
139 grad_norm_sq = ctx.reduce(grad_norm_sq);
140 r_norm_inf = ctx.reduce(r_norm_inf, [](auto a, auto b) { return max(a, b); });
141 if (!isfinite(r_norm_sq))
142 r_norm_inf = r_norm_sq;
143 if (ctx.is_master())
144 std::cout << " RESID(stationarity inner): abs∞="
145 << guanaqo::float_to_str(r_norm_inf, prec)
146 << ", abs₂=" << guanaqo::float_to_str(sqrt(r_norm_sq), prec)
147 << ", rel₂=" << guanaqo::float_to_str(sqrt(r_norm_sq / grad_norm_sq), prec)
148 << "\n"
149 << " RESID(stationarity outer): abs∞="
150 << guanaqo::float_to_str(r_kkt_norm_inf, prec)
151 << ", abs₂=" << guanaqo::float_to_str(sqrt(r_kkt_norm_sq), prec)
152 << ", rel₂=" << guanaqo::float_to_str(sqrt(r_kkt_norm_sq / grad_norm_sq), prec)
153 << "\n";
154 auto &x_next = temp_var;
155 const auto add_step = [&](auto, auto, auto xi, auto dii, auto x_nexti) {
156 linalg::add(xi, dii, x_nexti);
157 };
158 ocp.foreach_stage(ctx, add_step, x, d, x_next);
159 auto &res_x_next = temp_eq;
160 eq_constr_resid(ctx, x_next, res_x_next);
161 real_t inf_res = norm_inf(ctx, res_x_next);
162 if (ctx.is_master())
163 std::cout << " RESID(eq. feasibility): abs∞="
164 << guanaqo::float_to_str(inf_res, prec) << "\n";
165}
166
167} // namespace CYQLONE_NS(cyqlone::qpalm)
Kahan-Babuška-Neumaier compensated summation.
Definition neumaier.hpp:16
#define CYQLONE_NS(ns)
Definition config.hpp:10
std::string float_to_str(F value, int precision)
void axpy(Vy &&y, const std::array< simdified_value_t< Vy >, sizeof...(Vx)> &alphas, Vx &&...x)
Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.
Definition linalg.hpp:361
void transform_elementwise(F &&fun, VA &&A, VAs &&...As)
Apply a function to all elements of the given matrices or vectors, storing the result in the first ar...
Definition linalg.hpp:443
void add(VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate >={})
Add two matrices or vectors C = A + B. Rotate affects B.
Definition linalg.hpp:417
void copy(VA &&A, VB &&B, Opts... opts)
void hadamard(Vx &&x, Vy &&y, Vz &&z)
Compute the Hadamard (elementwise) product of two vectors z = x ⊙ y.
Definition linalg.hpp:309
real_t norm_inf(Context &ctx, const T &x) const
Infinity or max norm of x.
Definition linalg.tpp:88
auto get_timed(Timings::type Timings::*member) const
eq_constr_vec_t eq_constr_vec() const
void print_solve_rhs_norms(Context &ctx, const var_vec_t &d, const eq_constr_vec_t &Δλ, const var_vec_t &grad, const var_vec_t &Mᵀλ, const var_vec_t &Aᵀŷ) const
Definition solve.tpp:68
void eq_constr_resid(Context &ctx, const var_vec_t &x, eq_constr_vec_t &Mxb)
ineq_constr_vec_t ineq_constr_vec() const
void print_solve_resid_norms(Context &ctx, const var_vec_t &x, const var_vec_t &d, const var_vec_t &grad, const var_vec_t &ξ, const var_vec_t &Mᵀλ, const var_vec_t &Aᵀŷ, const var_vec_t &MᵀΔλ, const ineq_constr_vec_t &Ad, const active_set_t &J)
Definition solve.tpp:93
void solve(Context &ctx, const var_vec_t &x, const var_vec_t &grad, const var_vec_t &Mᵀλ, const var_vec_t &Aᵀŷ, const eq_constr_vec_t &Mxb, real_t S, const ineq_constr_vec_t &Σ, const active_set_t &J, var_vec_t &d, var_vec_t &ξ, ineq_constr_vec_t &Ad, eq_constr_vec_t &Δλ, var_vec_t &MᵀΔλ)
Definition solve.tpp:16
void mat_vec_A(Context &ctx, const var_vec_t &x, ineq_constr_vec_t &Ax)
auto norm_inf_l1_sq(Context &ctx, const T &x) const
Compute the infinity, l1 and l2 norms of x.
Definition linalg.tpp:76
void mat_vec_AT(Context &ctx, const ineq_constr_vec_t &y, var_vec_t &Aᵀy)