10#include <batmat/assume.hpp>
11#include <batmat/config.hpp>
12#include <batmat/linalg/copy.hpp>
13#include <batmat/openmp.h>
14#include <batmat/simd.hpp>
15#include <guanaqo/print.hpp>
16#include <guanaqo/timed-cpu.hpp>
17#include <guanaqo/trace.hpp>
35namespace datapar = batmat::datapar;
37template <index_t VL, StorageOrder DefaultOrder>
80 std::unique_ptr<typename OCP_t::SharedContext>
parallel_ctx =
ocp.create_parallel_context();
86 std::optional<var_vec_t>
x0;
87 std::optional<ineq_constr_vec_t>
y0;
88 std::optional<eq_constr_vec_t>
λ0;
100 this->parallel_ctx->barrier.spin_count =
settings.spin_count;
101 this->ocp.update_tricyqle_params(
settings.tricyqle_params);
110 if (!data.initial_variables.empty()) {
112 this->ocp.pack_variables(data.initial_variables,
x0->view());
114 if (!data.initial_inequality_multipliers.empty()) {
116 this->ocp.pack_constraints(data.initial_inequality_multipliers,
y0->view());
118 if (!data.initial_equality_multipliers.empty()) {
120 this->ocp.pack_dynamics(data.initial_equality_multipliers,
λ0->view());
127 this->ocp.update_data(
ocp);
145 const auto k_to_l = [&](index_t k) {
146 const auto num_stages =
ocp.n;
147 const auto i = (
ocp.ceil_N() - k) % num_stages;
148 const auto k1 = (k + i) / num_stages;
149 const auto k2 = k1 >>
ocp.lp();
150 const auto v = k2 % (1 <<
ocp.lv());
151 const auto t = k1 - (k2 <<
ocp.lp());
152 return ((num_stages * t + i) <<
ocp.lv()) +
v;
170 for (index_t k = 1; k <
ocp.N_horiz; ++k)
171 x0(k_to_l(k - 1)) = x(k_to_l(k));
172 x0(k_to_l(
ocp.N_horiz - 1)) = x(k_to_l(
ocp.N_horiz - 1));
173 for (index_t k = 1; k <
ocp.N_horiz; ++k)
174 y0(k_to_l(k - 1)) = y(k_to_l(k));
175 y0(k_to_l(
ocp.N_horiz - 1)) = y(k_to_l(
ocp.N_horiz - 1));
176 for (index_t k = 1; k <
ocp.N_horiz; ++k)
177 λ0(k_to_l(k - 1)) = λ(k_to_l(k));
178 λ0(k_to_l(
ocp.N_horiz - 1)) = λ(k_to_l(
ocp.N_horiz - 1));
184 for (index_t k = 1; k <
ocp.N_horiz; ++k)
185 x0(k_to_l(k - 1)) = x(k_to_l(k));
186 x0(k_to_l(
ocp.N_horiz - 1)) = x(k_to_l(
ocp.N_horiz - 1));
187 for (index_t k = 1; k <
ocp.N_horiz; ++k)
188 λ0(k_to_l(k - 1)) = λ(k_to_l(k));
189 λ0(k_to_l(
ocp.N_horiz - 1)) = λ(k_to_l(
ocp.N_horiz - 1));
203 [[nodiscard]] index_t
num_var()
const {
return ocp.num_variables(); }
218 template <
class... Js>
222 template <
class... Xs>
226 template <
class... Ys>
230 template <
class... Λs>
246 void ineq_constr_resid(Context &ctx,
const ineq_constr_vec_t &Ax, ineq_constr_vec_t &e)
const;
248 void project_multipliers_ineq(Context &ctx, ineq_constr_vec_t &y)
const;
250 real_t ineq_constr_viol(Context &ctx,
const ineq_constr_vec_t &Ax)
const;
252 real_t ineq_constr_resid_al(Context &ctx,
const ineq_constr_vec_t &y,
253 const ineq_constr_vec_t &ŷ,
const ineq_constr_vec_t &Σ,
254 ineq_constr_vec_t &e);
261 ocp.transposed_dynamics_constr(ctx, λ, Mᵀλ);
269 ocp.transposed_general_constr(ctx, y, Aᵀy);
273 ocp.transposed_general_constr(y, Aᵀy);
277 ocp.general_constr(ctx, x, Ax);
300 ocp.cost_gradient_remove_regularization(ctx, S, x, x_reg,
grad_f);
304 return std::numeric_limits<real_t>::quiet_NaN();
309 return {f, std::move(
grad_f)};
340 if (num_Σ_changed > 0)
347 template <
class T,
size_t N>
348 static void merge_chunk(std::span<const T> chunk,
size_t chunk_index,
349 std::span<
const std::array<size_t, N>> separators, std::span<T> out);
352 compute_partition_breakpoints(Context &ctx, std::vector<Breakpoint> &breakpoints,
353 const ineq_constr_vec_t &Σ,
const ineq_constr_vec_t &y,
354 const ineq_constr_vec_t &Ad,
const ineq_constr_vec_t &Ax,
355 const ineq_constr_vec_t &b_min,
const ineq_constr_vec_t &b_max);
372 template <
class T,
class U>
373 void xaxpy(Context &ctx, real_t a,
const T &x, U &y);
375 template <
class T,
class U>
376 void xcopy(Context &ctx,
const T &x, U &y)
const;
378 template <
class T,
class U>
379 void set_constant(Context &ctx, T &x,
const U &y)
const;
382 void scale(Context &ctx, real_t s, T &x)
const;
384 [[nodiscard]] real_t
dot(Context &ctx,
const var_vec_t &a,
const var_vec_t &b)
const;
386 template <
class... Args>
387 void local_dots(std::span<real_t, 1 +
sizeof...(Args) / 2> out,
const auto &a,
const auto &b,
388 const Args &...others)
const;
391 template <
class... Args>
392 [[nodiscard]] std::array<real_t,
sizeof...(Args) / 2> dots(Context &ctx,
393 const Args &...args)
const;
396 [[nodiscard]]
auto norm_inf_l1_sq(Context &ctx,
const T &x)
const;
399 [[nodiscard]] real_t
norm_inf(Context &ctx,
const T &x)
const;
402 [[nodiscard]] real_t norm_squared(Context &ctx,
const T &x)
const;
409 index_t calc_ŷ_Aᵀŷ(Context &ctx,
const ineq_constr_vec_t &Ax,
const ineq_constr_vec_t &Σ,
410 const ineq_constr_vec_t &y, ineq_constr_vec_t &ŷ, var_vec_t &Aᵀŷ,
416 auto nrm_simd =
norms.zero_simd();
417 const auto grad_norm_simd = [&](
auto grad_fji,
auto Mᵀλji,
auto Aᵀŷji) {
418 nrm_simd =
norms(nrm_simd, grad_fji + Mᵀλji + Aᵀŷji);
420 const auto grad_norm_batch = [&](
auto,
auto,
auto grad_fj,
auto Mᵀλj,
auto Aᵀŷj) {
423 ocp.foreach_stage(ctx, grad_norm_batch,
grad_f, Mᵀλ, Aᵀŷ);
424 return ctx.reduce(
norms(nrm_simd),
norms).norm_inf();
428 ocp.pack_variables(in, out);
431 ocp.pack_constraints(in, out);
434 ocp.pack_dynamics(in, out);
438 ocp.unpack_variables(in, out);
441 ocp.unpack_constraints(in, out);
444 ocp.unpack_constraints(in, out);
447 ocp.unpack_dynamics(in, out);
452 BATMAT_ASSERT(J.rows() == J_old.rows() && J.cols() == J_old.cols());
455 index_t num_different = 0;
457 num_different += std::inner_product(Ji.data(), Ji.data() + Ji.size(), J_oldi.data(),
458 index_t{0}, std::plus<>{}, std::not_equal_to<>{});
465 num_different = ctx.reduce(num_different);
469 return num_different;
471 do_reset_fac |=
static_cast<double>(num_different) >=
476 return num_different;
478 ctx.run_single_sync([&] {
482 stats.rank_updates += num_different;
485 if (prev_update_pending) {
486 const auto delta = [&](
auto,
auto,
auto Ji,
auto J_oldi,
auto ΔΣi) {
489 ocp.foreach_stage(ctx, delta, J, J_old,
ΔΣ);
491 const auto delta = [&](
auto,
auto,
auto Ji,
auto J_oldi,
auto ΔΣi) {
494 ocp.foreach_stage(ctx, delta, J, J_old,
ΔΣ);
496 return num_different;
557 return ocp_timings ? std::optional<typename Timings::timed_t>((*ocp_timings).*member)
570 {
"breakpoints", t.breakpoints},
571 {
"calc_y_hat", t.calc_y_hat},
572 {
"calc_y_hat_AT", t.calc_y_hat_AT},
573 {
"update_active_set_change", t.update_active_set_change},
574 {
"update_factorization", t.update_factorization},
575 {
"factor", t.factor},
577 {
"solve_MT", t.solve_MT},
578 {
"solve_A", t.solve_A},
579 {
"solve_grad", t.solve_grad},
580 {
"solve_resid", t.solve_resid},
581 {
"recompute_outer_grad", t.recompute_outer_grad},
582 {
"recompute_outer_A", t.recompute_outer_A},
583 {
"recompute_outer_AT", t.recompute_outer_AT},
584 {
"recompute_outer_MT", t.recompute_outer_MT},
585 {
"recompute_outer_norm", t.recompute_outer_norm},
586 {
"recompute_inner_grad", t.recompute_inner_grad},
587 {
"recompute_inner_A", t.recompute_inner_A},
588 {
"recompute_inner_MT", t.recompute_inner_MT},
589 {
"ineq_constr_resid", t.ineq_constr_resid},
590 {
"ineq_constr_viol", t.ineq_constr_viol},
591 {
"ineq_constr_resid_al", t.ineq_constr_resid_al},
592 {
"update_penalty_y", t.update_penalty_y},
597template <index_t VL, StorageOrder DefaultOrder>
598unique_CyQPALMBackend<VL, DefaultOrder>::~unique_CyQPALMBackend() =
default;
600template <index_t VL, StorageOrder DefaultOrder>
601unique_CyQPALMBackend<VL, DefaultOrder>
604 return {std::make_unique<CyQPALMBackend<VL, DefaultOrder>>(ocp, data, settings)};
607template <index_t VL, StorageOrder DefaultOrder>
613template <index_t VL, StorageOrder DefaultOrder>
The main header for the Cyqlone and Tricyqle linear solvers.
std::decay_t< decltype(Tag)> tag_t
simdified_value_t< Vx > norm_inf(Vx &&x)
Compute the infinity norm of a vector.
void axpy(Vy &&y, const std::array< simdified_value_t< Vy >, sizeof...(Vx)> &alphas, Vx &&...x)
Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.
void for_each_elementwise(F &&fun, VA &&A, VAs &&...As)
Apply a function to all elements of the given matrices or vectors.
simdified_value_t< Vx > dot(Vx &&x, Vy &&y)
Compute the dot product of two vectors.
void scale(T alpha, Vx &&x, Vz &&z)
Multiply a vector by a scalar z = αx.
void sub(VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate >={})
Subtract two matrices or vectors C = A - B. Rotate affects B.
unique_CyQPALMBackend< VL, DefaultOrder > make_cyqpalm_backend(const CyqloneStorage< real_t > &ocp, CyqloneData data, const CyQPALMBackendSettings &settings)
void update_cyqpalm_backend(CyQPALMBackend< VL, DefaultOrder > &backend, const CyqloneStorage< real_t > &ocp)
#define GUANAQO_TRACE(name, instance,...)
simd_view_types< std::remove_const_t< T >, Abi >::template matrix< T, Order > matrix
batmat::DefaultTimings DefaultTimings
Kahan-Babuška-Neumaier compensated summation.
Functions for the factorization and solution of Newton system in QPALM for the Cyqlone backend.
Linear solver for systems with optimal control structure.
void pack_constraints(std::span< const value_type > y_lin, mut_view<> y, value_type fill=0) const
tricyqle_t::Context Context
void pack_dynamics(std::span< const value_type > λ_lin, mut_view<> λ) const
const index_t ny_0
Number of general constraints at stage 0, D(0) u(0).
Storage for a linear-quadratic OCP with the initial states x₀ eliminated.
static CyqloneStorage build(const LinearOCPStorage &ocp, index_t ny_0=-1)
Storage for a linear-quadratic OCP of the form.
Utilities for computing vector norms.
eq_constr_vec_t()=default
ineq_constr_vec_t()=default
CyQPALMBackend(const CyqloneStorage<> &ocp, CyqloneData data, const CyQPALMBackendSettings &settings)
void initialize_active_set(Js &...js) const
void recompute_inner(Context &ctx, real_t S, const var_vec_t &x_outer, const var_vec_t &x, const eq_constr_vec_t &λ, var_vec_t &grad, ineq_constr_vec_t &Ax, var_vec_t &Mᵀλ)
void initial_variables(Context &ctx, var_vec_t &x) const
void set_b_eq(std::span< const real_t > b_eq)
void xcopy(Context &ctx, const T &x, U &y) const
Copy x to y.
void initial_multipliers_ineq(Context &ctx, ineq_constr_vec_t &y) const
index_t num_eq_constr() const
real_t recompute_outer(Context &ctx, const var_vec_t &x, const var_vec_t &Aᵀŷ, const eq_constr_vec_t &λ, var_vec_t &grad, ineq_constr_vec_t &Ax, var_vec_t &Mᵀλ)
real_t norm_inf(Context &ctx, const T &x) const
Infinity or max norm of x.
type update_factorization
void update_regularization_changed(Context &ctx, real_t S_new, real_t S_old)
Called when the primal regularization S has changed: causes the factorization to be reset.
ineq_constr_vec_t temp_ineq
cyqlone::CyqloneSolver< VL, real_t, DefaultOrder > OCP_t
real_t unscaled_eq_constr_viol(Context &ctx, const eq_constr_vec_t &Mxb) const
const ineq_constr_vec_t & Ax_min() const
auto get_timed(Timings::type Timings::*member) const
void grad_f_remove_regularization(Context &ctx, real_t S, const var_vec_t &x, const var_vec_t &x_reg, var_vec_t &grad_f)
std::vector< std::array< size_t, 4 > > thread_indices
void update_data(const CyqloneStorage<> &ocp)
CyQPALMBackendStats Stats
ineq_constr_vec_t b_max_strided
std::tuple< real_t, var_vec_t > f_grad_f(Context &ctx, const var_vec_t &x)
void unscale_ineq_constr(const active_set_t &in, std::span< real_t > out) const
void set_b_ub(std::span< const real_t > b_ub)
eq_constr_vec_t eq_constr_vec() const
const ineq_constr_vec_t & Ax_max() const
typename OCP_t::Context Context
real_t boost_regularization(Context &ctx, real_t S, real_t S_boost)
Decrease the primal regularization S to S_boost, to boost convergence when close to the solution.
type recompute_outer_grad
CyQPALMBackendSettings settings
void initial_multipliers_eq(Context &ctx, eq_constr_vec_t &λ) const
type update_active_set_change
index_t active_set_change(Context &ctx, real_t, const ineq_constr_vec_t &Σ, const active_set_t &J, const active_set_t &J_old)
void unscale_ineq_constr(const ineq_constr_vec_t &in, std::span< real_t > out) const
void warm_start(const var_vec_t &x, const ineq_constr_vec_t &y, const eq_constr_vec_t &λ)
BreakpointsResult compute_partition_breakpoints(Context &ctx, std::vector< Breakpoint > &breakpoints, const ineq_constr_vec_t &Σ, const ineq_constr_vec_t &y, const ineq_constr_vec_t &Ad, const ineq_constr_vec_t &Ax, const ineq_constr_vec_t &b_min, const ineq_constr_vec_t &b_max)
active_set_t active_set() const
var_vec_t var_vec() const
std::map< std::string, typename Timings::type > clear_timings()
void set_b_lb(std::span< const real_t > b_lb)
void mat_vec_MT(Context &ctx, const eq_constr_vec_t &λ, var_vec_t &Mᵀλ)
index_t update_penalty_y(Context &ctx, ineq_constr_vec_t &Σ, const ineq_constr_vec_t &e, const ineq_constr_vec_t &e_old, const PenaltySettings &settings)
Increase the penalty parameters Σ for which the violation e has not decreased sufficiently compared t...
void scale_ineq_constr(std::span< const real_t > in, ineq_constr_vec_t &out) const
void print_solve_rhs_norms(Context &ctx, const var_vec_t &d, const eq_constr_vec_t &Δλ, const var_vec_t &grad, const var_vec_t &Mᵀλ, const var_vec_t &Aᵀŷ) const
void eq_constr_resid(Context &ctx, const var_vec_t &x, eq_constr_vec_t &Mxb)
void update_penalty_changed(Context &ctx, const ineq_constr_vec_t &Σ, index_t num_Σ_changed)
Called when the penalty parameters Σ have been updated: causes the factorization to be reset if any o...
void unscale_variables(const var_vec_t &in, std::span< real_t > out) const
ineq_constr_vec_t ineq_constr_vec() const
void print_solve_resid_norms(Context &ctx, const var_vec_t &x, const var_vec_t &d, const var_vec_t &grad, const var_vec_t &ξ, const var_vec_t &Mᵀλ, const var_vec_t &Aᵀŷ, const var_vec_t &MᵀΔλ, const ineq_constr_vec_t &Ad, const active_set_t &J)
void scale_variables(std::span< const real_t > in, var_vec_t &out) const
void mat_vec_AT(const ineq_constr_vec_t &y, var_vec_t &Aᵀy)
void initialize_eq_constr_vec(Λs &...λs) const
friend BreakpointsResult guanaqo_tag_invoke(guanaqo::tag_t< get_breakpoints >, CyQPALMBackend &backend, Context &ctx, std::vector< Breakpoint > &breakpoints, const ineq_constr_vec_t &Σ, const ineq_constr_vec_t &y, const ineq_constr_vec_t &Ad, const ineq_constr_vec_t &Ax, const ineq_constr_vec_t &b_min, const ineq_constr_vec_t &b_max)
void initialize_ineq_constr_vec(Ys &...ys) const
void grad_f(Context &ctx, const var_vec_t &x, var_vec_t &grad_f)
guanaqo::Timed< batmat::DefaultTimings > timed_t
void solve(Context &ctx, const var_vec_t &x, const var_vec_t &grad, const var_vec_t &Mᵀλ, const var_vec_t &Aᵀŷ, const eq_constr_vec_t &Mxb, real_t S, const ineq_constr_vec_t &Σ, const active_set_t &J, var_vec_t &d, var_vec_t &ξ, ineq_constr_vec_t &Ad, eq_constr_vec_t &Δλ, var_vec_t &MᵀΔλ)
void mat_vec_A(Context &ctx, const var_vec_t &x, ineq_constr_vec_t &Ax)
std::optional< ineq_constr_vec_t > y0
typename OCP_t::simd simd
ineq_constr_vec_t b_min_strided
std::optional< var_vec_t > x0
void grad_f_regularized(Context &ctx, real_t S, const var_vec_t &x, const var_vec_t &x_reg, var_vec_t &grad_f)
typename OCP_t::template matrix<> storage_t
ineq_constr_vec_t mat_vec_A(Context &ctx, const var_vec_t &x)
type recompute_inner_grad
std::vector< Breakpoint > breakpoints_temp
std::optional< eq_constr_vec_t > λ0
void scale_eq_constr(std::span< const real_t > in, eq_constr_vec_t &out) const
type recompute_outer_norm
void unscale_eq_constr(const eq_constr_vec_t &in, std::span< real_t > out) const
real_t unscaled_aug_lagr_norm(Context &ctx, const var_vec_t &grad_f, const var_vec_t &Mᵀλ, const var_vec_t &Aᵀŷ) const
void initialize_var_vec(Xs &...xs) const
std::unique_ptr< typename OCP_t::SharedContext > parallel_ctx
eq_constr_vec_t b_eq_strided
void set_constant(Context &ctx, T &x, const U &y) const
Set each element of x to the constant value y.
index_t num_ineq_constr() const
void mat_vec_AT(Context &ctx, const ineq_constr_vec_t &y, var_vec_t &Aᵀy)
real_t f_grad_f(Context &ctx, const var_vec_t &x, var_vec_t &grad_f)
type ineq_constr_resid_al
static constexpr auto norms
std::unique_ptr< Timings > ocp_timings