3#include <batmat/assume.hpp>
4#include <batmat/linalg/simdify.hpp>
5#include <guanaqo/blas/hl-blas-interface.hpp>
8#include <batmat/thread-pool.hpp>
11namespace CYQLONE_NS(cyqlone) {
22 for (index_t r = 0; r < src.
rows; ++r)
23 for (index_t c = 0; c < src.
cols; ++c)
24 dst(r, c) = src(r, c);
33 for (index_t r = 0; r < src.
rows; ++r)
34 for (index_t c = 0; c < src.
cols; ++c)
35 dst(r, c) = scalar * src(r, c);
39template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
40CyqloneSolver<VL, T, DefaultOrder, Ctx>
93template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
104 for (index_t c = 0; c <
p; ++c) {
105 const index_t k0 = c *
n;
106 const index_t di0 = c *
n;
107 for (index_t i = 0; i <
n; ++i) {
108 index_t di = di0 + i;
109 for (index_t vi = 0; vi <
v; ++vi) {
117 data_F.batch(di)(vi).left_cols(
nu));
118 data_F.batch(di)(vi).right_cols(
nx).set_constant(0);
121 data_H.batch(di)(vi).bottom_left(
nx,
nu).set_constant(0);
133 data_F.batch(di)(vi).set_constant(0);
134 data_F.batch(di)(vi).right_cols(
nx).set_diagonal(1);
135 data_H.batch(di)(vi).left_cols(
nu).set_constant(0);
136 data_H.batch(di)(vi).top_left(
nu,
nu).set_diagonal(1);
139 data_Gᵀ.batch(di)(vi).left_cols(
ny).set_constant(0);
146template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
152 for (index_t ti = 0; ti <
p; ++ti) {
153 const index_t k0 = ti *
n;
154 const index_t di0 = ti *
n;
155 for (index_t i = 0; i <
n; ++i) {
156 index_t di = di0 + i;
157 for (index_t vi = 0; vi <
v; ++vi) {
160 rhs.batch(di)(vi) = ocp.
data_c(k);
162 rhs.batch(di)(vi).set_constant(0);
169template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
176 for (index_t ti = 0; ti <
p; ++ti) {
177 const index_t k0 = ti *
n;
178 const index_t di0 = ti *
n;
179 for (index_t i = 0; i <
n; ++i) {
180 index_t di = di0 + i;
181 for (index_t vi = 0; vi <
v; ++vi) {
185 grad.batch(di)(vi) = ocp.
data_rq(0);
189 grad.batch(di)(vi).bottom_rows(
nx));
192 grad.batch(di)(vi) = ocp.
data_rq(k);
194 grad.batch(di)(vi).top_rows(
nu).set_constant(0);
196 grad.batch(di)(vi).bottom_rows(
nx));
203template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
206 const index_t nyM = std::max(
ny,
ny_0 +
ny_N);
213 const auto inf = std::numeric_limits<value_type>::infinity();
214 for (index_t ti = 0; ti <
p; ++ti) {
215 const index_t k0 = ti *
n;
216 const index_t di0 = ti *
n;
217 for (index_t i = 0; i <
n; ++i) {
218 index_t di = di0 + i;
219 for (index_t vi = 0; vi <
v; ++vi) {
221 auto b_min_i = b_min.batch(di)(vi), b_max_i = b_max.batch(di)(vi);
226 b_max_i.bottom_rows(nyM -
ny_0 -
ny_N).set_constant(+inf);
228 b_min_i.top_rows(
ny) = ocp.
data_lb(k - 1);
231 b_max_i.bottom_rows(nyM -
ny).set_constant(+inf);
233 b_min_i.set_constant(-inf);
234 b_max_i.set_constant(+inf);
241template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
244 const index_t nux =
nu +
nx;
249 for (index_t ti = 0; ti <
p; ++ti) {
250 const index_t k0 = ti *
n;
251 const index_t di0 = ti *
n;
252 for (index_t i = 0; i <
n; ++i) {
253 index_t di = di0 + i;
254 for (index_t vi = 0; vi <
v; ++vi) {
258 ux.batch(di)(vi).top_rows(
nu) = crview::as_column(ux_lin.first(
nu));
260 ux.batch(di)(vi).bottom_rows(
nx) =
261 crview::as_column(ux_lin.subspan(
nu + (
N_horiz - 1) * nux,
nx));
263 ux.batch(di)(vi).bottom_rows(
nx).set_constant(0);
265 ux.batch(di)(vi).top_rows(
nu) = crview::as_column(ux_lin.subspan(k * nux,
nu));
266 ux.batch(di)(vi).bottom_rows(
nx) =
267 crview::as_column(ux_lin.subspan(
nu + (k - 1) * nux,
nx));
270 ux.batch(di)(vi).bottom_rows(
nx) =
271 crview::as_column(ux_lin.subspan(
nu + (
N_horiz - 1) * nux,
nx));
273 ux.batch(di)(vi).set_constant(0);
280template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
282 std::span<value_type> ux_lin)
const {
283 const index_t nux =
nu +
nx;
288 for (index_t ti = 0; ti <
p; ++ti) {
289 const index_t k0 = ti *
n;
290 const index_t di0 = ti *
n;
291 for (index_t i = 0; i <
n; ++i) {
292 index_t di = di0 + i;
293 for (index_t vi = 0; vi <
v; ++vi) {
297 rview::as_column(ux_lin.first(
nu)) = ux.batch(di)(vi).top_rows(
nu);
299 rview::as_column(ux_lin.subspan(
nu + (
N_horiz - 1) * nux,
nx)) =
300 ux.batch(di)(vi).bottom_rows(
nx);
302 rview::as_column(ux_lin.subspan(k * nux,
nu)) = ux.batch(di)(vi).top_rows(
nu);
303 rview::as_column(ux_lin.subspan(
nu + (k - 1) * nux,
nx)) =
304 ux.batch(di)(vi).bottom_rows(
nx);
307 rview::as_column(ux_lin.subspan(
nu + (
N_horiz - 1) * nux,
nx)) =
308 ux.batch(di)(vi).bottom_rows(
nx);
315template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
318 const index_t nλ =
nx;
323 for (index_t ti = 0; ti <
p; ++ti) {
324 const index_t k0 = ti *
n;
325 const index_t di0 = ti *
n;
326 for (index_t i = 0; i <
n; ++i) {
327 index_t di = di0 + i;
328 for (index_t vi = 0; vi <
v; ++vi) {
332 λ.batch(di)(vi) = crview::as_column(λ_lin.subspan(k * nλ, nλ));
334 λ.batch(di)(vi).set_constant(0);
341template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
343 std::span<value_type> λ_lin)
const {
344 const index_t nλ =
nx;
349 for (index_t ti = 0; ti <
p; ++ti) {
350 const index_t k0 = ti *
n;
351 const index_t di0 = ti *
n;
352 for (index_t i = 0; i <
n; ++i) {
353 index_t di = di0 + i;
354 for (index_t vi = 0; vi <
v; ++vi) {
358 rview::as_column(λ_lin.subspan(k * nλ, nλ)) = λ.batch(di)(vi);
365template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
373 for (index_t ti = 0; ti <
p; ++ti) {
374 const index_t k0 = ti *
n;
375 const index_t di0 = ti *
n;
376 for (index_t i = 0; i <
n; ++i) {
377 index_t di = di0 + i;
378 for (index_t vi = 0; vi <
v; ++vi) {
383 y.batch(di)(vi).top_rows(
ny_0) = crview::as_column(y_lin.first(
ny_0));
384 y.batch(di)(vi).bottom_rows(
ny_N) =
386 y.batch(di)(vi).bottom_rows(ny_pad).set_constant(
fill);
389 y.batch(di)(vi).top_rows(
ny) =
390 crview::as_column(y_lin.subspan(
ny_0 + (k - 1) *
ny,
ny));
391 y.batch(di)(vi).bottom_rows(ny_pad).set_constant(
fill);
393 y.batch(di)(vi).set_constant(0);
400template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
402 view<> y, std::span<value_type> y_lin)
const {
407 for (index_t ti = 0; ti <
p; ++ti) {
408 const index_t k0 = ti *
n;
409 const index_t di0 = ti *
n;
410 for (index_t i = 0; i <
n; ++i) {
411 index_t di = di0 + i;
412 for (index_t vi = 0; vi <
v; ++vi) {
416 rview::as_column(y_lin.first(
ny_0)) = y.batch(di)(vi).top_rows(
ny_0);
418 y.batch(di)(vi).bottom_rows(
ny_N);
420 rview::as_column(y_lin.subspan(
ny_0 + (k - 1) *
ny,
ny)) =
421 y.batch(di)(vi).top_rows(
ny);
428template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
436template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
444template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
452template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
454 std::span<const value_type> ux_lin)
const ->
matrix<> {
460template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
462 -> std::vector<value_type> {
468template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
476template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
478 -> std::vector<value_type> {
484template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
492template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
494 -> std::vector<value_type> {
500template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
505template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
510template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
The main header for the Cyqlone and Tricyqle linear solvers.
void copy(VA &&A, VB &&B, Opts... opts)
void scale(T alpha, Vx &&x, Vz &&z)
Multiply a vector by a scalar z = αx.
void fill(simdified_value_t< VB > a, VB &&B)
constexpr auto simdify(simdifiable auto &&a) -> simdified_view_t< decltype(a)>
void copy(guanaqo::MatrixView< T1, I1, S1, O1 > src, guanaqo::MatrixView< T2, I2, S2, O2 > dst)
Simple (inefficient) matrix copy that supports slices with non-unit strides.
void scale(T0 scalar, guanaqo::MatrixView< T1, I1, S1, O1 > src, guanaqo::MatrixView< T2, I2, S2, O2 > dst)
Simple (inefficient) scaled matrix copy that supports slices with non-unit strides.
constexpr bool is_pow_2(index_t n)
auto bottom_rows(index_type n)
auto left_cols(index_type n)
auto top_rows(index_type n)
auto top_left(index_type nr, index_type nc)
auto bottom_right(index_type nr, index_type nc)
Linear solver for systems with optimal control structure.
const index_t n
Number of stages per thread per vector lane (rounded up).
index_t num_variables() const
Get the total number of primal variables in the OCP.
matrix< default_order > data_H
Stage-wise Hessian blocks H(j) = [ R(j) S(j); S(j)ᵀ Q(j) ] of the OCP cost function.
matrix initialize_variables() const
Get a zero-initialized matrix for the primal variables u and x.
index_t ceil_N() const
Horizon length, rounded up to a multiple of the number of parallel execution units.
typename tricyqle_t::template view< O > view
Non-owning immutable view type for matrix.
void update_data(const CyqloneStorage< value_type > &ocp)
Update the internal data structures to reflect changes in the OCP data (without changing the problem ...
matrix< default_order > data_F
Stage-wise dynamics matrices F(j) = [ B(j) A(j) ] of the OCP.
matrix< default_order > data_Gᵀ
Stage-wise constraint Jacobians G(j)ᵀ = [ D(j) C(j) ]ᵀ of the OCP.
void pack_variables(std::span< const value_type > ux_lin, mut_view<> ux) const
void pack_constraints(std::span< const value_type > y_lin, mut_view<> y, value_type fill=0) const
void initialize_gradient(const CyqloneStorage< value_type > &ocp, mut_view<> grad) const
Initialize the gradient vector for the OCP cost function, using the custom Cyqlone storage format.
void unpack_variables(view<> ux, std::span< value_type > ux_lin) const
index_t sub_wrap_ceil_N(index_t a, index_t b) const
Subtract b from a modulo N_horiz.
void unpack_constraints(view<> y, std::span< value_type > y_lin) const
typename tricyqle_t::template matrix< O > matrix
Owning type for a batch of matrices (with batch size v).
const index_t N_horiz
Horizon length of the optimal control problem.
index_t num_dynamics_constraints() const
Get the total number of dynamics constraints in the OCP.
static CyqloneSolver build(const CyqloneStorage< value_type > &ocp, index_t p)
Initialize a Cyqlone solver for the given OCP.
const index_t ny
Number of general constraints of the OCP per stage.
matrix initialize_dynamics_constraints() const
Get a zero-initialized matrix for the dynamics constraints (or their multipliers).
void initialize_rhs(const CyqloneStorage< value_type > &ocp, mut_view<> rhs) const
Initialize the right-hand side vector for the dynamics constraints of the OCP, using the custom Cyqlo...
matrix initialize_general_constraints() const
Get a zero-initialized matrix for the general constraints (or their multipliers).
void initialize_bounds(const CyqloneStorage< value_type > &ocp, mut_view<> b_min, mut_view<> b_max) const
Initialize the lower and upper bounds for the general constraints of the OCP, using the custom Cyqlon...
typename tricyqle_t::template mut_view< O > mut_view
Non-owning mutable view type for matrix.
void pack_dynamics(std::span< const value_type > λ_lin, mut_view<> λ) const
const index_t ny_0
Number of general constraints at stage 0, D(0) u(0).
const index_t nu
Number of controls of the OCP.
const index_t p
Number of processors/threads.
index_t num_general_constraints() const
Get the total number of general constraints in the OCP.
const index_t ny_N
Number of general constraints at the final stage, C(N) x(N).
void unpack_dynamics(view<> λ, std::span< value_type > λ_lin) const
static constexpr index_t v
Vector length.
const index_t nx
Number of states of the OCP.
Storage for a linear-quadratic OCP with the initial states x₀ eliminated.