3#include <batmat/assume.hpp>
4#include <guanaqo/blas/hl-blas-interface.hpp>
8#include <batmat/linalg/copy.hpp>
9#include <batmat/linalg/trtri.hpp>
11namespace CYQLONE_NS(cyqlone) {
12using namespace batmat::linalg;
14template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
16 std::span<const value_type> Σ)
const
21 const index_t nux =
nu +
nx, nuxx = nux +
nx;
23 const index_t stage_stride = nuxx;
25 const index_t interval_stride = nuxx *
n -
nx;
27 const index_t vector_stride =
p * interval_stride;
29 const index_t nn =
ceil_N() * nuxx;
31 const index_t sλ = nn - (
nx *
p *
v);
42 for (index_t j = 0; j <
N_horiz; ++j) {
46 for (index_t r = 0; r <
ny; ++r)
47 for (index_t c = 0; c < nux; ++c)
48 DC(j, r, c) *= sqrt(Σ[
ny_0 + (j - 1) *
ny + r]);
52 for (index_t r = 0; r <
ny_0; ++r)
53 for (index_t c = 0; c <
nu; ++c)
54 DC(0, r, c) *= sqrt(Σ[r]);
55 for (index_t r = 0; r <
ny_N; ++r)
56 for (index_t c = 0; c <
nx; ++c)
63 for (index_t l = 0; l <
v; ++l) {
64 for (index_t c = 0; c <
p; ++c) {
65 const index_t j0 = c *
n + l *
p *
n;
66 const auto biA = c + l *
p;
71 for (index_t i = 0; i <
n; ++i) {
74 index_t s = l * vector_stride + c * interval_stride + stage_stride * i;
77 mat.add_diag(s, s, 1, nux);
78 i + 1 ==
n ? mat.add_diag(sλI, s +
nu, -1,
nx)
79 : mat.add_diag(s + nux, s +
nu, -1,
nx);
85 mat.add(s +
nu, s, Sᵀ(j));
88 mat.add(sλA, s, ocp.data_F(j).left_cols(
nu));
89 j > 0 ? mat.add(sλA, s +
nu, ocp.data_F(j).right_cols(
nx))
92 mat.add(s, s -
nx, ocp.data_F(j).left_cols(
nu).transposed());
93 mat.add(s +
nu, s -
nx, ocp.data_F(j).right_cols(
nx).transposed());
96 i + 1 ==
n ? mat.add_diag(sλI, s +
nu, -1,
nx)
97 : mat.add_diag(s + nux, s +
nu, -1,
nx);
101 return std::move(mat).build();
104template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
109 const index_t nux =
nu +
nx, nuxx = nux +
nx;
111 const index_t stage_stride = nuxx;
113 const index_t interval_stride = nuxx *
n -
nx;
115 const index_t vector_stride =
p * interval_stride;
117 const index_t nn =
ceil_N() * nuxx;
119 const index_t sλ = nn - (
nx *
p *
v);
121 std::vector<value_type> rhs(nn);
122 std::ranges::fill(rhs, std::numeric_limits<value_type>::quiet_NaN());
124 for (index_t l = 0; l <
v; ++l) {
125 for (index_t t = 0; t <
p; ++t) {
126 const index_t di0 = t *
n;
127 for (index_t i = 0; i <
n; ++i) {
128 const index_t di = di0 + i;
129 index_t s = l * vector_stride + t * interval_stride + stage_stride * i;
131 for (index_t c = 0; c <
nx; ++c)
132 rhs[s -
nx + c] = scale_b * b.batch(di)(l)(c, 0);
133 for (index_t c = 0; c < nux; ++c)
134 rhs[s + c] = scale_rq * rq.batch(di)(l)(c, 0);
139 const auto cyclic_block = [&](index_t i) {
140 const index_t t = i %
p, l = i /
p;
141 const index_t di = t *
n;
142 for (index_t c = 0; c <
nx; ++c)
143 rhs[s + c] = scale_b * b.batch(di)(l)(c, 0);
146 for (index_t l = 0; l <
lp(); ++l) {
147 index_t offset = 1 << l;
148 index_t stride = offset << 1;
149 for (index_t i = offset; i <
v *
p; i += stride)
152 for (index_t i = 0; i <
v *
p; i +=
p) {
158template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
162 const index_t nux =
nu +
nx, nuxx = nux +
nx;
164 const index_t stage_stride = nuxx;
166 const index_t interval_stride = nuxx *
n -
nx;
168 const index_t vector_stride =
p * interval_stride;
170 const index_t nn =
ceil_N() * nuxx;
172 const index_t sλ = nn - (
nx *
p *
v);
181 for (index_t t = 0; t <
p; ++t) {
182 const index_t di0 = t *
n;
184 auto LBAt = LBA.batch(t);
185 auto invQᵀt = invQᵀ.batch(t);
187 auto LAt = LA.batch(t);
188 for (index_t i = 0; i <
n; ++i) {
189 const auto di = di0 + i;
190 auto RSQi = RSQ.middle_cols(i * nux, nux);
191 auto Qi =
tril(RSQi.bottom_right(
nx,
nx));
192 auto Qi_inv =
triu(invQᵀt.middle_cols(i *
nx,
nx));
193 auto Âi = Â.middle_cols(i *
nx,
nx);
194 auto LAi = LAt.middle_cols(i *
nx,
nx);
198 trtri(Qi, Qi_inv.transposed());
202 trsm(LAi, Qi.transposed());
204 auto LBAi = LBAt.middle_cols((i - 1) *
nx,
nx);
205 auto RSQ_prev = RSQ.middle_cols((i - 1) * nux, nux);
206 auto Q_prev =
tril(RSQ_prev.bottom_right(
nx,
nx));
207 auto BA =
data_F.batch(di);
208 copy(BA.transposed(), LBAi);
215 for (index_t l = 0; l <
v; ++l) {
216 for (index_t t = 0; t <
p; ++t) {
217 const index_t j0 = t *
n + l *
n *
p;
218 const auto biA = t + l *
p;
224 auto LBAt = LBA.batch(t);
225 auto LAt = LA.batch(t);
226 auto invQᵀt = invQᵀ.batch(t);
227 for (index_t i = 0; i <
n; ++i) {
229 index_t s = l * vector_stride + t * interval_stride + i * stage_stride;
230 auto B̂i = B̂t.middle_cols(i *
nu,
nu);
231 auto LHi = LHt.middle_cols(i * nux, nux);
232 auto iQiᵀ = invQᵀt.middle_cols(i *
nx,
nx);
233 auto AiQiᵀ = LAt.middle_cols(i *
nx,
nx);
235 auto LBAi = LBAt.middle_cols((i - 1) *
nx,
nx);
236 auto iQᵀprev = invQᵀt.middle_cols((i - 1) *
nx,
nx);
237 auto LA_prev = LAt.middle_cols((i - 1) *
nx,
nx);
239 mat.add(s, s -
nx, LBAi(l));
240 mat.add(sλA, s -
nx, LA_prev(l));
243 mat.add(sλA, s, B̂i(l));
248 mat.add(sλA, s +
nu, AiQiᵀ(l));
253 const auto cyclic_block = [&](index_t i, index_t offset) {
256 const index_t t = i %
p, l = i /
p;
258 if (i + offset <
v *
p)
259 mat.add(sY, s,
tricyqle.cr_Y.batch(t)(l));
260 mat.add(sU, s,
tricyqle.cr_U.batch(t)(l));
263 const auto cyclic_block_final = [&](index_t i, index_t offset) {
265 const index_t t = i %
p, l = i /
p;
267 if (i + offset <
v *
p)
268 mat.add(sY, s,
tricyqle.cr_Y.batch(t)(l));
271 for (index_t l = 0; l <
lp(); ++l) {
272 index_t offset = 1 << l;
273 index_t stride = offset << 1;
274 for (index_t i = offset; i <
v *
p; i += stride)
275 cyclic_block(i, offset);
277 for (index_t i = 0; i <
v *
p; i +=
p)
278 cyclic_block_final(i,
p);
279 return std::move(mat).build();
282template <index_t VL,
class T, StorageOrder DefaultOrder,
class Ctx>
285 const index_t nux =
nu +
nx, nuxx = nux +
nx;
287 const index_t stage_stride = nuxx;
289 const index_t interval_stride = nuxx *
n -
nx;
291 const index_t vector_stride =
p * interval_stride;
293 const index_t nn =
ceil_N() * nuxx;
295 const index_t sλ = nn - (
nx *
p *
v);
299 for (index_t l = 0; l <
v; ++l) {
300 for (index_t t = 0; t <
p; ++t) {
301 for (index_t i = 0; i <
n; ++i) {
302 index_t s = l * vector_stride + t * interval_stride + i * stage_stride;
304 for (index_t c = 0; c <
nx; ++c)
305 mat.add(s -
nx + c, s -
nx + c, -1);
306 for (index_t c = 0; c <
nu; ++c)
307 mat.add(s + c, s + c, 1);
308 for (index_t c = 0; c <
nx; ++c)
309 mat.add(s + c +
nu, s + c +
nu, 1);
313 for (index_t i = 0; i <
p *
v; ++i)
314 for (index_t r = 0; r <
nx; ++r)
315 mat.
add(sλ +
nx * i + r, sλ +
nx * i + r, -1);
316 return std::move(mat).build();
The main header for the Cyqlone and Tricyqle linear solvers.
void xsyrk_LT(T alpha, std::type_identity_t< MatrixView< const T, I > > A, T beta, MatrixView< T, I > C)
void trsm(Structured< VA, SA > A, VB &&B, VD &&D, with_rotate_B_t< RotB >={})
void trtri(Structured< VA, MatrixStructure::LowerTriangular > A, Structured< VD, MatrixStructure::LowerTriangular > D)
void trmm(Structured< VA, SA > A, Structured< VB, SB > B, Structured< VD, SD > D, Opts... opts)
void copy(VA &&A, VB &&B, Opts... opts)
constexpr auto triu(M &&m)
constexpr auto tril(M &&m)
constexpr bool is_pow_2(index_t n)
auto top_rows(index_type n)
auto bottom_left(index_type nr, index_type nc)
auto top_left(index_type nr, index_type nc)
auto bottom_right(index_type nr, index_type nc)
const index_t n
Number of stages per thread per vector lane (rounded up).
SparseMatrix build_sparse_factor() const
index_t ceil_N() const
Horizon length, rounded up to a multiple of the number of parallel execution units.
typename tricyqle_t::template view< O > view
Non-owning immutable view type for matrix.
matrix< default_order > data_F
Stage-wise dynamics matrices F(j) = [ B(j) A(j) ] of the OCP.
SparseMatrix build_sparse(const CyqloneStorage< value_type > &ocp, std::span< const value_type > Σ) const
index_t sub_wrap_ceil_N(index_t a, index_t b) const
Subtract b from a modulo N_horiz.
index_t get_linear_batch_offset(index_t biA) const
typename tricyqle_t::template matrix< O > matrix
Owning type for a batch of matrices (with batch size v).
const index_t N_horiz
Horizon length of the optimal control problem.
const index_t ny
Number of general constraints of the OCP per stage.
const index_t ny_0
Number of general constraints at stage 0, D(0) u(0).
const index_t nu
Number of controls of the OCP.
matrix< default_order > riccati_LH
Cholesky factors of the Hessian blocks for the Riccati recursion.
SparseMatrix build_sparse_diag() const
const index_t p
Number of processors/threads.
tricyqle_t tricyqle
Block-tridiagonal solver (CR/PCR/PCG).
std::vector< value_type > build_rhs(view<> rq, view<> b, value_type scale_rq=-1, value_type scale_b=-1) const
const index_t ny_N
Number of general constraints at the final stage, C(N) x(N).
constexpr index_t lp() const
log₂(p), logarithm of the number of processors/threads, rounded up.
static constexpr index_t v
Vector length.
index_t sub_wrap_ceil_P(index_t a, index_t b) const
const index_t nx
Number of states of the OCP.
matrix< default_order > riccati_LAB
Storage for the matrices LB(j), Acl(j) and LA(j₁) for the Riccati recursion.
Storage for a linear-quadratic OCP with the initial states x₀ eliminated.
A builder for constructing a SparseMatrix incrementally.
void add(index_t row, index_t col, real_t value)
A sparse matrix in COO format.