develop/Doxygen/riccati_8tpp_source.html

#include <cyqlone/cyqlone.hpp>

#include <cyqlone/linalg.hpp>


#include <batmat/linalg/compress.hpp>

#include <batmat/linalg/gemm-diag.hpp>

#include <batmat/linalg/gemm.hpp>

#include <batmat/linalg/gemv.hpp>

#include <batmat/linalg/potrf.hpp>

#include <batmat/linalg/shift.hpp>

#include <batmat/linalg/trsm.hpp>


namespace CYQLONE_NS(cyqlone) {


using namespace linalg;

using namespace batmat::linalg;


// Algorithm 1 “Factorization of a single modified Riccati block column”


//! [Modified Riccati factorization and fused forward solve]

template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>

template <bool Factor, bool Solve>

// NOLINTNEXTLINE(*-cognitive-complexity) // Needs to match pseudocode structure


void CyqloneSolver<VL, T, DefaultOrder, Ctx>::factor_riccati_solve(Context &ctx, value_type γ,

                                                                   view<> Σ, mut_view<> ux,

                                                                   mut_view<> λ) {

    using batmat::linalg::compress_masks_sqrt;

    const index_t c = riccati_thread_assignment(ctx);

    //  3|  j₁ = n(c-1)+1, jₙ = nc

    const index_t dn  = c * n;                                    // data batch index

    const index_t jn  = c * n;                                    // stage index

    const index_t nux = nu + nx, nyM = std::max(ny, ny_0 + ny_N); // max active constraints/stage

    // TODO: special case nyM for c == 0

    auto LHs = riccati_LH.batch(c);

    auto B̂s = riccati_LAB.batch(c).right_cols(n * nu), Âs = riccati_LAB.batch(c).left_cols(n * nx);

    auto VGᵀ       = riccati_V.batch(c);

    index_t m_syrk = 0; // number of columns of VDCᵀ (depends on active constraints)

    if constexpr (Factor) {

        GUANAQO_TRACE("Riccati init", jn);

        //  4|  B̂(jₙ) = B(jₙ)

        // Note that Â(jₙ) is not copied explicitly, as it is not modified in-place

        copy(data_F.batch(dn).left_cols(nu), B̂s.left_cols(nu));

        // Compress the active constraint Jacobians to add them to the Hessian later

        if (nyM > 0)

            m_syrk = compress_masks_sqrt(data_Gᵀ.batch(dn), Σ.batch(dn), VGᵀ.left_cols(nyM));

    }

    // Iterate over all stages in the interval (in reverse order)

    for (index_t i = 0; i < n; ++i) {

        //  6|  for j = jₙ downto j₁

        const index_t j  = sub_wrap_ceil_N(jn, i); // stage index j ≡ jₙ - i mod N

        const index_t di = dn + i;                 // data batch index

        auto LH          = LHs.middle_cols(i * nux, nux);

        auto RS          = LH.left_cols(nu);

        auto R = RS.top_rows(nu), S = RS.bottom_rows(nx), Q = LH.bottom_right(nx, nx);

        auto B̂ = B̂s.middle_cols(i * nu, nu), Acl = Âs.middle_cols(i * nx, nx);

        {

            GUANAQO_TRACE("Riccati QRS", j);

            // Compute and factor R̂, update Ŝ, factor Q̂

            //

            // 13|  [ R̂(j)  Ŝ(j) ] = [ R(j)  S(j) ] + [ D(j)ᵀ ] Σ(j) [ D(j)  C(j) ] + V(j) V(j)ᵀ

            //   |  [ Ŝ(j)ᵀ Q̂(j) ]   [ S(j)ᵀ Q(j) ]   [ C(j)ᵀ ]

            //

            //  7|  [ LR(j)       ] = chol [ R̂(j)  Ŝ(j) ]

            //   |  [ LS(j) LQ(j) ]        [ Ŝ(j)ᵀ Q̂(j) ]

            if constexpr (Factor) {

                // VGᵀprev = [ B(j+1)ᵀ LQ(j+1)   D(j)ᵀ √Σ(j) ]

                //           [ A(j+1)ᵀ LQ(j+1)   C(j)ᵀ √Σ(j) ]

                auto VGᵀprev = VGᵀ.left_cols(m_syrk);

                syrk_add_potrf(VGᵀprev, tril(data_H.batch(di)), tril(LH), 1 / γ);

            }

            if constexpr (Solve) {

                // Solve u ← LR̂⁻¹ u, x ← x - Ŝ u

                auto ui = ux.batch(di).top_rows(nu), xi = ux.batch(di).bottom_rows(nx);

                trsm(tril(R), ui);

                gemv_sub(S, ui, xi);

            }

            //  8|  LB(j) = B̂(j) LR(j)⁻ᵀ

            if constexpr (Factor) {

                trsm(B̂, tril(R).transposed());

            }

            if constexpr (Solve) {

                auto ui = ux.batch(di).top_rows(nu), λ_last = λ.batch(dn);

                gemv_add(B̂, ui, λ_last);

            }

            //  9|  Acl(j) = Â(j) - LB(j) LS(j)ᵀ

            if constexpr (Factor) {

                //  4|  Â(jₙ) = A(jₙ)

                auto An = data_F.batch(dn).right_cols(nx);

                i == 0 ? gemm_sub(B̂, S.transposed(), An, Acl) //

                       : gemm_sub(B̂, S.transposed(), Acl);

            }

        }

        // 10|  if j > j₁

        if (i + 1 < n) {

            [[maybe_unused]] const auto j_next = sub_wrap_ceil_N(j, 1);

            GUANAQO_TRACE("Riccati update AB", j_next);

            const auto di_next = dn + i + 1;

            auto VGᵀnext = VGᵀ.left_cols(nx + nyM), V_next = VGᵀnext.left_cols(nx),

                 Gᵀnext = VGᵀnext.right_cols(nyM);

            auto F_next = data_F.batch(di_next), B_next = F_next.left_cols(nu),

                 A_next = F_next.right_cols(nx);

            // 11|  [ B̂(j-1)  Â(j-1) ] = Acl(j) [ B(j-1)  A(j-1) ]

            if constexpr (Factor) {

                auto B̂_next = B̂s.middle_cols((i + 1) * nu, nu),

                     Â_next = Âs.middle_cols((i + 1) * nx, nx);

                gemm(Acl, B_next, B̂_next);

                gemm(Acl, A_next, Â_next);

            }

            if constexpr (Solve) {

                auto xi = ux.batch(di).bottom_rows(nx), ux_next = ux.batch(di_next),

                     λ_next = λ.batch(di_next), λ_last = λ.batch(dn);

                gemv_add(Acl, λ_next, λ_last); // λ(jn) += Â λ(j-1)

                auto w = tricyqle.work_cr.batch(c).left_cols(1);

                trmm(tril(Q).transposed(), λ_next, w);     // w = LQᵀ(j) λ(j-1)

                trmm(tril(Q), w);                          // w = LQ(j) LQᵀ(j) λ(j-1)

                sub(xi, w, w);                             // w = x(j) - LQ(j) LQᵀ(j) λ(j-1)

                gemv_add(F_next.transposed(), w, ux_next); // u(j-1) += BAᵀ(j-1) w

            }

            // 12|  V(j-1) = [ B(j-1)ᵀ ] LQ(j)

            //   |           [ A(j-1)ᵀ ]

            if constexpr (Factor) {

                trmm(F_next.transposed(), tril(Q), V_next);

                m_syrk = nx; // columns of V(j-1)

                // Compress the active constraint Jacobians to add them to the Hessian later

                if (nyM > 0)

                    m_syrk += compress_masks_sqrt(data_Gᵀ.batch(di_next), Σ.batch(di_next), Gᵀnext);

            }

        } else {

            GUANAQO_TRACE("Riccati last", j);

            // 14|  LA(j₁) = Â(j₁) LQ(j₁)⁻ᵀ

            if constexpr (Factor) {

                trsm(Acl, tril(Q).transposed());

            }

            if constexpr (Solve) {

                auto xi = ux.batch(di).bottom_rows(nx), λ_last = λ.batch(dn);

                trsm(tril(Q), xi);

                gemv_add(Acl, xi, λ_last);

                trsm(tril(Q).transposed(), xi);

            }

        }

    }

}


//! [Modified Riccati factorization and fused forward solve]


template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void CyqloneSolver<VL, T, DefaultOrder, Ctx>::solve_riccati_reverse(

    Context &ctx, mut_view<> ux, mut_view<> λ, mut_view<> work,

    std::optional<mut_view<>> Mᵀλ) const {

    const index_t c       = riccati_thread_assignment(ctx);

    const index_t c_prev  = sub_wrap_p(c, 1);

    const index_t jn      = c * n;      // stage index

    const index_t dn      = c * n;      // jₙ data batch index

    const index_t dn_prev = c_prev * n; // j₀ data batch index

    const index_t nux     = nu + nx;

    const auto LHs        = riccati_LH.batch(c);

    const auto LBs        = riccati_LAB.batch(c).right_cols(n * nu),

               AclLAs     = riccati_LAB.batch(c).left_cols(n * nx);

    const auto λn         = λ.batch(dn);

    const auto w          = work.batch(c);


    for (index_t i = n; i-- > 0;) {

        [[maybe_unused]] index_t j = sub_wrap_ceil_N(jn, i);

        index_t di                 = dn + i;

        const auto LH = LHs.middle_cols(i * nux, nux), LQ = LH.bottom_right(nx, nx),

                   LR = LH.top_left(nu, nu), LS = LH.bottom_left(nx, nu);

        const auto LB = LBs.middle_cols(i * nu, nu);

        if (i + 1 < n) {

            const auto di_prev = di + 1;

            GUANAQO_TRACE("Riccati solve rev", j);

            const auto u = ux.batch(di).top_rows(nu), x = ux.batch(di).bottom_rows(nx);

            const auto Acl    = AclLAs.middle_cols(i * nx, nx);

            const auto F_prev = data_F.batch(di_prev);

            const auto λ_prev = λ.batch(di_prev);

            // w = q(j)

            copy(x, w);

            // x(j) = A(j-1) x(j-1) + B u(j-1) + b(j-1)

            gemv_add(F_prev, ux.batch(di_prev), λ_prev, x);

            // u(j) = LR(j)⁻ᵀ(r(j) - LS(j)ᵀ x(j) - LB(j)ᵀ λ(jₙ))

            gemv_sub(LB.transposed(), λn, u);

            gemv_sub(LS.transposed(), x, u);

            trsm(tril(LR).transposed(), u);


            // λ(j-1) = LQ(j) LQ(j)ᵀ x(j) + Aclᵀ λ(jₙ) - q(j)

            trmm(tril(LQ).transposed(), x, λ_prev);

            trmm(tril(LQ), λ_prev);

            gemv_add(Acl.transposed(), λn, λ_prev);

            sub(λ_prev, w);

            if (Mᵀλ) {

                const auto Fᵀprev = F_prev.transposed();

                const auto Mᵀλj = Mᵀλ->batch(di), Mᵀλ_prev = Mᵀλ->batch(di_prev);

                gemv_add(Fᵀprev, λ_prev, Mᵀλ_prev);   // (Mᵀλ)(j-1) += [ B(j-1)ᵀ ] λ(j-1)

                                                      //               [ A(j-1)ᵀ ]

                Mᵀλj.top_rows(nu).set_constant(0);    // (Mᵀλ)(j) = - [ 0 ] λ(j-1)

                negate(λ_prev, Mᵀλj.bottom_rows(nx)); //              [ I ]

            }

        } else {

            GUANAQO_TRACE("Riccati solve rev", j);

            const auto u1 = ux.batch(di).top_rows(nu), x1 = ux.batch(di).bottom_rows(nx);

            const auto LA1    = AclLAs.middle_cols(i * nx, nx);

            const auto λ_prev = λ.batch(dn_prev);

            // w = LQ(j₁)⁻¹ λ(j₀)

            c == 0 && v > 1 ? trsm(tril(LQ), λ_prev, w, with_rotate_B<-1>)

                            : trsm(tril(LQ), λ_prev, w);

            // w = LQ(j₁)⁻¹ λ(j₀) - LA(j₁)ᵀ λ(jₙ)

            gemv_sub(LA1.transposed(), λn, w);

            // w = LQ(j₁)⁻ᵀ(LQ(j₁)⁻¹ λ(j₀) - LA(j₁)ᵀ λ(jₙ))

            trsm(tril(LQ).transposed(), w);

            // x(j₁) = LQ(j₁)⁻ᵀ(LQ(j₁)⁻¹ λ(j₀) - LA(j₁)ᵀ λ(jₙ)) + q(j₁)

            add(x1, w);


            // u(j₁) = LR(j₁)⁻ᵀ(r(j₁) - LB(j₁)ᵀ λ(jₙ) - LS(j₁)ᵀ x(j₁))

            gemv_sub(LB.transposed(), λn, u1);

            gemv_sub(LS.transposed(), x1, u1);

            trsm(tril(LR).transposed(), u1);

            if (Mᵀλ) {

                const auto Mᵀλj = Mᵀλ->batch(di);

                Mᵀλj.top_rows(nu).set_constant(0);                     // (Mᵀλ)(j) = - [ 0 ] λ(j-1)

                c > 0 || v == 1 ? negate(λ_prev, Mᵀλj.bottom_rows(nx)) //              [ I ]

                                : negate(λ_prev, Mᵀλj.bottom_rows(nx), with_rotate<-1>);

            }

        }

    }

    if (Mᵀλ) {

        const auto Fᵀn  = data_F.batch(dn).transposed();

        const auto λn   = λ.batch(dn);

        const auto Mᵀλn = Mᵀλ->batch(dn);

        v > 1 || c > 0

            ? gemv_add(Fᵀn, λn, Mᵀλn)                            // (Mᵀλ)(jₙ) += [ B(jₙ)ᵀ ] λ(jₙ)

            : gemv_add(Fᵀn.top_rows(nu), λn, Mᵀλn.top_rows(nu)); //              [ A(jₙ)ᵀ ]

    }

}


} // namespace CYQLONE_NS(cyqlone)

cyqlone.hpp
The main header for the Cyqlone and Tricyqle linear solvers.

batmat::linalg::syrk_add_potrf
void syrk_add_potrf(VA &&A, Structured< VC, SC > C, Structured< VD, SC > D, simdified_value_t< VA > regularization=0)

batmat::linalg::trsm
void trsm(Structured< VA, SA > A, VB &&B, VD &&D, with_rotate_B_t< RotB >={})

batmat::linalg::gemv_add
void gemv_add(VA &&A, VB &&B, VC &&C, VD &&D, Opts... opts)

batmat::linalg::gemm
void gemm(VA &&A, VB &&B, VD &&D, TilingOptions packing={}, Opts... opts)

cyqlone::linalg::add
void add(VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate >={})
Add two matrices or vectors C = A + B. Rotate affects B.
Definition linalg.hpp:417

batmat::linalg::trmm
void trmm(Structured< VA, SA > A, Structured< VB, SB > B, Structured< VD, SD > D, Opts... opts)

batmat::linalg::gemm_sub
void gemm_sub(VA &&A, VB &&B, VC &&C, VD &&D, TilingOptions packing={}, Opts... opts)

cyqlone::linalg::negate
void negate(VA &&A, VB &&B, with_rotate_t< Rotate >={})
Negate a matrix or vector B = -A.
Definition linalg.hpp:386

batmat::linalg::copy
void copy(VA &&A, VB &&B, Opts... opts)

batmat::linalg::compress_masks_sqrt
index_t compress_masks_sqrt(VA &&Ain, VS &&Sin, VAo &&Aout)

batmat::linalg::gemv_sub
void gemv_sub(VA &&A, VB &&B, VC &&C, VD &&D, Opts... opts)

cyqlone::linalg::sub
void sub(VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate >={})
Subtract two matrices or vectors C = A - B. Rotate affects B.
Definition linalg.hpp:401

batmat::linalg::tril
constexpr auto tril(M &&m)

GUANAQO_TRACE
#define GUANAQO_TRACE(name, instance,...)

linalg.hpp

batmat::linalg::with_rotate
constexpr with_rotate_t< I > with_rotate

batmat::linalg::with_rotate_B
constexpr with_rotate_B_t< I > with_rotate_B

cyqlone::CyqloneSolver::n
const index_t n
Number of stages per thread per vector lane (rounded up).
Definition cyqlone.hpp:605

cyqlone::CyqloneSolver::data_H
matrix< default_order > data_H
Stage-wise Hessian blocks H(j) = [ R(j) S(j); S(j)ᵀ Q(j) ] of the OCP cost function.
Definition cyqlone.hpp:762

cyqlone::CyqloneSolver::view
typename tricyqle_t::template view< O > view
Non-owning immutable view type for matrix.
Definition cyqlone.hpp:693

cyqlone::CyqloneSolver::data_F
matrix< default_order > data_F
Stage-wise dynamics matrices F(j) = [ B(j) A(j) ] of the OCP.
Definition cyqlone.hpp:766

cyqlone::CyqloneSolver::data_Gᵀ
matrix< default_order > data_Gᵀ
Stage-wise constraint Jacobians G(j)ᵀ = [ D(j) C(j) ]ᵀ of the OCP.
Definition cyqlone.hpp:770

cyqlone::CyqloneSolver::sub_wrap_ceil_N
index_t sub_wrap_ceil_N(index_t a, index_t b) const
Subtract b from a modulo N_horiz.
Definition indexing.tpp:53

cyqlone::CyqloneSolver::Context
tricyqle_t::Context Context
Definition cyqlone.hpp:596

cyqlone::CyqloneSolver::ny
const index_t ny
Number of general constraints of the OCP per stage.
Definition cyqlone.hpp:570

cyqlone::CyqloneSolver::solve_riccati_reverse
void solve_riccati_reverse(Context &ctx, mut_view<> ux, mut_view<> λ, mut_view<> work, std::optional< mut_view<> > Mᵀλ) const
[Modified Riccati factorization and fused forward solve]
Definition riccati.tpp:145

cyqlone::CyqloneSolver::riccati_thread_assignment
index_t riccati_thread_assignment(Context &ctx) const
Definition cyqlone.hpp:972

cyqlone::CyqloneSolver::sub_wrap_p
index_t sub_wrap_p(index_t a, index_t b) const
Subtract b from a modulo p.
Definition indexing.tpp:64

cyqlone::CyqloneSolver::mut_view
typename tricyqle_t::template mut_view< O > mut_view
Non-owning mutable view type for matrix.
Definition cyqlone.hpp:696

cyqlone::CyqloneSolver::ny_0
const index_t ny_0
Number of general constraints at stage 0, D(0) u(0).
Definition cyqlone.hpp:571

cyqlone::CyqloneSolver::nu
const index_t nu
Number of controls of the OCP.
Definition cyqlone.hpp:569

cyqlone::CyqloneSolver::riccati_LH
matrix< default_order > riccati_LH
Cholesky factors of the Hessian blocks for the Riccati recursion.
Definition cyqlone.hpp:782

cyqlone::CyqloneSolver::factor_riccati_solve
void factor_riccati_solve(Context &ctx, value_type γ, view<> Σ, mut_view<> ux, mut_view<> λ)
[Modified Riccati factorization and fused forward solve]
Definition riccati.tpp:23

cyqlone::CyqloneSolver::value_type
T value_type
Definition cyqlone.hpp:562

cyqlone::CyqloneSolver::tricyqle
tricyqle_t tricyqle
Block-tridiagonal solver (CR/PCR/PCG).
Definition cyqlone.hpp:747

cyqlone::CyqloneSolver::ny_N
const index_t ny_N
Number of general constraints at the final stage, C(N) x(N).
Definition cyqlone.hpp:572

cyqlone::CyqloneSolver::v
static constexpr index_t v
Vector length.
Definition cyqlone.hpp:603

cyqlone::CyqloneSolver::riccati_V
matrix< default_order > riccati_V
Temporary storage for the V(j) = [ B(j)ᵀ LQ(j); A(j)ᵀ LQ(j) ] matrices during the Riccati recursion.
Definition cyqlone.hpp:794

cyqlone::CyqloneSolver::nx
const index_t nx
Number of states of the OCP.
Definition cyqlone.hpp:568

cyqlone::CyqloneSolver::riccati_LAB
matrix< default_order > riccati_LAB
Storage for the matrices LB(j), Acl(j) and LA(j₁) for the Riccati recursion.
Definition cyqlone.hpp:788