develop/Doxygen/factor_8tpp_source.html

#include <cyqlone/cyqlone.hpp>


namespace CYQLONE_NS(cyqlone) {


// Algorithm 2 “Cyqlone factorization”

// §4 “Cyqlone: Parallel factorization and solution of KKT systems with optimal control structure”

//

// Optionally fused factorization and forward solve of the KKT system.


//! [Cyqlone factorization and fused forward solve]

template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>

template <bool Factor, bool Solve>


void CyqloneSolver<VL, T, DefaultOrder, Ctx>::factor_solve_impl(Context &ctx, value_type γ,

                                                                view<> Σ, mut_view<> ux,

                                                                mut_view<> λ) {

    //  2|  factor-block-column-riccati(c)    -- steps 1 and 2

    factor_riccati_solve<Factor, Solve>(ctx, γ, Σ, ux, λ);

    //  3|  compute-schur(c)                  -- step 3

    compute_schur<Factor, Solve>(ctx, ux, λ);

    //  4|  factor-schur(c)                   -- step 4

    tricyqle.template factor_solve_skip_first<Factor, Solve>(ctx, λ, n);

}


//! [Cyqlone factorization and fused forward solve]


// First level of CR is only needed when solving a standalone block tridiagonal matrix. In Cyqlone,

// this is fused with the Schur complement computation.

template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>

template <bool Factor, bool Solve>


void TricyqleSolver<VL, T, DefaultOrder, Ctx>::factor_solve_impl(Context &ctx, mut_view<> λ,

                                                                 index_t stride) {

    const index_t iL = ctx.index;

    auto M           = tril(cr_L.batch(iL));

    if (p == 1) {

        if constexpr (Factor)

            potrf(M, tril(pcr_L.batch(0)));

    } else if (ν2p(iL) == 0) {

        if constexpr (Factor)

            potrf(M);

        if constexpr (Solve)

            trsm(M, λ.batch(stride * iL));

    }

    factor_solve_skip_first<Factor, Solve>(ctx, λ, stride);

}


//! [Cyqlone factor Schur]

template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>

template <bool Factor, bool Solve>


void TricyqleSolver<VL, T, DefaultOrder, Ctx>::factor_solve_skip_first(Context &ctx, mut_view<> λ,

                                                                       index_t stride) {

    // When vectorization is enabled, the number of threads p must be a power of two.

    // TODO: allow circular coupling for v=1 and non-power-of-two p, which requires wrapping of

    //       the indices in the CR code.

    BATMAT_ASSERT(is_pow_2(p) || (v == 1 && !circular));

    const index_t c = ctx.index;

    // 17|  for l = 0 ... log₂(P)-1

    for (index_t l = 0; l < lp(); ++l) { // Recursion level of cyclic reduction

        const auto c_ = cr_thread_assignment(l, c);

        // 18|  iU = c+1, iY = c+1-2^l

        const auto iU = add_wrap_ceil_p(c_, 1), iY = sub_wrap_ceil_p(c_, (1 << l) - 1);

        // 19|  -- sync --

        ctx.arrive_and_wait(); // Wait for L

        // 20|  if ν₂(iU) = l:  U(iU) = K˂(iU) L(iU)⁻ᵀ

        if (ν2p(iU) == l) {

            if constexpr (Factor)

                factor_U(l, iU);

            if constexpr (Solve)

                solve_u_forward(l, iU, λ, stride);

        }

        // 21|  elif ν₂(iY) = l:  Y(iY) = K˃(iY) L(iY)⁻ᵀ

        else if (ν2p(iY) == l) {

            if constexpr (Factor)

                factor_Y(l, iY);

            if constexpr (Solve)

                solve_y_forward(l, iY, λ, work_cr, stride);

        }

        // 22|  -- sync --

        ctx.arrive_and_wait(); // Wait for U, Y

        // 23|  if ν₂(iU) = l:  factor-L(l, iY)

        if (ν2p(iU) == l) {

            if constexpr (Factor)

                factor_L(l, iY);

            if constexpr (Solve)

                solve_λ_forward(l, iY, λ, work_cr, stride);

        }

        // 24|  elif ν₂(iY) = l:  update-K(l, iY)

        else if (ν2p(iY) == l) {

            if constexpr (Factor)

                update_K(l, iY);

        }

    }

    // Factor or solve the last level using PCR or PCG

    if constexpr (Factor) {

        if (params.solve_method == SolveMethod::PCR) {

            ctx.arrive_and_wait(); // wait for off-diagonal block

            if (block_size >= params.parallel_factor_pcr_threshold && p > 1)

                factor_pcr_parallel(ctx);

            else if (ν2p(c + 1) + 1 == lp() || p == 1)

                factor_pcr();

        }

    }

    if constexpr (Solve) {

        if (params.solve_method == SolveMethod::PCR) {

            if constexpr (!Factor)

                ctx.arrive_and_wait(); // wait for off-diagonal block TODO: necessary?

            if (ν2p(c + 1) + 1 == lp() || p == 1)

                solve_pcr(λ.batch(0), work_pcg.batch(0).left_cols(1));

        } else {

            ctx.arrive_and_wait(); // wait for off-diagonal block

            if (ν2p(c + 1) + 1 == lp() || p == 1)

                solve_pcg(λ.batch(0), work_pcg.batch(0));

        }

    }

}


//! [Cyqlone factor Schur]


template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void TricyqleSolver<VL, T, DefaultOrder, Ctx>::factor_solve(Context &ctx, mut_view<> λ,

                                                            index_t stride) {

    factor_solve_impl<true, true>(ctx, λ, stride);

}


template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void TricyqleSolver<VL, T, DefaultOrder, Ctx>::factor(Context &ctx) {

    factor_solve_impl<true, false>(ctx, {});

}


template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void TricyqleSolver<VL, T, DefaultOrder, Ctx>::solve_forward(Context &ctx, mut_view<> λ,

                                                             index_t stride) {

    factor_solve_impl<false, true>(ctx, λ, stride);

}


template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void CyqloneSolver<VL, T, DefaultOrder, Ctx>::factor_solve(Context &ctx, value_type γ, view<> Σ,

                                                           mut_view<> ux, mut_view<> λ) {

    factor_solve_impl<true, true>(ctx, γ, Σ, ux, λ);

}


template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void CyqloneSolver<VL, T, DefaultOrder, Ctx>::factor(Context &ctx, value_type γ, view<> Σ) {

    factor_solve_impl<true, false>(ctx, γ, Σ, {}, {});

}


template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void CyqloneSolver<VL, T, DefaultOrder, Ctx>::solve_forward(Context &ctx, mut_view<> ux,

                                                            mut_view<> λ) {

    factor_solve_impl<false, true>(ctx, 0, {}, ux, λ);

}


// Algorithm 5 “Solution of a symmetric block-tridiagonal system using cyclic reduction (CR)”

// §3.2 Cyclic reduction of block-tridiagonal linear systems

//

// The reverse solve routines below closely follow the structure of the corresponding factorization

// and forward solve routines, but in reverse order. An iterative approach is used instead of

// recursion. Note that the evaluation of λ(0) is performed during the forward solve step.

// Depending on the problem size, either a parallel or serial version of the CR solve is used.


template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void CyqloneSolver<VL, T, DefaultOrder, Ctx>::solve_reverse(Context &ctx, mut_view<> ux,

                                                            mut_view<> λ, mut_view<> work,

                                                            std::optional<mut_view<>> Mᵀλ) const {

    tricyqle.solve_reverse(ctx, λ, work, n);

    ctx.arrive_and_wait(); // wait for λ(c-1)

    solve_riccati_reverse(ctx, ux, λ, work, Mᵀλ);

}


template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void TricyqleSolver<VL, T, DefaultOrder, Ctx>::solve_reverse(Context &ctx, mut_view<> λ,

                                                             mut_view<> work,

                                                             index_t stride) const {

    if (block_size >= params.parallel_solve_cr_threshold && p > 1) {

        solve_reverse_parallel(ctx, λ, work, stride);

    } else {

        if (ν2p(ctx.index + 1) + 1 == lp() || p == 1)

            solve_reverse_serial(λ, work, stride);

        if (p != 1)

            ctx.arrive_and_wait(); // wait for solution (comes from a single thread now)

    }

}


//![Cyqlone solve CR]

template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void TricyqleSolver<VL, T, DefaultOrder, Ctx>::solve_reverse_parallel(Context &ctx, mut_view<> λ,

                                                                      mut_view<> work,

                                                                      index_t stride) const {

    const index_t c = ctx.index;

    for (index_t l = lp(); l-- > 0;) {

        const auto c_     = cr_thread_assignment(l, c);

        const index_t i_u = add_wrap_ceil_p(c_, 1), i_y = sub_wrap_ceil_p(c_, (1 << l) - 1);

        if (l < lp() - 1) {              // λ(0) was already computed during forward solve

            auto wait_uy = ctx.arrive(); // wait for Uᵀλ, Yᵀλ

            if (ν2p(i_y) == l + 1) {

                ctx.wait(std::move(wait_uy));

                solve_λ_backward(i_y, λ, work, stride);

            } else if (ν2p(i_u) == l) {

                prefetch_U(l, i_u);

                ctx.wait(std::move(wait_uy));

            } else {

                if (ν2p(i_y) == l)

                    prefetch_Y(l, i_y);

                ctx.wait(std::move(wait_uy));

            }

        }

        auto wait_λ = ctx.arrive(); // wait for λ

        if (ν2p(i_u) == l) {

            ctx.wait(std::move(wait_λ));

            solve_u_backward(l, i_u, λ, work, stride);

        } else if (ν2p(i_y) == l) {

            ctx.wait(std::move(wait_λ));

            solve_y_backward(l, i_y, λ, stride);

        } else {

            if (l > 0) {

                const auto l_next = l - 1, c_next = cr_thread_assignment(l_next, c);

                const index_t i_u_next = add_wrap_ceil_p(c_next, 1),

                              i_y_next = sub_wrap_ceil_p(c_next, (1 << l_next) - 1);

                if (ν2p(i_y_next) == l_next + 1) {

                    prefetch_U(l_next, i_u_next);

                    prefetch_L(i_y_next);

                }

            }

            ctx.wait(std::move(wait_λ));

        }

    }

    ctx.arrive_and_wait(); // wait for Uᵀλ, Yᵀλ

    if (ν2p(c) == 0 && p != 1)

        solve_λ_backward(c, λ, work, stride);

}


//![Cyqlone solve CR]


//![Cyqlone solve CR serial]

template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void TricyqleSolver<VL, T, DefaultOrder, Ctx>::solve_reverse_serial(mut_view<> λ, mut_view<> work,

                                                                    index_t stride) const {

    for (index_t l = lp(); l-- > 0;) {

        for (index_t c = 0; c < p; ++c) {

            const index_t c_  = cr_thread_assignment(l, c);

            const index_t i_y = sub_wrap_ceil_p(c_, (1 << l) - 1);

            if (l < lp() - 1) { // λ(0) was already computed during forward solve

                if (ν2p(i_y) == l + 1)

                    solve_λ_backward(i_y, λ, work, stride);

            }

        }

        for (index_t c = 0; c < p; ++c) {

            const index_t c_  = cr_thread_assignment(l, c);

            const index_t i_u = add_wrap_ceil_p(c_, 1), i_y = sub_wrap_ceil_p(c_, (1 << l) - 1);

            if (ν2p(i_u) == l)

                solve_u_backward(l, i_u, λ, work, stride);

            else if (ν2p(i_y) == l)

                solve_y_backward(l, i_y, λ, stride);


        }

    }


    for (index_t c = 0; c < p; ++c)

        if (ν2p(c) == 0 && p != 1)


            solve_λ_backward(c, λ, work, stride);

}

//![Cyqlone solve CR serial]


template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void CyqloneSolver<VL, T, DefaultOrder, Ctx>::solve_reverse(Context &ctx, mut_view<> ux,

                                                            mut_view<> λ) {

    solve_reverse(ctx, ux, λ, riccati_work);

}


template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


void CyqloneSolver<VL, T, DefaultOrder, Ctx>::solve_reverse_mul(Context &ctx, mut_view<> ux,

                                                                mut_view<> λ, mut_view<> Mᵀλ) {

    solve_reverse(ctx, ux, λ, riccati_work, Mᵀλ);

}


/// Adjust thread assignment for non-power-of-two p:

/// The diagonal blocks M(⌊p/2⌋2) are usually mapped to increasing thread indices c as the CR level

/// l increases, as can be seen in the functions above, where iY = c + 1 - 2^l, and from the way the

/// path of M nodes curves to the right in the thread assignment diagram in the paper.

/// However, these large thread indices are not actually present if p is not a power of two, so

/// we need to remap them, undoing the offset 1 - 2^l.

/// We always assign the last M evaluation to the even thread ⌊p/2⌋2, since this thread is present

/// even if p is odd. The odd thread ⌊p/2⌋2+1 is assigned an inactive index, since it never has any

/// work during CR, as there is no coupling between the last and first stages (at least not in the

/// scalar case).

template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>


index_t TricyqleSolver<VL, T, DefaultOrder, Ctx>::cr_thread_assignment(index_t l, index_t c) const {

    // Index of the last diagonal block M or L that may need to be handled in this level

    const auto iL = c & ~index_t{(1 << l) - 1};

    // Only remap the last two threads: c == p - 1 for odd p; c == p - 2 or c == p - 1 for even p

    const bool last_threads = (c >> 1) + 1 == (p + 1) >> 1;

    // If this block iL would be assigned to a thread >= p, remap it to the last even thread < p

    const bool remap = iL + (1 << l) - 1 >= p;

    if (!is_pow_2(p) && last_threads && remap)

        c = c & 1 ? iL                                 // last odd thread gets the inactive index

                  : add_wrap_ceil_p(iL, (1 << l) - 1); // last even thread gets remapped

    return c;

}


} // namespace CYQLONE_NS(cyqlone)


BATMAT_ASSERT
#define BATMAT_ASSERT(x)

cyqlone.hpp
The main header for the Cyqlone and Tricyqle linear solvers.

cyqlone::SolveMethod::PCR
@ PCR
Parallel Cyclic Reduction (direct).
Definition cyqlone-params.hpp:14

batmat::linalg::trsm
void trsm(Structured< VA, SA > A, VB &&B, VD &&D, with_rotate_B_t< RotB >={})

batmat::linalg::potrf
void potrf(Structured< VC, SC > C, Structured< VD, SC > D, simdified_value_t< VC > regularization=0)

batmat::linalg::tril
constexpr auto tril(M &&m)

cyqlone::is_pow_2
constexpr bool is_pow_2(index_t n)
Definition cyqlone.hpp:32

batmat::matrix::View::batch
batch_view_type batch(index_type b) const

cyqlone::CyqloneSolver::n
const index_t n
Number of stages per thread per vector lane (rounded up).
Definition cyqlone.hpp:605

cyqlone::CyqloneSolver::view
typename tricyqle_t::template view< O > view
Non-owning immutable view type for matrix.
Definition cyqlone.hpp:693

cyqlone::CyqloneSolver::solve_reverse_mul
void solve_reverse_mul(Context &ctx, mut_view<> ux, mut_view<> λ, mut_view<> Mᵀλ)
Fused variant of solve_reverse and transposed_dynamics_constr (for improved locality of the dynamics ...
Definition factor.tpp:261

cyqlone::CyqloneSolver::factor
void factor(Context &ctx, value_type γ, view<> Σ)
Compute the Cyqlone factorization of the KKT matrix of the OCP.
Definition factor.tpp:137

cyqlone::CyqloneSolver::factor_solve
void factor_solve(Context &ctx, value_type γ, view<> Σ, mut_view<> ux, mut_view<> λ)
Compute the Cyqlone factorization of the KKT matrix of the OCP and perform a forward solve (fused for...
Definition factor.tpp:132

cyqlone::CyqloneSolver::Context
tricyqle_t::Context Context
Definition cyqlone.hpp:596

cyqlone::CyqloneSolver::solve_forward
void solve_forward(Context &ctx, mut_view<> ux, mut_view<> λ)
Perform a forward solve with the Cyqlone factorization.
Definition factor.tpp:141

cyqlone::CyqloneSolver::solve_reverse
void solve_reverse(Context &ctx, mut_view<> ux, mut_view<> λ)
Perform a reverse solve with the Cyqlone factorization.
Definition factor.tpp:255

cyqlone::CyqloneSolver::solve_riccati_reverse
void solve_riccati_reverse(Context &ctx, mut_view<> ux, mut_view<> λ, mut_view<> work, std::optional< mut_view<> > Mᵀλ) const
[Modified Riccati factorization and fused forward solve]
Definition riccati.tpp:145

cyqlone::CyqloneSolver::compute_schur
void compute_schur(Context &ctx, mut_view<> ux, mut_view<> λ)
[Cyqlone compute Schur]
Definition schur.tpp:31

cyqlone::CyqloneSolver::riccati_work
matrix< column_major > riccati_work
Temporary workspace for the Riccati solve phase.
Definition cyqlone.hpp:799

cyqlone::CyqloneSolver::mut_view
typename tricyqle_t::template mut_view< O > mut_view
Non-owning mutable view type for matrix.
Definition cyqlone.hpp:696

cyqlone::CyqloneSolver::factor_riccati_solve
void factor_riccati_solve(Context &ctx, value_type γ, view<> Σ, mut_view<> ux, mut_view<> λ)
[Modified Riccati factorization and fused forward solve]
Definition riccati.tpp:23

cyqlone::CyqloneSolver::value_type
T value_type
Definition cyqlone.hpp:562

cyqlone::CyqloneSolver::tricyqle
tricyqle_t tricyqle
Block-tridiagonal solver (CR/PCR/PCG).
Definition cyqlone.hpp:747

cyqlone::CyqloneSolver::factor_solve_impl
void factor_solve_impl(Context &ctx, value_type γ, view<> Σ, mut_view<> ux, mut_view<> λ)
[Cyqlone factorization and fused forward solve]
Definition factor.tpp:13

cyqlone::TricyqleSolver::lp
constexpr index_t lp() const
log₂(p), logarithm of the number of processors/threads p, rounded up.
Definition cyqlone.hpp:105

cyqlone::TricyqleSolver::factor_solve_impl
void factor_solve_impl(Context &ctx, mut_view<> λ, index_t stride=1)
Implementation of factor_solve.
Definition factor.tpp:29

cyqlone::TricyqleSolver::solve_reverse_serial
void solve_reverse_serial(mut_view<> λ, mut_view<> work, index_t stride) const
[Cyqlone solve CR]
Definition factor.tpp:228

cyqlone::TricyqleSolver::factor_L
void factor_L(index_t l, index_t i)
Update and factorize a block L in the Cholesky factor for CR level l+1 and column index i,...
Definition cr.tpp:71

cyqlone::TricyqleSolver::prefetch_L
void prefetch_L(batch_view< O > X) const
Definition cr.tpp:276

cyqlone::TricyqleSolver::factor_solve_skip_first
void factor_solve_skip_first(Context &ctx, mut_view<> λ, index_t stride=1)
Fused factorization and forward solve.
Definition factor.tpp:48

cyqlone::TricyqleSolver::solve_y_forward
void solve_y_forward(index_t l, index_t iY, mut_view<> λ, mut_view<> w, index_t stride) const
Update the right-hand side λ during the forward solve phase of CR after computing block iY of λ at le...
Definition cr.tpp:177

cyqlone::TricyqleSolver::solve_u_backward
void solve_u_backward(index_t l, index_t iU, mut_view<> λ, mut_view<> w, index_t stride) const
Definition cr.tpp:210

cyqlone::TricyqleSolver::mut_view
batmat::matrix::View< value_type, index_t, vl_t, index_t, index_t, O > mut_view
Non-owning mutable view type for matrix.
Definition cyqlone.hpp:158

cyqlone::TricyqleSolver::ν2p
index_t ν2p(index_t i) const
2-adic valuation modulo p, i.e. ν2p(0) = ν2p(p) = lp().
Definition indexing.tpp:36

cyqlone::TricyqleSolver::add_wrap_ceil_p
index_t add_wrap_ceil_p(index_t a, index_t b) const
Add b to a modulo ceil_p().
Definition indexing.tpp:19

cyqlone::TricyqleSolver::solve_forward
void solve_forward(Context &ctx, mut_view<> λ, index_t stride=1)
Perform only the forward solve as described by factor_solve.
Definition factor.tpp:126

cyqlone::TricyqleSolver::sub_wrap_ceil_p
index_t sub_wrap_ceil_p(index_t a, index_t b) const
Subtract b from a modulo ceil_p().
Definition indexing.tpp:8

cyqlone::TricyqleSolver::cr_thread_assignment
index_t cr_thread_assignment(index_t l, index_t c) const
Adjust thread assignment for non-power-of-two p: The diagonal blocks M(⌊p/2⌋2) are usually mapped to ...
Definition factor.tpp:277

cyqlone::TricyqleSolver::circular
bool circular
Whether the block-tridiagonal system is circular (nonzero top-right & bottom-left corners).
Definition cyqlone.hpp:79

cyqlone::TricyqleSolver::pcr_L
matrix< default_order > pcr_L
Diagonal blocks of the PCR Cholesky factorizations.
Definition cyqlone.hpp:296

cyqlone::TricyqleSolver::solve_λ_backward
void solve_λ_backward(index_t biL, mut_view<> λ, view<> w, index_t stride) const
Definition cr.tpp:241

cyqlone::TricyqleSolver::work_cr
matrix< column_major > work_cr
Temporary workspace for the CR solve phase.
Definition cyqlone.hpp:286

cyqlone::TricyqleSolver::solve_λ_forward
void solve_λ_forward(index_t l, index_t iL, mut_view<> λ, view<> w, index_t stride) const
Apply the updates to block iL of the right-hand side from solve_u_forward and solve_y_forward,...
Definition cr.tpp:190

cyqlone::TricyqleSolver::solve_y_backward
void solve_y_backward(index_t l, index_t iY, mut_view<> λ, index_t stride) const
Definition cr.tpp:225

cyqlone::TricyqleSolver::factor_pcr
void factor_pcr()
Compute the parallel cyclic reduction factorization of the final block tridiagonal system of size v.
Definition pcr.tpp:28

cyqlone::TricyqleSolver::factor_solve
void factor_solve(Context &ctx, mut_view<> λ, index_t stride=1)
Fused factorization and forward solve.
Definition factor.tpp:117

cyqlone::TricyqleSolver::factor_U
void factor_U(index_t l, index_t iU)
Compute a block U in the Cholesky factor for the given CR level l and column index iU.
Definition cr.tpp:23

cyqlone::TricyqleSolver::solve_pcg
void solve_pcg(mut_batch_view<> λ, mut_batch_view<> work_pcg) const
Solve a linear system with the final block tridiagonal system of size v using the preconditioned conj...
Definition pcg.tpp:54

cyqlone::TricyqleSolver::v
static constexpr index_t v
Vector length.
Definition cyqlone.hpp:103

cyqlone::TricyqleSolver::factor
void factor(Context &ctx)
Perform only the factorization as described by factor_solve.
Definition factor.tpp:122

cyqlone::TricyqleSolver::solve_reverse_parallel
void solve_reverse_parallel(Context &ctx, mut_view<> λ, mut_view<> work, index_t stride) const
[Cyqlone solve CR]
Definition factor.tpp:179

cyqlone::TricyqleSolver::work_pcg
matrix< column_major > work_pcg
Temporary workspace for CG vectors.
Definition cyqlone.hpp:313

cyqlone::TricyqleSolver::prefetch_U
void prefetch_U(index_t l, index_t iU) const
Definition cr.tpp:297

cyqlone::TricyqleSolver::solve_reverse
void solve_reverse(Context &ctx, mut_view<> λ, mut_view<> work, index_t stride=1) const
Perform the backward solve phase, after the forward solve phase has been performed by factor_solve.
Definition factor.tpp:164

cyqlone::TricyqleSolver::block_size
const index_t block_size
Block size of the block-tridiagonal system.
Definition cyqlone.hpp:75

cyqlone::TricyqleSolver::params
Params params
Solver parameters for Tricyqle-specific settings.
Definition cyqlone.hpp:87

cyqlone::TricyqleSolver::solve_u_forward
void solve_u_forward(index_t l, index_t iU, mut_view<> λ, index_t stride) const
Update the right-hand side λ during the forward solve phase of CR after computing block iU of λ at le...
Definition cr.tpp:163

cyqlone::TricyqleSolver::p
const index_t p
Number of processors/threads.
Definition cyqlone.hpp:101

cyqlone::TricyqleSolver::solve_pcr
void solve_pcr(mut_batch_view<> λ, mut_batch_view<> work_pcr) const
Solve a linear system with the final block tridiagonal system of size v using the PCR factorization.
Definition pcr.tpp:181

cyqlone::TricyqleSolver::update_K
void update_K(index_t l, index_t i)
Compute a subdiagonal block K of the Schur complement for CR level l+1 and column index i,...
Definition cr.tpp:50

cyqlone::TricyqleSolver::cr_L
matrix< default_order > cr_L
Diagonal blocks of the Cholesky factor of the Schur complement (used during CR).
Definition cyqlone.hpp:272

cyqlone::TricyqleSolver::factor_pcr_parallel
void factor_pcr_parallel(Context &ctx)
Compute the parallel cyclic reduction factorization of the final block tridiagonal system of size v.
Definition pcr.tpp:101

cyqlone::TricyqleSolver::prefetch_Y
void prefetch_Y(index_t l, index_t iY) const
Definition cr.tpp:306

cyqlone::TricyqleSolver::factor_Y
void factor_Y(index_t l, index_t iY)
Compute a block Y in the Cholesky factor for the given CR level l and column index iY.
Definition cr.tpp:37

cyqlone::TricyqleSolver::Context
Ctx Context
Definition cyqlone.hpp:69