13#include <batmat/assume.hpp>
14#include <batmat/config.hpp>
15#include <batmat/linalg/hyhound.hpp>
16#include <batmat/matrix/layout.hpp>
17#include <batmat/matrix/matrix.hpp>
18#include <batmat/openmp.h>
19#include <batmat/simd.hpp>
20#include <batmat/unroll.h>
21#include <guanaqo/trace.hpp>
28namespace CYQLONE_NS(cyqlone) {
32[[nodiscard]]
constexpr bool is_pow_2(index_t n) {
34 auto un =
static_cast<std::make_unsigned_t<index_t>
>(n);
35 return std::has_single_bit(un);
38[[nodiscard]]
constexpr index_t
ceil_log2(index_t n) {
40 auto un =
static_cast<std::make_unsigned_t<index_t>
>(n);
41 return static_cast<index_t
>(std::bit_width(un - 1));
45[[nodiscard]]
constexpr index_t
get_level(index_t i) {
47 auto ui =
static_cast<std::make_unsigned_t<index_t>
>(i);
48 return static_cast<index_t
>(std::countr_zero(ui));
64template <index_t VL = 4,
class T = real_t, StorageOrder DefaultOrder = StorageOrder::ColMajor,
65 class Ctx = parallel::Context<>>
103 static constexpr index_t
v = VL;
107 [[nodiscard]]
constexpr index_t
ceil_p()
const {
return 1 <<
lp(); }
109 [[nodiscard]]
constexpr index_t
ceil_P()
const {
return 1 << (
lp() +
lv()); }
116 using vl_t = std::integral_constant<index_t, v>;
123 return std::make_unique<SharedContext>(
p);
132 [[nodiscard]] index_t
ν2(index_t i)
const;
134 [[nodiscard]] index_t
ν2p(index_t i)
const;
151 template <StorageOrder O = column_major>
154 template <StorageOrder O = column_major>
157 template <StorageOrder O = column_major>
161 template <StorageOrder O = column_major>
164 template <StorageOrder O = column_major>
178 return func(ctx.index,
cr_L.batch(ctx.index));
186 return (
p == 1 || ctx.index & 1) ? func(ctx.index,
cr_Y.batch(ctx.index))
187 : func(ctx.index,
cr_U.batch(ctx.index + 1).transposed());
195 return func(ctx.index, b.
batch(ctx.index));
203 return func(ctx.index, λ.
batch(ctx.index));
207 return func(ctx.index, λ.
batch(ctx.index));
210 template <
class Λ,
class F>
212 return func(ctx.index, λ.
batch(ctx.index));
323 std::vector<index_t>
m_update = std::vector<index_t>(
p);
377 template <
bool Factor = true,
bool Solve = true>
386 template <
bool Factor = true,
bool Solve = true>
437 template <index_t Level>
445 template <index_t Level>
454 template <index_t Level>
509 [[nodiscard]] std::pair<index_t, index_t>
cols_Ups_fwd(index_t l, index_t i)
const;
510 [[nodiscard]] std::pair<index_t, index_t>
cols_Ups_bwd(index_t l, index_t i)
const;
511 [[nodiscard]] std::pair<index_t, index_t>
cols_Q_cr(index_t l, index_t i)
const;
527 template <
bool Solve = true>
533 template <index_t Level>
542 template <StorageOrder O>
544 template <StorageOrder O>
559template <index_t VL = 4,
class T = real_t,
StorageOrder DefaultOrder = StorageOrder::ColMajor,
603 static constexpr index_t
v = VL;
608 [[nodiscard]]
constexpr index_t
lv()
const {
return tricyqle.lv(); }
610 [[nodiscard]]
constexpr index_t
lp()
const {
return tricyqle.lp(); }
617 return tricyqle.create_parallel_context();
627 for (index_t i = 0; i <
n; ++i) {
628 const index_t di = ti *
n + i;
630 func(j, di, xs.batch(di)...);
640 for (index_t i =
n; i --> 0;) {
641 const index_t di = ti *
n + i;
643 func(j, di, xs.batch(di)...);
653 [[nodiscard]] index_t
ceil_N()
const {
return n *
p *
v; }
655 [[nodiscard]] index_t
ν2(index_t i)
const;
657 [[nodiscard]] index_t
ν2p(index_t i)
const;
663 [[nodiscard]] index_t
add_wrap_p(index_t a, index_t b)
const;
665 [[nodiscard]] index_t
sub_wrap_p(index_t a, index_t b)
const;
689 template <StorageOrder O = column_major>
692 template <StorageOrder O = column_major>
695 template <StorageOrder O = column_major>
699 template <StorageOrder O = column_major>
702 template <StorageOrder O = column_major>
732 std::string_view solve = tricyqle_params.solve_method ==
SolveMethod::PCR ?
"pcr"
737 return std::format(
"nx={}-nu={}-ny={}-N={}-p={}-v={}-{}-{}",
nx,
nu,
ny,
N_horiz,
p,
v,
903 bool accum =
false)
const;
973 template <
bool Factor = true,
bool Solve = true>
975 template <
bool Factor = true,
bool Solve = true>
983 template <
bool Factor = true,
bool Solve = true>
994 std::optional<
mut_view<>> Mᵀλ = std::nullopt)
const;
1003 template <
bool Solve = true>
1006 template <
bool Solve = true>
1015 std::span<const value_type> Σ)
const;
std::string_view order(qp::StorageOrder o)
Data structure for optimal control problems where the initial states are eliminated.
@ StairPCG
Preconditioned Conjugate Gradient with staircase preconditioner (iterative).
@ PCR
Parallel Cyclic Reduction (direct).
Parameters and settings for the Tricyqle block-tridiagonal solver.
auto hyhound_size_W(Structured< VL, SL > L)
void fill(simdified_value_t< VB > a, VB &&B)
constexpr auto tril(M &&m)
Parameters and settings for the Cyqlone solver.
simd< Tp, deduced_abi< Tp, Np > > deduced_simd
constexpr index_t get_level(index_t i)
constexpr index_t get_index_in_level(index_t i)
constexpr index_t ceil_log2(index_t n)
constexpr bool is_pow_2(index_t n)
Parallel execution context and synchronization primitives.
batch_view_type batch(index_type b) const
Linear solver for systems with optimal control structure.
index_t num_variables() const
Get the total number of primal variables in the OCP.
std::vector< value_type > build_sol(view<> ux, view<> λ) const
matrix pack_constraints(std::span< const value_type > y_lin, value_type fill=0) const
SparseMatrix build_sparse_factor() const
matrix< default_order > data_H
void update(Context &ctx, view<> ΔΣ)
Perform factorization updates of the Cyqlone factorization as described by Algorithm 4 in the paper.
void transposed_general_constr(view<> y, mut_view<> DCᵀy) const
Compute Gᵀy, where G is the general constraint Jacobian matrix of the OCP.
matrix initialize_variables() const
Get a zero-initialized matrix for the primal variables u and x.
std::vector< value_type > unpack_constraints(view<> y) const
index_t ν2p(index_t i) const
2-adic valuation modulo p, i.e. ν2p(0) = ν2p(p) = lp().
constexpr index_t ceil_P() const
The number of parallel execution units P rounded up to the next power of two.
index_t add_wrap_ceil_N(index_t a, index_t b) const
Add b to a modulo N_horiz.
typename tricyqle_t::template view< O > view
Non-owning immutable view type for matrix.
void update_data(const CyqloneStorage< value_type > &ocp)
Update the internal data structures to reflect changes in the OCP data (without changing the problem ...
matrix< default_order > data_F
void update_params(const CyqloneParams< value_type > &new_params)
Update the Cyqlone solver parameters.
void solve_reverse_mul(Context &ctx, mut_view<> ux, mut_view<> λ, mut_view<> Mᵀλ)
Fused variant of solve_reverse and transposed_dynamics_constr (for improved locality of the dynamics ...
void factor(Context &ctx, value_type γ, view<> Σ)
Compute the Cyqlone factorization of the KKT matrix of the OCP.
void solve_reverse(Context &ctx, mut_view<> ux, mut_view<> λ, mut_view<> work, std::optional< mut_view<> > Mᵀλ=std::nullopt) const
matrix< default_order > data_Gᵀ
typename tricyqle_t::template batch_view< O > batch_view
Non-owning immutable view type for a single batch of v matrices.
void update_solve(Context &ctx, view<> ΔΣ, mut_view<> ux, mut_view<> λ)
Fused variant of update and solve_forward.
void pack_variables(std::span< const value_type > ux_lin, mut_view<> ux) const
void factor_solve(Context &ctx, value_type γ, view<> Σ, mut_view<> ux, mut_view<> λ)
Compute the Cyqlone factorization of the KKT matrix of the OCP and perform a forward solve (fused for...
void pack_constraints(std::span< const value_type > y_lin, mut_view<> y, value_type fill=0) const
typename tricyqle_t::layer_stride layer_stride
void cost_gradient_remove_regularization(Context &ctx, value_type γ, view<> x, view<> x0, mut_view<> grad_f) const
Subtract the regularization term from the cost gradient.
void initialize_gradient(const CyqloneStorage< value_type > &ocp, mut_view<> grad) const
Initialize the gradient vector for the OCP cost function, using the custom Cyqlone storage format.
void unpack_variables(view<> ux, std::span< value_type > ux_lin) const
matrix initialize_rhs(const CyqloneStorage< value_type > &ocp) const
Initialize the right-hand side vector for the dynamics constraints of the OCP, using the custom Cyqlo...
void transposed_dynamics_constr(Context &ctx, view<> λ, mut_view<> Mᵀλ, bool accum=false) const
Compute Mᵀλ, where M is the dynamics constraint Jacobian matrix of the OCP.
void residual_dynamics_constr(Context &ctx, view<> x, view<> b, mut_view<> Mxb) const
Compute Mx + b, where M is the dynamics constraint Jacobian matrix of the OCP.
index_t ν2(index_t i) const
2-adic valuation ν₂.
void update_solve_impl(Context &ctx, view<> ΔΣ, mut_view<> ux, mut_view<> λ)
[PCR update]
void update_riccati(Context &ctx, view<> ΔΣ)
TricyqleParams< value_type > get_tricyqle_params() const
Get the current Tricyqle solver parameters.
SparseMatrix build_sparse(const CyqloneStorage< value_type > &ocp, std::span< const value_type > Σ) const
index_t sub_wrap_ceil_N(index_t a, index_t b) const
constexpr index_t lv() const
log₂(v), logarithm of the vector length.
std::unique_ptr< SharedContext > create_parallel_context() const
std::string get_params_string() const
Get a string representation of the main solver parameters. Used mainly for file names.
void unpack_constraints(view<> y, std::span< value_type > y_lin) const
index_t get_linear_batch_offset(index_t biA) const
index_t add_wrap_p(index_t a, index_t b) const
Add b to a modulo p.
void cost_gradient_regularized(Context &ctx, value_type γ, view<> ux, view<> ux0, view<> q, mut_view<> grad_f) const
Compute the regularized cost gradient, with regularization parameter γ⁻¹, with respect to the point u...
typename tricyqle_t::template matrix< O > matrix
Owning type for a batch of matrices (with batch size v).
tricyqle_t::Context Context
void solve_forward(Context &ctx, mut_view<> ux, mut_view<> λ)
Perform a forward solve with the Cyqlone factorization.
index_t num_dynamics_constraints() const
Get the total number of dynamics constraints in the OCP.
static CyqloneSolver build(const CyqloneStorage< value_type > &ocp, index_t p)
Initialize a Cyqlone solver for the given OCP.
void cost_gradient(Context &ctx, view<> ux, value_type α, view<> q, value_type β, mut_view<> grad_f) const
Compute the cost gradient, with optional scaling factors.
matrix initialize_dynamics_constraints() const
Get a zero-initialized matrix for the dynamics constraints (or their multipliers).
void transposed_general_constr(Context &ctx, view<> y, mut_view<> DCᵀy) const
Compute Gᵀy, where G is the general constraint Jacobian matrix of the OCP.
void foreach_stage(Context &ctx, auto &&func, auto &&...xs) const
Call a function for each stage in the horizon, passing the stage index, the data batch index,...
index_t sub_wrap_ceil_p(index_t a, index_t b) const
Subtract b from a modulo ceil_p().
void initialize_rhs(const CyqloneStorage< value_type > &ocp, mut_view<> rhs) const
Initialize the right-hand side vector for the dynamics constraints of the OCP, using the custom Cyqlo...
void solve_reverse(Context &ctx, mut_view<> ux, mut_view<> λ)
Perform a reverse solve with the Cyqlone factorization.
matrix< column_major > riccati_Υ2
std::vector< value_type > unpack_variables(view<> ux) const
TricyqleSolver< VL, T, DefaultOrder, Ctx > tricyqle_t
Tricyqle solver type for solving block-tridiagonal systems in parallel.
void solve_riccati_reverse(Context &ctx, mut_view<> ux, mut_view<> λ, mut_view<> work, std::optional< mut_view<> > Mᵀλ) const
[Modified Riccati factorization and fused forward solve]
index_t riccati_thread_assignment(Context &ctx) const
void general_constr(Context &ctx, view<> ux, mut_view<> DCux) const
Compute the general constraints Gx, where G is the general constraint Jacobian matrix of the OCP.
index_t add_wrap_ceil_P(index_t a, index_t b) const
void compute_schur(Context &ctx, mut_view<> ux, mut_view<> λ)
[Cyqlone compute Schur]
matrix initialize_general_constraints() const
Get a zero-initialized matrix for the general constraints (or their multipliers).
matrix initialize_gradient(const CyqloneStorage< value_type > &ocp) const
Initialize the gradient vector for the OCP cost function, using the custom Cyqlone storage format.
typename tricyqle_t::template mut_batch_view< O > mut_batch_view
Non-owning mutable view type for a single batch of v matrices.
matrix< column_major > riccati_Υ1
void update_riccati_solve(Context &ctx, view<> ΔΣ, mut_view<> ux, mut_view<> λ)
Update the modified Riccati factorization of a single block column as described by Algorithm 3 in the...
index_t sub_wrap_p(index_t a, index_t b) const
Subtract b from a modulo p.
matrix< column_major > riccati_work
void initialize_bounds(const CyqloneStorage< value_type > &ocp, mut_view<> b_min, mut_view<> b_max) const
Initialize the lower and upper bounds for the general constraints of the OCP, using the custom Cyqlon...
std::vector< value_type > unpack_dynamics(view<> λ) const
index_t add_wrap_ceil_p(index_t a, index_t b) const
Add b to a modulo ceil_p().
typename tricyqle_t::template mut_view< O > mut_view
Non-owning mutable view type for matrix.
void pack_dynamics(std::span< const value_type > λ_lin, mut_view<> λ) const
void update_tricyqle_params(const TricyqleParams< value_type > &new_params)
Update the Tricyqle solver parameters.
matrix pack_dynamics(std::span< const value_type > λ_lin) const
constexpr index_t ceil_p() const
The number of processors p rounded up to the next power of two.
void foreach_stage_fwd(Context &ctx, auto &&func, auto &&...xs) const
Call a function for each stage in the horizon, passing the stage index, the data batch index,...
matrix< default_order > riccati_LH
void factor_riccati_solve(Context &ctx, value_type γ, view<> Σ, mut_view<> ux, mut_view<> λ)
[Modified Riccati factorization and fused forward solve]
SparseMatrix build_sparse_diag() const
CyqloneParams< value_type > params
std::pair< matrix<>, matrix<> > initialize_bounds(const CyqloneStorage< value_type > &ocp) const
Initialize the lower and upper bounds for the general constraints of the OCP, using the custom Cyqlon...
static constexpr auto column_major
static constexpr auto default_order
index_t num_general_constraints() const
Get the total number of general constraints in the OCP.
matrix pack_variables(std::span< const value_type > ux_lin) const
matrix< column_major > work_Σ
CyqloneParams< value_type > get_params() const
Get the current Cyqlone solver parameters.
std::vector< value_type > build_rhs(view<> rq, view<> b, value_type scale_rq=-1, value_type scale_b=-1) const
void factor_solve_impl(Context &ctx, value_type γ, view<> Σ, mut_view<> ux, mut_view<> λ)
[Cyqlone factorization and fused forward solve]
constexpr index_t lp() const
log₂(p), logarithm of the number of processors/threads, rounded up.
void unpack_dynamics(view<> λ, std::span< value_type > λ_lin) const
static constexpr index_t v
tricyqle_t::SharedContext SharedContext
index_t sub_wrap_ceil_P(index_t a, index_t b) const
matrix< default_order > riccati_V
matrix< default_order > riccati_LAB
Storage for a linear-quadratic OCP with the initial states x₀ eliminated.
A sparse matrix in COO format.
Solver for block-tridiagonal systems using cyclic reduction (CR), parallel cyclic reduction (PCR),...
void update_pcr(batch_view<> fwd, batch_view<> bwd, batch_view<> Σ)
[Cyqlone update CR helper]
constexpr index_t lp() const
log₂(p), logarithm of the number of processors/threads p, rounded up.
void factor_solve_impl(Context &ctx, mut_view<> λ, index_t stride=1)
Implementation of factor_solve.
value_type mul_precond(batch_view<> r, mut_batch_view<> z, mut_batch_view<> w, batch_view< default_order > L, batch_view< default_order > K) const
Multiply a vector by the preconditioner for the final block tridiagonal system of size v.
void solve_reverse_serial(mut_view<> λ, mut_view<> work, index_t stride) const
[Cyqlone solve CR]
mut_batch_view< column_major > work_Σ_extra()
static constexpr index_t lv()
batmat::matrix::View< value_type, index_t, vl_t, vl_t, layer_stride, O > mut_batch_view
Non-owning mutable view type for a single batch of v matrices.
mut_batch_view< column_major > work_Σ_fwd(index_t l, index_t i)
void factor_L(index_t l, index_t i)
Update and factorize a block L in the Cholesky factor for CR level l+1 and column index i,...
void prefetch_L(batch_view< O > X) const
decltype(auto) get_solution(Context &ctx, mut_view<> λ, auto &&func) const
Get access to the solution computed by this thread using a user-provided function.
mut_batch_view< column_major > work_Ups_fwd_last()
mut_batch_view< column_major > work_Σ_bwd_last()
decltype(auto) init_subdiag(Context &ctx, auto &&func)
Initialize the subdiagonal blocks K of the block tridiagonal system using a user-provided function.
mut_batch_view< column_major > work_Σ_Q(index_t l, index_t i)
mut_batch_view< column_major > work_Ups_extra()
void factor_skip_first(Context &ctx)
Factorization-only variant of factor_solve_skip_first.
void factor_solve_skip_first(Context &ctx, mut_view<> λ, index_t stride=1)
Fused factorization and forward solve.
void prefetch_L(index_t bi) const
typename Context::shared_context_type SharedContext
matrix< column_major > work_update_pcr_UY
static constexpr auto column_major
index_t ν2(index_t i) const
2-adic valuation ν₂.
void solve_pcr(mut_batch_view<> λ)
Solve a linear system with the final block tridiagonal system of size v using the PCR factorization.
matrix< column_major > work_update_pcr_L
batmat::matrix::DefaultStride layer_stride
mut_batch_view< column_major > work_Q_cr(index_t l, index_t i)
batmat::datapar::deduced_simd< value_type, v > simd
Represents a SIMD vector of width v storing values of type value_type.
void solve_y_forward(index_t l, index_t iY, mut_view<> λ, mut_view<> w, index_t stride) const
Update the right-hand side λ during the forward solve phase of CR after computing block iY of λ at le...
void solve_u_backward(index_t l, index_t iU, mut_view<> λ, mut_view<> w, index_t stride) const
batmat::matrix::View< const value_type, index_t, vl_t, index_t, index_t, O > view
Non-owning immutable view type for matrix.
batmat::matrix::View< value_type, index_t, vl_t, index_t, index_t, O > mut_view
Non-owning mutable view type for matrix.
decltype(auto) get_solution(Context &ctx, Λ &&λ, F &&func) const
Get access to the solution computed by this thread using a user-provided function.
index_t ν2p(index_t i) const
2-adic valuation modulo p, i.e. ν2p(0) = ν2p(p) = lp().
index_t add_wrap_ceil_p(index_t a, index_t b) const
Add b to a modulo ceil_p().
void solve_reverse(Context &ctx, mut_view<> λ, index_t stride=1)
decltype(auto) init_diag(Context &ctx, auto &&func)
Initialize the diagonal blocks M of the block tridiagonal system using a user-provided function.
void solve_forward(Context &ctx, mut_view<> λ, index_t stride=1)
Perform only the forward solve as described by factor_solve.
mut_batch_view< column_major > work_Σ_bwd(index_t l, index_t i)
index_t sub_wrap_ceil_p(index_t a, index_t b) const
Subtract b from a modulo ceil_p().
static constexpr bool merge_last_level_pcr
index_t cr_thread_assignment(index_t l, index_t c) const
Adjust thread assignment for non-power-of-two p: The diagonal blocks M(⌊p/2⌋2) are usually mapped to ...
constexpr index_t ceil_P() const
The number of parallel execution units P = p * v rounded up to the next power of two.
matrix< default_order > pcr_U
mut_batch_view< column_major > work_Ups_bwd(index_t l, index_t i)
void solve_forward_skip_first(Context &ctx, mut_view<> λ, index_t stride=1)
Solution-only variant of factor_solve_skip_first.
matrix< default_order > pcr_L
std::pair< index_t, index_t > cols_Ups_fwd(index_t l, index_t i) const
void update_L(index_t l, index_t i)
[Cyqlone update CR helper]
matrix< column_major > work_update
matrix< default_order > cr_Y
std::vector< index_t > m_update
std::pair< index_t, index_t > cols_Q_cr(index_t l, index_t i) const
void update_pcr_level(index_t m, mut_batch_view<> WYU, mut_batch_view<> WΣ)
void solve_λ_backward(index_t biL, mut_view<> λ, view<> w, index_t stride) const
decltype(auto) get_solution(Context &ctx, view<> λ, auto &&func) const
Get access to the solution computed by this thread using a user-provided function.
matrix< column_major > work_cr
mut_batch_view< column_major > work_Ups_bwd_last()
mut_batch_view< column_major > work_Σ_fwd_last()
index_t work_Ups_bwd_w(index_t l, index_t i) const
void set_update_rank_extra(index_t m)
void solve_λ_forward(index_t l, index_t iL, mut_view<> λ, view<> w, index_t stride) const
Apply the updates to block iL of the right-hand side from solve_u_forward and solve_y_forward,...
mut_batch_view< column_major > work_Ups_fwd(index_t l, index_t i)
void solve_y_backward(index_t l, index_t iY, mut_view<> λ, index_t stride) const
void update_solve_cr(Context &ctx, mut_view<> λ, index_t stride)
[Cyqlone update CR]
void set_thread_update_rank(Context &ctx, index_t c, index_t m)
[Cyqlone update Riccati]
std::integral_constant< index_t, v *alignof(value_type)> align_t
Integral constant type for the alignment of the batched matrix data structures.
void factor_pcr()
Compute the parallel cyclic reduction factorization of the final block tridiagonal system of size v.
Params get_params() const
Get the current solver parameters.
constexpr index_t ceil_p() const
The number of processors p rounded up to the next power of two.
std::unique_ptr< SharedContext > create_parallel_context() const
Create a new parallel execution context, storing synchronization primitives and shared data for the p...
void factor_solve(Context &ctx, mut_view<> λ, index_t stride=1)
Fused factorization and forward solve.
void factor_U(index_t l, index_t iU)
Compute a block U in the Cholesky factor for the given CR level l and column index iU.
batmat::matrix::View< const value_type, index_t, vl_t, vl_t, layer_stride, O > batch_view
Non-owning immutable view type for a single batch of v matrices.
void solve_pcg(mut_batch_view<> λ)
Solve a linear system with the final block tridiagonal system of size v using the preconditioned conj...
matrix< default_order > pcr_M
matrix< column_major > work_update_Σ
void solve_pcg(mut_batch_view<> λ, mut_batch_view<> work_pcg) const
Solve a linear system with the final block tridiagonal system of size v using the preconditioned conj...
std::integral_constant< index_t, v > vl_t
Integral constant type for the vector length.
static constexpr index_t v
void factor(Context &ctx)
Perform only the factorization as described by factor_solve.
void solve_reverse_parallel(Context &ctx, mut_view<> λ, mut_view<> work, index_t stride) const
[Cyqlone solve CR]
matrix< column_major > work_pcg
void prefetch_U(index_t l, index_t iU) const
void solve_reverse(Context &ctx, mut_view<> λ, mut_view<> work, index_t stride=1) const
Perform the backward solve phase, after the forward solve phase has been performed by factor_solve.
void prefetch(batch_view< O > X) const
[Cyqlone solve CR helper]
matrix< column_major > work_hyh
void factor_pcr_level_parallel(Context &ctx)
Perform a single level of the PCR factorization.
decltype(auto) init_rhs(Context &ctx, mut_view<> b, auto &&func) const
Initialize the right-hand side of the linear system using a user-provided function.
void solve_u_forward(index_t l, index_t iU, mut_view<> λ, index_t stride) const
Update the right-hand side λ during the forward solve phase of CR after computing block iU of λ at le...
static constexpr auto default_order
void solve_pcr(mut_batch_view<> λ, mut_batch_view<> work_pcr) const
Solve a linear system with the final block tridiagonal system of size v using the PCR factorization.
void update_params(const Params &new_params)
Update the solver parameters.
index_t work_Ups_fwd_w(index_t l, index_t i) const
void update_K(index_t l, index_t i)
Compute a subdiagonal block K of the Schur complement for CR level l+1 and column index i,...
void solve_pcr_level(mut_batch_view<> λ, mut_batch_view<> work_pcr) const
Perform a single level of the PCR solve.
void update_Y(index_t l, index_t i)
std::pair< index_t, index_t > cols_Ups_bwd(index_t l, index_t i) const
matrix< default_order > cr_U
matrix< default_order > pcr_Y
matrix< default_order > cr_L
TricyqleParams< value_type > Params
matrix< column_major > work_update_pcr_Σ
batmat::matrix::Matrix< value_type, index_t, vl_t, index_t, O, align_t > matrix
Owning type for a batch of matrices (with batch size v).
void update_U(index_t l, index_t i)
void factor_pcr_parallel(Context &ctx)
Compute the parallel cyclic reduction factorization of the final block tridiagonal system of size v.
value_type mul_Mv(batch_view<> p, mut_batch_view<> Mp, batch_view< default_order > L, batch_view< default_order > K) const
Multiply a vector by the final block tridiagonal matrix of size v.
void prefetch_Y(index_t l, index_t iY) const
void clear_update_rank_extra()
void factor_pcr_level()
Perform a single level of the PCR factorization.
void factor_Y(index_t l, index_t iY)
Compute a block Y in the Cholesky factor for the given CR level l and column index iY.
Thread context for parallel execution.