cyqlone develop
Fast, parallel and vectorized solver for linear systems with optimal control structure.
Loading...
Searching...
No Matches
pcg.tpp
Go to the documentation of this file.
1#include <cyqlone/cyqlone.hpp>
2#include <cyqlone/linalg.hpp>
3
4#include <print>
5
6#include <batmat/linalg/copy.hpp>
7#include <batmat/linalg/gemm.hpp>
8#include <batmat/linalg/simdify.hpp>
9#include <batmat/linalg/syomv.hpp>
10#include <batmat/linalg/trsm.hpp>
11
12namespace CYQLONE_NS(cyqlone) {
13
14using namespace linalg;
15using namespace batmat::linalg;
16
17// §7.5.2 “Handling of the final scalar levels”
18//
19// Straightforward preconditioned conjugate gradient method to solve M(v) λ = b, with optimized
20// vectorized matrix-vector products for M(v) and its preconditioner.
21
22template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
26 -> value_type {
27 // Mp = M p = LLᵀ p + K p
28 trmm(triu(L.transposed()), p, Mp);
29 trmm(tril(L), Mp);
30 syomv(tril(K), p, Mp);
31 return dot(p, Mp);
32}
33
34template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
39 -> value_type {
40 // Stair: z = Φ⁻¹ r = L⁻ᵀL⁻¹ r - L⁻ᵀL⁻¹ K L⁻ᵀL⁻¹ r = L⁻ᵀL⁻¹(r - K L⁻ᵀL⁻¹ r)
41 copy(r, z);
42 if (params.solve_method == SolveMethod::StairPCG) {
43 trsm(tril(L), r, w);
44 trsm(triu(L.transposed()), w); // w = L⁻ᵀL⁻¹ r
45 syomv_neg(tril(K), w, z); // z -= K L⁻ᵀL⁻¹ r
46 }
47 // Jacobi: z = L⁻ᵀL⁻¹ r
48 trsm(tril(L), z);
49 trsm(triu(L.transposed()), z);
50 return dot(r, z);
51}
52
53template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
56 auto r = work_pcg.middle_cols(0, 1), z = work_pcg.middle_cols(1, 1),
57 p = work_pcg.middle_cols(2, 1), Mp = work_pcg.middle_cols(3, 1);
58 auto M = pcr_L.batch(0), K = cr_Y.batch(0);
59 value_type rᵀz = [&] {
60 GUANAQO_TRACE("PCG", 0);
61 copy(λ, r);
62 fill(value_type{}, λ);
63 value_type rᵀz = mul_precond(r, z, Mp, M, K);
64 copy(z, p);
65 return rᵀz;
66 }();
67 const auto ε2 = params.pcg_tolerance * params.pcg_tolerance;
68 for (index_t it = 0; it < params.pcg_max_iter; ++it) {
69 GUANAQO_TRACE("PCG", it + 1);
70 value_type pᵀMp = mul_Mv(p, Mp, M, K);
71 value_type α = rᵀz / pᵀMp;
72 axpy(+α, p, λ);
73 axpy(-α, Mp, r);
74 value_type r2 = dot(r, r);
75 if (params.pcg_print_resid)
76 std::println("{:>4}) pcg resid = {:15.6e}", it, std::sqrt(r2));
77 if (r2 < ε2)
78 break;
79 value_type rᵀz_new = mul_precond(r, z, Mp, M, K);
80 value_type β = rᵀz_new / rᵀz;
81 axpby(value_type{1}, z, β, p);
82 rᵀz = rᵀz_new;
83 }
84}
85
86} // namespace CYQLONE_NS(cyqlone)
The main header for the Cyqlone and Tricyqle linear solvers.
@ StairPCG
Preconditioned Conjugate Gradient with staircase preconditioner (iterative).
void syomv(Structured< VA, SA > A, VB &&B, VD &&D)
void trsm(Structured< VA, SA > A, VB &&B, VD &&D, with_rotate_B_t< RotB >={})
void axpy(Vy &&y, const std::array< simdified_value_t< Vy >, sizeof...(Vx)> &alphas, Vx &&...x)
Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.
Definition linalg.hpp:361
void trmm(Structured< VA, SA > A, Structured< VB, SB > B, Structured< VD, SD > D, Opts... opts)
void axpby(Ta alpha, Vx &&x, Tb beta, Vy &&y, Vz &&z)
Add scaled vector z = αx + βy.
Definition linalg.hpp:343
void copy(VA &&A, VB &&B, Opts... opts)
simdified_value_t< Vx > dot(Vx &&x, Vy &&y)
Compute the dot product of two vectors.
Definition linalg.hpp:286
void syomv_neg(Structured< VA, SA > A, VB &&B, VD &&D)
void fill(simdified_value_t< VB > a, VB &&B)
constexpr auto triu(M &&m)
constexpr auto tril(M &&m)
#define GUANAQO_TRACE(name, instance,...)
value_type mul_precond(batch_view<> r, mut_batch_view<> z, mut_batch_view<> w, batch_view< default_order > L, batch_view< default_order > K) const
Multiply a vector by the preconditioner for the final block tridiagonal system of size v.
Definition pcg.tpp:35
batmat::matrix::View< value_type, index_t, vl_t, vl_t, layer_stride, O > mut_batch_view
Non-owning mutable view type for a single batch of v matrices.
Definition cyqlone.hpp:165
matrix< default_order > pcr_L
Diagonal blocks of the PCR Cholesky factorizations.
Definition cyqlone.hpp:296
matrix< default_order > cr_Y
Subdiagonal blocks Y of the Cholesky factor of the Schur complement (used during CR).
Definition cyqlone.hpp:282
batmat::matrix::View< const value_type, index_t, vl_t, vl_t, layer_stride, O > batch_view
Non-owning immutable view type for a single batch of v matrices.
Definition cyqlone.hpp:162
void solve_pcg(mut_batch_view<> λ, mut_batch_view<> work_pcg) const
Solve a linear system with the final block tridiagonal system of size v using the preconditioned conj...
Definition pcg.tpp:54
matrix< column_major > work_pcg
Temporary workspace for CG vectors.
Definition cyqlone.hpp:313
Params params
Solver parameters for Tricyqle-specific settings.
Definition cyqlone.hpp:87
const index_t p
Number of processors/threads.
Definition cyqlone.hpp:101
value_type mul_Mv(batch_view<> p, mut_batch_view<> Mp, batch_view< default_order > L, batch_view< default_order > K) const
Multiply a vector by the final block tridiagonal matrix of size v.
Definition pcg.tpp:23