cyqlone develop
Fast, parallel and vectorized solver for linear systems with optimal control structure.
Loading...
Searching...
No Matches
test-pcr.cpp
Go to the documentation of this file.
1#include <cyqlone/config.hpp>
2#include <cyqlone/linalg.hpp>
3#include <cyqlone/matio.hpp>
4#include <cyqlone/packing.hpp>
5#include <batmat/dtypes.hpp>
6#include <batmat/linalg/gemm.hpp>
7#include <batmat/linalg/gemv.hpp>
8#include <batmat/linalg/potrf.hpp>
9#include <batmat/linalg/shift.hpp>
10#include <batmat/linalg/symv.hpp>
11#include <batmat/linalg/syomv.hpp>
12#include <batmat/linalg/trsm.hpp>
13#include <guanaqo/print.hpp>
14#include <cmath>
15#include <iostream>
16#include <random>
17
18namespace cyqlone {
19
20using namespace batmat::linalg;
21using namespace cyqlone::linalg;
22
23[[nodiscard]] constexpr index_t get_depth(index_t n) {
24 return static_cast<index_t>(std::bit_width(static_cast<std::make_unsigned_t<index_t>>(n) - 1));
25}
26
27// Standalone PCR factorization class for testing
28template <index_t VL, class T, StorageOrder DefaultOrder>
30 using value_type = T;
31 using vl_t = std::integral_constant<index_t, VL>;
32 using align_t = std::integral_constant<index_t, VL * alignof(T)>;
33 static constexpr index_t v = VL;
34 static constexpr index_t lv = get_depth(v);
35
36 template <StorageOrder O = DefaultOrder>
38 template <StorageOrder O = DefaultOrder>
40 template <StorageOrder O = DefaultOrder>
42 template <StorageOrder O = DefaultOrder>
44
45 index_t n;
46
47 bmatrix<> pcr_L = [this] { return bmatrix<>{{.depth = v * (lv + 1), .rows = n, .cols = n}}; }();
48 bmatrix<> pcr_Y = [this] { return bmatrix<>{{.depth = v * lv, .rows = n, .cols = n}}; }();
49 bmatrix<> pcr_U = [this] { return bmatrix<>{{.depth = v * lv, .rows = n, .cols = n}}; }();
50 matrix<> pcr_M = [this] { return matrix<>{{.rows = n, .cols = n}}; }();
51 matrix<> work = [this] { return matrix<>{{.rows = n, .cols = 1}}; }();
52
53 void factor_pcr(view<> M0, view<> K0) {
54 [&]<index_t... Levels>(std::integer_sequence<index_t, Levels...>) {
55 (this->template factor_pcr_level<Levels>(M0, K0), ...);
56 }(std::make_integer_sequence<index_t, lv>{});
57 auto M = lv == 0 ? M0 : pcr_M.batch(0);
58 auto L = pcr_L.batch(lv);
59 potrf(tril(M), tril(L));
60 }
61
62 template <index_t l>
64 static constexpr auto r = 1 << l;
65 auto M = l == 0 ? M0 : pcr_M.batch(0), K = l == 0 ? K0 : pcr_Y.batch(l);
66 auto L = pcr_L.batch(l), Y = pcr_Y.batch(l), U = pcr_U.batch(l);
67 auto M_next = pcr_M.batch(0);
68 potrf(tril(M), tril(L));
69 if constexpr (l + 1 < lv) {
70 auto K_next = pcr_Y.batch(l + 1);
71 trsm(K.transposed(), triu(L.transposed()), U, with_rotate_A<-r>);
72 trsm(K, triu(L.transposed()), Y);
75 gemm_neg(Y, U.transposed(), K_next, {}, with_rotate_C<-r>, with_rotate_D<-r>);
76 } else {
77 copy(K, U, with_rotate<+r>);
78 add(U, K.transposed()); // U contains Kᵀ now
79 trsm(U, triu(L.transposed()), with_rotate_A<-r>);
81 }
82 }
83
84 void solve_pcr(mut_view<> λ) { solve_pcr(λ, work.batch(0)); }
85 void solve_pcr(mut_view<> λ, mut_view<> work_pcr) const {
86 [&]<index_t... Levels>(std::integer_sequence<index_t, Levels...>) {
87 (this->template solve_pcr_level<Levels>(λ, work_pcr), ...);
88 }(std::make_integer_sequence<index_t, lv>{});
89 trsm(tril(pcr_L.batch(lv)), λ);
90 trsm(triu(pcr_L.batch(lv).transposed()), λ);
91 }
92
93 template <index_t l>
94 void solve_pcr_level(mut_view<> λ, mut_view<> work_pcr) const {
95 static constexpr auto r = 1 << l;
96 trsm(tril(pcr_L.batch(l)), λ, work_pcr); // w = L⁻¹ λ
97 gemv_sub(pcr_U.batch(l), work_pcr, λ, with_rotate_C<-r>, with_rotate_D<-r>);
98 if constexpr (l + 1 < lv)
99 gemv_sub(pcr_Y.batch(l), work_pcr, λ, with_rotate_C<+r>, with_rotate_D<+r>);
100 }
101};
102
103// Quick-and-dirty power iteration to estimate the spectral norm of a block tridiagonal matrix.
104template <class Mat>
105auto λ_max_power(const Mat &M, const Mat &K, int max_it, typename Mat::value_type tol) {
106 using std::abs;
107 using std::sqrt;
108 using T = typename Mat::value_type;
109 batmat::matrix::Matrix<typename Mat::value_type, typename Mat::index_type,
110 typename Mat::batch_size_type, typename Mat::depth_type>
111 x{{.rows = M.rows(), .cols = 1}}, Ax{{.rows = M.rows(), .cols = 1}};
112 x.set_constant(T(1) / sqrt(T(M.rows())));
113 T λ = 0, λ_old = 0;
114 int k;
115 for (k = 0; k < max_it; ++k) {
116 symv(tril(M), x, Ax); // y = M x
117 syomv(tril(K), x, Ax); // y += K x
118 λ_old = std::exchange(λ, dot(x, Ax)); // λ = xᵀAx (Rayleigh quotient, ‖x‖=1)
119 auto norm = norm_2(Ax); // ‖Ax‖
120 scale(T(1) / norm, Ax, x); // x ← Ax/‖Ax‖
121 if (abs(λ - λ_old) < tol * abs(λ))
122 break;
123 }
124 return std::make_pair(λ, k + 1);
125};
126
127template <class M>
128auto unpacked(const M &matrix) {
130 {.depth = matrix.depth(), .rows = matrix.rows(), .cols = matrix.cols()}};
131 unpack(matrix, res);
132 return res;
133}
134
135} // namespace cyqlone
136
137int main() {
138 using std::abs;
139 using std::sqrt;
140 using namespace cyqlone;
141 using T = batmat::real_t;
142 constexpr index_t N = batmat::types::vl_or_largest<T, 8>; // number of blocks == vector length
143 static_assert(N > 1, "No suitable vector length available");
145 constexpr index_t n = 15; // block size
146
147 Solver::matrix<> A{{.rows = n, .cols = n}}, B{{.rows = n, .cols = n}}; // (for initialization)
148 Solver::matrix<> M{{.rows = n, .cols = n}}; // diagonal blocks of the block tridiagonal system
149 Solver::matrix<> K{{.rows = n, .cols = n}}; // subdiagonal blocks
150 Solver::matrix<> b{{.rows = n, .cols = 1}}; // right-hand side
151
152 // Generate random block tridiagonal system
153 std::mt19937 rng(12345);
154 std::uniform_real_distribution<T> dist(-1, 1);
155 std::ranges::generate(A, [&] { return dist(rng); });
156 std::ranges::generate(B, [&] { return dist(rng); });
157 std::ranges::generate(b, [&] { return dist(rng); });
158 // M(i) = A(i) A(i)ᵀ + B(i-1) B(i-1)ᵀ and K(i) = B(i) A(i)ᵀ
159 syrk(A, tril(M));
161 gemm(B, A.transposed(), K);
162 M.add_to_diagonal(T(1e-4) * static_cast<T>(n * N)); // ensure positive definiteness
163
164 // PCR factorization and solution
165 Solver solver{.n = n};
166 solver.factor_pcr(M, K);
167 Solver::matrix<> x = b;
168 solver.solve_pcr(x);
169
170 // Residual M x - b
171 Solver::matrix<> r{{.rows = n, .cols = 1}};
173 symv_add(tril(M), x, r);
174 syomv(tril(K), x, r);
175 auto res_norm = cyqlone::linalg::norms_all(r);
176 std::cout << "\nResidual norms: ℓ₂ = " << guanaqo::float_to_str(res_norm.norm_2())
177 << ",\tmax = " << guanaqo::float_to_str(res_norm.norm_inf()) << "\n";
178 constexpr auto ε = std::numeric_limits<T>::epsilon();
179 const int max_it = 100 * N * n;
180 auto [normM, it] = λ_max_power(M, K, max_it, ε);
181 std::cout << "Spectral norm M: " << guanaqo::float_to_str(normM) << " ("
182 << (it <= max_it ? "" : "approx., ") << it << " iter.)\n";
184 const auto η = res_norm.norm_2() / (normM * norm_2(x) + norm_2(b));
185 std::cout << "Backward error: " << guanaqo::float_to_str(η) << "\n\n";
186
187#if CYQLONE_WITH_MATIO
188 // Export the original system and the solution as a .mat file
189 std::filesystem::path filename = "test-pcr.mat";
190 auto matfile = cyqlone::create_mat(filename);
191 cyqlone::add_to_mat(matfile.get(), "A", unpacked(A));
192 cyqlone::add_to_mat(matfile.get(), "B", unpacked(B));
193 cyqlone::add_to_mat(matfile.get(), "M", unpacked(M));
194 cyqlone::add_to_mat(matfile.get(), "K", unpacked(K));
195 cyqlone::add_to_mat(matfile.get(), "b", unpacked(b));
196 cyqlone::add_to_mat(matfile.get(), "x", unpacked(x));
197 std::cout << "Saved system and solution to " << filename << "\n";
198#endif
199
200 return η < ε * static_cast<T>(100 * n * N) ? 0 : 1;
201}
int main()
std::string float_to_str(F value, int precision)
void syrk_sub(VA &&A, Structured< VC, SD > C, Structured< VD, SD > D, Opts... opts)
void syomv(Structured< VA, SA > A, VB &&B, VD &&D)
void trsm(Structured< VA, SA > A, VB &&B, VD &&D, with_rotate_B_t< RotB >={})
void gemm_neg(VA &&A, VB &&B, VD &&D, TilingOptions packing={}, Opts... opts)
void unpack(VA &&A, VB &&B)
Copy a compact batch of matrices A to multiple scalar matrices B.
Definition packing.hpp:147
void gemm(VA &&A, VB &&B, VD &&D, TilingOptions packing={}, Opts... opts)
void syrk(Structured< VA, SA > A, Structured< VD, SD > D, Opts... opts)
norms< simdified_value_t< Vx > >::result norms_all(Vx &&x)
Compute the norms (max, 1-norm, and 2-norm) of a vector.
Definition linalg.hpp:254
void add(VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate >={})
Add two matrices or vectors C = A + B. Rotate affects B.
Definition linalg.hpp:417
void symv_add(Structured< VA, SA > A, VB &&B, VC &&C, VD &&D)
void negate(VA &&A, VB &&B, with_rotate_t< Rotate >={})
Negate a matrix or vector B = -A.
Definition linalg.hpp:386
void syrk_add(VA &&A, Structured< VC, SD > C, Structured< VD, SD > D, Opts... opts)
void copy(VA &&A, VB &&B, Opts... opts)
void potrf(Structured< VC, SC > C, Structured< VD, SC > D, simdified_value_t< VC > regularization=0)
simdified_value_t< Vx > dot(Vx &&x, Vy &&y)
Compute the dot product of two vectors.
Definition linalg.hpp:286
void gemv_sub(VA &&A, VB &&B, VC &&C, VD &&D, Opts... opts)
simdified_value_t< Vx > norm_2(Vx &&x)
Compute the 2-norm of a vector.
Definition linalg.hpp:278
void scale(T alpha, Vx &&x, Vz &&z)
Multiply a vector by a scalar z = αx.
Definition linalg.hpp:294
void symv(Structured< VA, SA > A, VB &&B, VD &&D)
constexpr auto triu(M &&m)
constexpr auto tril(M &&m)
MatFilePtr create_mat(const std::filesystem::path &filename)
Create and open a new .mat file for writing.
Definition matio.cpp:97
void add_to_mat(mat_t *mat, const std::string &varname, float value)
Add a value to an open .mat file.
Definition matio.cpp:122
Functions for exporting and loading matrices and OCP data to and from .mat files.
constexpr with_rotate_D_t< I > with_rotate_D
constexpr with_rotate_t< I > with_rotate
simd_view_types< std::remove_const_t< T >, Abi >::template matrix< T, Order > matrix
constexpr with_rotate_C_t< I > with_rotate_C
constexpr with_rotate_A_t< I > with_rotate_A
constexpr index_t vl_or_largest
auto λ_max_power(const Mat &M, const Mat &K, int max_it, typename Mat::value_type tol)
Definition test-pcr.cpp:105
constexpr index_t get_depth(index_t n)
Definition test-pcr.cpp:23
auto unpacked(const M &matrix)
Definition test-pcr.cpp:128
void add_to_diagonal(const value_type &t)
void set_constant(value_type t)
std::integral_constant< index_t, VL > vl_t
Definition test-pcr.cpp:31
batmat::matrix::Matrix< value_type, index_t, vl_t, index_t, O, align_t > bmatrix
Definition test-pcr.cpp:37
matrix< O >::const_view_type view
Definition test-pcr.cpp:43
void factor_pcr_level(view<> M0, view<> K0)
Definition test-pcr.cpp:63
std::integral_constant< index_t, VL *alignof(T)> align_t
Definition test-pcr.cpp:32
void factor_pcr(view<> M0, view<> K0)
Definition test-pcr.cpp:53
void solve_pcr(mut_view<> λ)
Definition test-pcr.cpp:84
matrix< O >::view_type mut_view
Definition test-pcr.cpp:41
batmat::matrix::Matrix< value_type, index_t, vl_t, vl_t, O, align_t > matrix
Definition test-pcr.cpp:39
void solve_pcr_level(mut_view<> λ, mut_view<> work_pcr) const
Definition test-pcr.cpp:94
static constexpr index_t lv
Definition test-pcr.cpp:34
static constexpr index_t v
Definition test-pcr.cpp:33
void solve_pcr(mut_view<> λ, mut_view<> work_pcr) const
Definition test-pcr.cpp:85
batmat::matrix::Matrix< value_type, index_t, vl_t, index_t, O, align_t > matrix
Definition cyqlone.hpp:152