3#include <batmat/matrix/matrix.hpp>
4#include <guanaqo/blas/hl-blas-interface.hpp>
5#include <guanaqo/print.hpp>
15using cyqlone::index_t;
19constexpr index_t
v = 4;
30int main(
int argc,
char *argv[])
try {
33 index_t block_size = 5;
34 bool circular =
false;
37 p = std::stoi(argv[1]);
39 block_size = std::stoi(argv[2]);
41 circular = std::stoi(argv[3]) != 0;
46 const index_t num_blocks = p *
v;
52 Solver solver{.block_size = block_size, .circular = circular, .p = p};
56 Solver::matrix b_solve{{.depth = num_blocks, .rows = block_size, .cols = 1}};
57 matrices x{{.depth = num_blocks, .rows = block_size, .cols = 1}};
60 auto pctx = solver.create_parallel_context();
64 auto pack_M = [&](index_t i,
auto Ms) {
return pack(M.middle_batches(i,
v, p), Ms); };
65 auto pack_K = [&](index_t i,
auto Ks) {
return pack(K.middle_batches(i,
v, p), Ks); };
66 auto pack_b = [&](index_t i,
auto bs) {
return pack(b.middle_batches(i,
v, p), bs); };
67 solver.init_diag(ctx, pack_M);
68 solver.init_subdiag(ctx, pack_K);
69 solver.init_rhs(ctx, b_solve, pack_b);
71 solver.factor_solve(ctx, b_solve);
72 solver.solve_reverse(ctx, b_solve);
75 auto unpack_x = [&](index_t i,
auto xs) {
return unpack(xs, x.
middle_batches(i,
v, p)); };
76 solver.get_solution(ctx, b_solve, unpack_x);
81 std::filesystem::path filename =
"block_tridiagonal_system.mat";
87 std::cout <<
"Saved system and solution to " << filename <<
"\n";
90 std::cout <<
"import numpy as np\n";
91 std::cout <<
"v = " <<
v <<
"\n";
92 std::cout <<
"n = " << block_size <<
"\n";
93 std::cout <<
"N = " << num_blocks <<
"\n";
94 std::cout <<
"M = np.array([\n";
95 for (index_t i = 0; i < num_blocks; ++i)
97 std::cout <<
"])\nK = np.array([\n";
98 for (index_t i = 0; i < num_blocks; ++i)
100 std::cout <<
"])\nb = np.array([\n";
101 for (index_t i = 0; i < num_blocks; ++i)
103 std::cout <<
"])\nx = np.array([\n";
104 for (index_t i = 0; i < num_blocks; ++i)
108}
catch (std::exception &e) {
109 std::cerr <<
"Error: " << e.what() <<
"\n";
115 std::mt19937 rng(12345);
116 std::normal_distribution<real_t> dist(0.0, 1.0);
119 matrices A{{.depth = num_blocks, .rows = block_size, .cols = block_size}, uninitialized};
120 matrices B{{.depth = num_blocks, .rows = block_size, .cols = block_size}, uninitialized};
121 std::ranges::generate(A, [&] {
return dist(rng); });
122 std::ranges::generate(B, [&] {
return dist(rng); });
124 matrices M{{.depth = num_blocks, .rows = block_size, .cols = block_size}, uninitialized};
125 matrices K{{.depth = num_blocks, .rows = block_size, .cols = block_size}, uninitialized};
126 matrices b{{.depth = num_blocks, .rows = block_size, .cols = 1}, uninitialized};
127 std::ranges::generate(b, [&] {
return dist(rng); });
131 for (index_t i = 0; i < num_blocks; ++i) {
132 index_t i_prev = (i - 1 + num_blocks) % num_blocks;
136 M(i).add_to_diagonal(1e-4);
138 return TridiagSystem{.M = std::move(M), .K = std::move(K), .b = std::move(b)};
The main header for the Cyqlone and Tricyqle linear solvers.
std::ostream & print_python(std::ostream &os, std::span< T, E > x, std::string_view end="\n", bool squeeze=true)
void xsyrk_LN(T alpha, std::type_identity_t< MatrixView< const T, I > > A, T beta, MatrixView< T, I > C)
void xgemm_NT(T alpha, std::type_identity_t< MatrixView< const T, I, UnitStride< I >, O > > A, std::type_identity_t< MatrixView< const T, I, UnitStride< I >, O > > B, T beta, MatrixView< T, I, UnitStride< I >, O > C)
@ PCR
Parallel Cyclic Reduction (direct).
void unpack(VA &&A, VB &&B)
Copy a compact batch of matrices A to multiple scalar matrices B.
void pack(VA &&A, VB &&B)
Copy multiple scalar matrices A to a compact batch of matrices B.
struct batmat::matrix::uninitialized_t uninitialized
MatFilePtr create_mat(const std::filesystem::path &filename)
Create and open a new .mat file for writing.
void add_to_mat(mat_t *mat, const std::string &varname, float value)
Add a value to an open .mat file.
Functions for exporting and loading matrices and OCP data to and from .mat files.
constexpr bool is_pow_2(index_t n)
batmat::matrix::Matrix< real_t, index_t > matrices
TridiagSystem init_random_system(index_t block_size, index_t num_blocks, bool circular=false)
std::function< void(benchmark::State &, SpringMassParams)> run
auto middle_batches(index_type b, index_type n, index_type stride=1)
void set_constant(value_type t)
Solver for block-tridiagonal systems using cyclic reduction (CR), parallel cyclic reduction (PCR),...
batmat::matrix::Matrix< value_type, index_t, vl_t, index_t, O, align_t > matrix
parallel::Context<> Context