cyqlone develop
Fast, parallel and vectorized solver for linear systems with optimal control structure.
Loading...
Searching...
No Matches
indexing.tpp
Go to the documentation of this file.
1#include <cyqlone/cyqlone.hpp>
2
3#include <batmat/assume.hpp>
4
5namespace CYQLONE_NS(cyqlone) {
6
7template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
9 -> index_t {
10 const index_t c = ceil_p();
11 BATMAT_ASSUME(c > 0);
12 BATMAT_ASSUME(a >= 0);
13 BATMAT_ASSUME(b >= 0);
14 BATMAT_ASSUME(a < c);
15 a -= b;
16 return a < 0 ? a + c : a;
17}
18template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
20 -> index_t {
21 const index_t c = ceil_p();
22 BATMAT_ASSUME(c > 0);
23 BATMAT_ASSUME(a >= 0);
24 BATMAT_ASSUME(b >= 0);
25 BATMAT_ASSUME(a < c);
26 a += b;
27 return a >= c ? a - c : a;
28}
29template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
30[[nodiscard]] index_t TricyqleSolver<VL, T, DefaultOrder, Ctx>::ν2(index_t bi) const {
31 BATMAT_ASSUME(bi > 0);
32 auto ui = static_cast<std::make_unsigned_t<index_t>>(bi);
33 return static_cast<index_t>(std::countr_zero(ui));
34}
35template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
36[[nodiscard]] index_t TricyqleSolver<VL, T, DefaultOrder, Ctx>::ν2p(index_t bi) const {
37 BATMAT_ASSUME(bi >= 0);
38 return bi == 0 ? lp() : ν2(bi);
39}
40
41template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
43 -> index_t {
44 const index_t N = ceil_N();
45 BATMAT_ASSUME(N >= 0);
46 BATMAT_ASSUME(a >= 0);
47 BATMAT_ASSUME(b >= 0);
48 BATMAT_ASSUME(a < N);
49 a += b;
50 return a >= N ? a - N : a;
51}
52template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
54 -> index_t {
55 const index_t N = ceil_N();
56 BATMAT_ASSUME(N >= 0);
57 BATMAT_ASSUME(a >= 0);
58 BATMAT_ASSUME(b >= 0);
59 BATMAT_ASSUME(a < N);
60 a -= b;
61 return a < 0 ? a + N : a;
62}
63template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
64auto CyqloneSolver<VL, T, DefaultOrder, Ctx>::sub_wrap_p(index_t a, index_t b) const -> index_t {
65 BATMAT_ASSUME(p > 0);
66 BATMAT_ASSUME(a >= 0);
67 BATMAT_ASSUME(b >= 0);
68 BATMAT_ASSUME(a < p);
69 a -= b;
70 return a < 0 ? a + p : a;
71}
72template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
73auto CyqloneSolver<VL, T, DefaultOrder, Ctx>::add_wrap_p(index_t a, index_t b) const -> index_t {
74 BATMAT_ASSUME(p > 0);
75 BATMAT_ASSUME(a >= 0);
76 BATMAT_ASSUME(b >= 0);
77 BATMAT_ASSUME(a < p);
78 a += b;
79 return a >= p ? a - p : a;
80}
81template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
83 -> index_t {
84 return tricyqle.sub_wrap_ceil_p(a, b);
85}
86template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
88 -> index_t {
89 return tricyqle.add_wrap_ceil_p(a, b);
90}
91template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
93 -> index_t {
94 const index_t c = ceil_P();
95 BATMAT_ASSUME(a >= 0);
96 BATMAT_ASSUME(b >= 0);
97 BATMAT_ASSUME(a < c);
98 a -= b;
99 return a < 0 ? a + c : a;
100}
101template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
103 -> index_t {
104 const index_t c = ceil_P();
105 BATMAT_ASSUME(a >= 0);
106 BATMAT_ASSUME(b >= 0);
107 BATMAT_ASSUME(a < c);
108 a += b;
109 return a >= c ? a - c : a;
110}
111template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
113 -> index_t {
114 const auto levA = biA > 0 ? get_level(biA) : lp() + lv();
115 const auto levP = lp();
116 if (levA >= levP)
117 return (((1 << levP) - 1) << (lp() + lv() - levP)) + (biA >> levP);
118 return (((1 << levA) - 1) << (lp() + lv() - levA)) + get_index_in_level(biA);
119}
120template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
121[[nodiscard]] index_t CyqloneSolver<VL, T, DefaultOrder, Ctx>::ν2(index_t bi) const {
122 return tricyqle.ν2(bi);
123}
124template <index_t VL, class T, StorageOrder DefaultOrder, class Ctx>
125[[nodiscard]] index_t CyqloneSolver<VL, T, DefaultOrder, Ctx>::ν2p(index_t bi) const {
126 return tricyqle.ν2p(bi);
127}
128
129} // namespace CYQLONE_NS(cyqlone)
#define BATMAT_ASSUME(x)
The main header for the Cyqlone and Tricyqle linear solvers.
constexpr index_t get_level(index_t i)
Definition cyqlone.hpp:45
constexpr index_t get_index_in_level(index_t i)
Definition cyqlone.hpp:51
index_t ceil_N() const
Horizon length, rounded up to a multiple of the number of parallel execution units.
Definition cyqlone.hpp:653
index_t ν2p(index_t i) const
2-adic valuation modulo p, i.e. ν2p(0) = ν2p(p) = lp().
Definition indexing.tpp:125
constexpr index_t ceil_P() const
The number of parallel execution units P rounded up to the next power of two.
Definition cyqlone.hpp:614
index_t add_wrap_ceil_N(index_t a, index_t b) const
Add b to a modulo N_horiz.
Definition indexing.tpp:42
index_t ν2(index_t i) const
2-adic valuation ν₂.
Definition indexing.tpp:121
index_t sub_wrap_ceil_N(index_t a, index_t b) const
Subtract b from a modulo N_horiz.
Definition indexing.tpp:53
constexpr index_t lv() const
log₂(v), logarithm of the vector length.
Definition cyqlone.hpp:608
index_t get_linear_batch_offset(index_t biA) const
Definition indexing.tpp:112
index_t add_wrap_p(index_t a, index_t b) const
Add b to a modulo p.
Definition indexing.tpp:73
index_t sub_wrap_ceil_p(index_t a, index_t b) const
Subtract b from a modulo ceil_p().
Definition indexing.tpp:82
index_t add_wrap_ceil_P(index_t a, index_t b) const
Definition indexing.tpp:102
index_t sub_wrap_p(index_t a, index_t b) const
Subtract b from a modulo p.
Definition indexing.tpp:64
index_t add_wrap_ceil_p(index_t a, index_t b) const
Add b to a modulo ceil_p().
Definition indexing.tpp:87
const index_t p
Number of processors/threads.
Definition cyqlone.hpp:601
tricyqle_t tricyqle
Block-tridiagonal solver (CR/PCR/PCG).
Definition cyqlone.hpp:747
constexpr index_t lp() const
log₂(p), logarithm of the number of processors/threads, rounded up.
Definition cyqlone.hpp:610
index_t sub_wrap_ceil_P(index_t a, index_t b) const
Definition indexing.tpp:92
constexpr index_t lp() const
log₂(p), logarithm of the number of processors/threads p, rounded up.
Definition cyqlone.hpp:105
index_t ν2(index_t i) const
2-adic valuation ν₂.
Definition indexing.tpp:30
index_t ν2p(index_t i) const
2-adic valuation modulo p, i.e. ν2p(0) = ν2p(p) = lp().
Definition indexing.tpp:36
index_t add_wrap_ceil_p(index_t a, index_t b) const
Add b to a modulo ceil_p().
Definition indexing.tpp:19
index_t sub_wrap_ceil_p(index_t a, index_t b) const
Subtract b from a modulo ceil_p().
Definition indexing.tpp:8
constexpr index_t ceil_p() const
The number of processors p rounded up to the next power of two.
Definition cyqlone.hpp:107