11template <index_t VL, StorageOrder DefaultOrder>
12template <
class T,
class U>
14 const auto xaxpy = [a](
auto,
auto,
auto xi,
auto yi) {
linalg::axpy(a, xi, yi); };
18template <index_t VL, StorageOrder DefaultOrder>
19template <
class T,
class U>
25template <index_t VL, StorageOrder DefaultOrder>
26template <
class T,
class U>
32template <index_t VL, StorageOrder DefaultOrder>
39template <index_t VL, StorageOrder DefaultOrder>
43 const auto dot = [&](
auto,
auto,
auto ai,
auto bi) { sum +=
linalg::dot(ai, bi); };
44 ocp.foreach_stage(ctx,
dot, a, b);
45 return ctx.reduce(sum);
48template <index_t VL, StorageOrder DefaultOrder>
49template <
class... Args>
51 const auto &a,
const auto &b,
52 const Args &...others)
const {
54 if constexpr (
sizeof...(Args) > 0)
55 local_dots(out.template subspan<1>(), others...);
58template <index_t VL, StorageOrder DefaultOrder>
59template <
class... Args>
60std::array<real_t,
sizeof...(Args) / 2>
62 using local_sums_t = std::array<real_t,
sizeof...(Args) / 2>;
63 local_sums_t local_sums{};
64 const auto dots = [&](
auto,
auto,
auto... batches) {
local_dots(local_sums, batches...); };
65 ocp.foreach_stage(ctx,
dots, args...);
66 return ctx.reduce(local_sums, [](local_sums_t a, local_sums_t b) {
68 for (
size_t i = 0; i < a.size(); ++i)
74template <index_t VL, StorageOrder DefaultOrder>
78 auto nrm =
norms.zero();
83 return ctx.reduce(nrm,
norms);
86template <index_t VL, StorageOrder DefaultOrder>
93template <index_t VL, StorageOrder DefaultOrder>
99 return ctx.reduce(sumsq);
void axpy(Vy &&y, const std::array< simdified_value_t< Vy >, sizeof...(Vx)> &alphas, Vx &&...x)
Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.
norms< simdified_value_t< Vx > >::result norms_all(Vx &&x)
Compute the norms (max, 1-norm, and 2-norm) of a vector.
simdified_value_t< Vx > norm_2_squared(Vx &&x)
Compute the squared 2-norm of a vector.
void copy(VA &&A, VB &&B, Opts... opts)
simdified_value_t< Vx > dot(Vx &&x, Vy &&y)
Compute the dot product of two vectors.
void fill(simdified_value_t< VB > a, VB &&B)
#define GUANAQO_TRACE(name, instance,...)
void local_dots(std::span< real_t, 1+sizeof...(Args)/2 > out, const auto &a, const auto &b, const Args &...others) const
Compute multiple partial dot products, without reducing across threads.
void xcopy(Context &ctx, const T &x, U &y) const
Copy x to y.
real_t dot(Context &ctx, const var_vec_t &a, const var_vec_t &b) const
Dot product of a and b.
real_t norm_inf(Context &ctx, const T &x) const
Infinity or max norm of x.
typename OCP_t::Context Context
void xaxpy(Context &ctx, real_t a, const T &x, U &y)
Compute y = a x + y.
real_t norm_squared(Context &ctx, const T &x) const
Squared l2 norm of x.
auto norm_inf_l1_sq(Context &ctx, const T &x) const
Compute the infinity, l1 and l2 norms of x.
void set_constant(Context &ctx, T &x, const U &y) const
Set each element of x to the constant value y.
void scale(Context &ctx, real_t s, T &x) const
Multiply a vector x by a scalar s.
static constexpr auto norms
std::array< real_t, sizeof...(Args)/2 > dots(Context &ctx, const Args &...args) const
Compute multiple dot products at once.