develop/Doxygen/linalg_8hpp_source.html

#pragma once


#include <cyqlone/config.hpp>

#include <cyqlone/reduce.hpp>

#include <batmat/assume.hpp>

#include <batmat/linalg/copy.hpp>

#include <batmat/linalg/shift.hpp>

#include <batmat/linalg/simdify.hpp>

#include <batmat/ops/rotate.hpp>

#include <batmat/simd.hpp>

#include <array>

#include <cmath>

#include <concepts>

#include <tuple>

#include <utility>


// TODO: eventually move this to batmat


namespace cyqlone::linalg {


using namespace batmat::linalg;


/// @cond DETAIL


namespace detail {


template <class T, class Abi, StorageOrder O, class F, class X, class... Xs>

[[gnu::always_inline]] inline void iter_elems(F &&fun, X &&x, Xs &&...xs) {

    using types = simd_view_types<T, Abi>;

    if constexpr (O == StorageOrder::ColMajor) {

        for (index_t c = 0; c < x.cols(); ++c)

            for (index_t r = 0; r < x.rows(); ++r)

                fun(types::aligned_load(&x(0, r, c)), types::aligned_load(&xs(0, r, c))...);

    } else {

        for (index_t r = 0; r < x.rows(); ++r)

            for (index_t c = 0; c < x.cols(); ++c)

                fun(types::aligned_load(&x(0, r, c)), types::aligned_load(&xs(0, r, c))...);

    }

}


template <class T, class Abi, StorageOrder O, class F, class X, class... Xs>

[[gnu::always_inline]] inline void iter_elems_store(F &&fun, X &&x, Xs &&...xs) {

    using types = simd_view_types<T, Abi>;

    if constexpr (O == StorageOrder::ColMajor) {

        for (index_t c = 0; c < x.cols(); ++c)

            for (index_t r = 0; r < x.rows(); ++r)

                types::aligned_store(fun(types::aligned_load(&xs(0, r, c))...), &x(0, r, c));

    } else {

        for (index_t r = 0; r < x.rows(); ++r)

            for (index_t c = 0; c < x.cols(); ++c)

                types::aligned_store(fun(types::aligned_load(&xs(0, r, c))...), &x(0, r, c));

    }

}


template <class T, class Abi, StorageOrder O, class F, class X0, class X1, class... Xs>

[[gnu::always_inline]] inline void iter_elems_store2(F &&fun, X0 &&x0, X1 &&x1, Xs &&...xs) {

    using types = simd_view_types<T, Abi>;

    if constexpr (O == StorageOrder::ColMajor) {

        for (index_t c = 0; c < x0.cols(); ++c)

            for (index_t r = 0; r < x0.rows(); ++r) {

                auto [r0, r1] = fun(types::aligned_load(&xs(0, r, c))...);

                types::aligned_store(r0, &x0(0, r, c));

                types::aligned_store(r1, &x1(0, r, c));

            }

    } else {

        for (index_t r = 0; r < x0.rows(); ++r)

            for (index_t c = 0; c < x0.cols(); ++c) {

                auto [r0, r1] = fun(types::aligned_load(&xs(0, r, c))...);

                types::aligned_store(r0, &x0(0, r, c));

                types::aligned_store(r1, &x1(0, r, c));

            }

    }

}


template <class T, class Abi, StorageOrder O, class F, class... Ys, class... Xs>

[[gnu::always_inline]] inline void iter_elems_store_n(F &&fun, std::tuple<Ys...> ys, Xs &&...xs) {

    using std::get;

    using types        = simd_view_types<T, Abi>;

    const index_t rows = std::get<0>(ys).rows(), cols = std::get<0>(ys).cols();

    if constexpr (O == StorageOrder::ColMajor) {

        for (index_t c = 0; c < cols; ++c)

            for (index_t r = 0; r < rows; ++r) {

                auto rs = fun(types::aligned_load(&xs(0, r, c))...);

                static_assert(std::tuple_size_v<decltype(rs)> == sizeof...(Ys));

                [&]<size_t... Is>(std::index_sequence<Is...>) {

                    ((types::aligned_store(get<Is>(rs), &get<Is>(ys)(0, r, c))), ...);

                }(std::index_sequence_for<Ys...>());

            }

    } else {

        for (index_t r = 0; r < rows; ++r)

            for (index_t c = 0; c < cols; ++c) {

                auto rs = fun(types::aligned_load(&xs(0, r, c))...);

                static_assert(std::tuple_size_v<decltype(rs)> == sizeof...(Ys));

                [&]<size_t... Is>(std::index_sequence<Is...>) {

                    ((types::aligned_store(get<Is>(rs), &get<Is>(ys)(0, r, c))), ...);

                }(std::index_sequence_for<Ys...>());

            }

    }

}


template <class T, class Abi, StorageOrder O0, class Tinit, class F, class R, class... Args>

auto reduce(Tinit init, F fun, R reduce, view<const T, Abi, O0> x0, const Args &...xs) {

    BATMAT_ASSERT(((x0.rows() == xs.rows()) && ...));

    BATMAT_ASSERT(((x0.cols() == xs.cols()) && ...));

    BATMAT_ASSERT(((x0.depth() == xs.depth()) && ...));

    BATMAT_ASSERT(((x0.batch_size() == xs.batch_size()) && ...));

    iter_elems<T, Abi, O0>([&](auto... args) { init = fun(init, args...); }, x0, xs...);

    return reduce(init);

}


template <class T, class Abi, StorageOrder OA>

[[gnu::flatten]] cyqlone::norms<T>::result norms_all(view<const T, Abi, OA> A) {

    using simd  = batmat::datapar::simd<T, Abi>;

    using norms = cyqlone::norms<T, simd>;

    return reduce<T, Abi>(norms::zero_simd(), norms(), norms(), A);

}


/// Dot product.

template <class T, class Abi, StorageOrder OA, StorageOrder OB>

[[gnu::flatten]] T dot(view<const T, Abi, OA> a, view<const T, Abi, OB> b) {

    using simd       = batmat::datapar::simd<T, Abi>;

    auto fma         = [](auto accum, auto ai, auto bi) { return ai * bi + accum; };

    auto simd_reduce = [](auto accum) { return reduce(accum); };

    return reduce<T, Abi>(simd{0}, fma, simd_reduce, a, b);

}


/// Squared 2-norm.

template <class T, class Abi, StorageOrder OA>

[[gnu::flatten]] T norm_2_sq(view<const T, Abi, OA> a) {

    using simd       = batmat::datapar::simd<T, Abi>;

    auto fma         = [](auto accum, auto ai) { return ai * ai + accum; };

    auto simd_reduce = [](auto accum) { return reduce(accum); };

    return reduce<T, Abi>(simd{0}, fma, simd_reduce, a);

}


/// Scalar product.

template <class T, class Abi, StorageOrder OB, StorageOrder OC>

[[gnu::flatten]] void scale(T a, view<const T, Abi, OB> B, view<T, Abi, OC> C) {

    BATMAT_ASSERT(B.rows() == C.rows());

    BATMAT_ASSERT(B.cols() == C.cols());

    iter_elems_store<T, Abi, OC>([&](auto Bi) { return a * Bi; }, C, B);

}


/// Hadamard (elementwise) product.

template <class T, class Abi, StorageOrder OA, StorageOrder OB, StorageOrder OC>

[[gnu::flatten]] void hadamard(view<const T, Abi, OA> A, view<const T, Abi, OB> B,

                               view<T, Abi, OC> C) {

    BATMAT_ASSERT(A.rows() == B.rows());

    BATMAT_ASSERT(A.cols() == B.cols());

    BATMAT_ASSERT(A.rows() == C.rows());

    BATMAT_ASSERT(A.cols() == C.cols());

    iter_elems_store<T, Abi, OC>([&](auto Ai, auto Bi) { return Ai * Bi; }, C, A, B);

}


/// Elementwise clamping z = max(lo, min(x, hi)).

template <class T, class Abi, StorageOrder O>

[[gnu::flatten]] void clamp(view<const T, Abi, O> x, view<const T, Abi, O> lo,

                            view<const T, Abi, O> hi, view<T, Abi, O> z) {

    BATMAT_ASSERT(x.rows() == lo.rows());

    BATMAT_ASSERT(x.cols() == lo.cols());

    BATMAT_ASSERT(x.rows() == hi.rows());

    BATMAT_ASSERT(x.cols() == hi.cols());

    BATMAT_ASSERT(x.rows() == z.rows());

    BATMAT_ASSERT(x.cols() == z.cols());

    const auto clamp = [&](auto xi, auto loi, auto hii) { return fmax(loi, fmin(xi, hii)); };

    iter_elems_store<T, Abi, O>(clamp, z, x, lo, hi);

}


/// Elementwise clamping residual z = x - max(lo, min(x, hi)).

template <class T, class Abi, StorageOrder O>

[[gnu::flatten]] void clamp_resid(view<const T, Abi, O> x, view<const T, Abi, O> lo,

                                  view<const T, Abi, O> hi, view<T, Abi, O> z) {

    BATMAT_ASSERT(x.rows() == lo.rows());

    BATMAT_ASSERT(x.cols() == lo.cols());

    BATMAT_ASSERT(x.rows() == hi.rows());

    BATMAT_ASSERT(x.cols() == hi.cols());

    BATMAT_ASSERT(x.rows() == z.rows());

    BATMAT_ASSERT(x.cols() == z.cols());

    using simd             = batmat::datapar::simd<T, Abi>;

    const auto clamp_resid = [&](auto xi, auto loi, auto hii) {

        return fmax(xi - hii, fmin(simd{0}, xi - loi));

    };

    iter_elems_store<T, Abi, O>(clamp_resid, z, x, lo, hi);

}


/// Linear combination of vectors z = beta * z + sum_i alpha_i * x_i.

template <class T, class Abi, T Beta, StorageOrder O, class... Xs>

[[gnu::flatten]] void gaxpby(view<T, Abi, O> z, const std::array<T, sizeof...(Xs)> &alphas,

                             const Xs &...xs) {

    BATMAT_ASSERT(((z.rows() == xs.rows()) && ...));

    BATMAT_ASSERT(((z.cols() == xs.cols()) && ...));

    if constexpr (Beta == 0)

        iter_elems_store<T, Abi, O>(

            [&](auto... xis) {

                return [&]<std::size_t... Is>(std::index_sequence<Is...>, auto... xis) {

                    return ((xis * alphas[Is]) + ...);

                }(std::make_index_sequence<sizeof...(Xs)>(), xis...);

            },

            z, xs...);

    else

        iter_elems_store<T, Abi, O>(

            [&](auto zi, auto... xis) {

                return [&]<std::size_t... Is>(std::index_sequence<Is...>, auto... xis) {

                    return zi * Beta + ((xis * alphas[Is]) + ...);

                }(std::make_index_sequence<sizeof...(Xs)>(), xis...);

            },

            z, z, xs...);

}


/// Negate a matrix or vector.

/// @todo: add Negate option to batmat::linalg::copy and remove this function, then this also

///        supports transposition.

template <class T, class Abi, int Rotate, StorageOrder OA, StorageOrder OB>

[[gnu::flatten]] void negate(view<const T, Abi, OA> A, view<T, Abi, OB> B) {

    BATMAT_ASSERT(A.rows() == B.rows());

    BATMAT_ASSERT(A.cols() == B.cols());

    using batmat::ops::rotl;

    iter_elems_store<T, Abi, OB>([&](auto Ai) { return -rotl<Rotate>(Ai); }, B, A);

}


/// Subtract two matrices or vectors C = A - B.

template <class T, class Abi, int Rotate, StorageOrder OA, StorageOrder OB, StorageOrder OC>

[[gnu::flatten]] void sub(view<const T, Abi, OA> A, view<const T, Abi, OB> B, view<T, Abi, OC> C) {

    BATMAT_ASSERT(A.rows() == B.rows());

    BATMAT_ASSERT(A.cols() == B.cols());

    BATMAT_ASSERT(A.rows() == C.rows());

    BATMAT_ASSERT(A.cols() == C.cols());

    using batmat::ops::rotl;

    iter_elems_store<T, Abi, OC>([&](auto Ai, auto Bi) { return Ai - rotl<Rotate>(Bi); }, C, A, B);

}


/// Add two matrices or vectors C = A + B.

template <class T, class Abi, int Rotate, StorageOrder OA, StorageOrder OB, StorageOrder OC>

[[gnu::flatten]] void add(view<const T, Abi, OA> A, view<const T, Abi, OB> B, view<T, Abi, OC> C) {

    BATMAT_ASSERT(A.rows() == B.rows());

    BATMAT_ASSERT(A.cols() == B.cols());

    BATMAT_ASSERT(A.rows() == C.rows());

    BATMAT_ASSERT(A.cols() == C.cols());

    using batmat::ops::rotl;

    iter_elems_store<T, Abi, OC>([&](auto Ai, auto Bi) { return Ai + rotl<Rotate>(Bi); }, C, A, B);

}


} // namespace detail


/// @endcond


/// @addtogroup topic-linalg

/// @{


/// @name Single-batch operations

/// @{


/// Compute the norms (max, 1-norm, and 2-norm) of a vector.

template <simdifiable Vx>


norms<simdified_value_t<Vx>>::result norms_all(Vx &&x) {

    return detail::norms_all<simdified_value_t<Vx>, simdified_abi_t<Vx>>(simdify(x).as_const());

}


/// Compute the infinity norm of a vector.

template <simdifiable Vx>


simdified_value_t<Vx> norm_inf(Vx &&x) {

    return norms_all(std::forward<Vx>(x)).norm_inf();

}


/// Compute the 1-norm of a vector.

template <simdifiable Vx>


simdified_value_t<Vx> norm_1(Vx &&x) {

    return norms_all(std::forward<Vx>(x)).norm_1();

}


/// Compute the squared 2-norm of a vector.

template <simdifiable Vx>


simdified_value_t<Vx> norm_2_squared(Vx &&x) {

    return detail::norm_2_sq<simdified_value_t<Vx>, simdified_abi_t<Vx>>(simdify(x).as_const());

}


/// Compute the 2-norm of a vector.

template <simdifiable Vx>


simdified_value_t<Vx> norm_2(Vx &&x) {

    using std::sqrt;

    return sqrt(norm_2_squared(std::forward<Vx>(x)));

}


/// Compute the dot product of two vectors.

template <simdifiable Vx, simdifiable Vy>

    requires simdify_compatible<Vx, Vy>


simdified_value_t<Vx> dot(Vx &&x, Vy &&y) {

    return detail::dot<simdified_value_t<Vx>, simdified_abi_t<Vx>>(simdify(x).as_const(),

                                                                   simdify(y).as_const());

}


/// Multiply a vector by a scalar z = αx.

template <simdifiable Vx, simdifiable Vz, std::convertible_to<simdified_value_t<Vx>> T>

    requires simdify_compatible<Vx, Vz>


void scale(T alpha, Vx &&x, Vz &&z) {

    detail::scale<simdified_value_t<Vx>, simdified_abi_t<Vx>>(alpha, simdify(x).as_const(),

                                                              simdify(z));

}


/// Multiply a vector by a scalar x = αx.

template <simdifiable Vx, std::convertible_to<simdified_value_t<Vx>> T>


void scale(T alpha, Vx &&x) {

    detail::scale<simdified_value_t<Vx>, simdified_abi_t<Vx>>(alpha, simdify(x).as_const(),

                                                              simdify(x));

}


/// Compute the Hadamard (elementwise) product of two vectors z = x ⊙ y.

template <simdifiable Vx, simdifiable Vy, simdifiable Vz>

    requires simdify_compatible<Vx, Vy, Vz>


void hadamard(Vx &&x, Vy &&y, Vz &&z) {

    detail::hadamard<simdified_value_t<Vx>, simdified_abi_t<Vx>>(simdify(x).as_const(),

                                                                 simdify(y).as_const(), simdify(z));

}


/// Compute the Hadamard (elementwise) product of two vectors x = x ⊙ y.

template <simdifiable Vx, simdifiable Vy>

    requires simdify_compatible<Vx, Vy>


void hadamard(Vx &&x, Vy &&y) {

    detail::hadamard<simdified_value_t<Vx>, simdified_abi_t<Vx>>(simdify(x).as_const(),

                                                                 simdify(y).as_const(), simdify(x));

}


/// Elementwise clamping z = max(lo, min(x, hi)).

template <simdifiable Vx, simdifiable Vlo, simdifiable Vhi, simdifiable Vz>

    requires simdify_compatible<Vx, Vlo, Vhi, Vz>


void clamp(Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z) {

    detail::clamp<simdified_value_t<Vx>, simdified_abi_t<Vx>>(

        simdify(x).as_const(), simdify(lo).as_const(), simdify(hi).as_const(), simdify(z));

}


/// Elementwise clamping residual z = x - max(lo, min(x, hi)).

template <simdifiable Vx, simdifiable Vlo, simdifiable Vhi, simdifiable Vz>

    requires simdify_compatible<Vx, Vlo, Vhi, Vz>


void clamp_resid(Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z) {

    detail::clamp_resid<simdified_value_t<Vx>, simdified_abi_t<Vx>>(

        simdify(x).as_const(), simdify(lo).as_const(), simdify(hi).as_const(), simdify(z));

}


/// Add scaled vector z = αx + βy.

template <simdifiable Vx, simdifiable Vy, simdifiable Vz, //

          std::convertible_to<simdified_value_t<Vx>> Ta,

          std::convertible_to<simdified_value_t<Vx>> Tb>

    requires simdify_compatible<Vx, Vy, Vz>


void axpby(Ta alpha, Vx &&x, Tb beta, Vy &&y, Vz &&z) {

    detail::gaxpby<simdified_value_t<Vx>, simdified_abi_t<Vx>, simdified_value_t<Vx>{0}>(

        simdify(z), {{alpha, beta}}, simdify(x).as_const(), simdify(y).as_const());

}


/// Add scaled vector y = αx + βy.

template <simdifiable Vx, simdifiable Vy, //

          std::convertible_to<simdified_value_t<Vx>> Ta,

          std::convertible_to<simdified_value_t<Vx>> Tb>

    requires simdify_compatible<Vx, Vy>


void axpby(Ta alpha, Vx &&x, Tb beta, Vy &&y) {

    detail::gaxpby<simdified_value_t<Vx>, simdified_abi_t<Vx>, simdified_value_t<Vx>{0}>(

        simdify(y), {{alpha, beta}}, simdify(x).as_const(), simdify(y).as_const());

}


/// Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.

template <auto Beta = 1, simdifiable Vy, simdifiable... Vx>

    requires simdify_compatible<Vy, Vx...>


void axpy(Vy &&y, const std::array<simdified_value_t<Vy>, sizeof...(Vx)> &alphas, Vx &&...x) {

    detail::gaxpby<simdified_value_t<Vy>, simdified_abi_t<Vy>, simdified_value_t<Vy>{Beta}>(

        simdify(y), alphas, simdify(x).as_const()...);

}


/// Add scaled vector z = αx + y.

template <simdifiable Vx, simdifiable Vy, simdifiable Vz,

          std::convertible_to<simdified_value_t<Vx>> Ta>

    requires simdify_compatible<Vx, Vy, Vz>


void axpy(Ta alpha, Vx &&x, Vy &&y, Vz &&z) {

    axpby(alpha, x, 1, y, z);

}


/// Add scaled vector y = αx + βy (where β is a compile-time constant).

template <auto Beta = 1, simdifiable Vx, simdifiable Vy,

          std::convertible_to<simdified_value_t<Vx>> Ta>

    requires simdify_compatible<Vx, Vy>


void axpy(Ta alpha, Vx &&x, Vy &&y) {

    detail::gaxpby<simdified_value_t<Vx>, simdified_abi_t<Vx>, simdified_value_t<Vx>{Beta}>(

        simdify(y), {{alpha}}, simdify(x).as_const());

}


/// Negate a matrix or vector B = -A.

template <simdifiable VA, simdifiable VB, int Rotate = 0>

    requires simdify_compatible<VA, VB>


void negate(VA &&A, VB &&B, with_rotate_t<Rotate> = {}) {

    detail::negate<simdified_value_t<VA>, simdified_abi_t<VA>, Rotate>(simdify(A).as_const(),

                                                                       simdify(B));

}


/// Negate a matrix or vector A = -A.

template <simdifiable VA, int Rotate = 0>


void negate(VA &&A, with_rotate_t<Rotate> = {}) {

    detail::negate<simdified_value_t<VA>, simdified_abi_t<VA>, Rotate>(simdify(A).as_const(),

                                                                       simdify(A));

}


/// Subtract two matrices or vectors C = A - B. Rotate affects B.

template <simdifiable VA, simdifiable VB, simdifiable VC, int Rotate = 0>

    requires simdify_compatible<VA, VB, VC>


void sub(VA &&A, VB &&B, VC &&C, with_rotate_t<Rotate> = {}) {

    detail::sub<simdified_value_t<VA>, simdified_abi_t<VA>, Rotate>(

        simdify(A).as_const(), simdify(B).as_const(), simdify(C));

}


/// Subtract two matrices or vectors A = A - B. Rotate affects B.

template <simdifiable VA, simdifiable VB, int Rotate = 0>

    requires simdify_compatible<VA, VB>


void sub(VA &&A, VB &&B, with_rotate_t<Rotate> = {}) {

    detail::sub<simdified_value_t<VA>, simdified_abi_t<VA>, Rotate>(

        simdify(A).as_const(), simdify(B).as_const(), simdify(A));

}


/// Add two matrices or vectors C = A + B. Rotate affects B.

template <simdifiable VA, simdifiable VB, simdifiable VC, int Rotate = 0>

    requires simdify_compatible<VA, VB, VC>


void add(VA &&A, VB &&B, VC &&C, with_rotate_t<Rotate> = {}) {

    detail::add<simdified_value_t<VA>, simdified_abi_t<VA>, Rotate>(

        simdify(A).as_const(), simdify(B).as_const(), simdify(C));

}


/// Add two matrices or vectors A = A + B. Rotate affects B.

template <simdifiable VA, simdifiable VB, int Rotate = 0>

    requires simdify_compatible<VA, VB>


void add(VA &&A, VB &&B, with_rotate_t<Rotate> = {}) {

    detail::add<simdified_value_t<VA>, simdified_abi_t<VA>, Rotate>(

        simdify(A).as_const(), simdify(B).as_const(), simdify(A));

}


/// Apply a function to all elements of the given matrices or vectors.

template <class F, simdifiable VA, simdifiable... VAs>

    requires simdify_compatible<VA, VAs...>


void for_each_elementwise(F &&fun, VA &&A, VAs &&...As) {

    static constexpr auto storage_order = simdified_view_t<VA>::storage_order;

    detail::iter_elems<simdified_value_t<VA>, simdified_abi_t<VA>, storage_order>(

        std::forward<F>(fun), simdify(A).as_const(), simdify(As).as_const()...);

}


/// Apply a function to all elements of the given matrices or vectors, storing the result in the

/// first argument.

template <class F, simdifiable VA, simdifiable... VAs>

    requires simdify_compatible<VA, VAs...>


void transform_elementwise(F &&fun, VA &&A, VAs &&...As) {

    static constexpr auto storage_order = simdified_view_t<VA>::storage_order;

    detail::iter_elems_store<simdified_value_t<VA>, simdified_abi_t<VA>, storage_order>(

        std::forward<F>(fun), simdify(A), simdify(As).as_const()...);

}


/// Apply a function to all elements of the given matrices or vectors, storing the results in the

/// first two arguments.

template <class F, simdifiable VA, simdifiable VB, simdifiable... VAs>

    requires simdify_compatible<VA, VB, VAs...>


void transform2_elementwise(F &&fun, VA &&A, VB &&B, VAs &&...As) {

    static constexpr auto storage_order = simdified_view_t<VA>::storage_order;

    detail::iter_elems_store2<simdified_value_t<VA>, simdified_abi_t<VA>, storage_order>(

        std::forward<F>(fun), simdify(A), simdify(B), simdify(As).as_const()...);

}


/// Apply a function to all elements of the given matrices or vectors, storing the results in the

/// tuple of matrices given as the first argument.

template <class F, simdifiable... VAs, simdifiable... VBs>

    requires simdify_compatible<VAs..., VBs...>


void transform_n_elementwise(F &&fun, std::tuple<VAs...> As, VBs &&...Bs) {

    using VA0                           = std::tuple_element_t<0, decltype(As)>;

    static constexpr auto storage_order = simdified_view_t<VA0>::storage_order;

    detail::iter_elems_store_n<simdified_value_t<VA0>, simdified_abi_t<VA0>, storage_order>(

        std::forward<F>(fun),

        std::apply([](auto &&...a) { return std::make_tuple(simdify(a)...); }, As),

        simdify(Bs).as_const()...);

}


/// @}


/// @}


// TODO: doxygen gets confused because the template parameters are the same as the single-batch

// versions, so put in a separate namespace


inline namespace multi {


/// @addtogroup topic-linalg

/// @{


/// @name Multi-batch operations

/// @{


/// Compute the norms (max, 1-norm, and 2-norm) of a vector.

template <simdifiable_multi Vx>


norms<simdified_value_t<Vx>>::result norms_all(Vx &&x) {

    typename norms<simdified_value_t<Vx>>::result result{};

    for (index_t b = 0; b < x.num_batches(); ++b)

        result = norms<simdified_value_t<Vx>>{}(result, linalg::norms_all(x.batch(b)));

    return result;

}


/// Compute the infinity norm of a vector.

template <simdifiable_multi Vx>


simdified_value_t<Vx> norm_inf(Vx &&x) {

    return norms_all(std::forward<Vx>(x)).norm_inf();

}


/// Compute the 1-norm of a vector.

template <simdifiable_multi Vx>


simdified_value_t<Vx> norm_1(Vx &&x) {

    return norms_all(std::forward<Vx>(x)).norm_1();

}


/// Compute the squared 2-norm of a vector.

template <simdifiable_multi Vx>


simdified_value_t<Vx> norm_2_squared(Vx &&x) {

    simdified_value_t<Vx> sumsq{};

    for (index_t b = 0; b < x.num_batches(); ++b)

        sumsq += linalg::norm_2_squared(x.batch(b));

    return sumsq;

}


/// Compute the 2-norm of a vector.

template <simdifiable_multi Vx>


simdified_value_t<Vx> norm_2(Vx &&x) {

    using std::sqrt;

    return sqrt(norm_2_squared(std::forward<Vx>(x)));

}


/// Compute the dot product of two vectors.

template <simdifiable_multi Vx, simdifiable_multi Vy>

    requires simdify_compatible<Vx, Vy>


simdified_value_t<Vx> dot(Vx &&x, Vy &&y) {

    BATMAT_ASSERT(x.num_batches() == y.num_batches());

    simdified_value_t<Vx> result{};

    for (index_t b = 0; b < x.num_batches(); ++b)

        result += linalg::dot(x.batch(b), y.batch(b));

    return result;

}


/// Multiply a vector by a scalar z = αx.

template <simdifiable_multi Vx, simdifiable_multi Vz, std::convertible_to<simdified_value_t<Vx>> T>

    requires simdify_compatible<Vx, Vz>


void scale(T alpha, Vx &&x, Vz &&z) {

    BATMAT_ASSERT(x.num_batches() == z.num_batches());

    for (index_t b = 0; b < x.num_batches(); ++b)

        linalg::scale(alpha, x.batch(b), z.batch(b));

}


/// Multiply a vector by a scalar x = αx.

template <simdifiable_multi Vx, std::convertible_to<simdified_value_t<Vx>> T>


void scale(T alpha, Vx &&x) {

    for (index_t b = 0; b < x.num_batches(); ++b)

        linalg::scale(alpha, x.batch(b));

}


/// Compute the Hadamard (elementwise) product of two vectors z = x ⊙ y.

template <simdifiable_multi Vx, simdifiable_multi Vy, simdifiable_multi Vz>

    requires simdify_compatible<Vx, Vy, Vz>


void hadamard(Vx &&x, Vy &&y, Vz &&z) {

    BATMAT_ASSERT(x.num_batches() == y.num_batches());

    BATMAT_ASSERT(x.num_batches() == z.num_batches());

    for (index_t b = 0; b < x.num_batches(); ++b)

        linalg::hadamard(x.batch(b), y.batch(b), z.batch(b));

}


/// Compute the Hadamard (elementwise) product of two vectors x = x ⊙ y.

template <simdifiable_multi Vx, simdifiable_multi Vy>

    requires simdify_compatible<Vx, Vy>


void hadamard(Vx &&x, Vy &&y) {

    BATMAT_ASSERT(x.num_batches() == y.num_batches());

    for (index_t b = 0; b < x.num_batches(); ++b)

        linalg::hadamard(x.batch(b), y.batch(b));

}


/// Elementwise clamping z = max(lo, min(x, hi)).

template <simdifiable_multi Vx, simdifiable_multi Vlo, simdifiable_multi Vhi, simdifiable_multi Vz>

    requires simdify_compatible<Vx, Vlo, Vhi, Vz>


void clamp(Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z) {

    BATMAT_ASSERT(x.num_batches() == lo.num_batches());

    BATMAT_ASSERT(x.num_batches() == hi.num_batches());

    BATMAT_ASSERT(x.num_batches() == z.num_batches());

    for (index_t b = 0; b < x.num_batches(); ++b)

        linalg::clamp(x.batch(b), lo.batch(b), hi.batch(b), z.batch(b));

}


/// Elementwise clamping residual z = x - max(lo, min(x, hi)).

template <simdifiable_multi Vx, simdifiable_multi Vlo, simdifiable_multi Vhi, simdifiable_multi Vz>

    requires simdify_compatible<Vx, Vlo, Vhi, Vz>


void clamp_resid(Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z) {

    BATMAT_ASSERT(x.num_batches() == lo.num_batches());

    BATMAT_ASSERT(x.num_batches() == hi.num_batches());

    BATMAT_ASSERT(x.num_batches() == z.num_batches());

    for (index_t b = 0; b < x.num_batches(); ++b)

        linalg::clamp_resid(x.batch(b), lo.batch(b), hi.batch(b), z.batch(b));

}


/// Add scaled vector z = αx + βy.

template <simdifiable_multi Vx, simdifiable_multi Vy, simdifiable_multi Vz, //

          std::convertible_to<simdified_value_t<Vx>> Ta,

          std::convertible_to<simdified_value_t<Vx>> Tb>

    requires simdify_compatible<Vx, Vy, Vz>


void axpby(Ta alpha, Vx &&x, Tb beta, Vy &&y, Vz &&z) {

    BATMAT_ASSERT(x.num_batches() == y.num_batches());

    BATMAT_ASSERT(x.num_batches() == z.num_batches());

    for (index_t b = 0; b < x.num_batches(); ++b)

        linalg::axpby(alpha, x.batch(b), beta, y.batch(b), z.batch(b));

}


/// Add scaled vector y = αx + βy.

template <simdifiable_multi Vx, simdifiable_multi Vy, //

          std::convertible_to<simdified_value_t<Vx>> Ta,

          std::convertible_to<simdified_value_t<Vx>> Tb>

    requires simdify_compatible<Vx, Vy>


void axpby(Ta alpha, Vx &&x, Tb beta, Vy &&y) {

    BATMAT_ASSERT(x.num_batches() == y.num_batches());

    for (index_t b = 0; b < x.num_batches(); ++b)

        linalg::axpby(alpha, x.batch(b), beta, y.batch(b));

}


/// Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.

template <auto Beta = 1, simdifiable_multi Vy, simdifiable_multi... Vx>

    requires simdify_compatible<Vy, Vx...>


void axpy(Vy &&y, const std::array<simdified_value_t<Vy>, sizeof...(Vx)> &alphas, Vx &&...x) {

    BATMAT_ASSERT(((y.num_batches() == x.num_batches()) && ...));

    for (index_t b = 0; b < y.num_batches(); ++b)

        linalg::axpy<Beta>(y.batch(b), alphas, x.batch(b)...);

}


/// Add scaled vector z = αx + y.

template <simdifiable_multi Vx, simdifiable_multi Vy, simdifiable_multi Vz,

          std::convertible_to<simdified_value_t<Vx>> Ta>

    requires simdify_compatible<Vx, Vy, Vz>


void axpy(Ta alpha, Vx &&x, Vy &&y, Vz &&z) {

    axpby(alpha, x, 1, y, z);

}


/// Add scaled vector y = αx + βy (where β is a compile-time constant).

template <auto Beta = 1, simdifiable_multi Vx, simdifiable_multi Vy,

          std::convertible_to<simdified_value_t<Vx>> Ta>

    requires simdify_compatible<Vx, Vy>


void axpy(Ta alpha, Vx &&x, Vy &&y) {

    BATMAT_ASSERT(x.num_batches() == y.num_batches());

    for (index_t b = 0; b < x.num_batches(); ++b)

        linalg::axpy<Beta>(alpha, x.batch(b), y.batch(b));

}


/// Negate a matrix or vector B = -A.

template <simdifiable_multi VA, simdifiable_multi VB, int Rotate = 0>

    requires simdify_compatible<VA, VB>


void negate(VA &&A, VB &&B, with_rotate_t<Rotate> rot = {}) {

    BATMAT_ASSERT(A.num_batches() == B.num_batches());

    for (index_t b = 0; b < A.num_batches(); ++b)

        linalg::negate(A.batch(b), B.batch(b), rot);

}


/// Negate a matrix or vector A = -A.

template <simdifiable_multi VA, int Rotate = 0>


void negate(VA &&A, with_rotate_t<Rotate> rot = {}) {

    for (index_t b = 0; b < A.num_batches(); ++b)

        linalg::negate(A.batch(b), rot);

}


/// Subtract two matrices or vectors C = A - B. Rotate affects B.

template <simdifiable_multi VA, simdifiable_multi VB, simdifiable_multi VC, int Rotate = 0>

    requires simdify_compatible<VA, VB, VC>


void sub(VA &&A, VB &&B, VC &&C, with_rotate_t<Rotate> rot = {}) {

    BATMAT_ASSERT(A.num_batches() == B.num_batches());

    BATMAT_ASSERT(A.num_batches() == C.num_batches());

    for (index_t b = 0; b < A.num_batches(); ++b)

        linalg::sub(A.batch(b), B.batch(b), C.batch(b), rot);

}


/// Subtract two matrices or vectors A = A - B. Rotate affects B.

template <simdifiable_multi VA, simdifiable_multi VB, int Rotate = 0>

    requires simdify_compatible<VA, VB>


void sub(VA &&A, VB &&B, with_rotate_t<Rotate> rot = {}) {

    BATMAT_ASSERT(A.num_batches() == B.num_batches());

    for (index_t b = 0; b < A.num_batches(); ++b)

        linalg::sub(A.batch(b), B.batch(b), rot);

}


/// Add two matrices or vectors C = A + B. Rotate affects B.

template <simdifiable_multi VA, simdifiable_multi VB, simdifiable_multi VC, int Rotate = 0>

    requires simdify_compatible<VA, VB, VC>


void add(VA &&A, VB &&B, VC &&C, with_rotate_t<Rotate> rot = {}) {

    BATMAT_ASSERT(A.num_batches() == B.num_batches());

    BATMAT_ASSERT(A.num_batches() == C.num_batches());

    for (index_t b = 0; b < A.num_batches(); ++b)

        linalg::add(A.batch(b), B.batch(b), C.batch(b), rot);

}


/// Add two matrices or vectors A = A + B. Rotate affects B.

template <simdifiable_multi VA, simdifiable_multi VB, int Rotate = 0>

    requires simdify_compatible<VA, VB>


void add(VA &&A, VB &&B, with_rotate_t<Rotate> rot = {}) {

    BATMAT_ASSERT(A.num_batches() == B.num_batches());

    for (index_t b = 0; b < A.num_batches(); ++b)

        linalg::add(A.batch(b), B.batch(b), rot);

}


/// Apply a function to all elements of the given matrices or vectors.

template <class F, simdifiable_multi VA, simdifiable_multi... VAs>

    requires simdify_compatible<VA, VAs...>


void for_each_elementwise(F &&fun, VA &&A, VAs &&...As) {

    BATMAT_ASSERT(((A.num_batches() == As.num_batches()) && ...));

    for (index_t b = 0; b < A.num_batches(); ++b)

        linalg::for_each_elementwise(fun, A.batch(b), As.batch(b)...);

}


/// Apply a function to all elements of the given matrices or vectors, storing the result in the

/// first argument.

template <class F, simdifiable_multi VA, simdifiable_multi... VAs>

    requires simdify_compatible<VA, VAs...>


void transform_elementwise(F &&fun, VA &&A, VAs &&...As) {

    BATMAT_ASSERT(((A.num_batches() == As.num_batches()) && ...));

    for (index_t b = 0; b < A.num_batches(); ++b)

        linalg::transform_elementwise(fun, A.batch(b), As.batch(b)...);

}


/// Apply a function to all elements of the given matrices or vectors, storing the results in the

/// first two arguments.

template <class F, simdifiable_multi VA, simdifiable_multi VB, simdifiable_multi... VAs>

    requires simdify_compatible<VA, VB, VAs...>


void transform2_elementwise(F &&fun, VA &&A, VB &&B, VAs &&...As) {

    BATMAT_ASSERT(A.num_batches() == B.num_batches());

    BATMAT_ASSERT(((A.num_batches() == As.num_batches()) && ...));

    for (index_t b = 0; b < A.num_batches(); ++b)

        linalg::transform2_elementwise(fun, A.batch(b), B.batch(b), As.batch(b)...);

}


/// Apply a function to all elements of the given matrices or vectors, storing the results in the

/// tuple of matrices given as the first argument.

template <class F, simdifiable_multi... VAs, simdifiable_multi... VBs>

    requires simdify_compatible<VAs..., VBs...>


void transform_n_elementwise(F &&fun, std::tuple<VAs...> As, VBs &&...Bs) {

    using std::get;

    auto &&a0 = get<0>(As);

    BATMAT_ASSERT(((a0.num_batches() == Bs.num_batches()) && ...));

    BATMAT_ASSERT([&]<std::size_t... Is>(std::index_sequence<Is...>) {

        return ((a0.num_batches() == get<Is>(As).num_batches()) && ...);

    }(std::make_index_sequence<sizeof...(VAs)>()));

    for (index_t b = 0; b < a0.num_batches(); ++b)

        linalg::transform_n_elementwise(

            fun, std::apply([&](auto &&...a) { return std::make_tuple(a.batch(b)...); }, As),

            Bs.batch(b)...);

}


// More multi-batch versions of batmat::linalg functions (should be upstreamed at some point)


using batmat::linalg::copy;


/// Copy a matrix or vector B = A.

template <simdifiable_multi VA, simdifiable_multi VB, batmat::linalg::rotate_opt... Opts>

    requires simdify_compatible<VA, VB>


void copy(VA &&A, VB &&B, Opts... opts) {

    BATMAT_ASSERT(A.num_batches() == B.num_batches());

    for (index_t b = 0; b < A.num_batches(); ++b)

        batmat::linalg::copy(A.batch(b), B.batch(b), opts...);

}


/// Copy a matrix or vector B = A.

template <MatrixStructure S, simdifiable_multi VA, simdifiable_multi VB,

          batmat::linalg::rotate_opt... Opts>

    requires simdify_compatible<VA, VB>


void copy(Structured<VA, S> A, Structured<VB, S> B, Opts... opts) {

    BATMAT_ASSERT(A.value.num_batches() == B.value.num_batches());

    for (index_t b = 0; b < A.value.num_batches(); ++b)

        batmat::linalg::copy(make_structured<S>(A.value.batch(b)),

                             make_structured<S>(B.value.batch(b)), opts...);

}


/// @}


/// @}


} // namespace multi


} // namespace cyqlone::linalg


BATMAT_ASSERT
#define BATMAT_ASSERT(x)

batmat::linalg::rotate_opt

config.hpp

cyqlone::linalg::multi::transform_elementwise
void transform_elementwise(F &&fun, VA &&A, VAs &&...As)
Apply a function to all elements of the given matrices or vectors, storing the result in the first ar...
Definition linalg.hpp:708

cyqlone::linalg::multi::hadamard
void hadamard(Vx &&x, Vy &&y, Vz &&z)
Compute the Hadamard (elementwise) product of two vectors z = x ⊙ y.
Definition linalg.hpp:553

cyqlone::linalg::norm_inf
simdified_value_t< Vx > norm_inf(Vx &&x)
Compute the infinity norm of a vector.
Definition linalg.hpp:260

cyqlone::linalg::multi::axpby
void axpby(Ta alpha, Vx &&x, Tb beta, Vy &&y, Vz &&z)
Add scaled vector z = αx + βy.
Definition linalg.hpp:596

cyqlone::linalg::axpy
void axpy(Vy &&y, const std::array< simdified_value_t< Vy >, sizeof...(Vx)> &alphas, Vx &&...x)
Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.
Definition linalg.hpp:361

cyqlone::linalg::multi::negate
void negate(VA &&A, VB &&B, with_rotate_t< Rotate > rot={})
Negate a matrix or vector B = -A.
Definition linalg.hpp:644

cyqlone::linalg::for_each_elementwise
void for_each_elementwise(F &&fun, VA &&A, VAs &&...As)
Apply a function to all elements of the given matrices or vectors.
Definition linalg.hpp:433

cyqlone::linalg::multi::transform_n_elementwise
void transform_n_elementwise(F &&fun, std::tuple< VAs... > As, VBs &&...Bs)
Apply a function to all elements of the given matrices or vectors, storing the results in the tuple o...
Definition linalg.hpp:729

cyqlone::linalg::multi::norm_inf
simdified_value_t< Vx > norm_inf(Vx &&x)
Compute the infinity norm of a vector.
Definition linalg.hpp:497

cyqlone::linalg::norms_all
norms< simdified_value_t< Vx > >::result norms_all(Vx &&x)
Compute the norms (max, 1-norm, and 2-norm) of a vector.
Definition linalg.hpp:254

cyqlone::linalg::transform_elementwise
void transform_elementwise(F &&fun, VA &&A, VAs &&...As)
Apply a function to all elements of the given matrices or vectors, storing the result in the first ar...
Definition linalg.hpp:443

cyqlone::linalg::norm_2_squared
simdified_value_t< Vx > norm_2_squared(Vx &&x)
Compute the squared 2-norm of a vector.
Definition linalg.hpp:272

cyqlone::linalg::multi::clamp_resid
void clamp_resid(Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z)
Elementwise clamping residual z = x - max(lo, min(x, hi)).
Definition linalg.hpp:583

cyqlone::linalg::add
void add(VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate >={})
Add two matrices or vectors C = A + B. Rotate affects B.
Definition linalg.hpp:417

cyqlone::linalg::axpby
void axpby(Ta alpha, Vx &&x, Tb beta, Vy &&y, Vz &&z)
Add scaled vector z = αx + βy.
Definition linalg.hpp:343

cyqlone::linalg::multi::transform2_elementwise
void transform2_elementwise(F &&fun, VA &&A, VB &&B, VAs &&...As)
Apply a function to all elements of the given matrices or vectors, storing the results in the first t...
Definition linalg.hpp:718

cyqlone::linalg::clamp_resid
void clamp_resid(Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z)
Elementwise clamping residual z = x - max(lo, min(x, hi)).
Definition linalg.hpp:333

cyqlone::linalg::negate
void negate(VA &&A, VB &&B, with_rotate_t< Rotate >={})
Negate a matrix or vector B = -A.
Definition linalg.hpp:386

cyqlone::linalg::multi::copy
void copy(VA &&A, VB &&B, Opts... opts)
Copy a matrix or vector B = A.
Definition linalg.hpp:749

batmat::linalg::MatrixStructure
MatrixStructure

cyqlone::linalg::multi::norm_2
simdified_value_t< Vx > norm_2(Vx &&x)
Compute the 2-norm of a vector.
Definition linalg.hpp:518

cyqlone::linalg::norm_1
simdified_value_t< Vx > norm_1(Vx &&x)
Compute the 1-norm of a vector.
Definition linalg.hpp:266

batmat::linalg::copy
void copy(VA &&A, VB &&B, Opts... opts)

cyqlone::linalg::multi::for_each_elementwise
void for_each_elementwise(F &&fun, VA &&A, VAs &&...As)
Apply a function to all elements of the given matrices or vectors.
Definition linalg.hpp:698

cyqlone::linalg::transform_n_elementwise
void transform_n_elementwise(F &&fun, std::tuple< VAs... > As, VBs &&...Bs)
Apply a function to all elements of the given matrices or vectors, storing the results in the tuple o...
Definition linalg.hpp:463

cyqlone::linalg::multi::clamp
void clamp(Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z)
Elementwise clamping z = max(lo, min(x, hi)).
Definition linalg.hpp:572

batmat::linalg::make_structured
constexpr auto make_structured(M &&m)

cyqlone::linalg::multi::norm_1
simdified_value_t< Vx > norm_1(Vx &&x)
Compute the 1-norm of a vector.
Definition linalg.hpp:503

cyqlone::linalg::dot
simdified_value_t< Vx > dot(Vx &&x, Vy &&y)
Compute the dot product of two vectors.
Definition linalg.hpp:286

cyqlone::linalg::multi::add
void add(VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate > rot={})
Add two matrices or vectors C = A + B. Rotate affects B.
Definition linalg.hpp:679

cyqlone::linalg::transform2_elementwise
void transform2_elementwise(F &&fun, VA &&A, VB &&B, VAs &&...As)
Apply a function to all elements of the given matrices or vectors, storing the results in the first t...
Definition linalg.hpp:453

cyqlone::linalg::clamp
void clamp(Vx &&x, Vlo &&lo, Vhi &&hi, Vz &&z)
Elementwise clamping z = max(lo, min(x, hi)).
Definition linalg.hpp:325

cyqlone::linalg::multi::scale
void scale(T alpha, Vx &&x, Vz &&z)
Multiply a vector by a scalar z = αx.
Definition linalg.hpp:537

cyqlone::linalg::multi::norms_all
norms< simdified_value_t< Vx > >::result norms_all(Vx &&x)
Compute the norms (max, 1-norm, and 2-norm) of a vector.
Definition linalg.hpp:488

cyqlone::linalg::norm_2
simdified_value_t< Vx > norm_2(Vx &&x)
Compute the 2-norm of a vector.
Definition linalg.hpp:278

cyqlone::linalg::hadamard
void hadamard(Vx &&x, Vy &&y, Vz &&z)
Compute the Hadamard (elementwise) product of two vectors z = x ⊙ y.
Definition linalg.hpp:309

cyqlone::linalg::multi::norm_2_squared
simdified_value_t< Vx > norm_2_squared(Vx &&x)
Compute the squared 2-norm of a vector.
Definition linalg.hpp:509

cyqlone::linalg::multi::axpy
void axpy(Vy &&y, const std::array< simdified_value_t< Vy >, sizeof...(Vx)> &alphas, Vx &&...x)
Add scaled vector y = ∑ᵢ αᵢxᵢ + βy.
Definition linalg.hpp:617

cyqlone::linalg::scale
void scale(T alpha, Vx &&x, Vz &&z)
Multiply a vector by a scalar z = αx.
Definition linalg.hpp:294

cyqlone::linalg::multi::dot
simdified_value_t< Vx > dot(Vx &&x, Vy &&y)
Compute the dot product of two vectors.
Definition linalg.hpp:526

cyqlone::linalg::sub
void sub(VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate >={})
Subtract two matrices or vectors C = A - B. Rotate affects B.
Definition linalg.hpp:401

cyqlone::linalg::multi::sub
void sub(VA &&A, VB &&B, VC &&C, with_rotate_t< Rotate > rot={})
Subtract two matrices or vectors C = A - B. Rotate affects B.
Definition linalg.hpp:660

batmat::ops::rotl
datapar::simd< F, Abi > rotl(datapar::simd< F, Abi > x)

rot
datapar::simd< F, Abi > rot(datapar::simd< F, Abi > x, int s)

batmat::datapar::simd
stdx::simd< Tp, Abi > simd

batmat::linalg::detail

batmat::linalg

batmat::linalg::simdified_value_t
typename detail::simdified_value< V >::type simdified_value_t

batmat::linalg::simdified_abi_t
typename detail::simdified_abi< V >::type simdified_abi_t

batmat::linalg::StorageOrder
StorageOrder

batmat::linalg::simdified_view_t
typename simdified_view_type< V >::type simdified_view_t

batmat::linalg::simdify_compatible
constexpr bool simdify_compatible

batmat::linalg::simdify
constexpr auto simdify(simdifiable auto &&a) -> simdified_view_t< decltype(a)>

batmat::linalg::view
simd_view_types< std::remove_const_t< T >, Abi >::template view< T, Order > view

batmat::types

cyqlone::detail::scale
void scale(T0 scalar, guanaqo::MatrixView< T1, I1, S1, O1 > src, guanaqo::MatrixView< T2, I2, S2, O2 > dst)
Simple (inefficient) scaled matrix copy that supports slices with non-unit strides.
Definition data.tpp:29

cyqlone::linalg::multi
Definition linalg.hpp:478

cyqlone::linalg
Definition linalg.hpp:18

reduce.hpp
Vector reductions.

batmat::linalg::Structured

batmat::linalg::Structured::value
M value

batmat::linalg::simd_view_types

batmat::linalg::with_rotate_t

cyqlone::norms
Utilities for computing vector norms.
Definition reduce.hpp:26

cyqlone::norms::zero_simd
static result_simd zero_simd()
Definition reduce.hpp:55

cyqlone::norms::result
typename norms< T >::result result
Accumulator.
Definition reduce.hpp:28