1.0.0a8/Doxygen/accelerators_2lbfgs_8hpp_source.html

#pragma once


#include <alpaqa/config/config.hpp>

#include <alpaqa/export.hpp>


#include <cmath>

#include <limits>

#include <string>

#include <utility>

#include <vector>


namespace alpaqa {


/// Cautious BFGS update.

/// @see @ref LBFGSParams::cbfgs

template <Config Conf = DefaultConfig>

struct CBFGSParams {

    USING_ALPAQA_CONFIG(Conf);

    real_t α = 1;

    real_t ϵ = 0; ///< Set to zero to disable CBFGS check.

    explicit operator bool() const { return ϵ > 0; }

};


/// Which method to use to select the L-BFGS step size.

enum class LBFGSStepSize {

    /// Initial inverse Hessian approximation is set to

    /// @f$ H_0 = \gamma I @f$.

    BasedOnExternalStepSize = 0,

    /// Initial inverse Hessian approximation is set to

    /// @f$ H_0 = \frac{s^\top y}{y^\top y} I @f$.

    BasedOnCurvature = 1,

    BasedOnGradientStepSize

    [[deprecated("use BasedOnExternalStepSize instead")]] =

        BasedOnExternalStepSize,

};


/// Parameters for the @ref LBFGS class.

template <Config Conf = DefaultConfig>

struct LBFGSParams {

    USING_ALPAQA_CONFIG(Conf);


    /// Length of the history to keep.

    length_t memory = 10;

    /// Reject update if @f$ y^\top s \le \text{min_div_fac} \cdot s^\top s @f$.

    real_t min_div_fac = std::numeric_limits<real_t>::epsilon();

    /// Reject update if @f$ s^\top s \le \text{min_abs_s} @f$.

    real_t min_abs_s =

        std::pow(std::numeric_limits<real_t>::epsilon(), real_t(2));

    /// Parameters in the cautious BFGS update condition

    /// @f[ \frac{y^\top s}{s^\top s} \ge \epsilon \| g \|^\alpha @f]

    /// @see https://epubs.siam.org/doi/10.1137/S1052623499354242

    CBFGSParams<config_t> cbfgs = {};

    /// If set to true, the inverse Hessian estimate should remain definite,

    /// i.e. a check is performed that rejects the update if

    /// @f$ y^\top s \le \text{min_div_fac} \cdot s^\top s @f$.

    /// If set to false, just try to prevent a singular Hessian by rejecting the

    /// update if

    /// @f$ \left| y^\top s \right| \le \text{min_div_fac} \cdot s^\top s @f$.

    bool force_pos_def = true;

    /// @see LBFGSStepSize

    LBFGSStepSize stepsize = LBFGSStepSize::BasedOnCurvature;

};


/// Layout:

/// ~~~

///       ┌───── 2 m ─────┐

///     ┌ ┌───┬───┬───┬───┐

///     │ │   │   │   │   │

///     │ │ s │ y │ s │ y │

/// n+1 │ │   │   │   │   │

///     │ ├───┼───┼───┼───┤

///     │ │ ρ │ α │ ρ │ α │

///     └ └───┴───┴───┴───┘

/// ~~~

template <Config Conf = DefaultConfig>

struct LBFGSStorage {

    USING_ALPAQA_CONFIG(Conf);


    /// Re-allocate storage for a problem with a different size.

    void resize(length_t n, length_t history);


    /// Get the size of the s and y vectors in the buffer.

    length_t n() const { return sto.rows() - 1; }

    /// Get the number of previous vectors s and y stored in the buffer.

    length_t history() const { return sto.cols() / 2; }


    auto s(index_t i) { return sto.col(2 * i).topRows(n()); }

    auto s(index_t i) const {

        return std::as_const(sto).col(2 * i).topRows(n());

    }

    auto y(index_t i) { return sto.col(2 * i + 1).topRows(n()); }

    auto y(index_t i) const {

        return std::as_const(sto).col(2 * i + 1).topRows(n());

    }

    real_t &ρ(index_t i) { return sto.coeffRef(n(), 2 * i); }

    real_t &ρ(index_t i) const { return sto.coeffRef(n(), 2 * i); }

    real_t &α(index_t i) { return sto.coeffRef(n(), 2 * i + 1); }

    real_t &α(index_t i) const { return sto.coeffRef(n(), 2 * i + 1); }


    using storage_t = mat;

    static_assert(!storage_t::IsRowMajor);

    mutable storage_t sto;

};


/// Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm

/// @ingroup grp_Accelerators

template <Config Conf = DefaultConfig>

class LBFGS {

  public:

    USING_ALPAQA_CONFIG(Conf);


    using Params = LBFGSParams<config_t>;


    /// The sign of the vectors @f$ p @f$ passed to the @ref update method.

    enum class Sign {

        Positive, ///< @f$ p \sim \nabla \psi(x) @f$

        Negative, ///< @f$ p \sim -\nabla \psi(x) @f$

    };


    LBFGS() = default;

    LBFGS(Params params) : params(params) {}

    LBFGS(Params params, length_t n) : params(params) { resize(n); }


    /// Check if the new vectors s and y allow for a valid BFGS update that

    /// preserves the positive definiteness of the Hessian approximation.

    static bool update_valid(const Params &params, real_t yᵀs, real_t sᵀs,

                             real_t pᵀp);


    /// Update the inverse Hessian approximation using the new vectors

    /// sₖ = xₙₑₓₜ - xₖ and yₖ = pₙₑₓₜ - pₖ.

    bool update_sy(crvec s, crvec y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced = false);

    /// @see @ref update_sy

    bool update_sy_impl(const auto &s, const auto &y, real_t pₙₑₓₜᵀpₙₑₓₜ,

                        bool forced = false);


    /// Update the inverse Hessian approximation using the new vectors xₙₑₓₜ

    /// and pₙₑₓₜ.

    bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ,

                Sign sign = Sign::Positive, bool forced = false);


    /// Apply the inverse Hessian approximation to the given vector q.

    /// Initial inverse Hessian approximation is set to @f$ H_0 = \gamma I @f$.

    /// If @p γ is negative, @f$ H_0 = \frac{s^\top y}{y^\top y} I @f$.

    bool apply(rvec q, real_t γ = -1) const;


    /// Apply the inverse Hessian approximation to the given vector q, applying

    /// only the columns and rows of the Hessian in the index set J.

    bool apply_masked(rvec q, real_t γ, crindexvec J) const;

    /// @copydoc apply_masked(rvec, real_t, crindexvec) const

    bool apply_masked(rvec q, real_t γ, const std::vector<index_t> &J) const;

    /// @copydoc apply_masked(rvec, real_t, crindexvec) const

    bool apply_masked_impl(rvec q, real_t γ, const auto &J) const;


    /// Throw away the approximation and all previous vectors s and y.

    void reset();

    /// Re-allocate storage for a problem with a different size. Causes

    /// a @ref reset.

    void resize(length_t n);


    /// Scale the stored y vectors by the given factor.

    void scale_y(real_t factor);


    /// Get a string identifier for this accelerator.

    std::string get_name() const {

        return "LBFGS<" + std::string(config_t::get_name()) + '>';

    }

    /// Get the parameters.

    const Params &get_params() const { return params; }


    /// Get the size of the s and y vectors in the buffer.

    length_t n() const { return sto.n(); }

    /// Get the number of previous vectors s and y stored in the buffer.

    length_t history() const { return sto.history(); }

    /// Get the next index in the circular buffer of previous s and y vectors.

    index_t succ(index_t i) const { return i + 1 < history() ? i + 1 : 0; }

    /// Get the previous index in the circular buffer of s and y vectors.

    index_t pred(index_t i) const { return i > 0 ? i - 1 : history() - 1; }

    /// Get the number of previous s and y vectors currently stored in the

    /// buffer.

    length_t current_history() const { return full ? history() : idx; }


    auto s(index_t i) { return sto.s(i); }

    auto s(index_t i) const { return sto.s(i); }

    auto y(index_t i) { return sto.y(i); }

    auto y(index_t i) const { return sto.y(i); }

    real_t &ρ(index_t i) { return sto.ρ(i); }

    real_t &ρ(index_t i) const { return sto.ρ(i); }

    real_t &α(index_t i) { return sto.α(i); }

    real_t &α(index_t i) const { return sto.α(i); }


    /// Iterate over the indices in the history buffer, oldest first.

    template <class F>

    void foreach_fwd(const F &fun) const {

        if (full)

            for (index_t i = idx; i < history(); ++i)

                fun(i);

        if (idx)

            for (index_t i = 0; i < idx; ++i)

                fun(i);

    }


    /// Iterate over the indices in the history buffer, newest first.

    template <class F>

    void foreach_rev(const F &fun) const {

        if (idx)

            for (index_t i = idx; i-- > 0;)

                fun(i);

        if (full)

            for (index_t i = history(); i-- > idx;)

                fun(i);

    }


  private:

    LBFGSStorage<config_t> sto;

    index_t idx = 0;

    bool full   = false;

    Params params;

};


ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, CBFGSParams, DefaultConfig);

ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, CBFGSParams, EigenConfigf);

ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, CBFGSParams, EigenConfigd);

ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, CBFGSParams, EigenConfigl);

#ifdef ALPAQA_WITH_QUAD_PRECISION

ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, CBFGSParams, EigenConfigq);

#endif


ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, LBFGSParams, DefaultConfig);

ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, LBFGSParams, EigenConfigf);

ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, LBFGSParams, EigenConfigd);

ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, LBFGSParams, EigenConfigl);

#ifdef ALPAQA_WITH_QUAD_PRECISION

ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, LBFGSParams, EigenConfigq);

#endif


ALPAQA_EXPORT_EXTERN_TEMPLATE(class, LBFGS, DefaultConfig);

ALPAQA_EXPORT_EXTERN_TEMPLATE(class, LBFGS, EigenConfigf);

ALPAQA_EXPORT_EXTERN_TEMPLATE(class, LBFGS, EigenConfigd);

ALPAQA_EXPORT_EXTERN_TEMPLATE(class, LBFGS, EigenConfigl);

#ifdef ALPAQA_WITH_QUAD_PRECISION

ALPAQA_EXPORT_EXTERN_TEMPLATE(class, LBFGS, EigenConfigq);

#endif


} // namespace alpaqa

alpaqa::LBFGS
Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm.
Definition: accelerators/lbfgs.hpp:108

alpaqa::LBFGS::params
Params params
Definition: accelerators/lbfgs.hpp:217

alpaqa::LBFGS::get_name
std::string get_name() const
Get a string identifier for this accelerator.
Definition: accelerators/lbfgs.hpp:164

alpaqa::LBFGS::foreach_rev
void foreach_rev(const F &fun) const
Iterate over the indices in the history buffer, newest first.
Definition: accelerators/lbfgs.hpp:204

alpaqa::LBFGS::s
auto s(index_t i)
Definition: accelerators/lbfgs.hpp:182

alpaqa::LBFGS::apply_masked
bool apply_masked(rvec q, real_t γ, crindexvec J) const
Apply the inverse Hessian approximation to the given vector q, applying only the columns and rows of ...
Definition: lbfgs.tpp:196

alpaqa::LBFGS::full
bool full
Definition: accelerators/lbfgs.hpp:216

alpaqa::LBFGS::s
auto s(index_t i) const
Definition: accelerators/lbfgs.hpp:183

alpaqa::LBFGS::current_history
length_t current_history() const
Get the number of previous s and y vectors currently stored in the buffer.
Definition: accelerators/lbfgs.hpp:180

alpaqa::LBFGS::LBFGS
LBFGS()=default

alpaqa::LBFGS::Params
LBFGSParams< config_t > Params
Definition: accelerators/lbfgs.hpp:112

alpaqa::LBFGS::succ
index_t succ(index_t i) const
Get the next index in the circular buffer of previous s and y vectors.
Definition: accelerators/lbfgs.hpp:175

alpaqa::LBFGS::pred
index_t pred(index_t i) const
Get the previous index in the circular buffer of s and y vectors.
Definition: accelerators/lbfgs.hpp:177

alpaqa::LBFGS::n
length_t n() const
Get the size of the s and y vectors in the buffer.
Definition: accelerators/lbfgs.hpp:171

alpaqa::LBFGS::LBFGS
LBFGS(Params params, length_t n)
Definition: accelerators/lbfgs.hpp:122

alpaqa::LBFGS::LBFGS
LBFGS(Params params)
Definition: accelerators/lbfgs.hpp:121

alpaqa::LBFGS::sto
LBFGSStorage< config_t > sto
Definition: accelerators/lbfgs.hpp:214

alpaqa::LBFGS::get_params
const Params & get_params() const
Get the parameters.
Definition: accelerators/lbfgs.hpp:168

alpaqa::LBFGS::history
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition: accelerators/lbfgs.hpp:173

alpaqa::LBFGS::foreach_fwd
void foreach_fwd(const F &fun) const
Iterate over the indices in the history buffer, oldest first.
Definition: accelerators/lbfgs.hpp:193

alpaqa::LBFGS::idx
index_t idx
Definition: accelerators/lbfgs.hpp:215

alpaqa::LBFGS::α
real_t & α(index_t i) const
Definition: accelerators/lbfgs.hpp:189

alpaqa::LBFGS::update_sy
bool update_sy(crvec s, crvec y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Update the inverse Hessian approximation using the new vectors sₖ = xₙₑₓₜ - xₖ and yₖ = pₙₑₓₜ - pₖ.
Definition: lbfgs.tpp:60

alpaqa::LBFGS::ρ
real_t & ρ(index_t i) const
Definition: accelerators/lbfgs.hpp:187

alpaqa::LBFGS::update_valid
static bool update_valid(const Params &params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition: lbfgs.tpp:12

alpaqa::LBFGS::apply_masked_impl
bool apply_masked_impl(rvec q, real_t γ, const auto &J) const
Apply the inverse Hessian approximation to the given vector q, applying only the columns and rows of ...
Definition: lbfgs.tpp:103

alpaqa::LBFGS::resize
void resize(length_t n)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.tpp:213

alpaqa::LBFGS::α
real_t & α(index_t i)
Definition: accelerators/lbfgs.hpp:188

alpaqa::LBFGS::ρ
real_t & ρ(index_t i)
Definition: accelerators/lbfgs.hpp:186

alpaqa::LBFGS::reset
void reset()
Throw away the approximation and all previous vectors s and y.
Definition: lbfgs.tpp:207

alpaqa::LBFGS::update_sy_impl
bool update_sy_impl(const auto &s, const auto &y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Definition: lbfgs.tpp:37

alpaqa::LBFGS::y
auto y(index_t i) const
Definition: accelerators/lbfgs.hpp:185

alpaqa::LBFGS::Sign
Sign
The sign of the vectors  passed to the update method.
Definition: accelerators/lbfgs.hpp:115

alpaqa::LBFGS::Sign::Positive
@ Positive

alpaqa::LBFGS::Sign::Negative
@ Negative

alpaqa::LBFGS::apply
bool apply(rvec q, real_t γ=-1) const
Apply the inverse Hessian approximation to the given vector q.
Definition: lbfgs.tpp:74

alpaqa::LBFGS::scale_y
void scale_y(real_t factor)
Scale the stored y vectors by the given factor.
Definition: lbfgs.tpp:226

alpaqa::LBFGS::update
bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ, Sign sign=Sign::Positive, bool forced=false)
Update the inverse Hessian approximation using the new vectors xₙₑₓₜ and pₙₑₓₜ.
Definition: lbfgs.tpp:65

alpaqa::LBFGS::y
auto y(index_t i)
Definition: accelerators/lbfgs.hpp:184

config.hpp

USING_ALPAQA_CONFIG
#define USING_ALPAQA_CONFIG(Conf)
Definition: config.hpp:42

export.hpp

ALPAQA_EXPORT_EXTERN_TEMPLATE
#define ALPAQA_EXPORT_EXTERN_TEMPLATE(...)
Definition: export.hpp:21

alpaqa
Definition: accelerators/anderson.hpp:10

alpaqa::mat
typename Conf::mat mat
Definition: config.hpp:57

alpaqa::LBFGSParams::min_abs_s
real_t min_abs_s
Reject update if .
Definition: accelerators/lbfgs.hpp:47

alpaqa::LBFGSParams::memory
length_t memory
Length of the history to keep.
Definition: accelerators/lbfgs.hpp:43

alpaqa::LBFGSParams::stepsize
LBFGSStepSize stepsize
Definition: accelerators/lbfgs.hpp:61

alpaqa::LBFGSParams::cbfgs
CBFGSParams< config_t > cbfgs
Parameters in the cautious BFGS update condition.
Definition: accelerators/lbfgs.hpp:52

alpaqa::LBFGSParams::force_pos_def
bool force_pos_def
If set to true, the inverse Hessian estimate should remain definite, i.e.
Definition: accelerators/lbfgs.hpp:59

alpaqa::real_t
typename Conf::real_t real_t
Definition: config.hpp:51

alpaqa::index_t
typename Conf::index_t index_t
Definition: config.hpp:63

alpaqa::length_t
typename Conf::length_t length_t
Definition: config.hpp:62

alpaqa::rvec
typename Conf::rvec rvec
Definition: config.hpp:55

alpaqa::crvec
typename Conf::crvec crvec
Definition: config.hpp:56

alpaqa::LBFGSParams::min_div_fac
real_t min_div_fac
Reject update if .
Definition: accelerators/lbfgs.hpp:45

alpaqa::LBFGSStepSize
LBFGSStepSize
Which method to use to select the L-BFGS step size.
Definition: accelerators/lbfgs.hpp:25

alpaqa::LBFGSStepSize::BasedOnGradientStepSize
@ BasedOnGradientStepSize

alpaqa::LBFGSStepSize::BasedOnCurvature
@ BasedOnCurvature
Initial inverse Hessian approximation is set to .

alpaqa::LBFGSStepSize::BasedOnExternalStepSize
@ BasedOnExternalStepSize
Initial inverse Hessian approximation is set to .

alpaqa::crindexvec
typename Conf::crindexvec crindexvec
Definition: config.hpp:66

alpaqa::LBFGSParams< config_t >

alpaqa::LBFGSParams
Parameters for the LBFGS class.
Definition: accelerators/lbfgs.hpp:39

alpaqa::CBFGSParams
Cautious BFGS update.
Definition: accelerators/lbfgs.hpp:17

alpaqa::CBFGSParams::α
real_t α
Definition: accelerators/lbfgs.hpp:19

alpaqa::CBFGSParams::ϵ
real_t ϵ
Set to zero to disable CBFGS check.
Definition: accelerators/lbfgs.hpp:20

alpaqa::DefaultConfig
Definition: config.hpp:132

alpaqa::EigenConfigd
Double-precision double configuration.
Definition: config.hpp:115

alpaqa::EigenConfigf
Single-precision float configuration.
Definition: config.hpp:111

alpaqa::EigenConfigl
long double configuration.
Definition: config.hpp:120

alpaqa::LBFGSStorage
Layout:
Definition: accelerators/lbfgs.hpp:76

alpaqa::LBFGSStorage::storage_t
mat storage_t
Definition: accelerators/lbfgs.hpp:100

alpaqa::LBFGSStorage::s
auto s(index_t i)
Definition: accelerators/lbfgs.hpp:87

alpaqa::LBFGSStorage::s
auto s(index_t i) const
Definition: accelerators/lbfgs.hpp:88

alpaqa::LBFGSStorage::resize
void resize(length_t n, length_t history)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.tpp:221

alpaqa::LBFGSStorage::n
length_t n() const
Get the size of the s and y vectors in the buffer.
Definition: accelerators/lbfgs.hpp:83

alpaqa::LBFGSStorage::history
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition: accelerators/lbfgs.hpp:85

alpaqa::LBFGSStorage::α
real_t & α(index_t i) const
Definition: accelerators/lbfgs.hpp:98

alpaqa::LBFGSStorage::ρ
real_t & ρ(index_t i) const
Definition: accelerators/lbfgs.hpp:96

alpaqa::LBFGSStorage::α
real_t & α(index_t i)
Definition: accelerators/lbfgs.hpp:97

alpaqa::LBFGSStorage::ρ
real_t & ρ(index_t i)
Definition: accelerators/lbfgs.hpp:95

alpaqa::LBFGSStorage::y
auto y(index_t i) const
Definition: accelerators/lbfgs.hpp:92

alpaqa::LBFGSStorage::sto
storage_t sto
Definition: accelerators/lbfgs.hpp:102

alpaqa::LBFGSStorage::y
auto y(index_t i)
Definition: accelerators/lbfgs.hpp:91