0.0.1/Doxygen/lbfgs_8hpp_source.html

#pragma once


#include <alpaqa/inner/directions/decl/lbfgs.hpp>

#include <stdexcept>

#include <type_traits>


namespace alpaqa {


inline bool LBFGS::update_valid(LBFGSParams params, real_t yᵀs, real_t sᵀs,

                                real_t pᵀp) {

    // Smallest number we want to divide by without overflow

    const real_t min_divisor = std::sqrt(std::numeric_limits<real_t>::min());


    // Check if this L-BFGS update is accepted

    if (not std::isfinite(yᵀs))

        return false;

    if (yᵀs < min_divisor)

        return false;

    if (sᵀs < min_divisor)

        return false;


    // CBFGS condition: https://epubs.siam.org/doi/10.1137/S1052623499354242

    real_t α = params.cbfgs.α;

    real_t ϵ = params.cbfgs.ϵ;

    // Condition: yᵀs / sᵀs >= ϵ ‖p‖^α

    bool cbfgs_cond = yᵀs / sᵀs >= ϵ * std::pow(pᵀp, α / 2);

    if (not cbfgs_cond)

        return false;


    return true;

}


inline bool LBFGS::update(crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, Sign sign,

                          bool forced) {

    const auto s = xₖ₊₁ - xₖ;

    const auto y = sign == Sign::Positive ? pₖ₊₁ - pₖ : pₖ - pₖ₊₁;

    real_t yᵀs   = y.dot(s);

    real_t ρ     = 1 / yᵀs;

    if (not forced) {

        real_t sᵀs = s.squaredNorm();

        real_t pᵀp = params.cbfgs.ϵ > 0 ? pₖ₊₁.squaredNorm() : 0;

        if (not update_valid(params, yᵀs, sᵀs, pᵀp))

            return false;

    }


    // Store the new s and y vectors

    this->s(idx) = s;

    this->y(idx) = y;

    this->ρ(idx) = ρ;


    // Increment the index in the circular buffer

    idx = succ(idx);

    full |= idx == 0;


    return true;

}


template <class Vec>

bool LBFGS::apply(Vec &&q, real_t γ) {

    // Only apply if we have previous vectors s and y

    if (idx == 0 && not full)

        return false;


    // If the step size is negative, compute it as sᵀy/yᵀy

    if (γ < 0) {

        auto new_idx = idx > 0 ? idx - 1 : history() - 1;

        real_t yᵀy   = y(new_idx).squaredNorm();

        γ            = 1. / (ρ(new_idx) * yᵀy);

    }


    auto update1 = [&](size_t i) {

        α(i) = ρ(i) * (s(i).dot(q));

        q -= α(i) * y(i);

    };

    if (idx)

        for (size_t i = idx; i-- > 0;)

            update1(i);

    if (full)

        for (size_t i = history(); i-- > idx;)

            update1(i);


    // r ← H₀ q

    q *= γ;


    auto update2 = [&](size_t i) {

        real_t β = ρ(i) * (y(i).dot(q));

        q += (α(i) - β) * s(i);

    };

    if (full)

        for (size_t i = idx; i < history(); ++i)

            update2(i);

    for (size_t i = 0; i < idx; ++i)

        update2(i);


    return true;

}


template <class Vec, class IndexVec>

bool LBFGS::apply(Vec &&q, real_t γ, const IndexVec &J) {

    // Only apply if we have previous vectors s and y

    if (idx == 0 && not full)

        return false;

    using Index = typename std::remove_reference_t<Vec>::Index;

    bool fullJ  = q.size() == Index(J.size());


    // Eigen 3.3.9 doesn't yet support indexing using a vector of indices

    // so we'll have to do it manually

    // TODO: Abstract this away in an expression template / nullary expression?

    //       Or wait for Eigen update?


    // Dot product of two vectors, adding only the indices in set J

    auto dotJ = [&J, fullJ](const auto &a, const auto &b) {

        if (fullJ) {

            return a.dot(b);

        } else {

            real_t acc = 0;

            for (auto j : J)

                acc += a(j) * b(j);

            return acc;

        }

    };


    auto update1 = [&](size_t i) {

        // Recompute ρ, it depends on the index set J. Note that even if ρ was

        // positive for the full vectors s and y, that's not necessarily the

        // case for the smaller vectors s(J) and y(J).

        if (not fullJ)

            ρ(i) = 1. / dotJ(s(i), y(i));


        if (ρ(i) <= 0) // Reject negative ρ to ensure positive definiteness

            return;


        α(i) = ρ(i) * dotJ(s(i), q);

        if (fullJ)

            q -= α(i) * y(i);

        else

            for (auto j : J)

                q(j) -= α(i) * y(i)(j);


        if (γ < 0) {

            // Compute step size based on most recent yᵀs/yᵀy > 0

            real_t yᵀy = dotJ(y(i), y(i));

            γ          = 1. / (ρ(i) * yᵀy);

        }

    };

    if (idx)

        for (size_t i = idx; i-- > 0;)

            update1(i);

    if (full)

        for (size_t i = history(); i-- > idx;)

            update1(i);


    // If all ρ <= 0, fail

    if (γ < 0)

        return false;


    // r ← H₀ q

    if (fullJ)

        q *= γ;

    else

        for (auto j : J)

            q(j) *= γ;


    auto update2 = [&](size_t i) {

        if (ρ(i) <= 0)

            return;

        real_t β = ρ(i) * dotJ(y(i), q);

        if (fullJ)

            q += (α(i) - β) * s(i);

        else

            for (auto j : J)

                q(j) += (α(i) - β) * s(i)(j);

    };

    if (full)

        for (size_t i = idx; i < history(); ++i)

            update2(i);

    for (size_t i = 0; i < idx; ++i)

        update2(i);


    return true;

}


inline void LBFGS::reset() {

    idx  = 0;

    full = false;

}


inline void LBFGS::resize(size_t n) {

    if (params.memory < 1)

        throw std::invalid_argument("LBFGSParams::memory must be > 1");

    sto.resize(n + 1, params.memory * 2);

    reset();

}


inline void LBFGS::scale_y(real_t factor) {

    if (full) {

        for (size_t i = 0; i < history(); ++i) {

            y(i) *= factor;

            ρ(i) *= 1. / factor;

        }

    } else {

        for (size_t i = 0; i < idx; ++i) {

            y(i) *= factor;

            ρ(i) *= 1. / factor;

        }

    }

}


inline void PANOCDirection<LBFGS>::initialize(crvec x₀, crvec x̂₀, crvec p₀,

                                              crvec grad₀) {

    lbfgs.resize(x₀.size());

    (void)x̂₀;

    (void)p₀;

    (void)grad₀;

}


inline bool PANOCDirection<LBFGS>::update(crvec xₖ, crvec xₖ₊₁, crvec pₖ,

                                          crvec pₖ₊₁, crvec grad_new,

                                          const Box &C, real_t γ_new) {

    (void)grad_new;

    (void)C;

    (void)γ_new;

    return lbfgs.update(xₖ, xₖ₊₁, pₖ, pₖ₊₁, LBFGS::Sign::Negative);

}


inline bool PANOCDirection<LBFGS>::apply(crvec xₖ, crvec x̂ₖ, crvec pₖ, real_t γ,

                                         rvec qₖ) {

    (void)xₖ;

    (void)x̂ₖ;

    qₖ = pₖ;

    return lbfgs.apply(qₖ, γ);

}


inline void PANOCDirection<LBFGS>::changed_γ(real_t γₖ, real_t old_γₖ) {

    if (lbfgs.get_params().rescale_when_γ_changes)

        lbfgs.scale_y(γₖ / old_γₖ);

    else

        lbfgs.reset();

}


inline void PANOCDirection<LBFGS>::reset() { lbfgs.reset(); }


inline std::string PANOCDirection<LBFGS>::get_name() const {

    return lbfgs.get_name();

}


inline LBFGSParams PANOCDirection<LBFGS>::get_params() const {

    return lbfgs.get_params();

}


} // namespace alpaqa

alpaqa::LBFGS::params
Params params
Definition: decl/lbfgs.hpp:97

alpaqa::LBFGS::succ
size_t succ(size_t i) const
Get the next index in the circular buffer of previous s and y vectors.
Definition: decl/lbfgs.hpp:80

alpaqa::LBFGS::idx
size_t idx
Definition: decl/lbfgs.hpp:95

alpaqa::LBFGS::full
bool full
Definition: decl/lbfgs.hpp:96

alpaqa::LBFGS::apply
bool apply(Vec &&q, real_t γ)
Apply the inverse Hessian approximation to the given vector q.
Definition: lbfgs.hpp:59

alpaqa::LBFGS::update
bool update(crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, Sign sign, bool forced=false)
Update the inverse Hessian approximation using the new vectors xₖ₊₁ and pₖ₊₁.
Definition: lbfgs.hpp:33

alpaqa::LBFGS::resize
void resize(size_t n)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.hpp:188

alpaqa::LBFGS::history
size_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition: decl/lbfgs.hpp:78

alpaqa::LBFGS::s
auto s(size_t i)
Definition: decl/lbfgs.hpp:82

alpaqa::LBFGS::α
real_t & α(size_t i)
Definition: decl/lbfgs.hpp:88

alpaqa::LBFGS::update_valid
static bool update_valid(LBFGSParams params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition: lbfgs.hpp:9

alpaqa::LBFGS::y
auto y(size_t i)
Definition: decl/lbfgs.hpp:84

alpaqa::LBFGS::n
size_t n() const
Get the size of the s and y vectors in the buffer.
Definition: decl/lbfgs.hpp:76

alpaqa::LBFGS::reset
void reset()
Throw away the approximation and all previous vectors s and y.
Definition: lbfgs.hpp:183

alpaqa::LBFGS::sto
storage_t sto
Definition: decl/lbfgs.hpp:94

alpaqa::LBFGS::Sign
Sign
The sign of the vectors  passed to the LBFGS::update method.
Definition: decl/lbfgs.hpp:35

alpaqa::LBFGS::Sign::Positive
@ Positive

alpaqa::LBFGS::Sign::Negative
@ Negative

alpaqa::LBFGS::scale_y
void scale_y(real_t factor)
Scale the stored y vectors by the given factor.
Definition: lbfgs.hpp:195

alpaqa::LBFGS::ρ
real_t & ρ(size_t i)
Definition: decl/lbfgs.hpp:86

lbfgs.hpp

alpaqa.test.params
params
Definition: test.py:275

alpaqa.test.n
int n
Definition: test.py:40

alpaqa.test.C
C
Definition: test.py:262

alpaqa
Definition: __init__.py:1

alpaqa::crvec
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18

alpaqa::LBFGSParams::memory
unsigned memory
Length of the history to keep.
Definition: decl/lbfgs.hpp:14

alpaqa::LBFGSParams::cbfgs
struct alpaqa::LBFGSParams::@0 cbfgs
Parameters in the cautious BFGS update condition.

alpaqa::real_t
double real_t
Default floating point type.
Definition: vec.hpp:8

alpaqa::rvec
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16

alpaqa::Box
Definition: box.hpp:7

alpaqa::LBFGSParams
Parameters for the LBFGS and SpecializedLBFGS classes.
Definition: decl/lbfgs.hpp:12

hanging-chain-mpc.b
b
Definition: hanging-chain-mpc.py:74

hanging-chain-mpc.a
a
Definition: hanging-chain-mpc.py:74

alpaqa::PANOCDirection
Definition: panoc-direction-update.hpp:8

alpaqa::PANOCDirection::update
static bool update(DirectionProviderT &dp, crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, crvec gradₖ₊₁, const Box &C, real_t γₖ₊₁)=delete

alpaqa::PANOCDirection::apply
static bool apply(DirectionProviderT &dp, crvec xₖ, crvec x̂ₖ, crvec pₖ, real_t γ, rvec qₖ)=delete
Apply the direction estimation in the current point.

alpaqa::PANOCDirection::changed_γ
static void changed_γ(DirectionProviderT &dp, real_t γₖ, real_t old_γₖ)=delete

alpaqa::PANOCDirection::initialize
static void initialize(DirectionProviderT &dp, crvec x₀, crvec x̂₀, crvec p₀, crvec grad₀)=delete