alpaqa 1.0.0a8
Nonconvex constrained optimization
Loading...
Searching...
No Matches
accelerators/lbfgs.hpp
Go to the documentation of this file.
1#pragma once
2
4#include <alpaqa/export.hpp>
5
6#include <cmath>
7#include <limits>
8#include <string>
9#include <utility>
10#include <vector>
11
12namespace alpaqa {
13
14/// Cautious BFGS update.
15/// @see @ref LBFGSParams::cbfgs
16template <Config Conf = DefaultConfig>
20 real_t ϵ = 0; ///< Set to zero to disable CBFGS check.
21 explicit operator bool() const { return ϵ > 0; }
22};
23
24/// Which method to use to select the L-BFGS step size.
25enum class LBFGSStepSize {
26 /// Initial inverse Hessian approximation is set to
27 /// @f$ H_0 = \gamma I @f$.
29 /// Initial inverse Hessian approximation is set to
30 /// @f$ H_0 = \frac{s^\top y}{y^\top y} I @f$.
33 [[deprecated("use BasedOnExternalStepSize instead")]] =
35};
36
37/// Parameters for the @ref LBFGS class.
38template <Config Conf = DefaultConfig>
41
42 /// Length of the history to keep.
44 /// Reject update if @f$ y^\top s \le \text{min_div_fac} \cdot s^\top s @f$.
45 real_t min_div_fac = std::numeric_limits<real_t>::epsilon();
46 /// Reject update if @f$ s^\top s \le \text{min_abs_s} @f$.
48 std::pow(std::numeric_limits<real_t>::epsilon(), real_t(2));
49 /// Parameters in the cautious BFGS update condition
50 /// @f[ \frac{y^\top s}{s^\top s} \ge \epsilon \| g \|^\alpha @f]
51 /// @see https://epubs.siam.org/doi/10.1137/S1052623499354242
53 /// If set to true, the inverse Hessian estimate should remain definite,
54 /// i.e. a check is performed that rejects the update if
55 /// @f$ y^\top s \le \text{min_div_fac} \cdot s^\top s @f$.
56 /// If set to false, just try to prevent a singular Hessian by rejecting the
57 /// update if
58 /// @f$ \left| y^\top s \right| \le \text{min_div_fac} \cdot s^\top s @f$.
59 bool force_pos_def = true;
60 /// @see LBFGSStepSize
62};
63
64/// Layout:
65/// ~~~
66/// ┌───── 2 m ─────┐
67/// ┌ ┌───┬───┬───┬───┐
68/// │ │ │ │ │ │
69/// │ │ s │ y │ s │ y │
70/// n+1 │ │ │ │ │ │
71/// │ ├───┼───┼───┼───┤
72/// │ │ ρ │ α │ ρ │ α │
73/// └ └───┴───┴───┴───┘
74/// ~~~
75template <Config Conf = DefaultConfig>
78
79 /// Re-allocate storage for a problem with a different size.
81
82 /// Get the size of the s and y vectors in the buffer.
83 length_t n() const { return sto.rows() - 1; }
84 /// Get the number of previous vectors s and y stored in the buffer.
85 length_t history() const { return sto.cols() / 2; }
86
87 auto s(index_t i) { return sto.col(2 * i).topRows(n()); }
88 auto s(index_t i) const {
89 return std::as_const(sto).col(2 * i).topRows(n());
90 }
91 auto y(index_t i) { return sto.col(2 * i + 1).topRows(n()); }
92 auto y(index_t i) const {
93 return std::as_const(sto).col(2 * i + 1).topRows(n());
94 }
95 real_t &ρ(index_t i) { return sto.coeffRef(n(), 2 * i); }
96 real_t &ρ(index_t i) const { return sto.coeffRef(n(), 2 * i); }
97 real_t &α(index_t i) { return sto.coeffRef(n(), 2 * i + 1); }
98 real_t &α(index_t i) const { return sto.coeffRef(n(), 2 * i + 1); }
99
100 using storage_t = mat;
101 static_assert(!storage_t::IsRowMajor);
102 mutable storage_t sto;
103};
104
105/// Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm
106/// @ingroup grp_Accelerators
107template <Config Conf = DefaultConfig>
108class LBFGS {
109 public:
111
113
114 /// The sign of the vectors @f$ p @f$ passed to the @ref update method.
115 enum class Sign {
116 Positive, ///< @f$ p \sim \nabla \psi(x) @f$
117 Negative, ///< @f$ p \sim -\nabla \psi(x) @f$
118 };
119
120 LBFGS() = default;
123
124 /// Check if the new vectors s and y allow for a valid BFGS update that
125 /// preserves the positive definiteness of the Hessian approximation.
126 static bool update_valid(const Params &params, real_t yᵀs, real_t sᵀs,
127 real_t pᵀp);
128
129 /// Update the inverse Hessian approximation using the new vectors
130 /// sₖ = xₙₑₓₜ - xₖ and yₖ = pₙₑₓₜ - pₖ.
131 bool update_sy(crvec s, crvec y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced = false);
132 /// @see @ref update_sy
133 bool update_sy_impl(const auto &s, const auto &y, real_t pₙₑₓₜᵀpₙₑₓₜ,
134 bool forced = false);
135
136 /// Update the inverse Hessian approximation using the new vectors xₙₑₓₜ
137 /// and pₙₑₓₜ.
138 bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ,
139 Sign sign = Sign::Positive, bool forced = false);
140
141 /// Apply the inverse Hessian approximation to the given vector q.
142 /// Initial inverse Hessian approximation is set to @f$ H_0 = \gamma I @f$.
143 /// If @p γ is negative, @f$ H_0 = \frac{s^\top y}{y^\top y} I @f$.
144 bool apply(rvec q, real_t γ = -1) const;
145
146 /// Apply the inverse Hessian approximation to the given vector q, applying
147 /// only the columns and rows of the Hessian in the index set J.
148 bool apply_masked(rvec q, real_t γ, crindexvec J) const;
149 /// @copydoc apply_masked(rvec, real_t, crindexvec) const
150 bool apply_masked(rvec q, real_t γ, const std::vector<index_t> &J) const;
151 /// @copydoc apply_masked(rvec, real_t, crindexvec) const
152 bool apply_masked_impl(rvec q, real_t γ, const auto &J) const;
153
154 /// Throw away the approximation and all previous vectors s and y.
155 void reset();
156 /// Re-allocate storage for a problem with a different size. Causes
157 /// a @ref reset.
158 void resize(length_t n);
159
160 /// Scale the stored y vectors by the given factor.
161 void scale_y(real_t factor);
162
163 /// Get a string identifier for this accelerator.
164 std::string get_name() const {
165 return "LBFGS<" + std::string(config_t::get_name()) + '>';
166 }
167 /// Get the parameters.
168 const Params &get_params() const { return params; }
169
170 /// Get the size of the s and y vectors in the buffer.
171 length_t n() const { return sto.n(); }
172 /// Get the number of previous vectors s and y stored in the buffer.
173 length_t history() const { return sto.history(); }
174 /// Get the next index in the circular buffer of previous s and y vectors.
175 index_t succ(index_t i) const { return i + 1 < history() ? i + 1 : 0; }
176 /// Get the previous index in the circular buffer of s and y vectors.
177 index_t pred(index_t i) const { return i > 0 ? i - 1 : history() - 1; }
178 /// Get the number of previous s and y vectors currently stored in the
179 /// buffer.
180 length_t current_history() const { return full ? history() : idx; }
181
182 auto s(index_t i) { return sto.s(i); }
183 auto s(index_t i) const { return sto.s(i); }
184 auto y(index_t i) { return sto.y(i); }
185 auto y(index_t i) const { return sto.y(i); }
186 real_t &ρ(index_t i) { return sto.ρ(i); }
187 real_t &ρ(index_t i) const { return sto.ρ(i); }
188 real_t &α(index_t i) { return sto.α(i); }
189 real_t &α(index_t i) const { return sto.α(i); }
190
191 /// Iterate over the indices in the history buffer, oldest first.
192 template <class F>
193 void foreach_fwd(const F &fun) const {
194 if (full)
195 for (index_t i = idx; i < history(); ++i)
196 fun(i);
197 if (idx)
198 for (index_t i = 0; i < idx; ++i)
199 fun(i);
200 }
201
202 /// Iterate over the indices in the history buffer, newest first.
203 template <class F>
204 void foreach_rev(const F &fun) const {
205 if (idx)
206 for (index_t i = idx; i-- > 0;)
207 fun(i);
208 if (full)
209 for (index_t i = history(); i-- > idx;)
210 fun(i);
211 }
212
213 private:
216 bool full = false;
218};
219
224#ifdef ALPAQA_WITH_QUAD_PRECISION
225ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, CBFGSParams, EigenConfigq);
226#endif
227
232#ifdef ALPAQA_WITH_QUAD_PRECISION
233ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, LBFGSParams, EigenConfigq);
234#endif
235
240#ifdef ALPAQA_WITH_QUAD_PRECISION
241ALPAQA_EXPORT_EXTERN_TEMPLATE(class, LBFGS, EigenConfigq);
242#endif
243
244} // namespace alpaqa
Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm.
std::string get_name() const
Get a string identifier for this accelerator.
void foreach_rev(const F &fun) const
Iterate over the indices in the history buffer, newest first.
auto s(index_t i)
bool apply_masked(rvec q, real_t γ, crindexvec J) const
Apply the inverse Hessian approximation to the given vector q, applying only the columns and rows of ...
Definition: lbfgs.tpp:196
auto s(index_t i) const
length_t current_history() const
Get the number of previous s and y vectors currently stored in the buffer.
LBFGS()=default
LBFGSParams< config_t > Params
index_t succ(index_t i) const
Get the next index in the circular buffer of previous s and y vectors.
index_t pred(index_t i) const
Get the previous index in the circular buffer of s and y vectors.
length_t n() const
Get the size of the s and y vectors in the buffer.
LBFGS(Params params, length_t n)
LBFGS(Params params)
LBFGSStorage< config_t > sto
const Params & get_params() const
Get the parameters.
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
void foreach_fwd(const F &fun) const
Iterate over the indices in the history buffer, oldest first.
real_t & α(index_t i) const
bool update_sy(crvec s, crvec y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Update the inverse Hessian approximation using the new vectors sₖ = xₙₑₓₜ - xₖ and yₖ = pₙₑₓₜ - pₖ.
Definition: lbfgs.tpp:60
real_t & ρ(index_t i) const
static bool update_valid(const Params &params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition: lbfgs.tpp:12
bool apply_masked_impl(rvec q, real_t γ, const auto &J) const
Apply the inverse Hessian approximation to the given vector q, applying only the columns and rows of ...
Definition: lbfgs.tpp:103
void resize(length_t n)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.tpp:213
real_t & α(index_t i)
real_t & ρ(index_t i)
void reset()
Throw away the approximation and all previous vectors s and y.
Definition: lbfgs.tpp:207
bool update_sy_impl(const auto &s, const auto &y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Definition: lbfgs.tpp:37
auto y(index_t i) const
Sign
The sign of the vectors passed to the update method.
bool apply(rvec q, real_t γ=-1) const
Apply the inverse Hessian approximation to the given vector q.
Definition: lbfgs.tpp:74
void scale_y(real_t factor)
Scale the stored y vectors by the given factor.
Definition: lbfgs.tpp:226
bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ, Sign sign=Sign::Positive, bool forced=false)
Update the inverse Hessian approximation using the new vectors xₙₑₓₜ and pₙₑₓₜ.
Definition: lbfgs.tpp:65
auto y(index_t i)
#define USING_ALPAQA_CONFIG(Conf)
Definition: config.hpp:42
#define ALPAQA_EXPORT_EXTERN_TEMPLATE(...)
Definition: export.hpp:21
typename Conf::mat mat
Definition: config.hpp:57
real_t min_abs_s
Reject update if .
length_t memory
Length of the history to keep.
CBFGSParams< config_t > cbfgs
Parameters in the cautious BFGS update condition.
bool force_pos_def
If set to true, the inverse Hessian estimate should remain definite, i.e.
typename Conf::real_t real_t
Definition: config.hpp:51
typename Conf::index_t index_t
Definition: config.hpp:63
typename Conf::length_t length_t
Definition: config.hpp:62
typename Conf::rvec rvec
Definition: config.hpp:55
typename Conf::crvec crvec
Definition: config.hpp:56
real_t min_div_fac
Reject update if .
LBFGSStepSize
Which method to use to select the L-BFGS step size.
@ BasedOnCurvature
Initial inverse Hessian approximation is set to .
@ BasedOnExternalStepSize
Initial inverse Hessian approximation is set to .
typename Conf::crindexvec crindexvec
Definition: config.hpp:66
Parameters for the LBFGS class.
Cautious BFGS update.
real_t ϵ
Set to zero to disable CBFGS check.
Double-precision double configuration.
Definition: config.hpp:115
Single-precision float configuration.
Definition: config.hpp:111
long double configuration.
Definition: config.hpp:120
auto s(index_t i) const
void resize(length_t n, length_t history)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.tpp:221
length_t n() const
Get the size of the s and y vectors in the buffer.
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
real_t & α(index_t i) const
real_t & ρ(index_t i) const
real_t & α(index_t i)
real_t & ρ(index_t i)
auto y(index_t i) const