alpaqa 1.0.0a12
Nonconvex constrained optimization
Loading...
Searching...
No Matches
lbfgs.hpp
Go to the documentation of this file.
1#pragma once
2
4#include <alpaqa/export.hpp>
5
6#include <cmath>
7#include <limits>
8#include <string>
9#include <utility>
10#include <vector>
11
12namespace alpaqa {
13
14/// Cautious BFGS update.
15/// @see @ref LBFGSParams::cbfgs
16template <Config Conf = DefaultConfig>
20 real_t ϵ = 0; ///< Set to zero to disable CBFGS check.
21 explicit operator bool() const { return ϵ > 0; }
22};
23
24/// Which method to use to select the L-BFGS step size.
25enum class LBFGSStepSize {
26 /// Initial inverse Hessian approximation is set to
27 /// @f$ H_0 = \gamma I @f$.
29 /// Initial inverse Hessian approximation is set to
30 /// @f$ H_0 = \frac{s^\top y}{y^\top y} I @f$.
33 [[deprecated("use BasedOnExternalStepSize instead")]] =
35};
36
37/// Parameters for the @ref LBFGS class.
38template <Config Conf = DefaultConfig>
41
42 /// Length of the history to keep.
44 /// Reject update if @f$ y^\top s \le \text{min_div_fac} \cdot s^\top s @f$.
45 real_t min_div_fac = std::numeric_limits<real_t>::epsilon();
46 /// Reject update if @f$ s^\top s \le \text{min_abs_s} @f$.
48 std::pow(std::numeric_limits<real_t>::epsilon(), real_t(2));
49 /// Parameters in the cautious BFGS update condition
50 /// @f[ \frac{y^\top s}{s^\top s} \ge \epsilon \| g \|^\alpha @f]
51 /// @see https://epubs.siam.org/doi/10.1137/S1052623499354242
53 /// If set to true, the inverse Hessian estimate should remain definite,
54 /// i.e. a check is performed that rejects the update if
55 /// @f$ y^\top s \le \text{min_div_fac} \cdot s^\top s @f$.
56 /// If set to false, just try to prevent a singular Hessian by rejecting the
57 /// update if
58 /// @f$ \left| y^\top s \right| \le \text{min_div_fac} \cdot s^\top s @f$.
59 bool force_pos_def = true;
60 /// @see LBFGSStepSize
62};
63
64/// Layout:
65/// ~~~
66/// ┌───── 2 m ─────┐
67/// ┌ ┌───┬───┬───┬───┐
68/// │ │ │ │ │ │
69/// │ │ s │ y │ s │ y │
70/// n+1 │ │ │ │ │ │
71/// │ ├───┼───┼───┼───┤
72/// │ │ ρ │ α │ ρ │ α │
73/// └ └───┴───┴───┴───┘
74/// ~~~
75template <Config Conf = DefaultConfig>
78
79 /// Re-allocate storage for a problem with a different size.
81
82 /// Get the size of the s and y vectors in the buffer.
83 length_t n() const { return sto.rows() - 1; }
84 /// Get the number of previous vectors s and y stored in the buffer.
85 length_t history() const { return sto.cols() / 2; }
86
87 auto s(index_t i) { return sto.col(2 * i).topRows(n()); }
88 auto s(index_t i) const {
89 return std::as_const(sto).col(2 * i).topRows(n());
90 }
91 auto y(index_t i) { return sto.col(2 * i + 1).topRows(n()); }
92 auto y(index_t i) const {
93 return std::as_const(sto).col(2 * i + 1).topRows(n());
94 }
95 real_t &ρ(index_t i) { return sto.coeffRef(n(), 2 * i); }
96 real_t &ρ(index_t i) const { return sto.coeffRef(n(), 2 * i); }
97 real_t &α(index_t i) { return sto.coeffRef(n(), 2 * i + 1); }
98 real_t &α(index_t i) const { return sto.coeffRef(n(), 2 * i + 1); }
99
100 using storage_t = mat;
101 static_assert(!storage_t::IsRowMajor);
102 mutable storage_t sto;
103};
104
105/// Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm
106/// @ingroup grp_Accelerators
107template <Config Conf = DefaultConfig>
108class LBFGS {
109 public:
111
113
114 /// The sign of the vectors @f$ p @f$ passed to the @ref update method.
115 enum class Sign {
116 Positive, ///< @f$ p \sim \nabla \psi(x) @f$
117 Negative, ///< @f$ p \sim -\nabla \psi(x) @f$
118 };
119
120 LBFGS() = default;
123
124 /// Check if the new vectors s and y allow for a valid BFGS update that
125 /// preserves the positive definiteness of the Hessian approximation.
126 static bool update_valid(const Params &params, real_t yᵀs, real_t sᵀs,
127 real_t pᵀp);
128
129 /// Update the inverse Hessian approximation using the new vectors
130 /// sₖ = xₙₑₓₜ - xₖ and yₖ = pₙₑₓₜ - pₖ.
132 /// @see @ref update_sy
133 bool update_sy_impl(const auto &s, const auto &y, real_t pₙₑₓₜᵀpₙₑₓₜ,
134 bool forced = false);
135
136 /// Update the inverse Hessian approximation using the new vectors xₙₑₓₜ
137 /// and pₙₑₓₜ.
139 Sign sign = Sign::Positive, bool forced = false);
140
141 /// Apply the inverse Hessian approximation to the given vector q.
142 /// Initial inverse Hessian approximation is set to @f$ H_0 = \gamma I @f$.
143 /// If @p γ is negative, @f$ H_0 = \frac{s^\top y}{y^\top y} I @f$.
144 bool apply(rvec q, real_t γ = -1) const;
145
146 /// Apply the inverse Hessian approximation to the given vector q, applying
147 /// only the columns and rows of the Hessian in the index set J.
148 bool apply_masked(rvec q, real_t γ, crindexvec J) const;
149 /// @copydoc apply_masked(rvec, real_t, crindexvec) const
150 bool apply_masked(rvec q, real_t γ, const std::vector<index_t> &J) const;
151 /// @copydoc apply_masked(rvec, real_t, crindexvec) const
152 bool apply_masked_impl(rvec q, real_t γ, const auto &J) const;
153
154 /// Throw away the approximation and all previous vectors s and y.
155 void reset();
156 /// Re-allocate storage for a problem with a different size. Causes
157 /// a @ref reset.
158 void resize(length_t n);
159
160 /// Scale the stored y vectors by the given factor.
161 void scale_y(real_t factor);
162
163 /// Get a string identifier for this accelerator.
164 std::string get_name() const {
165 return "LBFGS<" + std::string(config_t::get_name()) + '>';
166 }
167 /// Get the parameters.
168 const Params &get_params() const { return params; }
169
170 /// Get the size of the s and y vectors in the buffer.
171 length_t n() const { return sto.n(); }
172 /// Get the number of previous vectors s and y stored in the buffer.
173 length_t history() const { return sto.history(); }
174 /// Get the next index in the circular buffer of previous s and y vectors.
175 index_t succ(index_t i) const { return i + 1 < history() ? i + 1 : 0; }
176 /// Get the previous index in the circular buffer of s and y vectors.
177 index_t pred(index_t i) const { return i > 0 ? i - 1 : history() - 1; }
178 /// Get the number of previous s and y vectors currently stored in the
179 /// buffer.
180 length_t current_history() const { return full ? history() : idx; }
181
182 auto s(index_t i) { return sto.s(i); }
183 auto s(index_t i) const { return sto.s(i); }
184 auto y(index_t i) { return sto.y(i); }
185 auto y(index_t i) const { return sto.y(i); }
186 real_t &ρ(index_t i) { return sto.ρ(i); }
187 real_t &ρ(index_t i) const { return sto.ρ(i); }
188 real_t &α(index_t i) { return sto.α(i); }
189 real_t &α(index_t i) const { return sto.α(i); }
190
191 /// Iterate over the indices in the history buffer, oldest first.
192 template <class F>
193 void foreach_fwd(const F &fun) const {
194 if (full)
195 for (index_t i = idx; i < history(); ++i)
196 fun(i);
197 if (idx)
198 for (index_t i = 0; i < idx; ++i)
199 fun(i);
200 }
201
202 /// Iterate over the indices in the history buffer, newest first.
203 template <class F>
204 void foreach_rev(const F &fun) const {
205 if (idx)
206 for (index_t i = idx; i-- > 0;)
207 fun(i);
208 if (full)
209 for (index_t i = history(); i-- > idx;)
210 fun(i);
211 }
212
213 private:
216 bool full = false;
218};
219
220// clang-format off
225
230
235// clang-format on
236
237} // namespace alpaqa
Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm.
Definition lbfgs.hpp:108
Params params
Definition lbfgs.hpp:217
std::string get_name() const
Get a string identifier for this accelerator.
Definition lbfgs.hpp:164
void foreach_rev(const F &fun) const
Iterate over the indices in the history buffer, newest first.
Definition lbfgs.hpp:204
auto s(index_t i)
Definition lbfgs.hpp:182
bool apply_masked(rvec q, real_t γ, crindexvec J) const
Apply the inverse Hessian approximation to the given vector q, applying only the columns and rows of ...
Definition lbfgs.tpp:196
auto s(index_t i) const
Definition lbfgs.hpp:183
length_t current_history() const
Get the number of previous s and y vectors currently stored in the buffer.
Definition lbfgs.hpp:180
LBFGS()=default
LBFGSParams< config_t > Params
Definition lbfgs.hpp:112
index_t succ(index_t i) const
Get the next index in the circular buffer of previous s and y vectors.
Definition lbfgs.hpp:175
index_t pred(index_t i) const
Get the previous index in the circular buffer of s and y vectors.
Definition lbfgs.hpp:177
length_t n() const
Get the size of the s and y vectors in the buffer.
Definition lbfgs.hpp:171
LBFGS(Params params, length_t n)
Definition lbfgs.hpp:122
LBFGS(Params params)
Definition lbfgs.hpp:121
LBFGSStorage< config_t > sto
Definition lbfgs.hpp:214
const Params & get_params() const
Get the parameters.
Definition lbfgs.hpp:168
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition lbfgs.hpp:173
void foreach_fwd(const F &fun) const
Iterate over the indices in the history buffer, oldest first.
Definition lbfgs.hpp:193
index_t idx
Definition lbfgs.hpp:215
real_t & α(index_t i) const
Definition lbfgs.hpp:189
bool update_sy(crvec s, crvec y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Update the inverse Hessian approximation using the new vectors sₖ = xₙₑₓₜ - xₖ and yₖ = pₙₑₓₜ - pₖ.
Definition lbfgs.tpp:60
real_t & ρ(index_t i) const
Definition lbfgs.hpp:187
static bool update_valid(const Params &params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition lbfgs.tpp:12
bool apply_masked_impl(rvec q, real_t γ, const auto &J) const
Apply the inverse Hessian approximation to the given vector q, applying only the columns and rows of ...
Definition lbfgs.tpp:103
void resize(length_t n)
Re-allocate storage for a problem with a different size.
Definition lbfgs.tpp:213
real_t & α(index_t i)
Definition lbfgs.hpp:188
real_t & ρ(index_t i)
Definition lbfgs.hpp:186
void reset()
Throw away the approximation and all previous vectors s and y.
Definition lbfgs.tpp:207
bool update_sy_impl(const auto &s, const auto &y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Definition lbfgs.tpp:37
auto y(index_t i) const
Definition lbfgs.hpp:185
Sign
The sign of the vectors passed to the update method.
Definition lbfgs.hpp:115
bool apply(rvec q, real_t γ=-1) const
Apply the inverse Hessian approximation to the given vector q.
Definition lbfgs.tpp:74
void scale_y(real_t factor)
Scale the stored y vectors by the given factor.
Definition lbfgs.tpp:226
bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ, Sign sign=Sign::Positive, bool forced=false)
Update the inverse Hessian approximation using the new vectors xₙₑₓₜ and pₙₑₓₜ.
Definition lbfgs.tpp:65
auto y(index_t i)
Definition lbfgs.hpp:184
#define USING_ALPAQA_CONFIG(Conf)
Definition config.hpp:56
#define ALPAQA_IF_QUADF(...)
Definition config.hpp:182
#define ALPAQA_IF_LONGD(...)
Definition config.hpp:194
#define ALPAQA_IF_FLOAT(...)
Definition config.hpp:188
#define ALPAQA_EXPORT_EXTERN_TEMPLATE(...)
Definition export.hpp:21
typename Conf::mat mat
Definition config.hpp:71
real_t min_abs_s
Reject update if .
Definition lbfgs.hpp:47
length_t memory
Length of the history to keep.
Definition lbfgs.hpp:43
LBFGSStepSize stepsize
Definition lbfgs.hpp:61
CBFGSParams< config_t > cbfgs
Parameters in the cautious BFGS update condition.
Definition lbfgs.hpp:52
bool force_pos_def
If set to true, the inverse Hessian estimate should remain definite, i.e.
Definition lbfgs.hpp:59
typename Conf::real_t real_t
Definition config.hpp:65
typename Conf::index_t index_t
Definition config.hpp:77
typename Conf::length_t length_t
Definition config.hpp:76
constexpr const auto inf
Definition config.hpp:85
typename Conf::rvec rvec
Definition config.hpp:69
typename Conf::crvec crvec
Definition config.hpp:70
real_t min_div_fac
Reject update if .
Definition lbfgs.hpp:45
LBFGSStepSize
Which method to use to select the L-BFGS step size.
Definition lbfgs.hpp:25
@ BasedOnCurvature
Initial inverse Hessian approximation is set to .
@ BasedOnExternalStepSize
Initial inverse Hessian approximation is set to .
typename Conf::crindexvec crindexvec
Definition config.hpp:80
Parameters for the LBFGS class.
Definition lbfgs.hpp:39
Cautious BFGS update.
Definition lbfgs.hpp:17
real_t ϵ
Set to zero to disable CBFGS check.
Definition lbfgs.hpp:20
Double-precision double configuration.
Definition config.hpp:135
Single-precision float configuration.
Definition config.hpp:131
long double configuration.
Definition config.hpp:140
auto s(index_t i)
Definition lbfgs.hpp:87
auto s(index_t i) const
Definition lbfgs.hpp:88
void resize(length_t n, length_t history)
Re-allocate storage for a problem with a different size.
Definition lbfgs.tpp:221
length_t n() const
Get the size of the s and y vectors in the buffer.
Definition lbfgs.hpp:83
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition lbfgs.hpp:85
real_t & α(index_t i) const
Definition lbfgs.hpp:98
real_t & ρ(index_t i) const
Definition lbfgs.hpp:96
real_t & α(index_t i)
Definition lbfgs.hpp:97
real_t & ρ(index_t i)
Definition lbfgs.hpp:95
auto y(index_t i) const
Definition lbfgs.hpp:92
auto y(index_t i)
Definition lbfgs.hpp:91