alpaqa develop
Nonconvex constrained optimization
Loading...
Searching...
No Matches
lbfgs.hpp
Go to the documentation of this file.
1#pragma once
2
4#include <alpaqa/export.hpp>
5
6#include <cmath>
7#include <limits>
8#include <string>
9#include <utility>
10#include <vector>
11
12namespace alpaqa {
13
14/// Cautious BFGS update.
15/// @see @ref LBFGSParams::cbfgs
16/// @ingroup grp_Parameters
17template <Config Conf = DefaultConfig>
21 real_t ϵ = 0; ///< Set to zero to disable CBFGS check.
22 explicit operator bool() const { return ϵ > 0; }
23};
24
25/// Which method to use to select the L-BFGS step size.
26enum class LBFGSStepSize {
27 /// Initial inverse Hessian approximation is set to
28 /// @f$ H_0 = \gamma I @f$, where @f$ \gamma @f$ is the forward-backward
29 /// splitting step size.
31 /// Initial inverse Hessian approximation is set to
32 /// @f$ H_0 = \frac{s^\top y}{y^\top y} I @f$.
35 [[deprecated("use BasedOnExternalStepSize instead")]] =
37};
38
39/// Parameters for the @ref LBFGS class.
40/// @ingroup grp_Parameters
41template <Config Conf = DefaultConfig>
44
45 /// Length of the history to keep.
47 /// Reject update if @f$ y^\top s \le \text{min_div_fac} \cdot s^\top s @f$.
48 /// Keeps the inverse Hessian approximation positive definite.
49 real_t min_div_fac = std::numeric_limits<real_t>::epsilon();
50 /// Reject update if @f$ s^\top s \le \text{min_abs_s} @f$.
51 /// Keeps the Hessian approximation nonsingular.
53 std::pow(std::numeric_limits<real_t>::epsilon(), real_t(2));
54 /// Parameters in the cautious BFGS update condition
55 /// @f[ \frac{y^\top s}{s^\top s} \ge \epsilon \| g \|^\alpha. @f]
56 /// Disabled by default.
57 /// @see https://epubs.siam.org/doi/10.1137/S1052623499354242
59 /// If set to true, the inverse Hessian estimate should remain definite,
60 /// i.e. a check is performed that rejects the update if
61 /// @f$ y^\top s \le \text{min_div_fac} \cdot s^\top s @f$.
62 /// If set to false, just try to prevent a singular Hessian by rejecting the
63 /// update if
64 /// @f$ \left| y^\top s \right| \le \text{min_div_fac} \cdot s^\top s @f$.
65 bool force_pos_def = true;
66 /// Scale of the initial inverse Hessian approximation that the rank-one
67 /// L-BFGS updates are applied to, @f$ H_0 @f$.
68 /// You probably want to keep this as the default.
69 /// @see LBFGSStepSize
71};
72
73/// Layout:
74/// ~~~
75/// ┌───── 2 m ─────┐
76/// ┌ ┌───┬───┬───┬───┐
77/// │ │ │ │ │ │
78/// │ │ s │ y │ s │ y │
79/// n+1 │ │ │ │ │ │
80/// │ ├───┼───┼───┼───┤
81/// │ │ ρ │ α │ ρ │ α │
82/// └ └───┴───┴───┴───┘
83/// ~~~
84template <Config Conf = DefaultConfig>
87
88 /// Re-allocate storage for a problem with a different size.
90
91 /// Get the size of the s and y vectors in the buffer.
92 length_t n() const { return sto.rows() - 1; }
93 /// Get the number of previous vectors s and y stored in the buffer.
94 length_t history() const { return sto.cols() / 2; }
95
96 auto s(index_t i) { return sto.col(2 * i).topRows(n()); }
97 auto s(index_t i) const {
98 return std::as_const(sto).col(2 * i).topRows(n());
99 }
100 auto y(index_t i) { return sto.col(2 * i + 1).topRows(n()); }
101 auto y(index_t i) const {
102 return std::as_const(sto).col(2 * i + 1).topRows(n());
103 }
104 real_t &ρ(index_t i) { return sto.coeffRef(n(), 2 * i); }
105 real_t &ρ(index_t i) const { return sto.coeffRef(n(), 2 * i); }
106 real_t &α(index_t i) { return sto.coeffRef(n(), 2 * i + 1); }
107 real_t &α(index_t i) const { return sto.coeffRef(n(), 2 * i + 1); }
108
109 using storage_t = mat;
110 static_assert(!storage_t::IsRowMajor);
111 mutable storage_t sto;
112};
113
114/// Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm
115/// @ingroup grp_Accelerators
116template <Config Conf = DefaultConfig, class Storage = LBFGSStorage<Conf>>
117class LBFGS {
118 public:
120
122
123 /// The sign of the vectors @f$ p @f$ passed to the @ref update method.
124 enum class Sign {
125 Positive, ///< @f$ p \sim \nabla \psi(x) @f$
126 Negative, ///< @f$ p \sim -\nabla \psi(x) @f$
127 };
128
129 LBFGS() = default;
132
133 /// Check if the new vectors s and y allow for a valid BFGS update that
134 /// preserves the positive definiteness of the Hessian approximation.
135 static bool update_valid(const Params &params, real_t yᵀs, real_t sᵀs,
136 real_t pᵀp);
137
138 /// Update the inverse Hessian approximation using the new vectors
139 /// sₖ = xₙₑₓₜ - xₖ and yₖ = pₙₑₓₜ - pₖ.
141 /// @see @ref update_sy
142 bool update_sy_impl(const auto &s, const auto &y, real_t pₙₑₓₜᵀpₙₑₓₜ,
143 bool forced = false);
144
145 /// Update the inverse Hessian approximation using the new vectors xₙₑₓₜ
146 /// and pₙₑₓₜ.
148 Sign sign = Sign::Positive, bool forced = false);
149
150 /// Apply the inverse Hessian approximation to the given vector q.
151 /// Initial inverse Hessian approximation is set to @f$ H_0 = \gamma I @f$.
152 /// If @p γ is negative, @f$ H_0 = \frac{s^\top y}{y^\top y} I @f$.
153 bool apply(rvec q, real_t γ = -1) const;
154
155 /// Apply the inverse Hessian approximation to the given vector q, applying
156 /// only the columns and rows of the Hessian in the index set J.
157 bool apply_masked(rvec q, real_t γ, crindexvec J) const;
158 /// @copydoc apply_masked(rvec, real_t, crindexvec) const
159 bool apply_masked(rvec q, real_t γ, const std::vector<index_t> &J) const;
160 /// @copydoc apply_masked(rvec, real_t, crindexvec) const
161 bool apply_masked_impl(rvec q, real_t γ, const auto &J) const;
162
163 /// Throw away the approximation and all previous vectors s and y.
164 void reset();
165 /// Re-allocate storage for a problem with a different size. Causes
166 /// a @ref reset.
167 void resize(length_t n);
168
169 /// Scale the stored y vectors by the given factor.
170 void scale_y(real_t factor);
171
172 /// Get a string identifier for this accelerator.
173 std::string get_name() const {
174 return "LBFGS<" + std::string(config_t::get_name()) + '>';
175 }
176 /// Get the parameters.
177 const Params &get_params() const { return params; }
178
179 /// Get the size of the s and y vectors in the buffer.
180 length_t n() const { return sto.n(); }
181 /// Get the number of previous vectors s and y stored in the buffer.
182 length_t history() const { return sto.history(); }
183 /// Get the next index in the circular buffer of previous s and y vectors.
184 index_t succ(index_t i) const { return i + 1 < history() ? i + 1 : 0; }
185 /// Get the previous index in the circular buffer of s and y vectors.
186 index_t pred(index_t i) const { return i > 0 ? i - 1 : history() - 1; }
187 /// Get the number of previous s and y vectors currently stored in the
188 /// buffer.
189 length_t current_history() const { return full ? history() : idx; }
190
191 auto s(index_t i) { return sto.s(i); }
192 auto s(index_t i) const { return sto.s(i); }
193 auto y(index_t i) { return sto.y(i); }
194 auto y(index_t i) const { return sto.y(i); }
195 real_t &ρ(index_t i) { return sto.ρ(i); }
196 real_t &ρ(index_t i) const { return sto.ρ(i); }
197 real_t &α(index_t i) { return sto.α(i); }
198 real_t &α(index_t i) const { return sto.α(i); }
199
200 /// Iterate over the indices in the history buffer, oldest first.
201 template <class F>
202 void foreach_fwd(const F &fun) const {
203 if (full)
204 for (index_t i = idx; i < history(); ++i)
205 fun(i);
206 if (idx)
207 for (index_t i = 0; i < idx; ++i)
208 fun(i);
209 }
210
211 /// Iterate over the indices in the history buffer, newest first.
212 template <class F>
213 void foreach_rev(const F &fun) const {
214 if (idx)
215 for (index_t i = idx; i-- > 0;)
216 fun(i);
217 if (full)
218 for (index_t i = history(); i-- > idx;)
219 fun(i);
220 }
221
222 private:
223 Storage sto;
225 bool full = false;
227};
228
229inline constexpr const char *enum_name(LBFGSStepSize s) {
230 switch (s) {
232 return "BasedOnExternalStepSize";
233 case LBFGSStepSize::BasedOnCurvature: return "BasedOnCurvature";
234 default:;
235 }
236 throw std::out_of_range("invalid value for alpaqa::LBFGSStepSize");
237}
238
239// clang-format off
240ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, CBFGSParams, EigenConfigd);
241ALPAQA_IF_FLOAT(ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, CBFGSParams, EigenConfigf);)
242ALPAQA_IF_LONGD(ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, CBFGSParams, EigenConfigl);)
244
245ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, LBFGSParams, EigenConfigd);
246ALPAQA_IF_FLOAT(ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, LBFGSParams, EigenConfigf);)
247ALPAQA_IF_LONGD(ALPAQA_EXPORT_EXTERN_TEMPLATE(struct, LBFGSParams, EigenConfigl);)
249
250ALPAQA_EXPORT_EXTERN_TEMPLATE(class, LBFGS, EigenConfigd);
251ALPAQA_IF_FLOAT(ALPAQA_EXPORT_EXTERN_TEMPLATE(class, LBFGS, EigenConfigf);)
252ALPAQA_IF_LONGD(ALPAQA_EXPORT_EXTERN_TEMPLATE(class, LBFGS, EigenConfigl);)
254// clang-format on
255
256} // namespace alpaqa
Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm.
Definition lbfgs.hpp:117
Params params
Definition lbfgs.hpp:226
std::string get_name() const
Get a string identifier for this accelerator.
Definition lbfgs.hpp:173
void foreach_rev(const F &fun) const
Iterate over the indices in the history buffer, newest first.
Definition lbfgs.hpp:213
auto s(index_t i)
Definition lbfgs.hpp:191
bool apply_masked(rvec q, real_t γ, crindexvec J) const
Apply the inverse Hessian approximation to the given vector q, applying only the columns and rows of ...
Definition lbfgs.tpp:198
auto s(index_t i) const
Definition lbfgs.hpp:192
length_t current_history() const
Get the number of previous s and y vectors currently stored in the buffer.
Definition lbfgs.hpp:189
LBFGS()=default
LBFGSParams< config_t > Params
Definition lbfgs.hpp:121
index_t succ(index_t i) const
Get the next index in the circular buffer of previous s and y vectors.
Definition lbfgs.hpp:184
index_t pred(index_t i) const
Get the previous index in the circular buffer of s and y vectors.
Definition lbfgs.hpp:186
length_t n() const
Get the size of the s and y vectors in the buffer.
Definition lbfgs.hpp:180
LBFGS(Params params, length_t n)
Definition lbfgs.hpp:131
LBFGS(Params params)
Definition lbfgs.hpp:130
const Params & get_params() const
Get the parameters.
Definition lbfgs.hpp:177
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition lbfgs.hpp:182
void foreach_fwd(const F &fun) const
Iterate over the indices in the history buffer, oldest first.
Definition lbfgs.hpp:202
index_t idx
Definition lbfgs.hpp:224
real_t & α(index_t i) const
Definition lbfgs.hpp:198
bool update_sy(crvec s, crvec y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Update the inverse Hessian approximation using the new vectors sₖ = xₙₑₓₜ - xₖ and yₖ = pₙₑₓₜ - pₖ.
Definition lbfgs.tpp:60
real_t & ρ(index_t i) const
Definition lbfgs.hpp:196
static bool update_valid(const Params &params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition lbfgs.tpp:12
bool apply_masked_impl(rvec q, real_t γ, const auto &J) const
Apply the inverse Hessian approximation to the given vector q, applying only the columns and rows of ...
Definition lbfgs.tpp:104
void resize(length_t n)
Re-allocate storage for a problem with a different size.
Definition lbfgs.tpp:215
real_t & α(index_t i)
Definition lbfgs.hpp:197
real_t & ρ(index_t i)
Definition lbfgs.hpp:195
void reset()
Throw away the approximation and all previous vectors s and y.
Definition lbfgs.tpp:209
bool update_sy_impl(const auto &s, const auto &y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Definition lbfgs.tpp:37
auto y(index_t i) const
Definition lbfgs.hpp:194
Sign
The sign of the vectors passed to the update method.
Definition lbfgs.hpp:124
Storage sto
Definition lbfgs.hpp:223
bool apply(rvec q, real_t γ=-1) const
Apply the inverse Hessian approximation to the given vector q.
Definition lbfgs.tpp:75
void scale_y(real_t factor)
Scale the stored y vectors by the given factor.
Definition lbfgs.tpp:228
bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ, Sign sign=Sign::Positive, bool forced=false)
Update the inverse Hessian approximation using the new vectors xₙₑₓₜ and pₙₑₓₜ.
Definition lbfgs.tpp:66
auto y(index_t i)
Definition lbfgs.hpp:193
#define USING_ALPAQA_CONFIG(Conf)
Definition config.hpp:77
#define ALPAQA_IF_QUADF(...)
Definition config.hpp:223
#define ALPAQA_IF_LONGD(...)
Definition config.hpp:235
#define ALPAQA_IF_FLOAT(...)
Definition config.hpp:229
#define ALPAQA_EXPORT_EXTERN_TEMPLATE(...)
Definition export.hpp:21
real_t min_abs_s
Reject update if .
Definition lbfgs.hpp:52
length_t memory
Length of the history to keep.
Definition lbfgs.hpp:46
LBFGSStepSize stepsize
Scale of the initial inverse Hessian approximation that the rank-one L-BFGS updates are applied to,...
Definition lbfgs.hpp:70
CBFGSParams< config_t > cbfgs
Parameters in the cautious BFGS update condition.
Definition lbfgs.hpp:58
bool force_pos_def
If set to true, the inverse Hessian estimate should remain definite, i.e.
Definition lbfgs.hpp:65
real_t min_div_fac
Reject update if .
Definition lbfgs.hpp:49
Parameters for the LBFGS class.
Definition lbfgs.hpp:42
typename Conf::mat mat
Definition config.hpp:93
typename Conf::real_t real_t
Definition config.hpp:86
typename Conf::index_t index_t
Definition config.hpp:104
typename Conf::length_t length_t
Definition config.hpp:103
constexpr const auto inf
Definition config.hpp:112
typename Conf::rvec rvec
Definition config.hpp:91
typename Conf::crvec crvec
Definition config.hpp:92
constexpr const char * enum_name(LBFGSStepSize s)
Definition lbfgs.hpp:229
LBFGSStepSize
Which method to use to select the L-BFGS step size.
Definition lbfgs.hpp:26
@ BasedOnCurvature
Initial inverse Hessian approximation is set to .
@ BasedOnExternalStepSize
Initial inverse Hessian approximation is set to , where is the forward-backward splitting step size.
typename Conf::crindexvec crindexvec
Definition config.hpp:107
Cautious BFGS update.
Definition lbfgs.hpp:18
real_t ϵ
Set to zero to disable CBFGS check.
Definition lbfgs.hpp:21
auto s(index_t i)
Definition lbfgs.hpp:96
auto s(index_t i) const
Definition lbfgs.hpp:97
void resize(length_t n, length_t history)
Re-allocate storage for a problem with a different size.
Definition lbfgs.tpp:223
length_t n() const
Get the size of the s and y vectors in the buffer.
Definition lbfgs.hpp:92
length_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition lbfgs.hpp:94
real_t & α(index_t i) const
Definition lbfgs.hpp:107
real_t & ρ(index_t i) const
Definition lbfgs.hpp:105
real_t & α(index_t i)
Definition lbfgs.hpp:106
real_t & ρ(index_t i)
Definition lbfgs.hpp:104
auto y(index_t i) const
Definition lbfgs.hpp:101
auto y(index_t i)
Definition lbfgs.hpp:100