alpaqa 1.0.0a18
Nonconvex constrained optimization
Loading...
Searching...
No Matches
lbfgs.tpp
Go to the documentation of this file.
1#pragma once
2
4
5#include <cmath>
6#include <limits>
7#include <stdexcept>
8
9namespace alpaqa {
10
11template <Config Conf>
13 real_t pᵀp) {
14 // Check if this L-BFGS update is accepted
15 if (sᵀs <= params.min_abs_s)
16 return false;
17 if (not std::isfinite(yᵀs))
18 return false;
19 real_t a_yᵀs = params.force_pos_def ? yᵀs : std::abs(yᵀs);
20 if (a_yᵀs <= params.min_div_fac * sᵀs)
21 return false;
22
23 // CBFGS condition: https://epubs.siam.org/doi/10.1137/S1052623499354242
24 if (params.cbfgs) {
25 const real_t α = params.cbfgs.α;
26 const real_t ϵ = params.cbfgs.ϵ;
27 // Condition: yᵀs / sᵀs >= ϵ ‖p‖^α
28 bool cbfgs_cond = a_yᵀs >= sᵀs * ϵ * std::pow(pᵀp, α / 2);
29 if (not cbfgs_cond)
30 return false;
31 }
32
33 return true;
34}
35
36template <Config Conf>
37bool LBFGS<Conf>::update_sy_impl(const auto &s, const auto &y,
39 real_t yᵀs = y.dot(s);
40 real_t ρ = 1 / yᵀs;
41 if (not forced) {
42 real_t sᵀs = s.squaredNorm();
43 if (not update_valid(params, yᵀs, sᵀs, pₙₑₓₜᵀpₙₑₓₜ))
44 return false;
45 }
46
47 // Store the new s and y vectors
48 sto.s(idx) = s;
49 sto.y(idx) = y;
50 sto.ρ(idx) = ρ;
51
52 // Increment the index in the circular buffer
53 idx = succ(idx);
54 full |= idx == 0;
55
56 return true;
57}
58
59template <Config Conf>
63
64template <Config Conf>
66 Sign sign, bool forced) {
67 const auto s = xₙₑₓₜ - xₖ;
68 const auto y = (sign == Sign::Positive) ? pₙₑₓₜ - pₖ : pₖ - pₙₑₓₜ;
69 real_t pₙₑₓₜᵀpₙₑₓₜ = params.cbfgs ? pₙₑₓₜ.squaredNorm() : 0;
70 return update_sy_impl(s, y, pₙₑₓₜᵀpₙₑₓₜ, forced);
71}
72
73template <Config Conf>
74bool LBFGS<Conf>::apply(rvec q, real_t γ) const {
75 // Only apply if we have previous vectors s and y
76 if (idx == 0 && not full)
77 return false;
78
79 // If the step size is negative, compute it as sᵀy/yᵀy
80 if (params.stepsize == LBFGSStepSize::BasedOnCurvature || γ < 0) {
81 auto new_idx = pred(idx);
82 real_t yᵀy = y(new_idx).squaredNorm();
83 γ = 1 / (ρ(new_idx) * yᵀy);
84 }
85
86 foreach_rev([&](index_t i) {
87 α(i) = ρ(i) * s(i).dot(q);
88 q -= α(i) * y(i);
89 });
90
91 // r ← H_0 q
92 q *= γ;
93
94 foreach_fwd([&](index_t i) {
95 real_t β = ρ(i) * y(i).dot(q);
96 q -= (β - α(i)) * s(i);
97 });
98
99 return true;
100}
101
102template <Config Conf>
103bool LBFGS<Conf>::apply_masked_impl(rvec q, real_t γ, const auto &J) const {
104 // Only apply if we have previous vectors s and y
105 if (idx == 0 && not full)
106 return false;
107 const bool fullJ = q.size() == static_cast<index_t>(J.size());
108
109 // Use curvature to compute initial scale
110 if (params.stepsize == LBFGSStepSize::BasedOnCurvature)
111 γ = -1;
112
113 if (params.cbfgs)
114 throw std::invalid_argument("CBFGS check not supported when using "
115 "masked version of LBFGS::apply_masked()");
116
117 // Eigen 3.3.9 doesn't yet support indexing using a vector of indices
118 // so we'll have to do it manually.
119 // TODO: Abstract this away in an expression template / nullary expression?
120 // Or wait for Eigen update?
121 // Update: Eigen 3.4's indexing seems significantly slower, so the manual
122 // for loops stay for now.
123
124 // Dot product of two vectors, adding only the indices in set J
125 const auto dotJ = [&J, fullJ](const auto &a, const auto &b) {
126 if (fullJ) {
127 return a.dot(b);
128 } else {
129 real_t acc = 0;
130 for (auto j : J)
131 acc += a(j) * b(j);
132 return acc;
133 }
134 };
135 // y -= a x, scaling and subtracting only the indices in set J
136 const auto axmyJ = [&J, fullJ](real_t a, const auto &x, auto &y) {
137 if (fullJ) {
138 y -= a * x;
139 } else {
140 for (auto j : J)
141 y(j) -= a * x(j);
143 };
144 // x *= a, scaling only the indices in set J
145 const auto scalJ = [&J, fullJ](real_t a, auto &x) {
146 if (fullJ) {
147 x *= a;
148 } else {
149 for (auto j : J)
150 x(j) *= a;
151 }
152 };
154 foreach_rev([&](index_t i) {
155 // Recompute ρ, it depends on the index set J. Note that even if ρ was
156 // positive for the full vectors s and y, that's not necessarily the
157 // case for the smaller vectors s(J) and y(J).
158 real_t yᵀs = dotJ(s(i), y(i));
159 real_t sᵀs = dotJ(s(i), s(i));
160 ρ(i) = 1 / yᵀs;
161 // Check if we should include this pair of vectors
162 if (not update_valid(params, yᵀs, sᵀs, 0)) {
163 ρ(i) = NaN<config_t>;
164 return; // continue foreach
165 }
166
167 α(i) = ρ(i) * dotJ(s(i), q); // αᵢ = ρᵢ〈sᵢ, q〉
168 axmyJ(α(i), y(i), q); // q -= αᵢ yᵢ
169
170 if (γ < 0) {
171 // Compute step size based on most recent valid yᵀs/yᵀy
172 real_t yᵀy = dotJ(y(i), y(i));
173 γ = 1 / (ρ(i) * yᵀy);
174 }
175 });
176
177 // If all ρ == 0, fail
178 if (γ < 0)
179 return false;
180
181 // r ← H_0 q
182 scalJ(γ, q); // q *= γ
183
184 foreach_fwd([&](index_t i) {
185 if (std::isnan(ρ(i)))
186 return; // continue foreach
187
188 real_t β = ρ(i) * dotJ(y(i), q); // βᵢ = ρᵢ〈yᵢ, q〉
189 axmyJ(β - α(i), s(i), q); // q -= (βᵢ - αᵢ) sᵢ
190 });
191
192 return true;
193}
194
195template <Config Conf>
197 return apply_masked_impl(q, γ, J);
198}
199
200template <Config Conf>
202 const std::vector<index_t> &J) const {
203 return apply_masked_impl(q, γ, J);
204}
205
206template <Config Conf>
208 idx = 0;
209 full = false;
210}
211
212template <Config Conf>
214 if (params.memory < 1)
215 throw std::invalid_argument("LBFGS::Params::memory must be >= 1");
216 sto.resize(n, params.memory);
217 reset();
218}
219
220template <Config Conf>
222 sto.resize(n + 1, history * 2);
223}
224
225template <Config Conf>
227 if (full) {
228 for (index_t i = 0; i < history(); ++i) {
229 y(i) *= factor;
230 ρ(i) *= 1 / factor;
231 }
232 } else {
233 for (index_t i = 0; i < idx; ++i) {
234 y(i) *= factor;
235 ρ(i) *= 1 / factor;
236 }
237 }
238}
239
240} // namespace alpaqa
bool apply_masked(rvec q, real_t γ, crindexvec J) const
Apply the inverse Hessian approximation to the given vector q, applying only the columns and rows of ...
Definition lbfgs.tpp:196
bool update_sy(crvec s, crvec y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Update the inverse Hessian approximation using the new vectors sₖ = xₙₑₓₜ - xₖ and yₖ = pₙₑₓₜ - pₖ.
Definition lbfgs.tpp:60
static bool update_valid(const Params &params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition lbfgs.tpp:12
bool apply_masked_impl(rvec q, real_t γ, const auto &J) const
Apply the inverse Hessian approximation to the given vector q, applying only the columns and rows of ...
Definition lbfgs.tpp:103
void resize(length_t n)
Re-allocate storage for a problem with a different size.
Definition lbfgs.tpp:213
void reset()
Throw away the approximation and all previous vectors s and y.
Definition lbfgs.tpp:207
bool update_sy_impl(const auto &s, const auto &y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Definition lbfgs.tpp:37
Sign
The sign of the vectors passed to the update method.
Definition lbfgs.hpp:124
bool apply(rvec q, real_t γ=-1) const
Apply the inverse Hessian approximation to the given vector q.
Definition lbfgs.tpp:74
void scale_y(real_t factor)
Scale the stored y vectors by the given factor.
Definition lbfgs.tpp:226
bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ, Sign sign=Sign::Positive, bool forced=false)
Update the inverse Hessian approximation using the new vectors xₙₑₓₜ and pₙₑₓₜ.
Definition lbfgs.tpp:65
real_t min_abs_s
Reject update if .
Definition lbfgs.hpp:52
CBFGSParams< config_t > cbfgs
Parameters in the cautious BFGS update condition.
Definition lbfgs.hpp:58
bool force_pos_def
If set to true, the inverse Hessian estimate should remain definite, i.e.
Definition lbfgs.hpp:65
real_t min_div_fac
Reject update if .
Definition lbfgs.hpp:49
typename Conf::real_t real_t
Definition config.hpp:86
typename Conf::index_t index_t
Definition config.hpp:104
typename Conf::length_t length_t
Definition config.hpp:103
constexpr const auto inf
Definition config.hpp:112
typename Conf::rvec rvec
Definition config.hpp:91
typename Conf::crvec crvec
Definition config.hpp:92
@ BasedOnCurvature
Initial inverse Hessian approximation is set to .
typename Conf::crindexvec crindexvec
Definition config.hpp:107
real_t ϵ
Set to zero to disable CBFGS check.
Definition lbfgs.hpp:21
void resize(length_t n, length_t history)
Re-allocate storage for a problem with a different size.
Definition lbfgs.tpp:221