alpaqa develop
Nonconvex constrained optimization
Loading...
Searching...
No Matches
lbfgs.tpp
Go to the documentation of this file.
1#pragma once
2
4
5#include <cmath>
6#include <limits>
7#include <stdexcept>
8
9namespace alpaqa {
10
11template <Config Conf, class Storage>
14 // Check if this L-BFGS update is accepted
15 if (sᵀs <= params.min_abs_s)
16 return false;
17 if (not std::isfinite(yᵀs))
18 return false;
19 real_t a_yᵀs = params.force_pos_def ? yᵀs : std::abs(yᵀs);
20 if (a_yᵀs <= params.min_div_fac * sᵀs)
21 return false;
22
23 // CBFGS condition: https://epubs.siam.org/doi/10.1137/S1052623499354242
24 if (params.cbfgs) {
25 const real_t α = params.cbfgs.α;
26 const real_t ϵ = params.cbfgs.ϵ;
27 // Condition: yᵀs / sᵀs >= ϵ ‖p‖^α
28 bool cbfgs_cond = a_yᵀs >= sᵀs * ϵ * std::pow(pᵀp, α / 2);
29 if (not cbfgs_cond)
30 return false;
31 }
32
33 return true;
34}
35
36template <Config Conf, class Storage>
37bool LBFGS<Conf, Storage>::update_sy_impl(const auto &s, const auto &y,
39 real_t yᵀs = y.dot(s);
40 real_t ρ = 1 / yᵀs;
41 if (not forced) {
42 real_t sᵀs = s.squaredNorm();
43 if (not update_valid(params, yᵀs, sᵀs, pₙₑₓₜᵀpₙₑₓₜ))
44 return false;
45 }
46
47 // Store the new s and y vectors
48 this->s(idx) = s;
49 this->y(idx) = y;
50 this->ρ(idx) = ρ;
51
52 // Increment the index in the circular buffer
53 idx = succ(idx);
54 full |= idx == 0;
55
56 return true;
57}
58
59template <Config Conf, class Storage>
64
65template <Config Conf, class Storage>
67 Sign sign, bool forced) {
68 const auto s = xₙₑₓₜ - xₖ;
69 const auto y = (sign == Sign::Positive) ? pₙₑₓₜ - pₖ : pₖ - pₙₑₓₜ;
70 real_t pₙₑₓₜᵀpₙₑₓₜ = params.cbfgs ? pₙₑₓₜ.squaredNorm() : 0;
71 return update_sy_impl(s, y, pₙₑₓₜᵀpₙₑₓₜ, forced);
72}
73
74template <Config Conf, class Storage>
76 // Only apply if we have previous vectors s and y
77 if (idx == 0 && not full)
78 return false;
79
80 // If the step size is negative, compute it as sᵀy/yᵀy
81 if (params.stepsize == LBFGSStepSize::BasedOnCurvature || γ < 0) {
82 auto new_idx = pred(idx);
83 real_t yᵀy = y(new_idx).squaredNorm();
84 γ = 1 / (ρ(new_idx) * yᵀy);
85 }
86
87 foreach_rev([&](index_t i) {
88 α(i) = ρ(i) * s(i).dot(q);
89 q -= α(i) * y(i);
90 });
91
92 // r ← H_0 q
93 q *= γ;
94
95 foreach_fwd([&](index_t i) {
96 real_t β = ρ(i) * y(i).dot(q);
97 q -= (β - α(i)) * s(i);
98 });
99
100 return true;
101}
102
103template <Config Conf, class Storage>
105 const auto &J) const {
106 // Only apply if we have previous vectors s and y
107 if (idx == 0 && not full)
108 return false;
109 const bool fullJ = q.size() == static_cast<index_t>(J.size());
110
111 // Use curvature to compute initial scale
112 if (params.stepsize == LBFGSStepSize::BasedOnCurvature)
113 γ = -1;
114
115 if (params.cbfgs)
116 throw std::invalid_argument("CBFGS check not supported when using "
117 "masked version of LBFGS::apply_masked()");
118
119 // Eigen 3.3.9 doesn't yet support indexing using a vector of indices
120 // so we'll have to do it manually.
121 // TODO: Abstract this away in an expression template / nullary expression?
122 // Or wait for Eigen update?
123 // Update: Eigen 3.4's indexing seems significantly slower, so the manual
124 // for loops stay for now.
125
126 // Dot product of two vectors, adding only the indices in set J
127 const auto dotJ = [&J, fullJ](const auto &a, const auto &b) {
128 if (fullJ) {
129 return a.dot(b);
130 } else {
131 real_t acc = 0;
132 for (auto j : J)
133 acc += a(j) * b(j);
134 return acc;
136 };
137 // y -= a x, scaling and subtracting only the indices in set J
138 const auto axmyJ = [&J, fullJ](real_t a, const auto &x, auto &y) {
139 if (fullJ) {
140 y -= a * x;
141 } else {
142 for (auto j : J)
143 y(j) -= a * x(j);
144 }
145 };
146 // x *= a, scaling only the indices in set J
147 const auto scalJ = [&J, fullJ](real_t a, auto &x) {
148 if (fullJ) {
149 x *= a;
150 } else {
151 for (auto j : J)
152 x(j) *= a;
154 };
155
156 foreach_rev([&](index_t i) {
157 // Recompute ρ, it depends on the index set J. Note that even if ρ was
158 // positive for the full vectors s and y, that's not necessarily the
159 // case for the smaller vectors s(J) and y(J).
160 real_t yᵀs = dotJ(s(i), y(i));
161 real_t sᵀs = dotJ(s(i), s(i));
162 ρ(i) = 1 / yᵀs;
163 // Check if we should include this pair of vectors
164 if (not update_valid(params, yᵀs, sᵀs, 0)) {
165 ρ(i) = NaN<config_t>;
166 return; // continue foreach
168
169 α(i) = ρ(i) * dotJ(s(i), q); // αᵢ = ρᵢ〈sᵢ, q〉
170 axmyJ(α(i), y(i), q); // q -= αᵢ yᵢ
171
172 if (γ < 0) {
173 // Compute step size based on most recent valid yᵀs/yᵀy
174 real_t yᵀy = dotJ(y(i), y(i));
175 γ = 1 / (ρ(i) * yᵀy);
176 }
177 });
178
179 // If all ρ == 0, fail
180 if (γ < 0)
181 return false;
182
183 // r ← H_0 q
184 scalJ(γ, q); // q *= γ
185
186 foreach_fwd([&](index_t i) {
187 if (std::isnan(ρ(i)))
188 return; // continue foreach
189
190 real_t β = ρ(i) * dotJ(y(i), q); // βᵢ = ρᵢ〈yᵢ, q〉
191 axmyJ(β - α(i), s(i), q); // q -= (βᵢ - αᵢ) sᵢ
192 });
193
194 return true;
195}
196
197template <Config Conf, class Storage>
199 return apply_masked_impl(q, γ, J);
200}
201
202template <Config Conf, class Storage>
204 const std::vector<index_t> &J) const {
205 return apply_masked_impl(q, γ, J);
206}
207
208template <Config Conf, class Storage>
210 idx = 0;
211 full = false;
212}
213
214template <Config Conf, class Storage>
216 if (params.memory < 1)
217 throw std::invalid_argument("LBFGS::Params::memory must be >= 1");
218 sto.resize(n, params.memory);
219 reset();
220}
221
222template <Config Conf>
224 sto.resize(n + 1, history * 2);
225}
226
227template <Config Conf, class Storage>
229 if (full) {
230 for (index_t i = 0; i < history(); ++i) {
231 y(i) *= factor;
232 ρ(i) *= 1 / factor;
233 }
234 } else {
235 for (index_t i = 0; i < idx; ++i) {
236 y(i) *= factor;
237 ρ(i) *= 1 / factor;
238 }
239 }
240}
241
242} // namespace alpaqa
bool apply_masked(rvec q, real_t γ, crindexvec J) const
Apply the inverse Hessian approximation to the given vector q, applying only the columns and rows of ...
Definition lbfgs.tpp:198
bool update_sy(crvec s, crvec y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Update the inverse Hessian approximation using the new vectors sₖ = xₙₑₓₜ - xₖ and yₖ = pₙₑₓₜ - pₖ.
Definition lbfgs.tpp:60
static bool update_valid(const Params &params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition lbfgs.tpp:12
bool apply_masked_impl(rvec q, real_t γ, const auto &J) const
Apply the inverse Hessian approximation to the given vector q, applying only the columns and rows of ...
Definition lbfgs.tpp:104
void resize(length_t n)
Re-allocate storage for a problem with a different size.
Definition lbfgs.tpp:215
void reset()
Throw away the approximation and all previous vectors s and y.
Definition lbfgs.tpp:209
bool update_sy_impl(const auto &s, const auto &y, real_t pₙₑₓₜᵀpₙₑₓₜ, bool forced=false)
Definition lbfgs.tpp:37
Sign
The sign of the vectors passed to the update method.
Definition lbfgs.hpp:124
bool apply(rvec q, real_t γ=-1) const
Apply the inverse Hessian approximation to the given vector q.
Definition lbfgs.tpp:75
void scale_y(real_t factor)
Scale the stored y vectors by the given factor.
Definition lbfgs.tpp:228
bool update(crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ, Sign sign=Sign::Positive, bool forced=false)
Update the inverse Hessian approximation using the new vectors xₙₑₓₜ and pₙₑₓₜ.
Definition lbfgs.tpp:66
real_t min_abs_s
Reject update if .
Definition lbfgs.hpp:52
CBFGSParams< config_t > cbfgs
Parameters in the cautious BFGS update condition.
Definition lbfgs.hpp:58
bool force_pos_def
If set to true, the inverse Hessian estimate should remain definite, i.e.
Definition lbfgs.hpp:65
real_t min_div_fac
Reject update if .
Definition lbfgs.hpp:49
typename Conf::real_t real_t
Definition config.hpp:86
typename Conf::index_t index_t
Definition config.hpp:104
typename Conf::length_t length_t
Definition config.hpp:103
constexpr const auto inf
Definition config.hpp:112
typename Conf::rvec rvec
Definition config.hpp:91
typename Conf::crvec crvec
Definition config.hpp:92
@ BasedOnCurvature
Initial inverse Hessian approximation is set to .
typename Conf::crindexvec crindexvec
Definition config.hpp:107
real_t ϵ
Set to zero to disable CBFGS check.
Definition lbfgs.hpp:21
void resize(length_t n, length_t history)
Re-allocate storage for a problem with a different size.
Definition lbfgs.tpp:223