alpaqa 0.0.1
Nonconvex constrained optimization
lbfgs.hpp
Go to the documentation of this file.
1#pragma once
2
4#include <stdexcept>
5#include <type_traits>
6
7namespace alpaqa {
8
10 real_t pᵀp) {
11 // Smallest number we want to divide by without overflow
12 const real_t min_divisor = std::sqrt(std::numeric_limits<real_t>::min());
13
14 // Check if this L-BFGS update is accepted
15 if (not std::isfinite(yᵀs))
16 return false;
17 if (yᵀs < min_divisor)
18 return false;
19 if (sᵀs < min_divisor)
20 return false;
21
22 // CBFGS condition: https://epubs.siam.org/doi/10.1137/S1052623499354242
23 real_t α = params.cbfgs.α;
24 real_t ϵ = params.cbfgs.ϵ;
25 // Condition: yᵀs / sᵀs >= ϵ ‖p‖^α
26 bool cbfgs_cond = yᵀs / sᵀs >= ϵ * std::pow(pᵀp, α / 2);
27 if (not cbfgs_cond)
28 return false;
29
30 return true;
31}
32
33inline bool LBFGS::update(crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, Sign sign,
34 bool forced) {
35 const auto s = xₖ₊₁ - xₖ;
36 const auto y = sign == Sign::Positive ? pₖ₊₁ - pₖ : pₖ - pₖ₊₁;
37 real_t yᵀs = y.dot(s);
38 real_t ρ = 1 / yᵀs;
39 if (not forced) {
40 real_t sᵀs = s.squaredNorm();
41 real_t pᵀp = params.cbfgs.ϵ > 0 ? pₖ₊₁.squaredNorm() : 0;
42 if (not update_valid(params, yᵀs, sᵀs, pᵀp))
43 return false;
44 }
45
46 // Store the new s and y vectors
47 this->s(idx) = s;
48 this->y(idx) = y;
49 this->ρ(idx) = ρ;
50
51 // Increment the index in the circular buffer
52 idx = succ(idx);
53 full |= idx == 0;
54
55 return true;
56}
57
58template <class Vec>
59bool LBFGS::apply(Vec &&q, real_t γ) {
60 // Only apply if we have previous vectors s and y
61 if (idx == 0 && not full)
62 return false;
63
64 // If the step size is negative, compute it as sᵀy/yᵀy
65 if (γ < 0) {
66 auto new_idx = idx > 0 ? idx - 1 : history() - 1;
67 real_t yᵀy = y(new_idx).squaredNorm();
68 γ = 1. / (ρ(new_idx) * yᵀy);
69 }
70
71 auto update1 = [&](size_t i) {
72 α(i) = ρ(i) * (s(i).dot(q));
73 q -= α(i) * y(i);
74 };
75 if (idx)
76 for (size_t i = idx; i-- > 0;)
77 update1(i);
78 if (full)
79 for (size_t i = history(); i-- > idx;)
80 update1(i);
81
82 // r ← H₀ q
83 q *= γ;
84
85 auto update2 = [&](size_t i) {
86 real_t β = ρ(i) * (y(i).dot(q));
87 q += (α(i) - β) * s(i);
88 };
89 if (full)
90 for (size_t i = idx; i < history(); ++i)
91 update2(i);
92 for (size_t i = 0; i < idx; ++i)
93 update2(i);
94
95 return true;
96}
97
98template <class Vec, class IndexVec>
99bool LBFGS::apply(Vec &&q, real_t γ, const IndexVec &J) {
100 // Only apply if we have previous vectors s and y
101 if (idx == 0 && not full)
102 return false;
103 using Index = typename std::remove_reference_t<Vec>::Index;
104 bool fullJ = q.size() == Index(J.size());
105
106 // Eigen 3.3.9 doesn't yet support indexing using a vector of indices
107 // so we'll have to do it manually
108 // TODO: Abstract this away in an expression template / nullary expression?
109 // Or wait for Eigen update?
110
111 // Dot product of two vectors, adding only the indices in set J
112 auto dotJ = [&J, fullJ](const auto &a, const auto &b) {
113 if (fullJ) {
114 return a.dot(b);
115 } else {
116 real_t acc = 0;
117 for (auto j : J)
118 acc += a(j) * b(j);
119 return acc;
120 }
121 };
122
123 auto update1 = [&](size_t i) {
124 // Recompute ρ, it depends on the index set J. Note that even if ρ was
125 // positive for the full vectors s and y, that's not necessarily the
126 // case for the smaller vectors s(J) and y(J).
127 if (not fullJ)
128 ρ(i) = 1. / dotJ(s(i), y(i));
129
130 if (ρ(i) <= 0) // Reject negative ρ to ensure positive definiteness
131 return;
132
133 α(i) = ρ(i) * dotJ(s(i), q);
134 if (fullJ)
135 q -= α(i) * y(i);
136 else
137 for (auto j : J)
138 q(j) -= α(i) * y(i)(j);
139
140 if (γ < 0) {
141 // Compute step size based on most recent yᵀs/yᵀy > 0
142 real_t yᵀy = dotJ(y(i), y(i));
143 γ = 1. / (ρ(i) * yᵀy);
144 }
145 };
146 if (idx)
147 for (size_t i = idx; i-- > 0;)
148 update1(i);
149 if (full)
150 for (size_t i = history(); i-- > idx;)
151 update1(i);
152
153 // If all ρ <= 0, fail
154 if (γ < 0)
155 return false;
156
157 // r ← H₀ q
158 if (fullJ)
159 q *= γ;
160 else
161 for (auto j : J)
162 q(j) *= γ;
163
164 auto update2 = [&](size_t i) {
165 if (ρ(i) <= 0)
166 return;
167 real_t β = ρ(i) * dotJ(y(i), q);
168 if (fullJ)
169 q += (α(i) - β) * s(i);
170 else
171 for (auto j : J)
172 q(j) += (α(i) - β) * s(i)(j);
173 };
174 if (full)
175 for (size_t i = idx; i < history(); ++i)
176 update2(i);
177 for (size_t i = 0; i < idx; ++i)
178 update2(i);
179
180 return true;
181}
182
183inline void LBFGS::reset() {
184 idx = 0;
185 full = false;
186}
187
188inline void LBFGS::resize(size_t n) {
189 if (params.memory < 1)
190 throw std::invalid_argument("LBFGSParams::memory must be > 1");
191 sto.resize(n + 1, params.memory * 2);
192 reset();
193}
194
195inline void LBFGS::scale_y(real_t factor) {
196 if (full) {
197 for (size_t i = 0; i < history(); ++i) {
198 y(i) *= factor;
199 ρ(i) *= 1. / factor;
200 }
201 } else {
202 for (size_t i = 0; i < idx; ++i) {
203 y(i) *= factor;
204 ρ(i) *= 1. / factor;
205 }
206 }
207}
208
209inline void PANOCDirection<LBFGS>::initialize(crvec x₀, crvec x̂₀, crvec p₀,
210 crvec grad₀) {
211 lbfgs.resize(x₀.size());
212 (void)x̂₀;
213 (void)p₀;
214 (void)grad₀;
215}
216
217inline bool PANOCDirection<LBFGS>::update(crvec xₖ, crvec xₖ₊₁, crvec pₖ,
218 crvec pₖ₊₁, crvec grad_new,
219 const Box &C, real_t γ_new) {
220 (void)grad_new;
221 (void)C;
222 (void)γ_new;
223 return lbfgs.update(xₖ, xₖ₊₁, pₖ, pₖ₊₁, LBFGS::Sign::Negative);
224}
225
226inline bool PANOCDirection<LBFGS>::apply(crvec xₖ, crvec x̂ₖ, crvec pₖ, real_t γ,
227 rvec qₖ) {
228 (void)xₖ;
229 (void)x̂ₖ;
230 qₖ = pₖ;
231 return lbfgs.apply(qₖ, γ);
232}
233
234inline void PANOCDirection<LBFGS>::changed_γ(real_t γₖ, real_t old_γₖ) {
235 if (lbfgs.get_params().rescale_when_γ_changes)
236 lbfgs.scale_y(γₖ / old_γₖ);
237 else
238 lbfgs.reset();
239}
240
241inline void PANOCDirection<LBFGS>::reset() { lbfgs.reset(); }
242
243inline std::string PANOCDirection<LBFGS>::get_name() const {
244 return lbfgs.get_name();
245}
246
248 return lbfgs.get_params();
249}
250
251} // namespace alpaqa
size_t succ(size_t i) const
Get the next index in the circular buffer of previous s and y vectors.
Definition: decl/lbfgs.hpp:80
bool apply(Vec &&q, real_t γ)
Apply the inverse Hessian approximation to the given vector q.
Definition: lbfgs.hpp:59
bool update(crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, Sign sign, bool forced=false)
Update the inverse Hessian approximation using the new vectors xₖ₊₁ and pₖ₊₁.
Definition: lbfgs.hpp:33
void resize(size_t n)
Re-allocate storage for a problem with a different size.
Definition: lbfgs.hpp:188
size_t history() const
Get the number of previous vectors s and y stored in the buffer.
Definition: decl/lbfgs.hpp:78
auto s(size_t i)
Definition: decl/lbfgs.hpp:82
real_t & α(size_t i)
Definition: decl/lbfgs.hpp:88
static bool update_valid(LBFGSParams params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition: lbfgs.hpp:9
auto y(size_t i)
Definition: decl/lbfgs.hpp:84
size_t n() const
Get the size of the s and y vectors in the buffer.
Definition: decl/lbfgs.hpp:76
void reset()
Throw away the approximation and all previous vectors s and y.
Definition: lbfgs.hpp:183
storage_t sto
Definition: decl/lbfgs.hpp:94
Sign
The sign of the vectors passed to the LBFGS::update method.
Definition: decl/lbfgs.hpp:35
void scale_y(real_t factor)
Scale the stored y vectors by the given factor.
Definition: lbfgs.hpp:195
real_t & ρ(size_t i)
Definition: decl/lbfgs.hpp:86
int n
Definition: test.py:40
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18
unsigned memory
Length of the history to keep.
Definition: decl/lbfgs.hpp:14
struct alpaqa::LBFGSParams::@0 cbfgs
Parameters in the cautious BFGS update condition.
double real_t
Default floating point type.
Definition: vec.hpp:8
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16
Parameters for the LBFGS and SpecializedLBFGS classes.
Definition: decl/lbfgs.hpp:12
static bool update(DirectionProviderT &dp, crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, crvec gradₖ₊₁, const Box &C, real_t γₖ₊₁)=delete
static bool apply(DirectionProviderT &dp, crvec xₖ, crvec x̂ₖ, crvec pₖ, real_t γ, rvec qₖ)=delete
Apply the direction estimation in the current point.
static void changed_γ(DirectionProviderT &dp, real_t γₖ, real_t old_γₖ)=delete
static void initialize(DirectionProviderT &dp, crvec x₀, crvec x̂₀, crvec p₀, crvec grad₀)=delete