alpaqa 0.0.1
Nonconvex constrained optimization
specialized-lbfgs.hpp
Go to the documentation of this file.
1#pragma once
2
6
7#include <cmath>
8
9namespace alpaqa {
10
11inline void SpecializedLBFGS::initialize(crvec x₀, crvec grad₀) {
12 idx = 0;
13 full = false;
14 x(0) = x₀;
15 g(0) = grad₀;
16}
17
18/// Standard L-BFGS update without changing the step size γ.
19inline bool SpecializedLBFGS::standard_update(crvec xₖ, crvec xₖ₊₁,
20 crvec pₖ, crvec pₖ₊₁,
21 crvec gradₖ₊₁) {
22 const auto s = xₖ₊₁ - xₖ;
23 const auto y = pₖ - pₖ₊₁;
24
25 real_t yᵀs = y.dot(s);
26 real_t sᵀs = s.squaredNorm();
27 real_t pᵀp = pₖ₊₁.squaredNorm();
28 real_t ρ = 1 / yᵀs;
29
30 if (not LBFGS::update_valid(params, yᵀs, sᵀs, pᵀp))
31 return false;
32
33 // Store the new s and y vectors
34 this->s(idx) = s;
35 this->y(idx) = y;
36 this->ρ(idx) = ρ;
37
38 // Store x and the gradient
39 this->x(succ(idx)) = xₖ₊₁;
40 this->g(succ(idx)) = gradₖ₊₁;
41
42 // Increment the index in the circular buffer
43 idx = succ(idx);
44 full |= idx == 0;
45
46 return true;
47}
48
49/// L-BFGS update when changing the step size γ, recomputing everything.
50inline bool SpecializedLBFGS::full_update(crvec xₖ, crvec xₖ₊₁,
51 crvec pₖ_old_γ, crvec pₖ₊₁,
52 crvec gradₖ₊₁, const Box &C,
53 real_t γ) {
54 auto &&sₖ = xₖ₊₁ - xₖ;
55 auto &&yₖ = this->w(); // temporary workspace
56 // Old pₖ is no longer valid, recompute with new γ
57 (void)pₖ_old_γ;
58 auto &&pₖ = this->p();
60 yₖ = pₖ - pₖ₊₁;
61
62 assert(x(idx) == xₖ);
63
64 real_t yᵀs = yₖ.dot(sₖ);
65 real_t sᵀs = sₖ.squaredNorm();
66 real_t pᵀp = pₖ₊₁.squaredNorm();
67 real_t ρₖ = 1 / yᵀs;
68
69 if (not LBFGS::update_valid(params, yᵀs, sᵀs, pᵀp))
70 return false;
71
72 // Recompute all residuals with new γ
73 // yₖ = pₖ - pₖ₊₁
74 // pₖ = Π(-γ∇ψ(xₖ), C - xₖ)
75 size_t endidx = full ? idx : pred(0);
76 for (size_t i = pred(idx); i != endidx; i = pred(i)) {
77 this->y(i) = -pₖ /* i+1 */;
78 pₖ = detail::projected_gradient_step(C, γ, this->x(i), this->g(i));
79 this->y(i) += pₖ /* i */;
80 }
81 // Store the new s and y vectors
82 this->s(idx) = sₖ;
83 this->y(idx) = yₖ;
84 this->ρ(idx) = ρₖ;
85
86 // Store x and the gradient
87 this->x(succ(idx)) = xₖ₊₁;
88 this->g(succ(idx)) = gradₖ₊₁;
89
90 // Increment the index in the circular buffer
91 idx = succ(idx);
92 full |= idx == 0;
93
94 return true;
95}
96
97inline bool SpecializedLBFGS::update(crvec xₖ, crvec xₖ₊₁,
98 crvec pₖ, crvec pₖ₊₁,
99 crvec gradₖ₊₁, const Box &C,
100 real_t γ) {
101 bool ret = (γ == old_γ || old_γ == 0)
102 ? standard_update(xₖ, xₖ₊₁, pₖ, pₖ₊₁, gradₖ₊₁)
103 : full_update(xₖ, xₖ₊₁, pₖ, pₖ₊₁, gradₖ₊₁, C, γ);
104 old_γ = γ;
105 return ret;
106}
107
108template <class Vec>
110 // TODO: dry, reuse standard LBFGS::apply
111 auto update1 = [&](size_t i) {
112 α(i) = ρ(i) * (s(i).dot(q));
113 q -= α(i) * y(i);
114 };
115 if (idx)
116 for (size_t i = idx; i-- > 0;)
117 update1(i);
118 if (full)
119 for (size_t i = history(); i-- > idx;)
120 update1(i);
121
122 // q = H₀ * q; // TODO: diagonal matrix H₀?
123
124 auto update2 = [&](size_t i) {
125 real_t β = ρ(i) * (y(i).dot(q));
126 q += (α(i) - β) * s(i);
127 };
128 if (full)
129 for (size_t i = idx; i < history(); ++i)
130 update2(i);
131 for (size_t i = 0; i < idx; ++i)
132 update2(i);
133}
134
135inline void SpecializedLBFGS::resize(size_t n, size_t history) {
136 sto.resize(n + 1, history * 4 + 2);
137 sto.fill(std::numeric_limits<real_t>::quiet_NaN());
138 idx = 0;
139 full = false;
140}
141
143 x(0) = x(idx);
144 g(0) = x(idx);
145 idx = 0;
146 full = false;
147}
148
149} // namespace alpaqa
150
152
153namespace alpaqa {
154
155template <>
157
158 static void initialize(SpecializedLBFGS &lbfgs, crvec x₀,
159 crvec x̂₀, crvec p₀, crvec grad₀) {
160 lbfgs.initialize(x₀, grad₀);
161 (void)x̂₀;
162 (void)p₀;
163 }
164
165 static bool update(SpecializedLBFGS &lbfgs, crvec xₖ, crvec xₖ₊₁,
166 crvec pₖ, crvec pₖ₊₁, crvec gradₖ₊₁,
167 const Box &C, real_t γ) {
168 return lbfgs.update(xₖ, xₖ₊₁, pₖ, pₖ₊₁, gradₖ₊₁, C, γ);
169 }
170
171 static bool apply(SpecializedLBFGS &lbfgs, crvec xₖ, crvec x̂ₖ,
172 crvec pₖ, real_t γ, rvec qₖ) {
173 (void)xₖ;
174 (void)x̂ₖ;
175 (void)γ; // TODO: add this parameter to SLBFGS
176 qₖ = pₖ;
177 lbfgs.apply(qₖ);
178 return true;
179 }
180
181 static void changed_γ(SpecializedLBFGS &lbfgs, real_t γₖ, real_t old_γₖ) {
182 (void)lbfgs;
183 (void)γₖ;
184 (void)old_γₖ;
185 }
186};
187
188} // namespace alpaqa
static bool update_valid(LBFGSParams params, real_t yᵀs, real_t sᵀs, real_t pᵀp)
Check if the new vectors s and y allow for a valid BFGS update that preserves the positive definitene...
Definition: lbfgs.hpp:9
Limited memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm that can handle updates of the γ p...
bool full_update(crvec xₖ, crvec xₖ₊₁, crvec pₖ_old_γ, crvec pₖ₊₁, crvec gradₖ₊₁, const Box &C, real_t γ)
L-BFGS update when changing the step size γ, recomputing everything.
size_t succ(size_t i) const
Get the next index in the circular buffer of previous s, y, x and g vectors.
void initialize(crvec x₀, crvec grad₀)
Initialize with the starting point x₀ and the gradient in that point.
size_t pred(size_t i) const
Get the previous index in the circular buffer of previous s, y, x and g vectors.
void apply(Vec &&q)
Apply the inverse Hessian approximation to the given vector q.
bool standard_update(crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, crvec gradₖ₊₁)
Standard L-BFGS update without changing the step size γ.
size_t history() const
Get the number of previous vectors s, y, x and g stored in the buffer.
bool update(crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, crvec gradₖ₊₁, const Box &C, real_t γ)
Update the inverse Hessian approximation using the new vectors xₖ₊₁ and pₖ₊₁.
void resize(size_t n, size_t history)
Re-allocate storage for a problem with a different size.
size_t n() const
Get the size of the s, y, x and g vectors in the buffer.
void reset()
Throw away the approximation and all previous vectors s and y.
auto projected_gradient_step(const Box &C, real_t γ, crvec x, crvec grad_ψ)
Projected gradient step.
int n
Definition: test.py:40
Eigen::Ref< const vec > crvec
Default type for immutable references to vectors.
Definition: vec.hpp:18
double real_t
Default floating point type.
Definition: vec.hpp:8
Eigen::Ref< vec > rvec
Default type for mutable references to vectors.
Definition: vec.hpp:16
static bool update(SpecializedLBFGS &lbfgs, crvec xₖ, crvec xₖ₊₁, crvec pₖ, crvec pₖ₊₁, crvec gradₖ₊₁, const Box &C, real_t γ)
static void initialize(SpecializedLBFGS &lbfgs, crvec x₀, crvec x̂₀, crvec p₀, crvec grad₀)
static bool apply(SpecializedLBFGS &lbfgs, crvec xₖ, crvec x̂ₖ, crvec pₖ, real_t γ, rvec qₖ)
static void changed_γ(SpecializedLBFGS &lbfgs, real_t γₖ, real_t old_γₖ)