alpaqa dll
Nonconvex constrained optimization
Loading...
Searching...
No Matches
structured-lbfgs.tpp
Go to the documentation of this file.
1#pragma once
2
5
6namespace alpaqa {
7
8template <Config Conf>
10 const Problem &problem, crvec y, crvec Σ, [[maybe_unused]] real_t γ_0,
11 [[maybe_unused]] crvec x_0, [[maybe_unused]] crvec x̂_0,
12 [[maybe_unused]] crvec p_0, [[maybe_unused]] crvec grad_ψx_0) {
13 if (!problem.provides_eval_inactive_indices_res_lna())
14 throw std::invalid_argument(
15 "Structured L-BFGS requires eval_inactive_indices_res_lna()");
16 if (direction_params.hessian_vec_factor != 0 &&
17 !direction_params.hessian_vec_finite_differences &&
18 !direction_params.full_augmented_hessian &&
19 !problem.provides_eval_lagrangian_hessian_product())
20 throw std::invalid_argument(
21 "Structured L-BFGS requires eval_lagrangian_hessian_product(). "
22 "Alternatively, set hessian_vec_factor = 0 or "
23 "hessian_vec_finite_differences = true.");
24 if (direction_params.hessian_vec_factor != 0 &&
25 !direction_params.hessian_vec_finite_differences &&
26 direction_params.full_augmented_hessian &&
27 !(problem.provides_eval_lagrangian_hessian_product() ||
28 problem.provides_eval_augmented_lagrangian_hessian_product()))
29 throw std::invalid_argument(
30 "Structured L-BFGS requires "
31 "_eval_augmented_lagrangian_hessian_product() or "
32 "eval_lagrangian_hessian_product(). Alternatively, set "
33 "hessian_vec_factor = 0 or hessian_vec_finite_differences = true.");
34 if (direction_params.hessian_vec_factor != 0 &&
35 !direction_params.hessian_vec_finite_differences &&
36 direction_params.full_augmented_hessian &&
37 !problem.provides_eval_augmented_lagrangian_hessian_product() &&
38 !(problem.provides_get_general_bounds() &&
39 problem.provides_eval_grad_gi()))
40 throw std::invalid_argument(
41 "Structured L-BFGS requires either "
42 "eval_augmented_lagrangian_hessian_product() or "
43 "get_general_bounds() and eval_grad_gi(). Alternatively, "
44 "set hessian_vec_factor = 0, hessian_vec_finite_differences = "
45 "true, or full_augmented_hessian = false.");
46 // Store references to problem and ALM variables
47 this->problem = &problem;
48 this->y.emplace(y);
49 this->Σ.emplace(Σ);
50 // Allocate workspaces
51 const auto n = problem.get_num_variables();
52 const auto m = problem.get_num_constraints();
53 lbfgs.resize(n);
54 J_sto.resize(n);
55 HqK.resize(n);
56 if (direction_params.hessian_vec_finite_differences) {
57 work_n.resize(n);
58 work_n2.resize(n);
59 work_m.resize(m);
60 } else if (direction_params.full_augmented_hessian) {
61 work_n.resize(n);
62 work_m.resize(m);
63 }
64}
65
66template <Config Conf>
68 [[maybe_unused]] crvec x̂ₖ, crvec pₖ,
69 crvec grad_ψxₖ, rvec qₖ) const {
70 const auto n = problem->get_num_variables();
71
72 // Find inactive indices J
73 auto nJ = problem->eval_inactive_indices_res_lna(γₖ, xₖ, grad_ψxₖ, J_sto);
74 auto J = J_sto.topRows(nJ);
75
76 // There are no inactive indices J
77 if (nJ == 0) {
78 // No free variables, no Newton step possible
79 return false; // Simply use the projection step
80 }
81 // There are inactive indices J
82 if (J.size() == n) { // There are no active indices K
83 // If all indices are free, we can use standard L-BFGS,
84 qₖ = (real_t(1) / γₖ) * pₖ;
85 return lbfgs.apply(qₖ, γₖ);
86 }
87 // There are active indices K
88 qₖ = pₖ;
89 if (direction_params.hessian_vec_factor != 0) {
90 qₖ(J).setZero();
91 approximate_hessian_vec_term(xₖ, grad_ψxₖ, qₖ, J);
92 // Compute right-hand side of 6.1c
93 qₖ(J) = (real_t(1) / γₖ) * pₖ(J) -
94 direction_params.hessian_vec_factor * HqK(J);
95 } else {
96 qₖ(J) = (real_t(1) / γₖ) * pₖ(J);
97 }
98 // If there are active indices, we need the specialized version
99 // that only applies L-BFGS to the inactive indices
100 bool success = lbfgs.apply_masked(qₖ, γₖ, J);
101 if (success)
102 return true;
103 // If L-BFGS application failed, qₖ(J) still contains
104 // -∇ψ(x)(J) - HqK(J) or -∇ψ(x)(J), which is not a valid step.
105 // A good alternative is to use H₀ = γI as an L-BFGS estimate.
106 // This seems to perform better in practice than just falling back to a
107 // projected gradient step.
108 switch (direction_params.failure_policy) {
111 if (nJ == n)
112 qₖ *= γₖ;
113 else
114 qₖ(J) *= γₖ;
115 return true;
116 default: return false;
117 }
118}
119
120template <Config Conf>
122 crvec xₖ, crvec grad_ψxₖ, rvec qₖ, crindexvec J) const {
123 const auto m = problem->get_num_constraints();
124 // Either compute the Hessian-vector product using finite differences
125 if (direction_params.hessian_vec_finite_differences) {
127 *problem, xₖ, *y, *Σ, grad_ψxₖ, qₖ, HqK, work_n, work_n2, work_m);
128 }
129 // Or using an exact AD
130 else {
131 if (!direction_params.full_augmented_hessian) {
132 // Compute the product with the Hessian of the Lagrangian
133 problem->eval_lagrangian_hessian_product(xₖ, *y, 1, qₖ, HqK);
134 } else {
135 if (problem->provides_eval_augmented_lagrangian_hessian_product()) {
136 // Compute the product with the Hessian of the augmented
137 // Lagrangian
138 problem->eval_augmented_lagrangian_hessian_product(xₖ, *y, *Σ,
139 1, qₖ, HqK);
140 } else {
141 // Compute the product with the Hessian of the Lagrangian
142 problem->eval_lagrangian_hessian_product(xₖ, *y, 1, qₖ, HqK);
143 // And then add the Hessian of the penalty terms, to get the
144 // Hessian of the full augmented Lagrangian (if required)
145 if (direction_params.full_augmented_hessian) {
146 assert(m == 0 || problem->provides_eval_grad_gi());
147 const auto &D = problem->get_general_bounds();
148 auto &g = work_m;
149 problem->eval_constraints(xₖ, g);
150 for (index_t i = 0; i < m; ++i) {
151 real_t ζ = g(i) + (*y)(i) / (*Σ)(i);
152 bool inactive = D.lower(i) < ζ && ζ < D.upper(i);
153 if (not inactive) {
154 problem->eval_grad_gi(xₖ, i, work_n);
155 auto t = (*Σ)(i)*work_n.dot(qₖ);
156 // TODO: the dot product is more work than
157 // strictly necessary (only over K)
158 for (auto j : J)
159 HqK(j) += work_n(j) * t;
160 }
161 }
162 }
163 }
164 }
165 }
166}
167
168} // namespace alpaqa
typename Conf::real_t real_t
Definition config.hpp:86
typename Conf::index_t index_t
Definition config.hpp:104
typename Conf::rvec rvec
Definition config.hpp:91
typename Conf::crvec crvec
Definition config.hpp:92
typename Conf::crindexvec crindexvec
Definition config.hpp:107
void initialize(const Problem &problem, crvec y, crvec Σ, real_t γ_0, crvec x_0, crvec x̂_0, crvec p_0, crvec grad_ψx_0)
void approximate_hessian_vec_term(crvec xₖ, crvec grad_ψxₖ, rvec qₖ, crindexvec J) const
bool apply(real_t γₖ, crvec xₖ, crvec x̂ₖ, crvec pₖ, crvec grad_ψxₖ, rvec qₖ) const
TypeErasedProblem< config_t > Problem
static void calc_augmented_lagrangian_hessian_prod_fd(const Problem &problem, crvec xₖ, crvec y, crvec Σ, crvec grad_ψ, crvec v, rvec Hv, rvec work_n1, rvec work_n2, rvec work_m)