alpaqa pantr
Nonconvex constrained optimization
Loading...
Searching...
No Matches
structured-lbfgs.tpp
Go to the documentation of this file.
1#pragma once
2
5
6namespace alpaqa {
7
8template <Config Conf>
10 const Problem &problem, crvec y, crvec Σ, [[maybe_unused]] real_t γ_0,
11 [[maybe_unused]] crvec x_0, [[maybe_unused]] crvec x̂_0,
12 [[maybe_unused]] crvec p_0, [[maybe_unused]] crvec grad_ψx_0) {
14 throw std::invalid_argument(
15 "Structured L-BFGS requires eval_inactive_indices_res_lna()");
16 if (direction_params.hessian_vec &&
17 !direction_params.hessian_vec_finite_differences &&
18 !direction_params.full_augmented_hessian &&
20 throw std::invalid_argument(
21 "Structured L-BFGS requires eval_hess_L_prod(). Alternatively, set "
22 "hessian_vec = false or hessian_vec_finite_differences = true.");
23 if (direction_params.hessian_vec &&
24 !direction_params.hessian_vec_finite_differences &&
25 direction_params.full_augmented_hessian &&
26 !(problem.provides_eval_hess_L_prod() ||
28 throw std::invalid_argument(
29 "Structured L-BFGS requires _eval_hess_ψ_prod() or "
30 "eval_hess_L_prod(). Alternatively, set "
31 "hessian_vec = false or hessian_vec_finite_differences = true.");
32 if (direction_params.hessian_vec &&
33 !direction_params.hessian_vec_finite_differences &&
34 direction_params.full_augmented_hessian &&
35 !problem.provides_eval_hess_ψ_prod() &&
36 !(problem.provides_get_box_D() && problem.provides_eval_grad_gi()))
37 throw std::invalid_argument(
38 "Structured L-BFGS requires either eval_hess_ψ_prod() or "
39 "get_box_D() and eval_grad_gi(). Alternatively, set hessian_vec = "
40 "false, hessian_vec_finite_differences = true, or "
41 "full_augmented_hessian = false.");
42 // Store references to problem and ALM variables
43 this->problem = &problem;
44 this->y.emplace(y);
45 this->Σ.emplace(Σ);
46 // Allocate workspaces
47 const auto n = problem.get_n();
48 const auto m = problem.get_m();
49 lbfgs.resize(n);
50 J_sto.resize(n);
51 HqK.resize(n);
52 if (direction_params.hessian_vec_finite_differences) {
53 work_n.resize(n);
54 work_n2.resize(n);
55 work_m.resize(m);
56 } else if (direction_params.full_augmented_hessian) {
57 work_n.resize(n);
58 work_m.resize(m);
59 }
60}
61
62template <Config Conf>
64 [[maybe_unused]] crvec x̂ₖ, crvec pₖ,
65 crvec grad_ψxₖ, rvec qₖ) const {
66 const auto n = problem->get_n();
67
68 // Find inactive indices J
69 auto nJ = problem->eval_inactive_indices_res_lna(γₖ, xₖ, grad_ψxₖ, J_sto);
70 auto J = J_sto.topRows(nJ);
71
72 // There are no inactive indices J
73 if (nJ == 0) {
74 // No free variables, no Newton step possible
75 return false; // Simply use the projection step
76 }
77 // There are inactive indices J
78 if (J.size() == n) { // There are no active indices K
79 // If all indices are free, we can use standard L-BFGS,
80 qₖ = (real_t(1) / γₖ) * pₖ;
81 return lbfgs.apply(qₖ, γₖ);
82 }
83 // There are active indices K
84 qₖ = pₖ;
85 if (direction_params.hessian_vec)
86 qₖ(J).setZero();
87 else
88 qₖ(J) = (real_t(1) / γₖ) * pₖ(J);
89 if (direction_params.hessian_vec) {
90 approximate_hessian_vec_term(xₖ, grad_ψxₖ, qₖ, J);
91 // Compute right-hand side of 6.1c
92 qₖ(J) = (real_t(1) / γₖ) * pₖ(J) - HqK(J);
93 }
94
95 // If there are active indices, we need the specialized version
96 // that only applies L-BFGS to the inactive indices
97 bool success = lbfgs.apply_masked(qₖ, γₖ, J);
98 if (success)
99 return true;
100 // If L-BFGS application failed, qₖ(J) still contains
101 // -∇ψ(x)(J) - HqK(J) or -∇ψ(x)(J), which is not a valid step.
102 // A good alternative is to use H₀ = γI as an L-BFGS estimate.
103 // This seems to perform better in practice than just falling back to a
104 // projected gradient step.
105 switch (direction_params.failure_policy) {
106 case DirectionParams::FallbackToProjectedGradient: return success;
107 case DirectionParams::UseScaledLBFGSInput:
108 if (nJ == n)
109 qₖ *= γₖ;
110 else
111 qₖ(J) *= γₖ;
112 return true;
113 default: return false;
114 }
115}
116
117template <Config Conf>
119 crvec xₖ, crvec grad_ψxₖ, rvec qₖ, crindexvec J) const {
120 const auto m = problem->get_m();
121 // Either compute the Hessian-vector product using finite differences
122 if (direction_params.hessian_vec_finite_differences) {
123 Helpers::calc_augmented_lagrangian_hessian_prod_fd(
124 *problem, xₖ, *y, *Σ, grad_ψxₖ, qₖ, HqK, work_n, work_n2, work_m);
125 }
126 // Or using an exact AD
127 else {
128 if (!direction_params.full_augmented_hessian) {
129 // Compute the product with the Hessian of the Lagrangian
130 problem->eval_hess_L_prod(xₖ, *y, 1, qₖ, HqK);
131 } else {
132 if (problem->provides_eval_hess_ψ_prod()) {
133 // Compute the product with the Hessian of the augmented
134 // Lagrangian
135 problem->eval_hess_ψ_prod(xₖ, *y, *Σ, 1, qₖ, HqK);
136 } else {
137 // Compute the product with the Hessian of the Lagrangian
138 problem->eval_hess_L_prod(xₖ, *y, 1, qₖ, HqK);
139 // And then add the Hessian of the penalty terms, to get the
140 // Hessian of the full augmented Lagrangian (if required)
141 if (direction_params.full_augmented_hessian) {
142 assert(m == 0 || problem->provides_eval_grad_gi());
143 const auto &D = problem->get_box_D();
144 auto &g = work_m;
145 problem->eval_g(xₖ, g);
146 for (index_t i = 0; i < m; ++i) {
147 real_t ζ = g(i) + (*y)(i) / (*Σ)(i);
148 bool inactive =
149 D.lowerbound(i) < ζ && ζ < D.upperbound(i);
150 if (not inactive) {
151 problem->eval_grad_gi(xₖ, i, work_n);
152 auto t = (*Σ)(i)*work_n.dot(qₖ);
153 // TODO: the dot product is more work than
154 // strictly necessary (only over K)
155 for (auto j : J)
156 HqK(j) += work_n(j) * t;
157 }
158 }
159 }
160 }
161 }
162 }
163}
164
165} // namespace alpaqa
bool provides_eval_hess_ψ_prod() const
Returns true if the problem provides an implementation of eval_hess_ψ_prod.
length_t get_n() const
[Required] Number of decision variables.
length_t get_m() const
[Required] Number of constraints.
bool provides_eval_inactive_indices_res_lna() const
Returns true if the problem provides an implementation of eval_inactive_indices_res_lna.
bool provides_eval_hess_L_prod() const
Returns true if the problem provides an implementation of eval_hess_L_prod.
bool provides_eval_grad_gi() const
Returns true if the problem provides an implementation of eval_grad_gi.
bool provides_get_box_D() const
Returns true if the problem provides an implementation of get_box_D.
typename Conf::real_t real_t
Definition: config.hpp:51
typename Conf::index_t index_t
Definition: config.hpp:63
typename Conf::rvec rvec
Definition: config.hpp:55
typename Conf::crvec crvec
Definition: config.hpp:56
typename Conf::crindexvec crindexvec
Definition: config.hpp:66
void initialize(const Problem &problem, crvec y, crvec Σ, real_t γ_0, crvec x_0, crvec x̂_0, crvec p_0, crvec grad_ψx_0)
void approximate_hessian_vec_term(crvec xₖ, crvec grad_ψxₖ, rvec qₖ, crindexvec J) const
bool apply(real_t γₖ, crvec xₖ, crvec x̂ₖ, crvec pₖ, crvec grad_ψxₖ, rvec qₖ) const