alpaqa 1.0.0a18
Nonconvex constrained optimization
Loading...
Searching...
No Matches
structured-lbfgs.tpp
Go to the documentation of this file.
1#pragma once
2
5
6namespace alpaqa {
7
8template <Config Conf>
10 const Problem &problem, crvec y, crvec Σ, [[maybe_unused]] real_t γ_0,
14 throw std::invalid_argument(
15 "Structured L-BFGS requires eval_inactive_indices_res_lna()");
16 if (direction_params.hessian_vec_factor != 0 &&
17 !direction_params.hessian_vec_finite_differences &&
18 !direction_params.full_augmented_hessian &&
20 throw std::invalid_argument(
21 "Structured L-BFGS requires eval_hess_L_prod(). Alternatively, set "
22 "hessian_vec_factor = 0 or hessian_vec_finite_differences = true.");
23 if (direction_params.hessian_vec_factor != 0 &&
24 !direction_params.hessian_vec_finite_differences &&
25 direction_params.full_augmented_hessian &&
26 !(problem.provides_eval_hess_L_prod() ||
28 throw std::invalid_argument(
29 "Structured L-BFGS requires _eval_hess_ψ_prod() or "
30 "eval_hess_L_prod(). Alternatively, set "
31 "hessian_vec_factor = 0 or hessian_vec_finite_differences = true.");
32 if (direction_params.hessian_vec_factor != 0 &&
33 !direction_params.hessian_vec_finite_differences &&
34 direction_params.full_augmented_hessian &&
35 !problem.provides_eval_hess_ψ_prod() &&
36 !(problem.provides_get_box_D() && problem.provides_eval_grad_gi()))
37 throw std::invalid_argument(
38 "Structured L-BFGS requires either eval_hess_ψ_prod() or "
39 "get_box_D() and eval_grad_gi(). Alternatively, set "
40 "hessian_vec_factor = 0, hessian_vec_finite_differences = true, or "
41 "full_augmented_hessian = false.");
42 // Store references to problem and ALM variables
43 this->problem = &problem;
44 this->y.emplace(y);
45 this->Σ.emplace(Σ);
46 // Allocate workspaces
47 const auto n = problem.get_n();
48 const auto m = problem.get_m();
49 lbfgs.resize(n);
50 J_sto.resize(n);
51 HqK.resize(n);
52 if (direction_params.hessian_vec_finite_differences) {
53 work_n.resize(n);
54 work_n2.resize(n);
55 work_m.resize(m);
56 } else if (direction_params.full_augmented_hessian) {
57 work_n.resize(n);
58 work_m.resize(m);
59 }
60}
61
62template <Config Conf>
65 crvec grad_ψxₖ, rvec qₖ) const {
66 const auto n = problem->get_n();
67
68 // Find inactive indices J
69 auto nJ = problem->eval_inactive_indices_res_lna(γₖ, xₖ, grad_ψxₖ, J_sto);
70 auto J = J_sto.topRows(nJ);
71
72 // There are no inactive indices J
73 if (nJ == 0) {
74 // No free variables, no Newton step possible
75 return false; // Simply use the projection step
76 }
77 // There are inactive indices J
78 if (J.size() == n) { // There are no active indices K
79 // If all indices are free, we can use standard L-BFGS,
80 qₖ = (real_t(1) / γₖ) * pₖ;
81 return lbfgs.apply(qₖ, γₖ);
82 }
83 // There are active indices K
84 qₖ = pₖ;
85 if (direction_params.hessian_vec_factor != 0) {
86 qₖ(J).setZero();
87 approximate_hessian_vec_term(xₖ, grad_ψxₖ, qₖ, J);
88 // Compute right-hand side of 6.1c
89 qₖ(J) = (real_t(1) / γₖ) * pₖ(J) -
90 direction_params.hessian_vec_factor * HqK(J);
91 } else {
92 qₖ(J) = (real_t(1) / γₖ) * pₖ(J);
93 }
94 // If there are active indices, we need the specialized version
95 // that only applies L-BFGS to the inactive indices
96 bool success = lbfgs.apply_masked(qₖ, γₖ, J);
97 if (success)
98 return true;
99 // If L-BFGS application failed, qₖ(J) still contains
100 // -∇ψ(x)(J) - HqK(J) or -∇ψ(x)(J), which is not a valid step.
101 // A good alternative is to use H₀ = γI as an L-BFGS estimate.
102 // This seems to perform better in practice than just falling back to a
103 // projected gradient step.
104 switch (direction_params.failure_policy) {
105 case DirectionParams::FallbackToProjectedGradient: return success;
106 case DirectionParams::UseScaledLBFGSInput:
107 if (nJ == n)
108 qₖ *= γₖ;
109 else
110 qₖ(J) *= γₖ;
111 return true;
112 default: return false;
113 }
114}
115
116template <Config Conf>
119 const auto m = problem->get_m();
120 // Either compute the Hessian-vector product using finite differences
121 if (direction_params.hessian_vec_finite_differences) {
122 Helpers::calc_augmented_lagrangian_hessian_prod_fd(
123 *problem, xₖ, *y, *Σ, grad_ψxₖ, qₖ, HqK, work_n, work_n2, work_m);
124 }
125 // Or using an exact AD
126 else {
127 if (!direction_params.full_augmented_hessian) {
128 // Compute the product with the Hessian of the Lagrangian
129 problem->eval_hess_L_prod(xₖ, *y, 1, qₖ, HqK);
130 } else {
131 if (problem->provides_eval_hess_ψ_prod()) {
132 // Compute the product with the Hessian of the augmented
133 // Lagrangian
134 problem->eval_hess_ψ_prod(xₖ, *y, *Σ, 1, qₖ, HqK);
135 } else {
136 // Compute the product with the Hessian of the Lagrangian
137 problem->eval_hess_L_prod(xₖ, *y, 1, qₖ, HqK);
138 // And then add the Hessian of the penalty terms, to get the
139 // Hessian of the full augmented Lagrangian (if required)
140 if (direction_params.full_augmented_hessian) {
141 assert(m == 0 || problem->provides_eval_grad_gi());
142 const auto &D = problem->get_box_D();
143 auto &g = work_m;
144 problem->eval_g(xₖ, g);
145 for (index_t i = 0; i < m; ++i) {
146 real_t ζ = g(i) + (*y)(i) / (*Σ)(i);
147 bool inactive =
148 D.lowerbound(i) < ζ && ζ < D.upperbound(i);
149 if (not inactive) {
150 problem->eval_grad_gi(xₖ, i, work_n);
151 auto t = (*Σ)(i)*work_n.dot(qₖ);
152 // TODO: the dot product is more work than
153 // strictly necessary (only over K)
154 for (auto j : J)
155 HqK(j) += work_n(j) * t;
156 }
157 }
158 }
159 }
160 }
161 }
162}
163
164} // namespace alpaqa
bool provides_eval_hess_ψ_prod() const
Returns true if the problem provides an implementation of eval_hess_ψ_prod.
length_t get_n() const
[Required] Number of decision variables.
length_t get_m() const
[Required] Number of constraints.
bool provides_eval_inactive_indices_res_lna() const
Returns true if the problem provides an implementation of eval_inactive_indices_res_lna.
bool provides_eval_hess_L_prod() const
Returns true if the problem provides an implementation of eval_hess_L_prod.
bool provides_eval_grad_gi() const
Returns true if the problem provides an implementation of eval_grad_gi.
bool provides_get_box_D() const
Returns true if the problem provides an implementation of get_box_D.
typename Conf::real_t real_t
Definition config.hpp:86
typename Conf::index_t index_t
Definition config.hpp:104
constexpr const auto inf
Definition config.hpp:112
typename Conf::rvec rvec
Definition config.hpp:91
typename Conf::crvec crvec
Definition config.hpp:92
typename Conf::crindexvec crindexvec
Definition config.hpp:107
void initialize(const Problem &problem, crvec y, crvec Σ, real_t γ_0, crvec x_0, crvec x̂_0, crvec p_0, crvec grad_ψx_0)
void approximate_hessian_vec_term(crvec xₖ, crvec grad_ψxₖ, rvec qₖ, crindexvec J) const
bool apply(real_t γₖ, crvec xₖ, crvec x̂ₖ, crvec pₖ, crvec grad_ψxₖ, rvec qₖ) const