Nonconvex constrained optimization
Loading...
Searching...
No Matches
newton-tr.hpp
Go to the documentation of this file.
1#pragma once
2
7#include <cmath>
8#include <limits>
9#include <optional>
10#include <stdexcept>
11
12namespace alpaqa {
13
14/// Parameters for the @ref NewtonTRDirection class.
15/// @ingroup grp_Parameters
16template <Config Conf>
19 /// The factor in front of the term @f$ \langle H_{\mathcal{JK}}
20 /// d_{\mathcal {K}}, d_{\mathcal{J}} \rangle @f$ in equation (9) in
21 /// @cite bodard2023pantr.
22 /// Set it to zero to leave out that term (this usually only slightly
23 /// increases the number of iterations, and eliminates one Hessian-vector
24 /// product per iteration, improving the overall runtime).
26 /// Use finite differences to compute Hessian-vector products.
27 bool finite_diff = false;
28 /// Size of the perturbation for the finite differences computation.
29 /// Multiplied by @f$ 1+\|x\| @f$.
31 std::sqrt(std::numeric_limits<real_t>::epsilon());
32};
33
34/// @ingroup grp_DirectionProviders
35template <Config Conf>
38
42
47
48 NewtonTRDirection() = default;
50 : steihaug(params.accelerator), direction_params(params.direction) {}
52 const DirectionParams &directionparams = {})
53 : steihaug(params), direction_params(directionparams) {}
54
55 /// @see @ref PANTRDirection::initialize
56 void initialize(const Problem &problem, [[maybe_unused]] crvec y,
57 [[maybe_unused]] crvec Σ, [[maybe_unused]] real_t γ_0,
58 [[maybe_unused]] crvec x_0, [[maybe_unused]] crvec x̂_0,
59 [[maybe_unused]] crvec p_0,
60 [[maybe_unused]] crvec grad_ψx_0) {
61 if (!direction_params.finite_diff &&
62 !problem.provides_eval_augmented_lagrangian_hessian_product() &&
63 !(problem.provides_eval_lagrangian_hessian_product() &&
64 problem.get_num_constraints() == 0))
65 throw std::invalid_argument(
66 "NewtonTR without finite differences requires "
67 "Problem::eval_augmented_lagrangian_hessian_product()");
68 if (!problem.provides_eval_inactive_indices_res_lna())
69 throw std::invalid_argument(
70 "NewtonTR requires "
71 "Problem::eval_inactive_indices_res_lna()");
72 // Store references to problem and ALM variables
73 this->problem = &problem;
74 this->y.emplace(y);
75 this->Σ.emplace(Σ);
76 // Resize workspaces
77 const auto n = problem.get_num_variables(),
78 m = problem.get_num_constraints();
79 JK_sto.resize(n);
80 rJ_sto.resize(n);
81 qJ_sto.resize(n);
82 work.resize(n);
83 work_2.resize(n);
84 steihaug.resize(n);
85 if (direction_params.finite_diff) {
86 work_n_fd.resize(n);
87 work_m_fd.resize(m);
88 }
89 }
90
91 /// @see @ref PANTRDirection::has_initial_direction
92 bool has_initial_direction() const { return true; }
93
94 /// @see @ref PANTRDirection::update
95 bool update([[maybe_unused]] real_t γₖ, [[maybe_unused]] real_t γₙₑₓₜ,
96 [[maybe_unused]] crvec xₖ, [[maybe_unused]] crvec xₙₑₓₜ,
97 [[maybe_unused]] crvec pₖ, [[maybe_unused]] crvec pₙₑₓₜ,
98 [[maybe_unused]] crvec grad_ψxₖ,
99 [[maybe_unused]] crvec grad_ψxₙₑₓₜ) {
100 return true;
101 }
102
103 /// @see @ref PANTRDirection::apply
104 real_t apply([[maybe_unused]] real_t γₖ, [[maybe_unused]] crvec xₖ,
105 [[maybe_unused]] crvec x̂ₖ, crvec pₖ,
106 [[maybe_unused]] crvec grad_ψxₖ, real_t radius,
107 rvec qₖ) const {
108
109 if (!std::isfinite(radius))
110 throw std::logic_error("Invalid trust radius");
111 if (radius < std::numeric_limits<real_t>::epsilon())
112 throw std::logic_error("Trust radius too small");
113
114 // Newton with exact Hessian
115
116 // Find inactive and active constraints
117 const auto n = problem->get_num_variables();
118 index_t nJ =
119 problem->eval_inactive_indices_res_lna(γₖ, xₖ, grad_ψxₖ, JK_sto);
120 crindexvec J = JK_sto.topRows(nJ);
121 rindexvec K = JK_sto.bottomRows(n - nJ);
123 auto rJ = rJ_sto.topRows(nJ);
124 auto qJ = qJ_sto.topRows(nJ);
125 rJ = (-real_t(1) / γₖ) * pₖ(J);
126 qₖ(K) = pₖ(K);
127 qₖ(J).setZero();
128 real_t norm_qK_sq = pₖ(K).squaredNorm();
129
130 // Hessian-vector term
131 if (direction_params.hessian_vec_factor != 0) {
132 if (direction_params.finite_diff) {
133 real_t ε =
134 (1 + xₖ(J).norm()) * direction_params.finite_diff_stepsize;
135 /// TODO: use a better rule to determine the step size
136 work = xₖ + ε * qₖ;
137 problem->eval_augmented_lagrangian_gradient(
139 rJ.noalias() += (work_2 - grad_ψxₖ)(J) *
140 (direction_params.hessian_vec_factor / ε);
141 } else {
142 problem->eval_augmented_lagrangian_hessian_product(xₖ, *y, *Σ,
143 1, qₖ, work);
144 rJ.noalias() += work(J) * direction_params.hessian_vec_factor;
145 }
146 }
147
148 // Hessian-vector product on subset J
149 auto hess_vec_mult = [&](crvec p, rvec Bp) {
150 if (direction_params.finite_diff) {
151 real_t ε =
152 (1 + xₖ(J).norm()) * direction_params.finite_diff_stepsize;
153 /// TODO: use a better rule to determine the step size
154 work = xₖ;
155 work(J) += ε * p;
156 problem->eval_augmented_lagrangian_gradient(
158 Bp.topRows(nJ) = (work_2 - grad_ψxₖ)(J) / ε;
159 } else {
160 work.setZero();
161 work(J) = p;
162 problem->eval_augmented_lagrangian_hessian_product(
163 xₖ, *y, *Σ, 1, work, work_2);
164 Bp.topRows(nJ) = work_2(J);
165 }
166 };
167
168 // Steihaug conjugate gradients
169 real_t qJ_model = steihaug.solve(rJ, hess_vec_mult, radius, qJ);
170 qₖ(J) = qJ;
171 return qJ_model - norm_qK_sq / (2 * γₖ);
172 }
173
174 /// @see @ref PANTRDirection::changed_γ
175 void changed_γ([[maybe_unused]] real_t γₖ, [[maybe_unused]] real_t old_γₖ) {
176 }
177
178 /// @see @ref PANTRDirection::reset
179 void reset() {}
180
181 /// @see @ref PANTRDirection::get_name
182 std::string get_name() const {
183 return "NewtonTRDirection<" + std::string(config_t::get_name()) + '>';
184 }
185
186 auto get_params() const {
187 return std::tie(steihaug.params, direction_params);
188 }
189
192 const Problem *problem = nullptr;
193#ifndef _WIN32
194 std::optional<crvec> y = std::nullopt;
195 std::optional<crvec> Σ = std::nullopt;
196#else
197 std::optional<vec> y = std::nullopt;
198 std::optional<vec> Σ = std::nullopt;
199#endif
201 mutable vec rJ_sto;
202 mutable vec qJ_sto;
204};
205
206} // namespace alpaqa
The main polymorphic minimization problem interface.
#define USING_ALPAQA_CONFIG(Conf)
Definition config.hpp:77
Parameters for the NewtonTRDirection class.
Definition newton-tr.hpp:17
Parameters for SteihaugCG.
typename Conf::indexvec indexvec
Definition config.hpp:105
typename Conf::real_t real_t
Definition config.hpp:86
typename Conf::rindexvec rindexvec
Definition config.hpp:106
typename Conf::index_t index_t
Definition config.hpp:104
typename Conf::rvec rvec
Definition config.hpp:91
typename Conf::crvec crvec
Definition config.hpp:92
typename Conf::vec vec
Definition config.hpp:88
typename Conf::crindexvec crindexvec
Definition config.hpp:107
std::string get_name() const
void changed_γ(real_t γₖ, real_t old_γₖ)
SteihaugCG< config_t > steihaug
real_t apply(real_t γₖ, crvec xₖ, crvec x̂ₖ, crvec pₖ, crvec grad_ψxₖ, real_t radius, rvec qₖ) const
NewtonTRDirection(const Params &params)
Definition newton-tr.hpp:49
NewtonTRDirection(const AcceleratorParams &params, const DirectionParams &directionparams={})
Definition newton-tr.hpp:51
DirectionParams direction_params
bool update(real_t γₖ, real_t γₙₑₓₜ, crvec xₖ, crvec xₙₑₓₜ, crvec pₖ, crvec pₙₑₓₜ, crvec grad_ψxₖ, crvec grad_ψxₙₑₓₜ)
Definition newton-tr.hpp:95
std::optional< crvec > y
void initialize(const Problem &problem, crvec y, crvec Σ, real_t γ_0, crvec x_0, crvec x̂_0, crvec p_0, crvec grad_ψx_0)
Definition newton-tr.hpp:56
const Problem * problem
SteihaugCGParams< config_t > AcceleratorParams
Definition newton-tr.hpp:40
bool has_initial_direction() const
Definition newton-tr.hpp:92
NewtonTRDirectionParams< config_t > DirectionParams
Definition newton-tr.hpp:41
TypeErasedProblem< config_t > Problem
Definition newton-tr.hpp:39
std::optional< crvec > Σ
Steihaug conjugate gradients procedure based on https://github.com/scipy/scipy/blob/583e70a50573169fc...
static void compute_complement(std::span< const index_t > in, std::span< index_t > out)
Definition index-set.hpp:37