alpaqa 1.0.0a17
Nonconvex constrained optimization
Loading...
Searching...
No Matches
steihaugcg.hpp
Go to the documentation of this file.
1#pragma once
2
5#include <cmath>
6
7namespace alpaqa {
8
9/// Parameters for @ref SteihaugCG.
10/// @ingroup grp_Parameters
11template <Config Conf>
14 /// Determines the tolerance for termination of the algorithm.
15 /// It terminates if the norm of the residual @f$ r = -g - Hq @f$ is smaller
16 /// than the tolerance @f[
17 /// \mathrm{tolerance} = \min\left(
18 /// \mathrm{tol\_max},\; \mathrm{tol\_scale}\cdot \|g\|\cdot
19 /// \min\left(\mathrm{tol\_scale\_root},\; \sqrt{\|g\|}\right)
20 /// \right)
21 /// @f]
23 /// Determines the tolerance for termination of the algorithm.
24 /// See @ref tol_scale.
26 /// Determines the tolerance for termination of the algorithm.
27 /// Prevents the use of huge tolerances if the gradient norm is still large.
28 /// See @ref tol_scale.
30 /// Limit the number of CG iterations to @f$ \lfloor n \cdot
31 /// \mathrm{max\_iter\_factor} \rceil @f$, where @f$ n @f$ is the number
32 /// of free variables of the problem.
34};
35
36/// Steihaug conjugate gradients procedure based on
37/// https://github.com/scipy/scipy/blob/583e70a50573169fc352b5dc6d94588a97c7389a/scipy/optimize/_trustregion_ncg.py#L44
38template <Config Conf>
39struct SteihaugCG {
41
44
45 SteihaugCG() = default;
47
48 mutable vec z, r, d, Bd, work_eval;
49
50 void resize(length_t n) {
51 z.resize(n);
52 r.resize(n);
53 d.resize(n);
54 Bd.resize(n);
55 work_eval.resize(n);
56 }
57
58 template <class HessFun>
59 real_t solve(const auto &grad, const HessFun &hess_prod,
61 length_t n = grad.size();
62 // get the norm of jacobian and define the origin
63 auto v = [n](auto &v) { return v.topRows(n); };
64 auto z = v(this->z), r = v(this->r), d = v(this->d), Bd = v(this->Bd);
65 auto g = v(grad);
66 auto s = v(step);
67 // init the state for the first iteration
68 z.setZero();
69 r = g;
70 d = -r;
71 real_t r_sq = r.squaredNorm();
72 real_t grad_mag = g.norm();
73
74 // define a default tolerance
75 real_t tolerance =
77 std::fmin(params.tol_scale_root,
78 std::sqrt(grad_mag)));
79
80 // Workspaces and function evaluation
81 auto eval = [&](crvec p) {
83 return p.dot(g) + real_t(0.5) * p.dot(v(work_eval));
84 };
85
86 // Search for the min of the approximation of the objective function.
87 index_t i = 0;
88 const auto max_iter = static_cast<index_t>(
89 std::round(static_cast<real_t>(n) * params.max_iter_factor));
90 while (true) {
91 // do an iteration
92 hess_prod(d, Bd);
93 real_t dBd = d.dot(Bd);
94 if (dBd <= 0) {
95 // Look at the two boundary points.
96 // Find both values of t to get the boundary points such that
97 // ||z + t d|| == trust_radius
98 // and then choose the one with the predicted min value.
99 auto [ta, tb] =
101 auto &pa = r; // Reuse storage
102 auto &pb = d; // Reuse storage
103 pa = z + ta * d;
104 pb = z + tb * d;
105 real_t q_a = eval(pa), q_b = eval(pb);
106 real_t q_min = std::fmin(q_a, q_b);
107 if (q_a == q_min) {
108 s = pa;
109 return q_a;
110 } else {
111 s = pb;
112 return q_b;
113 }
114 }
115
116 real_t alpha = r_sq / dBd;
117 if (!std::isfinite(alpha)) {
118 s.setConstant(NaN<config_t>);
119 return NaN<config_t>;
120 }
121 s = z + alpha * d;
122 if (s.norm() >= trust_radius) {
123 // Find t >= 0 to get the boundary point such that
124 // ||z + t d|| == trust_radius
125 auto [ta, tb] =
127 s = z + tb * d;
128 return eval(s);
129 }
130 r += alpha * Bd;
131 real_t r_next_sq = r.squaredNorm();
132 real_t r_next = std::sqrt(r_next_sq);
134 return eval(s);
136 r_sq = r_next_sq;
137 d = beta_next * d - r;
138 z = s;
139 ++i;
140 }
141 }
142
143 /// Solve the scalar quadratic equation ||z + t d|| == trust_radius.
144 /// This is like a line-sphere intersection.
145 /// Return the two values of t, sorted from low to high.
148 real_t a = d.squaredNorm();
149 real_t b = 2 * z.dot(d);
150 real_t c = z.squaredNorm() - trust_radius * trust_radius;
151 real_t sqrt_discriminant = std::sqrt(b * b - 4 * a * c);
152
153 // The following calculation is mathematically
154 // equivalent to:
155 // ta = (-b - sqrt_discriminant) / (2*a)
156 // tb = (-b + sqrt_discriminant) / (2*a)
157 // but produce smaller round off errors.
158 // Look at Matrix Computation p.97
159 // for a better justification.
160 real_t aux = b + std::copysign(sqrt_discriminant, b);
161 real_t ta = -aux / (2 * a);
162 real_t tb = -2 * c / aux;
163 return std::make_tuple(std::fmin(ta, tb), std::fmax(ta, tb));
164 }
165};
166
167} // namespace alpaqa
#define USING_ALPAQA_CONFIG(Conf)
Definition config.hpp:77
real_t tol_scale_root
Determines the tolerance for termination of the algorithm.
real_t tol_max
Determines the tolerance for termination of the algorithm.
real_t max_iter_factor
Limit the number of CG iterations to , where is the number of free variables of the problem.
real_t tol_scale
Determines the tolerance for termination of the algorithm.
Parameters for SteihaugCG.
typename Conf::real_t real_t
Definition config.hpp:86
typename Conf::index_t index_t
Definition config.hpp:104
typename Conf::length_t length_t
Definition config.hpp:103
constexpr const auto inf
Definition config.hpp:112
typename Conf::rvec rvec
Definition config.hpp:91
typename Conf::crvec crvec
Definition config.hpp:92
typename Conf::vec vec
Definition config.hpp:88
Steihaug conjugate gradients procedure based on https://github.com/scipy/scipy/blob/583e70a50573169fc...
SteihaugCG()=default
SteihaugCG(const Params &params)
void resize(length_t n)
real_t solve(const auto &grad, const HessFun &hess_prod, real_t trust_radius, rvec step) const
static auto get_boundaries_intersections(crvec z, crvec d, real_t trust_radius)
Solve the scalar quadratic equation ||z + t d|| == trust_radius.