alpaqa 1.0.0a16
Nonconvex constrained optimization
Loading...
Searching...
No Matches
steihaugcg.hpp
Go to the documentation of this file.
1#pragma once
2
5
6namespace alpaqa {
7
8/// Parameters for @ref SteihaugCG.
9/// @ingroup grp_Parameters
10template <Config Conf>
13 /// Determines the tolerance for termination of the algorithm.
14 /// It terminates if the norm of the residual @f$ r = -g - Hq @f$ is smaller
15 /// than the tolerance @f[
16 /// \mathrm{tolerance} = \min\left(
17 /// \mathrm{tol\_max},\; \mathrm{tol\_scale}\cdot \|g\|\cdot
18 /// \min\left(\mathrm{tol\_scale\_root},\; \sqrt{\|g\|}\right)
19 /// \right)
20 /// @f]
22 /// Determines the tolerance for termination of the algorithm.
23 /// See @ref tol_scale.
25 /// Determines the tolerance for termination of the algorithm.
26 /// Prevents the use of huge tolerances if the gradient norm is still large.
27 /// See @ref tol_scale.
29 /// Limit the number of CG iterations to @f$ \lfloor n \cdot
30 /// \mathrm{max\_iter\_factor} \rceil @f$, where @f$ n @f$ is the number
31 /// of free variables of the problem.
33};
34
35/// Steihaug conjugate gradients procedure based on
36/// https://github.com/scipy/scipy/blob/583e70a50573169fc352b5dc6d94588a97c7389a/scipy/optimize/_trustregion_ncg.py#L44
37template <Config Conf>
38struct SteihaugCG {
40
43
44 SteihaugCG() = default;
46
47 mutable vec z, r, d, Bd, work_eval;
48
49 void resize(length_t n) {
50 z.resize(n);
51 r.resize(n);
52 d.resize(n);
53 Bd.resize(n);
54 work_eval.resize(n);
55 }
56
57 template <class HessFun>
58 real_t solve(const auto &grad, const HessFun &hess_prod,
60 length_t n = grad.size();
61 // get the norm of jacobian and define the origin
62 auto v = [n](auto &v) { return v.topRows(n); };
63 auto z = v(this->z), r = v(this->r), d = v(this->d), Bd = v(this->Bd);
64 auto g = v(grad);
65 auto s = v(step);
66 // init the state for the first iteration
67 z.setZero();
68 r = g;
69 d = -r;
70 real_t r_sq = r.squaredNorm();
71 real_t grad_mag = g.norm();
72
73 // define a default tolerance
74 real_t tolerance =
76 std::fmin(params.tol_scale_root,
77 std::sqrt(grad_mag)));
78
79 // Workspaces and function evaluation
80 auto eval = [&](crvec p) {
82 return p.dot(g) + real_t(0.5) * p.dot(v(work_eval));
83 };
84
85 // Search for the min of the approximation of the objective function.
86 index_t i = 0;
87 const auto max_iter = static_cast<index_t>(
88 std::round(static_cast<real_t>(n) * params.max_iter_factor));
89 while (true) {
90 // do an iteration
91 hess_prod(d, Bd);
92 real_t dBd = d.dot(Bd);
93 if (dBd <= 0) {
94 // Look at the two boundary points.
95 // Find both values of t to get the boundary points such that
96 // ||z + t d|| == trust_radius
97 // and then choose the one with the predicted min value.
98 auto [ta, tb] =
100 auto &pa = r; // Reuse storage
101 auto &pb = d; // Reuse storage
102 pa = z + ta * d;
103 pb = z + tb * d;
104 real_t q_a = eval(pa), q_b = eval(pb);
105 real_t q_min = std::fmin(q_a, q_b);
106 if (q_a == q_min) {
107 s = pa;
108 return q_a;
109 } else {
110 s = pb;
111 return q_b;
112 }
113 }
114
115 real_t alpha = r_sq / dBd;
116 s = z + alpha * d;
117 if (s.norm() >= trust_radius) {
118 // Find t >= 0 to get the boundary point such that
119 // ||z + t d|| == trust_radius
120 auto [ta, tb] =
122 s = z + tb * d;
123 return eval(s);
124 }
125 r += alpha * Bd;
126 real_t r_next_sq = r.squaredNorm();
127 if (std::sqrt(r_next_sq) < tolerance || i > max_iter)
128 return eval(s);
130 r_sq = r_next_sq;
131 d = beta_next * d - r;
132 z = s;
133 ++i;
134 }
135 }
136
137 /// Solve the scalar quadratic equation ||z + t d|| == trust_radius.
138 /// This is like a line-sphere intersection.
139 /// Return the two values of t, sorted from low to high.
142 real_t a = d.squaredNorm();
143 real_t b = 2 * z.dot(d);
144 real_t c = z.squaredNorm() - trust_radius * trust_radius;
145 real_t sqrt_discriminant = std::sqrt(b * b - 4 * a * c);
146
147 // The following calculation is mathematically
148 // equivalent to:
149 // ta = (-b - sqrt_discriminant) / (2*a)
150 // tb = (-b + sqrt_discriminant) / (2*a)
151 // but produce smaller round off errors.
152 // Look at Matrix Computation p.97
153 // for a better justification.
154 real_t aux = b + std::copysign(sqrt_discriminant, b);
155 real_t ta = -aux / (2 * a);
156 real_t tb = -2 * c / aux;
157 return std::make_tuple(std::fmin(ta, tb), std::fmax(ta, tb));
158 }
159};
160
161} // namespace alpaqa
#define USING_ALPAQA_CONFIG(Conf)
Definition config.hpp:63
real_t tol_scale_root
Determines the tolerance for termination of the algorithm.
real_t tol_max
Determines the tolerance for termination of the algorithm.
real_t max_iter_factor
Limit the number of CG iterations to , where is the number of free variables of the problem.
real_t tol_scale
Determines the tolerance for termination of the algorithm.
Parameters for SteihaugCG.
typename Conf::real_t real_t
Definition config.hpp:72
typename Conf::index_t index_t
Definition config.hpp:90
typename Conf::length_t length_t
Definition config.hpp:89
constexpr const auto inf
Definition config.hpp:98
typename Conf::rvec rvec
Definition config.hpp:77
typename Conf::crvec crvec
Definition config.hpp:78
typename Conf::vec vec
Definition config.hpp:74
Steihaug conjugate gradients procedure based on https://github.com/scipy/scipy/blob/583e70a50573169fc...
SteihaugCG()=default
SteihaugCG(const Params &params)
void resize(length_t n)
real_t solve(const auto &grad, const HessFun &hess_prod, real_t trust_radius, rvec step) const
static auto get_boundaries_intersections(crvec z, crvec d, real_t trust_radius)
Solve the scalar quadratic equation ||z + t d|| == trust_radius.