isce3 0.25.0
Loading...
Searching...
No Matches
RootFind1dBracket.icc
1#pragma once
2#include <cmath>
3#include <cstdio>
4#include <limits>
5
6#include <isce3/core/Common.h>
7
8namespace isce3 { namespace math {
9
10// Use the commmented macro to debug root-finding algorithms.
11#ifndef ISCE3_DEBUG_ROOT
12// #define ISCE3_DEBUG_ROOT(x) x
13#define ISCE3_DEBUG_ROOT(x)
14#endif
15
16template<typename T>
17CUDA_HOSTDEV inline bool opposite_sign(T a, T b)
18{
19 return std::signbit(a) ^ std::signbit(b);
20}
21
22template<typename T>
23inline constexpr T epsilon = std::numeric_limits<T>::epsilon();
24
25template<typename T>
26CUDA_HOSTDEV inline int count_bisection_steps(T a, T b, T tol)
27{
28 return static_cast<int>(std::ceil(std::log2(std::abs((a - b) / tol))));
29}
30
31/* Excuse the verbose comments. I started with the cyrptic Fortran
32 * implementation on netlib.org before finding Brent's book with lots of
33 * description and a clearer ALGOL implementation.
34 *
35 * Brent's algorithm is a little ad-hoc in how it chooses between bisection,
36 * linear, and quadratic steps. Its close attention to finite precision is nice.
37 * Looking at other bracketing methods,
38 *
39 * Chandrupatla has a more understandible step selection criterion.
40 * T.R. Chandrupatla, "A new hybrid quadratic/bisection algorithm for
41 * finding the zero of a nonlinear function without using derivatives,"
42 * Advances in Engineering Software, Vol. 28, No. 3, 1997, pp 145-149.
43 * https://doi.org/10.1016/S0965-9978(96)00051-8.
44 *
45 * TOMS748 improves convergence slightly with the option for a cubic step.
46 * G. E. Alefeld, F. A. Potra, and Yixun Shi. 1995. Algorithm 748: enclosing
47 * zeros of continuous functions. ACM Trans. Math. Softw. 21, 3 (Sept.
48 * 1995), 327–344. https://doi.org/10.1145/210089.210111
49 *
50 * ITP is an interesting new method worth attention.
51 * I. F. D. Oliveira and R. H. C. Takahashi. 2020. An Enhancement of the
52 * Bisection Method Average Performance Preserving Minmax Optimality. ACM
53 * Trans. Math. Softw. 47, 1, Article 5 (January 2021), 24 pages.
54 * https://doi.org/10.1145/3423597
55 */
56template<typename T, typename Func>
57CUDA_HOSTDEV isce3::error::ErrorCode find_zero_brent(
58 T a, T b, Func f, const T tol, T* root)
59{
60 using namespace isce3::error;
61 using std::abs;
62 if (root == nullptr) {
63 return ErrorCode::NullDereference;
64 }
65 // tol == 0 is okay since we handle relative precision explicitly.
66 if (tol < T(0)) {
67 return ErrorCode::InvalidTolerance;
68 }
69 // a, b, and c are locations on the x-axis
70 // b is the current best estimate of the root.
71 // a is the previous estimate.
72 // The current interval is [b,c] or [c,b] (sorted by function value)
73 // We may have the case a == c.
74 // d and e are the previous two steps
75 // p and q are numerator/denominator of interpolation step.
76 T c, d, e, fa, fb, fc, p, q, r, s, tol1;
77 ISCE3_DEBUG_ROOT(const char* step_type;)
78
79 fa = f(a);
80 if (fa == T(0)) {
81 *root = a;
82 return ErrorCode::Success;
83 }
84 fb = f(b);
85 if (fb == T(0)) {
86 *root = b;
87 return ErrorCode::Success;
88 }
89 // Check that f(a) and f(b) have different signs
90 // so any continuous function f has a zero in interval [a,b]
91 if (!opposite_sign(fa, fb)) {
92 return ErrorCode::InvalidInterval;
93 }
94
95 c = a;
96 fc = fa;
97 e = d = b - a;
98
99 ISCE3_DEBUG_ROOT(printf("b,a,step_type,x,f(x)\n");)
100
101 // We could replace this for loop with a while(1) since convergence is
102 // guaranteed in N^2 steps where N is the number of bisection steps.
103 // However maybe we should defend against weird cases like non-deterministic
104 // functions. If so, we may as well also limit effort to the practical
105 // limit 3*N since further iterations are inefficient.
106 tol1 = tol > T(0) ? tol : epsilon<T>;
107 const int maxiter = 3 * count_bisection_steps(a, b, tol1);
108 for (int i = 0; i < maxiter; ++i) {
109 // Sort so that b is always the best guess based on the heuristic that
110 // the point closer to the x-axis is the better guess.
111 // That is, if b isn't best then swap b & c using a as a temporary,
112 // which excludes the possibility of a quadratic step.
113 if (abs(fc) < abs(fb)) {
114 a = b;
115 b = c;
116 c = a;
117 fa = fb;
118 fb = fc;
119 fc = fa;
120 }
121 // tol1 is the minimum valid step size, considering both machine
122 // precision and the requested tolerance.
123 tol1 = 2 * epsilon<T> * abs(b) + T(0.5) * tol;
124 // xm is the bisection step from b to the midpoint of the interval.
125 const T xm = T(0.5) * (c - b);
126 // We're done if interval is small enough or f(b) is exactly zero.
127 // Return b (not b + xm) assuming that by this point we've reached
128 // superlinear convergence.
129 if ((abs(xm) <= tol1) || (fb == T(0))) {
130 *root = b;
131 return ErrorCode::Success;
132 }
133 // see if a bisection is forced
134 // That occurs when the 2nd to last step was too small (e.g., a nudge)
135 // or the solution didn't improve on the last iteration.
136 if ((abs(e) < tol1) || (abs(fa) <= abs(fb))) {
137 ISCE3_DEBUG_ROOT(
138 if (abs(e) < tol1) { step_type = "bisect1"; } else {
139 step_type = "bisect2";
140 })
141 e = d = xm;
142 } else {
143 // Since we haven't found the root yet, we know fa > 0 so division
144 // is safe.
145 s = fb / fa;
146 if (a == c) {
147 // linear interpolation
148 // Only valid interp when cache has only two distinct values.
149 ISCE3_DEBUG_ROOT(step_type = "linear";)
150 p = 2 * xm * s;
151 q = T(1) - s;
152 } else {
153 // inverse quadratic interpolation
154 ISCE3_DEBUG_ROOT(step_type = "quadratic";)
155 q = fa / fc;
156 r = fb / fc;
157 p = s * (2 * xm * q * (q - r) - (b - a) * (r - T(1)));
158 q = (q - T(1)) * (r - T(1)) * (s - T(1));
159 }
160 // Ensure positive p to save an abs() call.
161 if (p > T(0)) {
162 q = -q;
163 } else {
164 p = -p;
165 }
166 s = e;
167 e = d;
168 // We want to avoid overflow and division by zero computing the
169 // ratio p/q, and we must require the step won't exceed the
170 // interval. The 3/2 factor (which bounds the step to 3/4 of the
171 // interval) further ensures that the inverse quadratic is single-
172 // valued on the interval.
173 // The other condition makes sure that the interval is halved in at
174 // worst every other iteration.
175 if (((2 * p) >= (3 * xm * q - abs(tol1 * q))) ||
176 (p >= abs(T(0.5) * s * q))) {
177 // Fall back to bisection step.
178 ISCE3_DEBUG_ROOT(
179 if ((2 * p) >= (3 * xm * q - abs(tol1 * q))) {
180 step_type = "bisect3";
181 } else { step_type = "bisect4"; })
182 e = d = xm;
183 } else {
184 // Use the interpolation step (either linear or quadratic).
185 d = p / q;
186 }
187 }
188 a = b;
189 fa = fb;
190 // Make sure we step at least as large as the tolerance.
191 if (abs(d) <= tol1) {
192 if (xm <= T(0)) {
193 ISCE3_DEBUG_ROOT(step_type = "nudge_left";)
194 b = b - tol1;
195 } else {
196 ISCE3_DEBUG_ROOT(step_type = "nudge_right";)
197 b = b + tol1;
198 }
199 } else {
200 b = b + d;
201 }
202 fb = f(b);
203 ISCE3_DEBUG_ROOT(
204 printf("%.17g,%.17g,%s,%.17g,%.17g\n", a, c, step_type, b, fb);)
205 // Make sure to set c such that [b,c] encloses the root. Note that
206 // when new guess has opposite sign from old guess we exclude a
207 // quadratic step on the next iteration.
208 if (!opposite_sign(fb, fc)) {
209 c = a;
210 fc = fa;
211 e = d = b - a;
212 }
213 }
214 *root = b;
215 return ErrorCode::FailedToConverge;
216}
217
218// Good old bisection. Only one branch likely to diverge, which may be of
219// interest on the GPU.
220
221template<typename T, typename Func>
222CUDA_HOSTDEV isce3::error::ErrorCode find_zero_bisection_iter(
223 T a, T b, Func f, int niter, T* root)
224{
225 using namespace isce3::error;
226 if (root == nullptr) {
227 return ErrorCode::NullDereference;
228 }
229 if (niter < 0) {
230 return ErrorCode::InvalidTolerance;
231 }
232 T midpoint, fa = f(a), fb = f(b);
233 if (fa == T(0)) {
234 *root = a;
235 return ErrorCode::Success;
236 }
237 fb = f(b);
238 if (fb == T(0)) {
239 *root = b;
240 return ErrorCode::Success;
241 }
242 if (!opposite_sign(fa, fb)) {
243 return ErrorCode::InvalidInterval;
244 }
245 ISCE3_DEBUG_ROOT(printf("b,a,step_type,x,f(x)\n");)
246 for (int i = 0; i < niter; ++i) {
247 midpoint = (a + b) / 2;
248 const T fmid = f(midpoint);
249 if (fmid == T(0)) break;
250 ISCE3_DEBUG_ROOT(printf(
251 "%.17g,%.17g,bisect,%.17g,%.17g\n", a, b, midpoint, fmid);)
252 if (opposite_sign(fa, fmid)) {
253 b = midpoint;
254 fb = fmid;
255 } else {
256 a = midpoint;
257 fa = fmid;
258 }
259 }
260 *root = midpoint;
261 return ErrorCode::Success;
262}
263
264
265template<typename T, typename Func>
266CUDA_HOSTDEV isce3::error::ErrorCode find_zero_bisection(
267 T a, T b, Func f, T tol, T* root)
268{
269 // tol == 0 NOT okay since we're using it to compute a fixed number of
270 // bisection iterations and allowing some wasted effort if we refine the
271 // interval to within relative tolerance before ending.
272 if (tol <= T(0)) {
273 return isce3::error::ErrorCode::InvalidTolerance;
274 }
275 const int niter = count_bisection_steps(a, b, tol);
276 ISCE3_DEBUG_ROOT(printf("bisection steps = %d\n", niter);)
277 return find_zero_bisection_iter(a, b, f, niter, root);
278}
279
280}} // namespace isce3::math
base interpolator is an abstract base class
Definition BinarySearchFunc.cpp:5

Generated for ISCE3.0 by doxygen 1.13.2.