blob: a8199623912d50d3439cb11e59c0c9a7712d125d
1 | /* |
2 | * erf function: Copyright (c) 2006 John Maddock |
3 | * This file is part of FFmpeg. |
4 | * |
5 | * FFmpeg is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU Lesser General Public |
7 | * License as published by the Free Software Foundation; either |
8 | * version 2.1 of the License, or (at your option) any later version. |
9 | * |
10 | * FFmpeg is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * Lesser General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU Lesser General Public |
16 | * License along with FFmpeg; if not, write to the Free Software |
17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | */ |
19 | |
20 | /** |
21 | * @file |
22 | * Replacements for frequently missing libm functions |
23 | */ |
24 | |
25 | #ifndef AVUTIL_LIBM_H |
26 | #define AVUTIL_LIBM_H |
27 | |
28 | #include <math.h> |
29 | #include "config.h" |
30 | #include "attributes.h" |
31 | #include "intfloat.h" |
32 | #include "mathematics.h" |
33 | |
34 | #if HAVE_MIPSFPU && HAVE_INLINE_ASM |
35 | #include "libavutil/mips/libm_mips.h" |
36 | #endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM*/ |
37 | |
38 | #if !HAVE_ATANF |
39 | #undef atanf |
40 | #define atanf(x) ((float)atan(x)) |
41 | #endif /* HAVE_ATANF */ |
42 | |
43 | #if !HAVE_ATAN2F |
44 | #undef atan2f |
45 | #define atan2f(y, x) ((float)atan2(y, x)) |
46 | #endif /* HAVE_ATAN2F */ |
47 | |
48 | #if !HAVE_POWF |
49 | #undef powf |
50 | #define powf(x, y) ((float)pow(x, y)) |
51 | #endif /* HAVE_POWF */ |
52 | |
53 | #if !HAVE_CBRT |
54 | static av_always_inline double cbrt(double x) |
55 | { |
56 | return x < 0 ? -pow(-x, 1.0 / 3.0) : pow(x, 1.0 / 3.0); |
57 | } |
58 | #endif /* HAVE_CBRT */ |
59 | |
60 | #if !HAVE_CBRTF |
61 | static av_always_inline float cbrtf(float x) |
62 | { |
63 | return x < 0 ? -powf(-x, 1.0 / 3.0) : powf(x, 1.0 / 3.0); |
64 | } |
65 | #endif /* HAVE_CBRTF */ |
66 | |
67 | #if !HAVE_COPYSIGN |
68 | static av_always_inline double copysign(double x, double y) |
69 | { |
70 | uint64_t vx = av_double2int(x); |
71 | uint64_t vy = av_double2int(y); |
72 | return av_int2double((vx & UINT64_C(0x7fffffffffffffff)) | (vy & UINT64_C(0x8000000000000000))); |
73 | } |
74 | #endif /* HAVE_COPYSIGN */ |
75 | |
76 | #if !HAVE_COSF |
77 | #undef cosf |
78 | #define cosf(x) ((float)cos(x)) |
79 | #endif /* HAVE_COSF */ |
80 | |
81 | #if !HAVE_ERF |
82 | static inline double ff_eval_poly(const double *coeff, int size, double x) { |
83 | double sum = coeff[size-1]; |
84 | int i; |
85 | for (i = size-2; i >= 0; --i) { |
86 | sum *= x; |
87 | sum += coeff[i]; |
88 | } |
89 | return sum; |
90 | } |
91 | |
92 | /** |
93 | * erf function |
94 | * Algorithm taken from the Boost project, source: |
95 | * http://www.boost.org/doc/libs/1_46_1/boost/math/special_functions/erf.hpp |
96 | * Use, modification and distribution are subject to the |
97 | * Boost Software License, Version 1.0 (see notice below). |
98 | * Boost Software License - Version 1.0 - August 17th, 2003 |
99 | Permission is hereby granted, free of charge, to any person or organization |
100 | obtaining a copy of the software and accompanying documentation covered by |
101 | this license (the "Software") to use, reproduce, display, distribute, |
102 | execute, and transmit the Software, and to prepare derivative works of the |
103 | Software, and to permit third-parties to whom the Software is furnished to |
104 | do so, all subject to the following: |
105 | |
106 | The copyright notices in the Software and this entire statement, including |
107 | the above license grant, this restriction and the following disclaimer, |
108 | must be included in all copies of the Software, in whole or in part, and |
109 | all derivative works of the Software, unless such copies or derivative |
110 | works are solely in the form of machine-executable object code generated by |
111 | a source language processor. |
112 | |
113 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
114 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
115 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT |
116 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE |
117 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, |
118 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
119 | DEALINGS IN THE SOFTWARE. |
120 | */ |
121 | static inline double erf(double z) |
122 | { |
123 | #ifndef FF_ARRAY_ELEMS |
124 | #define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0])) |
125 | #endif |
126 | double result; |
127 | |
128 | /* handle the symmetry: erf(-x) = -erf(x) */ |
129 | if (z < 0) |
130 | return -erf(-z); |
131 | |
132 | /* branch based on range of z, and pick appropriate approximation */ |
133 | if (z == 0) |
134 | return 0; |
135 | else if (z < 1e-10) |
136 | return z * 1.125 + z * 0.003379167095512573896158903121545171688; |
137 | else if (z < 0.5) { |
138 | // Maximum Deviation Found: 1.561e-17 |
139 | // Expected Error Term: 1.561e-17 |
140 | // Maximum Relative Change in Control Points: 1.155e-04 |
141 | // Max Error found at double precision = 2.961182e-17 |
142 | |
143 | static const double y = 1.044948577880859375; |
144 | static const double p[] = { |
145 | 0.0834305892146531832907, |
146 | -0.338165134459360935041, |
147 | -0.0509990735146777432841, |
148 | -0.00772758345802133288487, |
149 | -0.000322780120964605683831, |
150 | }; |
151 | static const double q[] = { |
152 | 1, |
153 | 0.455004033050794024546, |
154 | 0.0875222600142252549554, |
155 | 0.00858571925074406212772, |
156 | 0.000370900071787748000569, |
157 | }; |
158 | double zz = z * z; |
159 | return z * (y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), zz) / ff_eval_poly(q, FF_ARRAY_ELEMS(q), zz)); |
160 | } |
161 | /* here onwards compute erfc */ |
162 | else if (z < 1.5) { |
163 | // Maximum Deviation Found: 3.702e-17 |
164 | // Expected Error Term: 3.702e-17 |
165 | // Maximum Relative Change in Control Points: 2.845e-04 |
166 | // Max Error found at double precision = 4.841816e-17 |
167 | static const double y = 0.405935764312744140625; |
168 | static const double p[] = { |
169 | -0.098090592216281240205, |
170 | 0.178114665841120341155, |
171 | 0.191003695796775433986, |
172 | 0.0888900368967884466578, |
173 | 0.0195049001251218801359, |
174 | 0.00180424538297014223957, |
175 | }; |
176 | static const double q[] = { |
177 | 1, |
178 | 1.84759070983002217845, |
179 | 1.42628004845511324508, |
180 | 0.578052804889902404909, |
181 | 0.12385097467900864233, |
182 | 0.0113385233577001411017, |
183 | 0.337511472483094676155e-5, |
184 | }; |
185 | result = y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), z - 0.5) / ff_eval_poly(q, FF_ARRAY_ELEMS(q), z - 0.5); |
186 | result *= exp(-z * z) / z; |
187 | return 1 - result; |
188 | } |
189 | else if (z < 2.5) { |
190 | // Max Error found at double precision = 6.599585e-18 |
191 | // Maximum Deviation Found: 3.909e-18 |
192 | // Expected Error Term: 3.909e-18 |
193 | // Maximum Relative Change in Control Points: 9.886e-05 |
194 | static const double y = 0.50672817230224609375; |
195 | static const double p[] = { |
196 | -0.0243500476207698441272, |
197 | 0.0386540375035707201728, |
198 | 0.04394818964209516296, |
199 | 0.0175679436311802092299, |
200 | 0.00323962406290842133584, |
201 | 0.000235839115596880717416, |
202 | }; |
203 | static const double q[] = { |
204 | 1, |
205 | 1.53991494948552447182, |
206 | 0.982403709157920235114, |
207 | 0.325732924782444448493, |
208 | 0.0563921837420478160373, |
209 | 0.00410369723978904575884, |
210 | }; |
211 | result = y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), z - 1.5) / ff_eval_poly(q, FF_ARRAY_ELEMS(q), z - 1.5); |
212 | result *= exp(-z * z) / z; |
213 | return 1 - result; |
214 | } |
215 | else if (z < 4.5) { |
216 | // Maximum Deviation Found: 1.512e-17 |
217 | // Expected Error Term: 1.512e-17 |
218 | // Maximum Relative Change in Control Points: 2.222e-04 |
219 | // Max Error found at double precision = 2.062515e-17 |
220 | static const double y = 0.5405750274658203125; |
221 | static const double p[] = { |
222 | 0.00295276716530971662634, |
223 | 0.0137384425896355332126, |
224 | 0.00840807615555585383007, |
225 | 0.00212825620914618649141, |
226 | 0.000250269961544794627958, |
227 | 0.113212406648847561139e-4, |
228 | }; |
229 | static const double q[] = { |
230 | 1, |
231 | 1.04217814166938418171, |
232 | 0.442597659481563127003, |
233 | 0.0958492726301061423444, |
234 | 0.0105982906484876531489, |
235 | 0.000479411269521714493907, |
236 | }; |
237 | result = y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), z - 3.5) / ff_eval_poly(q, FF_ARRAY_ELEMS(q), z - 3.5); |
238 | result *= exp(-z * z) / z; |
239 | return 1 - result; |
240 | } |
241 | /* differ from Boost here, the claim of underflow of erfc(x) past 5.8 is |
242 | * slightly incorrect, change to 5.92 |
243 | * (really somewhere between 5.9125 and 5.925 is when it saturates) */ |
244 | else if (z < 5.92) { |
245 | // Max Error found at double precision = 2.997958e-17 |
246 | // Maximum Deviation Found: 2.860e-17 |
247 | // Expected Error Term: 2.859e-17 |
248 | // Maximum Relative Change in Control Points: 1.357e-05 |
249 | static const double y = 0.5579090118408203125; |
250 | static const double p[] = { |
251 | 0.00628057170626964891937, |
252 | 0.0175389834052493308818, |
253 | -0.212652252872804219852, |
254 | -0.687717681153649930619, |
255 | -2.5518551727311523996, |
256 | -3.22729451764143718517, |
257 | -2.8175401114513378771, |
258 | }; |
259 | static const double q[] = { |
260 | 1, |
261 | 2.79257750980575282228, |
262 | 11.0567237927800161565, |
263 | 15.930646027911794143, |
264 | 22.9367376522880577224, |
265 | 13.5064170191802889145, |
266 | 5.48409182238641741584, |
267 | }; |
268 | result = y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), 1 / z) / ff_eval_poly(q, FF_ARRAY_ELEMS(q), 1 / z); |
269 | result *= exp(-z * z) / z; |
270 | return 1 - result; |
271 | } |
272 | /* handle the nan case, but don't use isnan for max portability */ |
273 | else if (z != z) |
274 | return z; |
275 | /* finally return saturated result */ |
276 | else |
277 | return 1; |
278 | } |
279 | #endif /* HAVE_ERF */ |
280 | |
281 | #if !HAVE_EXPF |
282 | #undef expf |
283 | #define expf(x) ((float)exp(x)) |
284 | #endif /* HAVE_EXPF */ |
285 | |
286 | #if !HAVE_EXP2 |
287 | #undef exp2 |
288 | #define exp2(x) exp((x) * M_LN2) |
289 | #endif /* HAVE_EXP2 */ |
290 | |
291 | #if !HAVE_EXP2F |
292 | #undef exp2f |
293 | #define exp2f(x) ((float)exp2(x)) |
294 | #endif /* HAVE_EXP2F */ |
295 | |
296 | #if !HAVE_ISINF |
297 | #undef isinf |
298 | /* Note: these do not follow the BSD/Apple/GNU convention of returning -1 for |
299 | -Inf, +1 for Inf, 0 otherwise, but merely follow the POSIX/ISO mandated spec of |
300 | returning a non-zero value for +/-Inf, 0 otherwise. */ |
301 | static av_always_inline av_const int avpriv_isinff(float x) |
302 | { |
303 | uint32_t v = av_float2int(x); |
304 | if ((v & 0x7f800000) != 0x7f800000) |
305 | return 0; |
306 | return !(v & 0x007fffff); |
307 | } |
308 | |
309 | static av_always_inline av_const int avpriv_isinf(double x) |
310 | { |
311 | uint64_t v = av_double2int(x); |
312 | if ((v & 0x7ff0000000000000) != 0x7ff0000000000000) |
313 | return 0; |
314 | return !(v & 0x000fffffffffffff); |
315 | } |
316 | |
317 | #define isinf(x) \ |
318 | (sizeof(x) == sizeof(float) \ |
319 | ? avpriv_isinff(x) \ |
320 | : avpriv_isinf(x)) |
321 | #endif /* HAVE_ISINF */ |
322 | |
323 | #if !HAVE_ISNAN |
324 | static av_always_inline av_const int avpriv_isnanf(float x) |
325 | { |
326 | uint32_t v = av_float2int(x); |
327 | if ((v & 0x7f800000) != 0x7f800000) |
328 | return 0; |
329 | return v & 0x007fffff; |
330 | } |
331 | |
332 | static av_always_inline av_const int avpriv_isnan(double x) |
333 | { |
334 | uint64_t v = av_double2int(x); |
335 | if ((v & 0x7ff0000000000000) != 0x7ff0000000000000) |
336 | return 0; |
337 | return (v & 0x000fffffffffffff) && 1; |
338 | } |
339 | |
340 | #define isnan(x) \ |
341 | (sizeof(x) == sizeof(float) \ |
342 | ? avpriv_isnanf(x) \ |
343 | : avpriv_isnan(x)) |
344 | #endif /* HAVE_ISNAN */ |
345 | |
346 | #if !HAVE_ISFINITE |
347 | static av_always_inline av_const int avpriv_isfinitef(float x) |
348 | { |
349 | uint32_t v = av_float2int(x); |
350 | return (v & 0x7f800000) != 0x7f800000; |
351 | } |
352 | |
353 | static av_always_inline av_const int avpriv_isfinite(double x) |
354 | { |
355 | uint64_t v = av_double2int(x); |
356 | return (v & 0x7ff0000000000000) != 0x7ff0000000000000; |
357 | } |
358 | |
359 | #define isfinite(x) \ |
360 | (sizeof(x) == sizeof(float) \ |
361 | ? avpriv_isfinitef(x) \ |
362 | : avpriv_isfinite(x)) |
363 | #endif /* HAVE_ISFINITE */ |
364 | |
365 | #if !HAVE_HYPOT |
366 | static inline av_const double hypot(double x, double y) |
367 | { |
368 | double ret, temp; |
369 | x = fabs(x); |
370 | y = fabs(y); |
371 | |
372 | if (isinf(x) || isinf(y)) |
373 | return av_int2double(0x7ff0000000000000); |
374 | if (x == 0 || y == 0) |
375 | return x + y; |
376 | if (x < y) { |
377 | temp = x; |
378 | x = y; |
379 | y = temp; |
380 | } |
381 | |
382 | y = y/x; |
383 | return x*sqrt(1 + y*y); |
384 | } |
385 | #endif /* HAVE_HYPOT */ |
386 | |
387 | #if !HAVE_LDEXPF |
388 | #undef ldexpf |
389 | #define ldexpf(x, exp) ((float)ldexp(x, exp)) |
390 | #endif /* HAVE_LDEXPF */ |
391 | |
392 | #if !HAVE_LLRINT |
393 | #undef llrint |
394 | #define llrint(x) ((long long)rint(x)) |
395 | #endif /* HAVE_LLRINT */ |
396 | |
397 | #if !HAVE_LLRINTF |
398 | #undef llrintf |
399 | #define llrintf(x) ((long long)rint(x)) |
400 | #endif /* HAVE_LLRINT */ |
401 | |
402 | #if !HAVE_LOG2 |
403 | #undef log2 |
404 | #define log2(x) (log(x) * 1.44269504088896340736) |
405 | #endif /* HAVE_LOG2 */ |
406 | |
407 | #if !HAVE_LOG2F |
408 | #undef log2f |
409 | #define log2f(x) ((float)log2(x)) |
410 | #endif /* HAVE_LOG2F */ |
411 | |
412 | #if !HAVE_LOG10F |
413 | #undef log10f |
414 | #define log10f(x) ((float)log10(x)) |
415 | #endif /* HAVE_LOG10F */ |
416 | |
417 | #if !HAVE_SINF |
418 | #undef sinf |
419 | #define sinf(x) ((float)sin(x)) |
420 | #endif /* HAVE_SINF */ |
421 | |
422 | #if !HAVE_RINT |
423 | static inline double rint(double x) |
424 | { |
425 | return x >= 0 ? floor(x + 0.5) : ceil(x - 0.5); |
426 | } |
427 | #endif /* HAVE_RINT */ |
428 | |
429 | #if !HAVE_LRINT |
430 | static av_always_inline av_const long int lrint(double x) |
431 | { |
432 | return rint(x); |
433 | } |
434 | #endif /* HAVE_LRINT */ |
435 | |
436 | #if !HAVE_LRINTF |
437 | static av_always_inline av_const long int lrintf(float x) |
438 | { |
439 | return (int)(rint(x)); |
440 | } |
441 | #endif /* HAVE_LRINTF */ |
442 | |
443 | #if !HAVE_ROUND |
444 | static av_always_inline av_const double round(double x) |
445 | { |
446 | return (x > 0) ? floor(x + 0.5) : ceil(x - 0.5); |
447 | } |
448 | #endif /* HAVE_ROUND */ |
449 | |
450 | #if !HAVE_ROUNDF |
451 | static av_always_inline av_const float roundf(float x) |
452 | { |
453 | return (x > 0) ? floor(x + 0.5) : ceil(x - 0.5); |
454 | } |
455 | #endif /* HAVE_ROUNDF */ |
456 | |
457 | #if !HAVE_TRUNC |
458 | static av_always_inline av_const double trunc(double x) |
459 | { |
460 | return (x > 0) ? floor(x) : ceil(x); |
461 | } |
462 | #endif /* HAVE_TRUNC */ |
463 | |
464 | #if !HAVE_TRUNCF |
465 | static av_always_inline av_const float truncf(float x) |
466 | { |
467 | return (x > 0) ? floor(x) : ceil(x); |
468 | } |
469 | #endif /* HAVE_TRUNCF */ |
470 | |
471 | #endif /* AVUTIL_LIBM_H */ |
472 |