1 | /*
|
---|
2 | * tanh.h
|
---|
3 | * The basic idea is to exploit Pade polynomials.
|
---|
4 | * Implemented by Manuel Schiller for LHCb.
|
---|
5 | *
|
---|
6 | * Created on: Sep 23, 2017
|
---|
7 | * Author: Paul Seyfert, Manuel Schiller
|
---|
8 | */
|
---|
9 |
|
---|
10 | /*
|
---|
11 | * VDT is free software: you can redistribute it and/or modify
|
---|
12 | * it under the terms of the GNU Lesser Public License as published by
|
---|
13 | * the Free Software Foundation, either version 3 of the License, or
|
---|
14 | * (at your option) any later version.
|
---|
15 | *
|
---|
16 | * This program is distributed in the hope that it will be useful,
|
---|
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
19 | * GNU Lesser Public License for more details.
|
---|
20 | *
|
---|
21 | * You should have received a copy of the GNU Lesser Public License
|
---|
22 | * along with this program. If not, see <http://www.gnu.org/licenses/>.
|
---|
23 | */
|
---|
24 |
|
---|
25 | #ifndef TANH_H_
|
---|
26 | #define TANH_H_
|
---|
27 |
|
---|
28 | #include "vdtcore_common.h"
|
---|
29 |
|
---|
30 | namespace vdt{
|
---|
31 |
|
---|
32 |
|
---|
33 |
|
---|
34 |
|
---|
35 | /// Fast tanh implementation double precision
|
---|
36 | inline double fast_tanh(double x){
|
---|
37 | // for very large |x| > 20, tanh(x) is x/|x| anyway (at least to double
|
---|
38 | // precision)
|
---|
39 | //
|
---|
40 | // NB: branch-free code takes longer to execute
|
---|
41 | if (std::abs(x) > 20.) return std::copysign(1., x);
|
---|
42 | // strategy for large arguments: tanh(2x) = 2 tanh(x)/(1 + tanh^2(x))
|
---|
43 | // idea is to use this "argument halving" a couple of times, and use a
|
---|
44 | // very short Padé approximation for the rest of the way
|
---|
45 | const auto xx = x * 0.125;
|
---|
46 | const auto xx2 = xx * xx;
|
---|
47 | const auto numer = 135135 + xx2 * (17325 + xx2 * ( 378 + xx2 * 1));
|
---|
48 | const auto denom = 135135 + xx2 * (62370 + xx2 * (3150 + xx2 * 28));
|
---|
49 |
|
---|
50 | auto tanh = xx * numer / denom;
|
---|
51 | tanh = 2 * tanh / (tanh * tanh + 1);
|
---|
52 | tanh = 2 * tanh / (tanh * tanh + 1);
|
---|
53 | return 2 * tanh / (tanh * tanh + 1);
|
---|
54 | }
|
---|
55 |
|
---|
56 | //------------------------------------------------------------------------------
|
---|
57 | /// Fast tanh implementation single precision
|
---|
58 | inline float fast_tanhf( float x ) {
|
---|
59 | // same strategy as double version above, but even shorter Padé
|
---|
60 | // approximation is sufficient for float
|
---|
61 | //
|
---|
62 | // NB: branch-free code takes longer to execute
|
---|
63 | if (std::abs(x) > 9.1f) return std::copysign(1.f, x);
|
---|
64 | const auto xx = x * 0.125f;
|
---|
65 | const auto xx2 = xx * xx;
|
---|
66 | auto tanh = xx * (xx2 + 15) / (6 * xx2 + 15);
|
---|
67 | tanh = 2 * tanh / (tanh * tanh + 1);
|
---|
68 | tanh = 2 * tanh / (tanh * tanh + 1);
|
---|
69 | return 2 * tanh / (tanh * tanh + 1);
|
---|
70 | }
|
---|
71 |
|
---|
72 | //------------------------------------------------------------------------------
|
---|
73 | // Vector signatures
|
---|
74 |
|
---|
75 | }// end of vdt
|
---|
76 |
|
---|
77 | #endif // end of tanh
|
---|