[16269] | 1 | /*
|
---|
| 2 | * tanh.h
|
---|
| 3 | * The basic idea is to exploit Pade polynomials.
|
---|
| 4 | * Implemented by Manuel Schiller for LHCb.
|
---|
| 5 | *
|
---|
| 6 | * Created on: Sep 23, 2017
|
---|
| 7 | * Author: Paul Seyfert, Manuel Schiller
|
---|
| 8 | */
|
---|
| 9 |
|
---|
| 10 | /*
|
---|
| 11 | * VDT is free software: you can redistribute it and/or modify
|
---|
| 12 | * it under the terms of the GNU Lesser Public License as published by
|
---|
| 13 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 14 | * (at your option) any later version.
|
---|
| 15 | *
|
---|
| 16 | * This program is distributed in the hope that it will be useful,
|
---|
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 19 | * GNU Lesser Public License for more details.
|
---|
| 20 | *
|
---|
| 21 | * You should have received a copy of the GNU Lesser Public License
|
---|
| 22 | * along with this program. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 23 | */
|
---|
| 24 |
|
---|
| 25 | #ifndef TANH_H_
|
---|
| 26 | #define TANH_H_
|
---|
| 27 |
|
---|
| 28 | #include "vdtcore_common.h"
|
---|
| 29 |
|
---|
| 30 | namespace vdt{
|
---|
| 31 |
|
---|
| 32 |
|
---|
| 33 |
|
---|
| 34 |
|
---|
| 35 | /// Fast tanh implementation double precision
|
---|
| 36 | inline double fast_tanh(double x){
|
---|
| 37 | // for very large |x| > 20, tanh(x) is x/|x| anyway (at least to double
|
---|
| 38 | // precision)
|
---|
| 39 | //
|
---|
| 40 | // NB: branch-free code takes longer to execute
|
---|
| 41 | if (std::abs(x) > 20.) return std::copysign(1., x);
|
---|
| 42 | // strategy for large arguments: tanh(2x) = 2 tanh(x)/(1 + tanh^2(x))
|
---|
| 43 | // idea is to use this "argument halving" a couple of times, and use a
|
---|
| 44 | // very short Padé approximation for the rest of the way
|
---|
| 45 | const auto xx = x * 0.125;
|
---|
| 46 | const auto xx2 = xx * xx;
|
---|
| 47 | const auto numer = 135135 + xx2 * (17325 + xx2 * ( 378 + xx2 * 1));
|
---|
| 48 | const auto denom = 135135 + xx2 * (62370 + xx2 * (3150 + xx2 * 28));
|
---|
| 49 |
|
---|
| 50 | auto tanh = xx * numer / denom;
|
---|
| 51 | tanh = 2 * tanh / (tanh * tanh + 1);
|
---|
| 52 | tanh = 2 * tanh / (tanh * tanh + 1);
|
---|
| 53 | return 2 * tanh / (tanh * tanh + 1);
|
---|
| 54 | }
|
---|
| 55 |
|
---|
| 56 | //------------------------------------------------------------------------------
|
---|
| 57 | /// Fast tanh implementation single precision
|
---|
| 58 | inline float fast_tanhf( float x ) {
|
---|
| 59 | // same strategy as double version above, but even shorter Padé
|
---|
| 60 | // approximation is sufficient for float
|
---|
| 61 | //
|
---|
| 62 | // NB: branch-free code takes longer to execute
|
---|
| 63 | if (std::abs(x) > 9.1f) return std::copysign(1.f, x);
|
---|
| 64 | const auto xx = x * 0.125f;
|
---|
| 65 | const auto xx2 = xx * xx;
|
---|
| 66 | auto tanh = xx * (xx2 + 15) / (6 * xx2 + 15);
|
---|
| 67 | tanh = 2 * tanh / (tanh * tanh + 1);
|
---|
| 68 | tanh = 2 * tanh / (tanh * tanh + 1);
|
---|
| 69 | return 2 * tanh / (tanh * tanh + 1);
|
---|
| 70 | }
|
---|
| 71 |
|
---|
| 72 | //------------------------------------------------------------------------------
|
---|
| 73 | // Vector signatures
|
---|
| 74 |
|
---|
| 75 | }// end of vdt
|
---|
| 76 |
|
---|
| 77 | #endif // end of tanh
|
---|