1 | /*
|
---|
2 | * aasin.h
|
---|
3 | * The basic idea is to exploit Pade' polynomials.
|
---|
4 | * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
|
---|
5 | * moshier@na-net.ornl.gov) as well as actual code.
|
---|
6 | * The Cephes library can be found here: http://www.netlib.org/cephes/
|
---|
7 | *
|
---|
8 | * Created on: Jun 23, 2012
|
---|
9 | * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
|
---|
10 | */
|
---|
11 |
|
---|
12 | /*
|
---|
13 | * VDT is free software: you can redistribute it and/or modify
|
---|
14 | * it under the terms of the GNU Lesser Public License as published by
|
---|
15 | * the Free Software Foundation, either version 3 of the License, or
|
---|
16 | * (at your option) any later version.
|
---|
17 | *
|
---|
18 | * This program is distributed in the hope that it will be useful,
|
---|
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
21 | * GNU Lesser Public License for more details.
|
---|
22 | *
|
---|
23 | * You should have received a copy of the GNU Lesser Public License
|
---|
24 | * along with this program. If not, see <http://www.gnu.org/licenses/>.
|
---|
25 | */
|
---|
26 |
|
---|
27 | #ifndef ASIN_H_
|
---|
28 | #define ASIN_H_
|
---|
29 |
|
---|
30 | #include "vdtcore_common.h"
|
---|
31 |
|
---|
32 | namespace vdt{
|
---|
33 |
|
---|
34 | namespace details{
|
---|
35 |
|
---|
36 | const double RX1asin = 2.967721961301243206100E-3;
|
---|
37 | const double RX2asin = -5.634242780008963776856E-1;
|
---|
38 | const double RX3asin = 6.968710824104713396794E0;
|
---|
39 | const double RX4asin = -2.556901049652824852289E1;
|
---|
40 | const double RX5asin = 2.853665548261061424989E1;
|
---|
41 |
|
---|
42 | const double SX1asin = -2.194779531642920639778E1;
|
---|
43 | const double SX2asin = 1.470656354026814941758E2;
|
---|
44 | const double SX3asin = -3.838770957603691357202E2;
|
---|
45 | const double SX4asin = 3.424398657913078477438E2;
|
---|
46 |
|
---|
47 | const double PX1asin = 4.253011369004428248960E-3;
|
---|
48 | const double PX2asin = -6.019598008014123785661E-1;
|
---|
49 | const double PX3asin = 5.444622390564711410273E0;
|
---|
50 | const double PX4asin = -1.626247967210700244449E1;
|
---|
51 | const double PX5asin = 1.956261983317594739197E1;
|
---|
52 | const double PX6asin = -8.198089802484824371615E0;
|
---|
53 |
|
---|
54 | const double QX1asin = -1.474091372988853791896E1;
|
---|
55 | const double QX2asin = 7.049610280856842141659E1;
|
---|
56 | const double QX3asin = -1.471791292232726029859E2;
|
---|
57 | const double QX4asin = 1.395105614657485689735E2;
|
---|
58 | const double QX5asin = -4.918853881490881290097E1;
|
---|
59 |
|
---|
60 | inline double getRX(const double x){
|
---|
61 | double rx = RX1asin;
|
---|
62 | rx*= x;
|
---|
63 | rx+= RX2asin;
|
---|
64 | rx*= x;
|
---|
65 | rx+= RX3asin;
|
---|
66 | rx*= x;
|
---|
67 | rx+= RX4asin;
|
---|
68 | rx*= x;
|
---|
69 | rx+= RX5asin;
|
---|
70 | return rx;
|
---|
71 | }
|
---|
72 | inline double getSX(const double x){
|
---|
73 | double sx = x;
|
---|
74 | sx+= SX1asin;
|
---|
75 | sx*= x;
|
---|
76 | sx+= SX2asin;
|
---|
77 | sx*= x;
|
---|
78 | sx+= SX3asin;
|
---|
79 | sx*= x;
|
---|
80 | sx+= SX4asin;
|
---|
81 | return sx;
|
---|
82 | }
|
---|
83 |
|
---|
84 | inline double getPX(const double x){
|
---|
85 | double px = PX1asin;
|
---|
86 | px*= x;
|
---|
87 | px+= PX2asin;
|
---|
88 | px*= x;
|
---|
89 | px+= PX3asin;
|
---|
90 | px*= x;
|
---|
91 | px+= PX4asin;
|
---|
92 | px*= x;
|
---|
93 | px+= PX5asin;
|
---|
94 | px*= x;
|
---|
95 | px+= PX6asin;
|
---|
96 | return px;
|
---|
97 | }
|
---|
98 |
|
---|
99 | inline double getQX(const double x){
|
---|
100 | double qx = x;
|
---|
101 | qx+= QX1asin;
|
---|
102 | qx*= x;
|
---|
103 | qx+= QX2asin;
|
---|
104 | qx*= x;
|
---|
105 | qx+= QX3asin;
|
---|
106 | qx*= x;
|
---|
107 | qx+= QX4asin;
|
---|
108 | qx*= x;
|
---|
109 | qx+= QX5asin;
|
---|
110 | return qx;
|
---|
111 | }
|
---|
112 | }
|
---|
113 |
|
---|
114 | }
|
---|
115 |
|
---|
116 | namespace vdt{
|
---|
117 |
|
---|
118 | // asin double precision --------------------------------------------------------
|
---|
119 | /// Double Precision asin
|
---|
120 | inline double fast_asin(double x){
|
---|
121 |
|
---|
122 | const uint64_t sign_mask = details::getSignMask(x);
|
---|
123 | x = std::fabs(x);
|
---|
124 | const double a = x;
|
---|
125 |
|
---|
126 |
|
---|
127 | double zz = 1.0 - a;
|
---|
128 | double px = details::getRX(zz);
|
---|
129 | double qx = details::getSX(zz);
|
---|
130 |
|
---|
131 | const double p = zz * px/qx;
|
---|
132 |
|
---|
133 | zz = std::sqrt(zz+zz);
|
---|
134 | double z = details::PIO4 - zz;
|
---|
135 | zz = zz * p - details::MOREBITS;
|
---|
136 | z -= zz;
|
---|
137 | z += details::PIO4;
|
---|
138 |
|
---|
139 | if( a < 0.625 ){
|
---|
140 | zz = a * a;
|
---|
141 | px = details::getPX(zz);
|
---|
142 | qx = details::getQX(zz);
|
---|
143 | z = zz*px/qx;
|
---|
144 | z = a * z + a;
|
---|
145 | }
|
---|
146 |
|
---|
147 |
|
---|
148 | // Linear approx, not sooo needed but seable. Price is cheap though
|
---|
149 | double res = a < 1e-8? a : z ;
|
---|
150 | // Restore Sign
|
---|
151 | return details::dpORuint64(res,sign_mask);
|
---|
152 |
|
---|
153 | }
|
---|
154 |
|
---|
155 | //------------------------------------------------------------------------------
|
---|
156 | /// Single Precision asin
|
---|
157 | inline float fast_asinf(float x){
|
---|
158 |
|
---|
159 |
|
---|
160 | uint32_t flag=0;
|
---|
161 |
|
---|
162 | const uint32_t sign_mask = details::getSignMask(x);
|
---|
163 | const float a = std::fabs(x);
|
---|
164 |
|
---|
165 | float z;
|
---|
166 | if( a > 0.5f )
|
---|
167 | {
|
---|
168 | z = 0.5f * (1.0f - a);
|
---|
169 | x = sqrtf( z );
|
---|
170 | flag = 1;
|
---|
171 | }
|
---|
172 | else
|
---|
173 | {
|
---|
174 | x = a;
|
---|
175 | z = x * x;
|
---|
176 | }
|
---|
177 |
|
---|
178 | z = (((( 4.2163199048E-2f * z
|
---|
179 | + 2.4181311049E-2f) * z
|
---|
180 | + 4.5470025998E-2f) * z
|
---|
181 | + 7.4953002686E-2f) * z
|
---|
182 | + 1.6666752422E-1f) * z * x
|
---|
183 | + x;
|
---|
184 |
|
---|
185 | // if( flag != 0 )
|
---|
186 | // {
|
---|
187 | // z = z + z;
|
---|
188 | // z = PIO2F - z;
|
---|
189 | // }
|
---|
190 |
|
---|
191 | // No branch with the two coefficients
|
---|
192 |
|
---|
193 | float tmp = z + z;
|
---|
194 | tmp = details::PIO2F - tmp;
|
---|
195 |
|
---|
196 | // Linear approx, not sooo needed but seable. Price is cheap though
|
---|
197 | float res = a < 1e-4f? a : tmp * flag + (1-flag) * z ;
|
---|
198 |
|
---|
199 | // Restore Sign
|
---|
200 | return details::spORuint32(res,sign_mask);
|
---|
201 |
|
---|
202 | }
|
---|
203 |
|
---|
204 | //------------------------------------------------------------------------------
|
---|
205 | // The cos is in this file as well
|
---|
206 |
|
---|
207 | inline double fast_acos( double x ){return details::PIO2 - fast_asin(x);}
|
---|
208 |
|
---|
209 | //------------------------------------------------------------------------------
|
---|
210 |
|
---|
211 | inline float fast_acosf( float x ){return details::PIO2F - fast_asinf(x);}
|
---|
212 |
|
---|
213 | //------------------------------------------------------------------------------
|
---|
214 |
|
---|
215 | } //vdt namespace
|
---|
216 |
|
---|
217 | #endif /* ASIN_H_ */
|
---|