[16269] | 1 | /*
|
---|
| 2 | * aasin.h
|
---|
| 3 | * The basic idea is to exploit Pade' polynomials.
|
---|
| 4 | * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
|
---|
| 5 | * moshier@na-net.ornl.gov) as well as actual code.
|
---|
| 6 | * The Cephes library can be found here: http://www.netlib.org/cephes/
|
---|
| 7 | *
|
---|
| 8 | * Created on: Jun 23, 2012
|
---|
| 9 | * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
|
---|
| 10 | */
|
---|
| 11 |
|
---|
| 12 | /*
|
---|
| 13 | * VDT is free software: you can redistribute it and/or modify
|
---|
| 14 | * it under the terms of the GNU Lesser Public License as published by
|
---|
| 15 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 16 | * (at your option) any later version.
|
---|
| 17 | *
|
---|
| 18 | * This program is distributed in the hope that it will be useful,
|
---|
| 19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 21 | * GNU Lesser Public License for more details.
|
---|
| 22 | *
|
---|
| 23 | * You should have received a copy of the GNU Lesser Public License
|
---|
| 24 | * along with this program. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 25 | */
|
---|
| 26 |
|
---|
| 27 | #ifndef ASIN_H_
|
---|
| 28 | #define ASIN_H_
|
---|
| 29 |
|
---|
| 30 | #include "vdtcore_common.h"
|
---|
| 31 |
|
---|
| 32 | namespace vdt{
|
---|
| 33 |
|
---|
| 34 | namespace details{
|
---|
| 35 |
|
---|
| 36 | const double RX1asin = 2.967721961301243206100E-3;
|
---|
| 37 | const double RX2asin = -5.634242780008963776856E-1;
|
---|
| 38 | const double RX3asin = 6.968710824104713396794E0;
|
---|
| 39 | const double RX4asin = -2.556901049652824852289E1;
|
---|
| 40 | const double RX5asin = 2.853665548261061424989E1;
|
---|
| 41 |
|
---|
| 42 | const double SX1asin = -2.194779531642920639778E1;
|
---|
| 43 | const double SX2asin = 1.470656354026814941758E2;
|
---|
| 44 | const double SX3asin = -3.838770957603691357202E2;
|
---|
| 45 | const double SX4asin = 3.424398657913078477438E2;
|
---|
| 46 |
|
---|
| 47 | const double PX1asin = 4.253011369004428248960E-3;
|
---|
| 48 | const double PX2asin = -6.019598008014123785661E-1;
|
---|
| 49 | const double PX3asin = 5.444622390564711410273E0;
|
---|
| 50 | const double PX4asin = -1.626247967210700244449E1;
|
---|
| 51 | const double PX5asin = 1.956261983317594739197E1;
|
---|
| 52 | const double PX6asin = -8.198089802484824371615E0;
|
---|
| 53 |
|
---|
| 54 | const double QX1asin = -1.474091372988853791896E1;
|
---|
| 55 | const double QX2asin = 7.049610280856842141659E1;
|
---|
| 56 | const double QX3asin = -1.471791292232726029859E2;
|
---|
| 57 | const double QX4asin = 1.395105614657485689735E2;
|
---|
| 58 | const double QX5asin = -4.918853881490881290097E1;
|
---|
| 59 |
|
---|
| 60 | inline double getRX(const double x){
|
---|
| 61 | double rx = RX1asin;
|
---|
| 62 | rx*= x;
|
---|
| 63 | rx+= RX2asin;
|
---|
| 64 | rx*= x;
|
---|
| 65 | rx+= RX3asin;
|
---|
| 66 | rx*= x;
|
---|
| 67 | rx+= RX4asin;
|
---|
| 68 | rx*= x;
|
---|
| 69 | rx+= RX5asin;
|
---|
| 70 | return rx;
|
---|
| 71 | }
|
---|
| 72 | inline double getSX(const double x){
|
---|
| 73 | double sx = x;
|
---|
| 74 | sx+= SX1asin;
|
---|
| 75 | sx*= x;
|
---|
| 76 | sx+= SX2asin;
|
---|
| 77 | sx*= x;
|
---|
| 78 | sx+= SX3asin;
|
---|
| 79 | sx*= x;
|
---|
| 80 | sx+= SX4asin;
|
---|
| 81 | return sx;
|
---|
| 82 | }
|
---|
| 83 |
|
---|
| 84 | inline double getPX(const double x){
|
---|
| 85 | double px = PX1asin;
|
---|
| 86 | px*= x;
|
---|
| 87 | px+= PX2asin;
|
---|
| 88 | px*= x;
|
---|
| 89 | px+= PX3asin;
|
---|
| 90 | px*= x;
|
---|
| 91 | px+= PX4asin;
|
---|
| 92 | px*= x;
|
---|
| 93 | px+= PX5asin;
|
---|
| 94 | px*= x;
|
---|
| 95 | px+= PX6asin;
|
---|
| 96 | return px;
|
---|
| 97 | }
|
---|
| 98 |
|
---|
| 99 | inline double getQX(const double x){
|
---|
| 100 | double qx = x;
|
---|
| 101 | qx+= QX1asin;
|
---|
| 102 | qx*= x;
|
---|
| 103 | qx+= QX2asin;
|
---|
| 104 | qx*= x;
|
---|
| 105 | qx+= QX3asin;
|
---|
| 106 | qx*= x;
|
---|
| 107 | qx+= QX4asin;
|
---|
| 108 | qx*= x;
|
---|
| 109 | qx+= QX5asin;
|
---|
| 110 | return qx;
|
---|
| 111 | }
|
---|
| 112 | }
|
---|
| 113 |
|
---|
| 114 | }
|
---|
| 115 |
|
---|
| 116 | namespace vdt{
|
---|
| 117 |
|
---|
| 118 | // asin double precision --------------------------------------------------------
|
---|
| 119 | /// Double Precision asin
|
---|
| 120 | inline double fast_asin(double x){
|
---|
| 121 |
|
---|
| 122 | const uint64_t sign_mask = details::getSignMask(x);
|
---|
| 123 | x = std::fabs(x);
|
---|
| 124 | const double a = x;
|
---|
| 125 |
|
---|
| 126 |
|
---|
| 127 | double zz = 1.0 - a;
|
---|
| 128 | double px = details::getRX(zz);
|
---|
| 129 | double qx = details::getSX(zz);
|
---|
| 130 |
|
---|
| 131 | const double p = zz * px/qx;
|
---|
| 132 |
|
---|
| 133 | zz = std::sqrt(zz+zz);
|
---|
| 134 | double z = details::PIO4 - zz;
|
---|
| 135 | zz = zz * p - details::MOREBITS;
|
---|
| 136 | z -= zz;
|
---|
| 137 | z += details::PIO4;
|
---|
| 138 |
|
---|
| 139 | if( a < 0.625 ){
|
---|
| 140 | zz = a * a;
|
---|
| 141 | px = details::getPX(zz);
|
---|
| 142 | qx = details::getQX(zz);
|
---|
| 143 | z = zz*px/qx;
|
---|
| 144 | z = a * z + a;
|
---|
| 145 | }
|
---|
| 146 |
|
---|
| 147 |
|
---|
| 148 | // Linear approx, not sooo needed but seable. Price is cheap though
|
---|
| 149 | double res = a < 1e-8? a : z ;
|
---|
| 150 | // Restore Sign
|
---|
| 151 | return details::dpORuint64(res,sign_mask);
|
---|
| 152 |
|
---|
| 153 | }
|
---|
| 154 |
|
---|
| 155 | //------------------------------------------------------------------------------
|
---|
| 156 | /// Single Precision asin
|
---|
| 157 | inline float fast_asinf(float x){
|
---|
| 158 |
|
---|
| 159 |
|
---|
| 160 | uint32_t flag=0;
|
---|
| 161 |
|
---|
| 162 | const uint32_t sign_mask = details::getSignMask(x);
|
---|
| 163 | const float a = std::fabs(x);
|
---|
| 164 |
|
---|
| 165 | float z;
|
---|
| 166 | if( a > 0.5f )
|
---|
| 167 | {
|
---|
| 168 | z = 0.5f * (1.0f - a);
|
---|
| 169 | x = sqrtf( z );
|
---|
| 170 | flag = 1;
|
---|
| 171 | }
|
---|
| 172 | else
|
---|
| 173 | {
|
---|
| 174 | x = a;
|
---|
| 175 | z = x * x;
|
---|
| 176 | }
|
---|
| 177 |
|
---|
| 178 | z = (((( 4.2163199048E-2f * z
|
---|
| 179 | + 2.4181311049E-2f) * z
|
---|
| 180 | + 4.5470025998E-2f) * z
|
---|
| 181 | + 7.4953002686E-2f) * z
|
---|
| 182 | + 1.6666752422E-1f) * z * x
|
---|
| 183 | + x;
|
---|
| 184 |
|
---|
| 185 | // if( flag != 0 )
|
---|
| 186 | // {
|
---|
| 187 | // z = z + z;
|
---|
| 188 | // z = PIO2F - z;
|
---|
| 189 | // }
|
---|
| 190 |
|
---|
| 191 | // No branch with the two coefficients
|
---|
| 192 |
|
---|
| 193 | float tmp = z + z;
|
---|
| 194 | tmp = details::PIO2F - tmp;
|
---|
| 195 |
|
---|
| 196 | // Linear approx, not sooo needed but seable. Price is cheap though
|
---|
| 197 | float res = a < 1e-4f? a : tmp * flag + (1-flag) * z ;
|
---|
| 198 |
|
---|
| 199 | // Restore Sign
|
---|
| 200 | return details::spORuint32(res,sign_mask);
|
---|
| 201 |
|
---|
| 202 | }
|
---|
| 203 |
|
---|
| 204 | //------------------------------------------------------------------------------
|
---|
| 205 | // The cos is in this file as well
|
---|
| 206 |
|
---|
| 207 | inline double fast_acos( double x ){return details::PIO2 - fast_asin(x);}
|
---|
| 208 |
|
---|
| 209 | //------------------------------------------------------------------------------
|
---|
| 210 |
|
---|
| 211 | inline float fast_acosf( float x ){return details::PIO2F - fast_asinf(x);}
|
---|
| 212 |
|
---|
| 213 | //------------------------------------------------------------------------------
|
---|
| 214 |
|
---|
| 215 | } //vdt namespace
|
---|
| 216 |
|
---|
| 217 | #endif /* ASIN_H_ */
|
---|