diyfp.h
Engine/source/persistence/rapidjson/internal/diyfp.h
Classes:
class
Namespaces:
namespace
Detailed Description
1 2// Tencent is pleased to support the open source community by making RapidJSON available. 3// 4// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 5// 6// Licensed under the MIT License (the "License"); you may not use this file except 7// in compliance with the License. You may obtain a copy of the License at 8// 9// http://opensource.org/licenses/MIT 10// 11// Unless required by applicable law or agreed to in writing, software distributed 12// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 13// CONDITIONS OF ANY KIND, either express or implied. See the License for the 14// specific language governing permissions and limitations under the License. 15 16// This is a C++ header-only implementation of Grisu2 algorithm from the publication: 17// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with 18// integers." ACM Sigplan Notices 45.6 (2010): 233-243. 19 20#ifndef RAPIDJSON_DIYFP_H_ 21#define RAPIDJSON_DIYFP_H_ 22 23#include "../rapidjson.h" 24#include <limits> 25 26#if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER) 27#include <intrin.h> 28#pragma intrinsic(_BitScanReverse64) 29#pragma intrinsic(_umul128) 30#endif 31 32RAPIDJSON_NAMESPACE_BEGIN 33namespace internal { 34 35#ifdef __GNUC__ 36RAPIDJSON_DIAG_PUSH 37RAPIDJSON_DIAG_OFF(effc++) 38#endif 39 40#ifdef __clang__ 41RAPIDJSON_DIAG_PUSH 42RAPIDJSON_DIAG_OFF(padded) 43#endif 44 45struct DiyFp { 46 DiyFp() : f(), e() {} 47 48 DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {} 49 50 explicit DiyFp(double d) { 51 union { 52 double d; 53 uint64_t u64; 54 } u = { d }; 55 56 int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize); 57 uint64_t significand = (u.u64 & kDpSignificandMask); 58 if (biased_e != 0) { 59 f = significand + kDpHiddenBit; 60 e = biased_e - kDpExponentBias; 61 } 62 else { 63 f = significand; 64 e = kDpMinExponent + 1; 65 } 66 } 67 68 DiyFp operator-(const DiyFp& rhs) const { 69 return DiyFp(f - rhs.f, e); 70 } 71 72 DiyFp operator*(const DiyFp& rhs) const { 73#if defined(_MSC_VER) && defined(_M_AMD64) 74 uint64_t h; 75 uint64_t l = _umul128(f, rhs.f, &h); 76 if (l & (uint64_t(1) << 63)) // rounding 77 h++; 78 return DiyFp(h, e + rhs.e + 64); 79#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__) 80 __extension__ typedef unsigned __int128 uint128; 81 uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f); 82 uint64_t h = static_cast<uint64_t>(p >> 64); 83 uint64_t l = static_cast<uint64_t>(p); 84 if (l & (uint64_t(1) << 63)) // rounding 85 h++; 86 return DiyFp(h, e + rhs.e + 64); 87#else 88 const uint64_t M32 = 0xFFFFFFFF; 89 const uint64_t a = f >> 32; 90 const uint64_t b = f & M32; 91 const uint64_t c = rhs.f >> 32; 92 const uint64_t d = rhs.f & M32; 93 const uint64_t ac = a * c; 94 const uint64_t bc = b * c; 95 const uint64_t ad = a * d; 96 const uint64_t bd = b * d; 97 uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32); 98 tmp += 1U << 31; /// mult_round 99 return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64); 100#endif 101 } 102 103 DiyFp Normalize() const { 104 RAPIDJSON_ASSERT(f != 0); // https://stackoverflow.com/a/26809183/291737 105#if defined(_MSC_VER) && defined(_M_AMD64) 106 unsigned long index; 107 _BitScanReverse64(&index, f); 108 return DiyFp(f << (63 - index), e - (63 - index)); 109#elif defined(__GNUC__) && __GNUC__ >= 4 110 int s = __builtin_clzll(f); 111 return DiyFp(f << s, e - s); 112#else 113 DiyFp res = *this; 114 while (!(res.f & (static_cast<uint64_t>(1) << 63))) { 115 res.f <<= 1; 116 res.e--; 117 } 118 return res; 119#endif 120 } 121 122 DiyFp NormalizeBoundary() const { 123 DiyFp res = *this; 124 while (!(res.f & (kDpHiddenBit << 1))) { 125 res.f <<= 1; 126 res.e--; 127 } 128 res.f <<= (kDiySignificandSize - kDpSignificandSize - 2); 129 res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2); 130 return res; 131 } 132 133 void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const { 134 DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary(); 135 DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1); 136 mi.f <<= mi.e - pl.e; 137 mi.e = pl.e; 138 *plus = pl; 139 *minus = mi; 140 } 141 142 double ToDouble() const { 143 union { 144 double d; 145 uint64_t u64; 146 }u; 147 RAPIDJSON_ASSERT(f <= kDpHiddenBit + kDpSignificandMask); 148 if (e < kDpDenormalExponent) { 149 // Underflow. 150 return 0.0; 151 } 152 if (e >= kDpMaxExponent) { 153 // Overflow. 154 return std::numeric_limits<double>::infinity(); 155 } 156 const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 : 157 static_cast<uint64_t>(e + kDpExponentBias); 158 u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize); 159 return u.d; 160 } 161 162 static const int kDiySignificandSize = 64; 163 static const int kDpSignificandSize = 52; 164 static const int kDpExponentBias = 0x3FF + kDpSignificandSize; 165 static const int kDpMaxExponent = 0x7FF - kDpExponentBias; 166 static const int kDpMinExponent = -kDpExponentBias; 167 static const int kDpDenormalExponent = -kDpExponentBias + 1; 168 static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000); 169 static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF); 170 static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000); 171 172 uint64_t f; 173 int e; 174}; 175 176inline DiyFp GetCachedPowerByIndex(size_t index) { 177 // 10^-348, 10^-340, ..., 10^340 178 static const uint64_t kCachedPowers_F[] = { 179 RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76), 180 RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea), 181 RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df), 182 RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f), 183 RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c), 184 RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5), 185 RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d), 186 RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637), 187 RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7), 188 RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5), 189 RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b), 190 RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996), 191 RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6), 192 RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8), 193 RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053), 194 RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd), 195 RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94), 196 RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b), 197 RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac), 198 RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3), 199 RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb), 200 RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c), 201 RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000), 202 RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984), 203 RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70), 204 RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245), 205 RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8), 206 RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a), 207 RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea), 208 RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85), 209 RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2), 210 RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3), 211 RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25), 212 RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece), 213 RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5), 214 RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a), 215 RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c), 216 RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a), 217 RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129), 218 RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429), 219 RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d), 220 RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841), 221 RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9), 222 RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b) 223 }; 224 static const int16_t kCachedPowers_E[] = { 225 -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, 226 -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, 227 -688, -661, -635, -608, -582, -555, -529, -502, -475, -449, 228 -422, -396, -369, -343, -316, -289, -263, -236, -210, -183, 229 -157, -130, -103, -77, -50, -24, 3, 30, 56, 83, 230 109, 136, 162, 189, 216, 242, 269, 295, 322, 348, 231 375, 402, 428, 455, 481, 508, 534, 561, 588, 614, 232 641, 667, 694, 720, 747, 774, 800, 827, 853, 880, 233 907, 933, 960, 986, 1013, 1039, 1066 234 }; 235 RAPIDJSON_ASSERT(index < 87); 236 return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]); 237} 238 239inline DiyFp GetCachedPower(int e, int* K) { 240 241 //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374; 242 double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive 243 int k = static_cast<int>(dk); 244 if (dk - k > 0.0) 245 k++; 246 247 unsigned index = static_cast<unsigned>((k >> 3) + 1); 248 *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table 249 250 return GetCachedPowerByIndex(index); 251} 252 253inline DiyFp GetCachedPower10(int exp, int *outExp) { 254 RAPIDJSON_ASSERT(exp >= -348); 255 unsigned index = static_cast<unsigned>(exp + 348) / 8u; 256 *outExp = -348 + static_cast<int>(index) * 8; 257 return GetCachedPowerByIndex(index); 258} 259 260#ifdef __GNUC__ 261RAPIDJSON_DIAG_POP 262#endif 263 264#ifdef __clang__ 265RAPIDJSON_DIAG_POP 266RAPIDJSON_DIAG_OFF(padded) 267#endif 268 269} // namespace internal 270RAPIDJSON_NAMESPACE_END 271 272#endif // RAPIDJSON_DIYFP_H_ 273