diyfp.h

Engine/source/persistence/rapidjson/internal/diyfp.h

More...

Classes:

Namespaces:

namespace

Detailed Description

  1
  2// Tencent is pleased to support the open source community by making RapidJSON available.
  3//
  4// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
  5//
  6// Licensed under the MIT License (the "License"); you may not use this file except
  7// in compliance with the License. You may obtain a copy of the License at
  8//
  9// http://opensource.org/licenses/MIT
 10//
 11// Unless required by applicable law or agreed to in writing, software distributed
 12// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 13// CONDITIONS OF ANY KIND, either express or implied. See the License for the
 14// specific language governing permissions and limitations under the License.
 15
 16// This is a C++ header-only implementation of Grisu2 algorithm from the publication:
 17// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
 18// integers." ACM Sigplan Notices 45.6 (2010): 233-243.
 19
 20#ifndef RAPIDJSON_DIYFP_H_
 21#define RAPIDJSON_DIYFP_H_
 22
 23#include "../rapidjson.h"
 24#include <limits>
 25
 26#if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
 27#include <intrin.h>
 28#pragma intrinsic(_BitScanReverse64)
 29#pragma intrinsic(_umul128)
 30#endif
 31
 32RAPIDJSON_NAMESPACE_BEGIN
 33namespace internal {
 34
 35#ifdef __GNUC__
 36RAPIDJSON_DIAG_PUSH
 37RAPIDJSON_DIAG_OFF(effc++)
 38#endif
 39
 40#ifdef __clang__
 41RAPIDJSON_DIAG_PUSH
 42RAPIDJSON_DIAG_OFF(padded)
 43#endif
 44
 45struct DiyFp {
 46    DiyFp() : f(), e() {}
 47
 48    DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
 49
 50    explicit DiyFp(double d) {
 51        union {
 52            double d;
 53            uint64_t u64;
 54        } u = { d };
 55
 56        int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
 57        uint64_t significand = (u.u64 & kDpSignificandMask);
 58        if (biased_e != 0) {
 59            f = significand + kDpHiddenBit;
 60            e = biased_e - kDpExponentBias;
 61        }
 62        else {
 63            f = significand;
 64            e = kDpMinExponent + 1;
 65        }
 66    }
 67
 68    DiyFp operator-(const DiyFp& rhs) const {
 69        return DiyFp(f - rhs.f, e);
 70    }
 71
 72    DiyFp operator*(const DiyFp& rhs) const {
 73#if defined(_MSC_VER) && defined(_M_AMD64)
 74        uint64_t h;
 75        uint64_t l = _umul128(f, rhs.f, &h);
 76        if (l & (uint64_t(1) << 63)) // rounding
 77            h++;
 78        return DiyFp(h, e + rhs.e + 64);
 79#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
 80        __extension__ typedef unsigned __int128 uint128;
 81        uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
 82        uint64_t h = static_cast<uint64_t>(p >> 64);
 83        uint64_t l = static_cast<uint64_t>(p);
 84        if (l & (uint64_t(1) << 63)) // rounding
 85            h++;
 86        return DiyFp(h, e + rhs.e + 64);
 87#else
 88        const uint64_t M32 = 0xFFFFFFFF;
 89        const uint64_t a = f >> 32;
 90        const uint64_t b = f & M32;
 91        const uint64_t c = rhs.f >> 32;
 92        const uint64_t d = rhs.f & M32;
 93        const uint64_t ac = a * c;
 94        const uint64_t bc = b * c;
 95        const uint64_t ad = a * d;
 96        const uint64_t bd = b * d;
 97        uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
 98        tmp += 1U << 31;  /// mult_round
 99        return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
100#endif
101    }
102
103    DiyFp Normalize() const {
104        RAPIDJSON_ASSERT(f != 0); // https://stackoverflow.com/a/26809183/291737
105#if defined(_MSC_VER) && defined(_M_AMD64)
106        unsigned long index;
107        _BitScanReverse64(&index, f);
108        return DiyFp(f << (63 - index), e - (63 - index));
109#elif defined(__GNUC__) && __GNUC__ >= 4
110        int s = __builtin_clzll(f);
111        return DiyFp(f << s, e - s);
112#else
113        DiyFp res = *this;
114        while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
115            res.f <<= 1;
116            res.e--;
117        }
118        return res;
119#endif
120    }
121
122    DiyFp NormalizeBoundary() const {
123        DiyFp res = *this;
124        while (!(res.f & (kDpHiddenBit << 1))) {
125            res.f <<= 1;
126            res.e--;
127        }
128        res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
129        res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
130        return res;
131    }
132
133    void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
134        DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
135        DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
136        mi.f <<= mi.e - pl.e;
137        mi.e = pl.e;
138        *plus = pl;
139        *minus = mi;
140    }
141
142    double ToDouble() const {
143        union {
144            double d;
145            uint64_t u64;
146        }u;
147        RAPIDJSON_ASSERT(f <= kDpHiddenBit + kDpSignificandMask);
148        if (e < kDpDenormalExponent) {
149            // Underflow.
150            return 0.0;
151        }
152        if (e >= kDpMaxExponent) {
153            // Overflow.
154            return std::numeric_limits<double>::infinity();
155        }
156        const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
157            static_cast<uint64_t>(e + kDpExponentBias);
158        u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
159        return u.d;
160    }
161
162    static const int kDiySignificandSize = 64;
163    static const int kDpSignificandSize = 52;
164    static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
165    static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
166    static const int kDpMinExponent = -kDpExponentBias;
167    static const int kDpDenormalExponent = -kDpExponentBias + 1;
168    static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
169    static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
170    static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
171
172    uint64_t f;
173    int e;
174};
175
176inline DiyFp GetCachedPowerByIndex(size_t index) {
177    // 10^-348, 10^-340, ..., 10^340
178    static const uint64_t kCachedPowers_F[] = {
179        RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
180        RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
181        RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
182        RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
183        RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
184        RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
185        RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
186        RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
187        RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
188        RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
189        RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
190        RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
191        RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
192        RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
193        RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
194        RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
195        RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
196        RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
197        RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
198        RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
199        RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
200        RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
201        RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
202        RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
203        RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
204        RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
205        RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
206        RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
207        RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
208        RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
209        RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
210        RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
211        RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
212        RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
213        RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
214        RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
215        RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
216        RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
217        RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
218        RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
219        RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
220        RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
221        RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
222        RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
223    };
224    static const int16_t kCachedPowers_E[] = {
225        -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007,  -980,
226        -954,  -927,  -901,  -874,  -847,  -821,  -794,  -768,  -741,  -715,
227        -688,  -661,  -635,  -608,  -582,  -555,  -529,  -502,  -475,  -449,
228        -422,  -396,  -369,  -343,  -316,  -289,  -263,  -236,  -210,  -183,
229        -157,  -130,  -103,   -77,   -50,   -24,     3,    30,    56,    83,
230        109,   136,   162,   189,   216,   242,   269,   295,   322,   348,
231        375,   402,   428,   455,   481,   508,   534,   561,   588,   614,
232        641,   667,   694,   720,   747,   774,   800,   827,   853,   880,
233        907,   933,   960,   986,  1013,  1039,  1066
234    };
235    RAPIDJSON_ASSERT(index < 87);
236    return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
237}
238
239inline DiyFp GetCachedPower(int e, int* K) {
240
241    //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
242    double dk = (-61 - e) * 0.30102999566398114 + 347;  // dk must be positive, so can do ceiling in positive
243    int k = static_cast<int>(dk);
244    if (dk - k > 0.0)
245        k++;
246
247    unsigned index = static_cast<unsigned>((k >> 3) + 1);
248    *K = -(-348 + static_cast<int>(index << 3));    // decimal exponent no need lookup table
249
250    return GetCachedPowerByIndex(index);
251}
252
253inline DiyFp GetCachedPower10(int exp, int *outExp) {
254    RAPIDJSON_ASSERT(exp >= -348);
255    unsigned index = static_cast<unsigned>(exp + 348) / 8u;
256    *outExp = -348 + static_cast<int>(index) * 8;
257    return GetCachedPowerByIndex(index);
258}
259
260#ifdef __GNUC__
261RAPIDJSON_DIAG_POP
262#endif
263
264#ifdef __clang__
265RAPIDJSON_DIAG_POP
266RAPIDJSON_DIAG_OFF(padded)
267#endif
268
269} // namespace internal
270RAPIDJSON_NAMESPACE_END
271
272#endif // RAPIDJSON_DIYFP_H_
273