Elaztek Developer Hub
Blamite Game Engine - blam!  00398.09.22.23.2015.blamite
The core library for the Blamite Game Engine.
diyfp.h
Go to the documentation of this file.
1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
16 // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
17 // integers." ACM Sigplan Notices 45.6 (2010): 233-243.
18 
19 #ifndef RAPIDJSON_DIYFP_H_
20 #define RAPIDJSON_DIYFP_H_
21 
22 #include "../rapidjson.h"
23 #include "clzll.h"
24 #include <limits>
25 
26 #if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
27 #include <intrin.h>
28 #if !defined(_ARM64EC_)
29 #pragma intrinsic(_umul128)
30 #else
31 #pragma comment(lib,"softintrin")
32 #endif
33 #endif
34 
36 namespace internal {
37 
38 #ifdef __GNUC__
39 RAPIDJSON_DIAG_PUSH
40 RAPIDJSON_DIAG_OFF(effc++)
41 #endif
42 
43 #ifdef __clang__
44 RAPIDJSON_DIAG_PUSH
45 RAPIDJSON_DIAG_OFF(padded)
46 #endif
47 
48 struct DiyFp {
49  DiyFp() : f(), e() {}
50 
51  DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
52 
53  explicit DiyFp(double d) {
54  union {
55  double d;
56  uint64_t u64;
57  } u = { d };
58 
59  int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
60  uint64_t significand = (u.u64 & kDpSignificandMask);
61  if (biased_e != 0) {
62  f = significand + kDpHiddenBit;
63  e = biased_e - kDpExponentBias;
64  }
65  else {
66  f = significand;
67  e = kDpMinExponent + 1;
68  }
69  }
70 
71  DiyFp operator-(const DiyFp& rhs) const {
72  return DiyFp(f - rhs.f, e);
73  }
74 
75  DiyFp operator*(const DiyFp& rhs) const {
76 #if defined(_MSC_VER) && defined(_M_AMD64)
77  uint64_t h;
78  uint64_t l = _umul128(f, rhs.f, &h);
79  if (l & (uint64_t(1) << 63)) // rounding
80  h++;
81  return DiyFp(h, e + rhs.e + 64);
82 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
83  __extension__ typedef unsigned __int128 uint128;
84  uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
85  uint64_t h = static_cast<uint64_t>(p >> 64);
86  uint64_t l = static_cast<uint64_t>(p);
87  if (l & (uint64_t(1) << 63)) // rounding
88  h++;
89  return DiyFp(h, e + rhs.e + 64);
90 #else
91  const uint64_t M32 = 0xFFFFFFFF;
92  const uint64_t a = f >> 32;
93  const uint64_t b = f & M32;
94  const uint64_t c = rhs.f >> 32;
95  const uint64_t d = rhs.f & M32;
96  const uint64_t ac = a * c;
97  const uint64_t bc = b * c;
98  const uint64_t ad = a * d;
99  const uint64_t bd = b * d;
100  uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
101  tmp += 1U << 31;
102  return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
103 #endif
104  }
105 
106  DiyFp Normalize() const {
107  int s = static_cast<int>(clzll(f));
108  return DiyFp(f << s, e - s);
109  }
110 
112  DiyFp res = *this;
113  while (!(res.f & (kDpHiddenBit << 1))) {
114  res.f <<= 1;
115  res.e--;
116  }
117  res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
118  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
119  return res;
120  }
121 
122  void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
123  DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
124  DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
125  mi.f <<= mi.e - pl.e;
126  mi.e = pl.e;
127  *plus = pl;
128  *minus = mi;
129  }
130 
131  double ToDouble() const {
132  union {
133  double d;
134  uint64_t u64;
135  }u;
137  if (e < kDpDenormalExponent) {
138  // Underflow.
139  return 0.0;
140  }
141  if (e >= kDpMaxExponent) {
142  // Overflow.
143  return std::numeric_limits<double>::infinity();
144  }
145  const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
146  static_cast<uint64_t>(e + kDpExponentBias);
147  u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
148  return u.d;
149  }
150 
151  static const int kDiySignificandSize = 64;
152  static const int kDpSignificandSize = 52;
153  static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
154  static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
155  static const int kDpMinExponent = -kDpExponentBias;
156  static const int kDpDenormalExponent = -kDpExponentBias + 1;
157  static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
158  static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
159  static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
160 
162  int e;
163 };
164 
165 inline DiyFp GetCachedPowerByIndex(size_t index) {
166  // 10^-348, 10^-340, ..., 10^340
167  static const uint64_t kCachedPowers_F[] = {
168  RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
169  RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
170  RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
171  RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
172  RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
173  RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
174  RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
175  RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
176  RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
177  RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
178  RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
179  RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
180  RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
181  RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
182  RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
183  RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
184  RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
185  RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
186  RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
187  RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
188  RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
189  RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
190  RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
191  RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
192  RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
193  RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
194  RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
195  RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
196  RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
197  RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
198  RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
199  RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
200  RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
201  RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
202  RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
203  RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
204  RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
205  RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
206  RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
207  RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
208  RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
209  RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
210  RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
211  RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
212  };
213  static const int16_t kCachedPowers_E[] = {
214  -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
215  -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
216  -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
217  -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
218  -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
219  109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
220  375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
221  641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
222  907, 933, 960, 986, 1013, 1039, 1066
223  };
224  RAPIDJSON_ASSERT(index < 87);
225  return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
226 }
227 
228 inline DiyFp GetCachedPower(int e, int* K) {
229 
230  //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
231  double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
232  int k = static_cast<int>(dk);
233  if (dk - k > 0.0)
234  k++;
235 
236  unsigned index = static_cast<unsigned>((k >> 3) + 1);
237  *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
238 
239  return GetCachedPowerByIndex(index);
240 }
241 
242 inline DiyFp GetCachedPower10(int exp, int *outExp) {
243  RAPIDJSON_ASSERT(exp >= -348);
244  unsigned index = static_cast<unsigned>(exp + 348) / 8u;
245  *outExp = -348 + static_cast<int>(index) * 8;
246  return GetCachedPowerByIndex(index);
247 }
248 
249 #ifdef __GNUC__
250 RAPIDJSON_DIAG_POP
251 #endif
252 
253 #ifdef __clang__
254 RAPIDJSON_DIAG_POP
255 RAPIDJSON_DIAG_OFF(padded)
256 #endif
257 
258 } // namespace internal
260 
261 #endif // RAPIDJSON_DIYFP_H_
RAPIDJSON_NAMESPACE_END
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition: rapidjson.h:124
internal::DiyFp::kDpSignificandMask
static const uint64_t kDpSignificandMask
Definition: diyfp.h:158
internal::DiyFp::DiyFp
DiyFp(uint64_t fp, int exp)
Definition: diyfp.h:51
internal::DiyFp::ToDouble
double ToDouble() const
Definition: diyfp.h:131
internal::DiyFp::kDpMaxExponent
static const int kDpMaxExponent
Definition: diyfp.h:154
internal::DiyFp::operator-
DiyFp operator-(const DiyFp &rhs) const
Definition: diyfp.h:71
RAPIDJSON_NAMESPACE_BEGIN
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition: rapidjson.h:121
internal::DiyFp::kDiySignificandSize
static const int kDiySignificandSize
Definition: diyfp.h:151
internal::DiyFp::kDpExponentMask
static const uint64_t kDpExponentMask
Definition: diyfp.h:157
internal::DiyFp::NormalizeBoundary
DiyFp NormalizeBoundary() const
Definition: diyfp.h:111
clzll.h
internal::DiyFp::kDpSignificandSize
static const int kDpSignificandSize
Definition: diyfp.h:152
internal::clzll
uint32_t clzll(uint64_t x)
Definition: clzll.h:32
internal::DiyFp
Definition: diyfp.h:48
internal::GetCachedPower
DiyFp GetCachedPower(int e, int *K)
Definition: diyfp.h:228
uint64_t
unsigned long long uint64_t
Definition: stdint.h:18
internal::DiyFp::DiyFp
DiyFp(double d)
Definition: diyfp.h:53
RAPIDJSON_ASSERT
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:437
internal::DiyFp::Normalize
DiyFp Normalize() const
Definition: diyfp.h:106
internal::GetCachedPowerByIndex
DiyFp GetCachedPowerByIndex(size_t index)
Definition: diyfp.h:165
internal::DiyFp::kDpMinExponent
static const int kDpMinExponent
Definition: diyfp.h:155
internal::DiyFp::kDpExponentBias
static const int kDpExponentBias
Definition: diyfp.h:153
a
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1249
internal::DiyFp::operator*
DiyFp operator*(const DiyFp &rhs) const
Definition: diyfp.h:75
internal::DiyFp::NormalizedBoundaries
void NormalizedBoundaries(DiyFp *minus, DiyFp *plus) const
Definition: diyfp.h:122
internal::DiyFp::kDpHiddenBit
static const uint64_t kDpHiddenBit
Definition: diyfp.h:159
internal::DiyFp::kDpDenormalExponent
static const int kDpDenormalExponent
Definition: diyfp.h:156
int16_t
short int16_t
Definition: stdint.h:12
internal
Definition: allocators.h:422
internal::GetCachedPower10
DiyFp GetCachedPower10(int exp, int *outExp)
Definition: diyfp.h:242
internal::DiyFp::DiyFp
DiyFp()
Definition: diyfp.h:49
RAPIDJSON_UINT64_C2
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition: rapidjson.h:320
internal::DiyFp::e
int e
Definition: diyfp.h:162
internal::DiyFp::f
uint64_t f
Definition: diyfp.h:161