/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/rapidjson/internal/diyfp.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/rapidjson/internal/diyfp.h Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/rapidjson/internal/diyfp.h Source File
diyfp.h
Go to the documentation of this file.
1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
16 // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
17 // integers." ACM Sigplan Notices 45.6 (2010): 233-243.
18 
19 #ifndef RAPIDJSON_DIYFP_H_
20 #define RAPIDJSON_DIYFP_H_
21 
22 #include "../rapidjson.h"
23 #include "clzll.h"
24 #include <limits>
25 
26 #if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
27 #include <intrin.h>
28 #if !defined(_ARM64EC_)
29 #pragma intrinsic(_umul128)
30 #else
31 #pragma comment(lib, "softintrin")
32 #endif
33 #endif
34 
36 namespace internal {
37 
38 #ifdef __GNUC__
39 RAPIDJSON_DIAG_PUSH
40 RAPIDJSON_DIAG_OFF(effc++)
41 #endif
42 
43 #ifdef __clang__
44 RAPIDJSON_DIAG_PUSH
45 RAPIDJSON_DIAG_OFF(padded)
46 #endif
47 
48 struct DiyFp
49 {
50  DiyFp() : f(), e() {}
51 
52  DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
53 
54  explicit DiyFp(double d)
55  {
56  union
57  {
58  double d;
59  uint64_t u64;
60  } u = {d};
61 
62  int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
63  uint64_t significand = (u.u64 & kDpSignificandMask);
64  if(biased_e != 0)
65  {
66  f = significand + kDpHiddenBit;
67  e = biased_e - kDpExponentBias;
68  }
69  else
70  {
71  f = significand;
72  e = kDpMinExponent + 1;
73  }
74  }
75 
76  DiyFp operator-(const DiyFp& rhs) const { return DiyFp(f - rhs.f, e); }
77 
78  DiyFp operator*(const DiyFp& rhs) const
79  {
80 #if defined(_MSC_VER) && defined(_M_AMD64)
81  uint64_t h;
82  uint64_t l = _umul128(f, rhs.f, &h);
83  if(l & (uint64_t(1) << 63)) // rounding
84  h++;
85  return DiyFp(h, e + rhs.e + 64);
86 #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && \
87  defined(__x86_64__)
88  __extension__ typedef unsigned __int128 uint128;
89  uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
90  uint64_t h = static_cast<uint64_t>(p >> 64);
91  uint64_t l = static_cast<uint64_t>(p);
92  if(l & (uint64_t(1) << 63)) // rounding
93  h++;
94  return DiyFp(h, e + rhs.e + 64);
95 #else
96  const uint64_t M32 = 0xFFFFFFFF;
97  const uint64_t a = f >> 32;
98  const uint64_t b = f & M32;
99  const uint64_t c = rhs.f >> 32;
100  const uint64_t d = rhs.f & M32;
101  const uint64_t ac = a * c;
102  const uint64_t bc = b * c;
103  const uint64_t ad = a * d;
104  const uint64_t bd = b * d;
105  uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
106  tmp += 1U << 31;
107  return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
108 #endif
109  }
110 
111  DiyFp Normalize() const
112  {
113  int s = static_cast<int>(clzll(f));
114  return DiyFp(f << s, e - s);
115  }
116 
118  {
119  DiyFp res = *this;
120  while(!(res.f & (kDpHiddenBit << 1)))
121  {
122  res.f <<= 1;
123  res.e--;
124  }
125  res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
126  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
127  return res;
128  }
129 
131  {
132  DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
133  DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
134  mi.f <<= mi.e - pl.e;
135  mi.e = pl.e;
136  *plus = pl;
137  *minus = mi;
138  }
139 
140  double ToDouble() const
141  {
142  union
143  {
144  double d;
145  uint64_t u64;
146  } u;
148  if(e < kDpDenormalExponent)
149  {
150  // Underflow.
151  return 0.0;
152  }
153  if(e >= kDpMaxExponent)
154  {
155  // Overflow.
156  return std::numeric_limits<double>::infinity();
157  }
158  const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0)
159  ? 0
160  : static_cast<uint64_t>(e + kDpExponentBias);
161  u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
162  return u.d;
163  }
164 
165  static const int kDiySignificandSize = 64;
166  static const int kDpSignificandSize = 52;
167  static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
168  static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
169  static const int kDpMinExponent = -kDpExponentBias;
170  static const int kDpDenormalExponent = -kDpExponentBias + 1;
171  static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
172  static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
173  static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
174 
176  int e;
177 };
178 
179 inline DiyFp GetCachedPowerByIndex(size_t index)
180 {
181  // 10^-348, 10^-340, ..., 10^340
182  static const uint64_t kCachedPowers_F[] = {
183  RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
184  RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
185  RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
186  RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
187  RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
188  RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
189  RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
190  RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
191  RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
192  RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
193  RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
194  RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
195  RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
196  RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
197  RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
198  RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
199  RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
200  RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
201  RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
202  RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
203  RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
204  RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
205  RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
206  RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
207  RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
208  RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
209  RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
210  RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
211  RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
212  RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
213  RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
214  RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
215  RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
216  RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
217  RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
218  RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
219  RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
220  RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
221  RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
222  RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
223  RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
224  RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
225  RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
226  RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)};
227  static const int16_t kCachedPowers_E[] = {
228  -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, -927, -901,
229  -874, -847, -821, -794, -768, -741, -715, -688, -661, -635, -608, -582, -555,
230  -529, -502, -475, -449, -422, -396, -369, -343, -316, -289, -263, -236, -210,
231  -183, -157, -130, -103, -77, -50, -24, 3, 30, 56, 83, 109, 136,
232  162, 189, 216, 242, 269, 295, 322, 348, 375, 402, 428, 455, 481,
233  508, 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, 827,
234  853, 880, 907, 933, 960, 986, 1013, 1039, 1066};
235  RAPIDJSON_ASSERT(index < 87);
236  return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
237 }
238 
239 inline DiyFp GetCachedPower(int e, int* K)
240 {
241 
242  // int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
243  double dk =
244  (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
245  int k = static_cast<int>(dk);
246  if(dk - k > 0.0)
247  k++;
248 
249  unsigned index = static_cast<unsigned>((k >> 3) + 1);
250  *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
251 
252  return GetCachedPowerByIndex(index);
253 }
254 
255 inline DiyFp GetCachedPower10(int exp, int* outExp)
256 {
257  RAPIDJSON_ASSERT(exp >= -348);
258  unsigned index = static_cast<unsigned>(exp + 348) / 8u;
259  *outExp = -348 + static_cast<int>(index) * 8;
260  return GetCachedPowerByIndex(index);
261 }
262 
263 #ifdef __GNUC__
264 RAPIDJSON_DIAG_POP
265 #endif
266 
267 #ifdef __clang__
268 RAPIDJSON_DIAG_POP
269 RAPIDJSON_DIAG_OFF(padded)
270 #endif
271 
272 } // namespace internal
274 
275 #endif // RAPIDJSON_DIYFP_H_
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:451
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition: rapidjson.h:121
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition: rapidjson.h:124
__host__ T exp(T x)
Definition: math_v2.hpp:391
__host__ __device__ minus() -> minus< void, void >
FIXME: create macro to replace 'host device' and nothing more.
__host__ __device__ plus() -> plus< void, void >
FIXME: create macro to replace 'host device' and nothing more.
Definition: allocators.h:459
DiyFp GetCachedPowerByIndex(size_t index)
Definition: diyfp.h:179
DiyFp GetCachedPower10(int exp, int *outExp)
Definition: diyfp.h:255
DiyFp GetCachedPower(int e, int *K)
Definition: diyfp.h:239
uint32_t clzll(uint64_t x)
Definition: clzll.h:32
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1517
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition: rapidjson.h:326
signed short int16_t
Definition: stdint.h:122
unsigned __int64 uint64_t
Definition: stdint.h:136
Definition: diyfp.h:49
static const int kDpSignificandSize
Definition: diyfp.h:166
uint64_t f
Definition: diyfp.h:175
static const int kDpExponentBias
Definition: diyfp.h:167
DiyFp NormalizeBoundary() const
Definition: diyfp.h:117
static const uint64_t kDpHiddenBit
Definition: diyfp.h:173
static const int kDpMaxExponent
Definition: diyfp.h:168
static const uint64_t kDpSignificandMask
Definition: diyfp.h:172
DiyFp operator*(const DiyFp &rhs) const
Definition: diyfp.h:78
static const int kDpDenormalExponent
Definition: diyfp.h:170
DiyFp(uint64_t fp, int exp)
Definition: diyfp.h:52
static const int kDpMinExponent
Definition: diyfp.h:169
DiyFp operator-(const DiyFp &rhs) const
Definition: diyfp.h:76
DiyFp Normalize() const
Definition: diyfp.h:111
static const uint64_t kDpExponentMask
Definition: diyfp.h:171
static const int kDiySignificandSize
Definition: diyfp.h:165
double ToDouble() const
Definition: diyfp.h:140
DiyFp(double d)
Definition: diyfp.h:54
void NormalizedBoundaries(DiyFp *minus, DiyFp *plus) const
Definition: diyfp.h:130
DiyFp()
Definition: diyfp.h:50
int e
Definition: diyfp.h:176