/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_xorwow.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_xorwow.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_xorwow.h Source File
rocrand_xorwow.h
1 // Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCRAND_XORWOW_H_
22 #define ROCRAND_XORWOW_H_
23 
24 #include "rocrand/rocrand_common.h"
25 #include "rocrand/rocrand_xorwow_precomputed.h"
26 
27 #include <hip/hip_runtime.h>
28 
37  #define ROCRAND_XORWOW_DEFAULT_SEED 0ULL // end of group rocranddevice
39 
40 namespace rocrand_device {
41 namespace detail {
42 
43 __forceinline__ __device__ __host__ void copy_vec(unsigned int* dst, const unsigned int* src)
44 {
45  for (int i = 0; i < XORWOW_N; i++)
46  {
47  dst[i] = src[i];
48  }
49 }
50 
51 __forceinline__ __device__ __host__ void mul_mat_vec_inplace(const unsigned int* m, unsigned int* v)
52 {
53  unsigned int r[XORWOW_N] = { 0 };
54  for (int ij = 0; ij < XORWOW_N * XORWOW_M; ij++)
55  {
56  const int i = ij / XORWOW_M;
57  const int j = ij % XORWOW_M;
58  const unsigned int b = (v[i] & (1U << j)) ? 0xffffffff : 0x0;
59  for (int k = 0; k < XORWOW_N; k++)
60  {
61  r[k] ^= b & m[i * XORWOW_M * XORWOW_N + j * XORWOW_N + k];
62  }
63  }
64  copy_vec(v, r);
65 }
66 
67 } // end detail namespace
68 
69 class xorwow_engine
70 {
71 public:
72  struct xorwow_state
73  {
74  // Weyl sequence value
75  unsigned int d;
76 
77  #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
78  // The Box–Muller transform requires two inputs to convert uniformly
79  // distributed real values [0; 1] to normally distributed real values
80  // (with mean = 0, and stddev = 1). Often user wants only one
81  // normally distributed number, to save performance and random
82  // numbers the 2nd value is saved for future requests.
83  unsigned int boxmuller_float_state; // is there a float in boxmuller_float
84  unsigned int boxmuller_double_state; // is there a double in boxmuller_double
85  float boxmuller_float; // normally distributed float
86  double boxmuller_double; // normally distributed double
87  #endif
88 
89  // Xorshift values (160 bits)
90  unsigned int x[5];
91  };
92 
93  __forceinline__ __device__ __host__ xorwow_engine()
94  : xorwow_engine(ROCRAND_XORWOW_DEFAULT_SEED, 0, 0)
95  {}
96 
102  __forceinline__ __device__ __host__ xorwow_engine(const unsigned long long seed,
103  const unsigned long long subsequence,
104  const unsigned long long offset)
105  {
106  m_state.x[0] = 123456789U;
107  m_state.x[1] = 362436069U;
108  m_state.x[2] = 521288629U;
109  m_state.x[3] = 88675123U;
110  m_state.x[4] = 5783321U;
111 
112  m_state.d = 6615241U;
113 
114  // Constants are arbitrary prime numbers
115  const unsigned int s0 = static_cast<unsigned int>(seed) ^ 0x2c7f967fU;
116  const unsigned int s1 = static_cast<unsigned int>(seed >> 32) ^ 0xa03697cbU;
117  const unsigned int t0 = 1228688033U * s0;
118  const unsigned int t1 = 2073658381U * s1;
119  m_state.x[0] += t0;
120  m_state.x[1] ^= t0;
121  m_state.x[2] += t1;
122  m_state.x[3] ^= t1;
123  m_state.x[4] += t0;
124  m_state.d += t1 + t0;
125 
126  discard_subsequence(subsequence);
127  discard(offset);
128 
129  #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
130  m_state.boxmuller_float_state = 0;
131  m_state.boxmuller_double_state = 0;
132  #endif
133  }
134 
136  __forceinline__ __device__ __host__ void discard(unsigned long long offset)
137  {
138  #ifdef __HIP_DEVICE_COMPILE__
139  jump(offset, d_xorwow_jump_matrices);
140  #else
141  jump(offset, h_xorwow_jump_matrices);
142  #endif
143 
144  // Apply n steps to Weyl sequence value as well
145  m_state.d += static_cast<unsigned int>(offset) * 362437;
146  }
147 
150  __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence)
151  {
152  // Discard n * 2^67 samples
153  #ifdef __HIP_DEVICE_COMPILE__
154  jump(subsequence, d_xorwow_sequence_jump_matrices);
155  #else
156  jump(subsequence, h_xorwow_sequence_jump_matrices);
157  #endif
158 
159  // d has the same value because 2^67 is divisible by 2^32 (d is 32-bit)
160  }
161 
162  __forceinline__ __device__ __host__ unsigned int operator()()
163  {
164  return next();
165  }
166 
167  __forceinline__ __device__ __host__ unsigned int next()
168  {
169  const unsigned int t = m_state.x[0] ^ (m_state.x[0] >> 2);
170  m_state.x[0] = m_state.x[1];
171  m_state.x[1] = m_state.x[2];
172  m_state.x[2] = m_state.x[3];
173  m_state.x[3] = m_state.x[4];
174  m_state.x[4] = (m_state.x[4] ^ (m_state.x[4] << 4)) ^ (t ^ (t << 1));
175 
176  m_state.d += 362437;
177 
178  return m_state.d + m_state.x[4];
179  }
180 
181 protected:
182  __forceinline__ __device__ __host__ void
183  jump(unsigned long long v,
184  const unsigned int jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE])
185  {
186  // x~(n + v) = (A^v mod m)x~n mod m
187  // The matrix (A^v mod m) can be precomputed for selected values of v.
188  //
189  // For XORWOW_JUMP_LOG2 = 2
190  // xorwow_jump_matrices contains precomputed matrices:
191  // A^1, A^4, A^16...
192  //
193  // For XORWOW_JUMP_LOG2 = 2 and XORWOW_SEQUENCE_JUMP_LOG2 = 67
194  // xorwow_sequence_jump_matrices contains precomputed matrices:
195  // A^(1 * 2^67), A^(4 * 2^67), A^(16 * 2^67)...
196  //
197  // Intermediate powers can be calculated as multiplication of the powers above.
198 
199  unsigned int mi = 0;
200  while (v > 0)
201  {
202  const unsigned int is = static_cast<unsigned int>(v) & ((1 << XORWOW_JUMP_LOG2) - 1);
203  for (unsigned int i = 0; i < is; i++)
204  {
205  detail::mul_mat_vec_inplace(jump_matrices[mi], m_state.x);
206  }
207  mi++;
208  v >>= XORWOW_JUMP_LOG2;
209  }
210  }
211 
212 protected:
213  // State
214  xorwow_state m_state;
215 
216  #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
217  friend struct detail::engine_boxmuller_helper<xorwow_engine>;
218  #endif
219 
220 }; // xorwow_engine class
221 
222 } // end namespace rocrand_device
223 
230 typedef rocrand_device::xorwow_engine rocrand_state_xorwow;
232 
244 __forceinline__ __device__ __host__
245 void rocrand_init(const unsigned long long seed,
246  const unsigned long long subsequence,
247  const unsigned long long offset,
248  rocrand_state_xorwow* state)
249 {
250  *state = rocrand_state_xorwow(seed, subsequence, offset);
251 }
252 
265 __forceinline__ __device__ __host__
266 unsigned int rocrand(rocrand_state_xorwow* state)
267 {
268  return state->next();
269 }
270 
279 __forceinline__ __device__ __host__
280 void skipahead(unsigned long long offset, rocrand_state_xorwow* state)
281 {
282  return state->discard(offset);
283 }
284 
294 __forceinline__ __device__ __host__
295 void skipahead_subsequence(unsigned long long subsequence, rocrand_state_xorwow* state)
296 {
297  return state->discard_subsequence(subsequence);
298 }
299 
309 __forceinline__ __device__ __host__
310 void skipahead_sequence(unsigned long long sequence, rocrand_state_xorwow* state)
311 {
312  return state->discard_subsequence(sequence);
313 }
314  // end of group rocranddevice
316 
317 #endif // ROCRAND_XORWOW_H_
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_xorwow *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_xorwow.h:266
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by sequence sequences.
Definition: rocrand_xorwow.h:310
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by offset elements.
Definition: rocrand_xorwow.h:280
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_xorwow *state)
Initialize XORWOW state.
Definition: rocrand_xorwow.h:245
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by subsequence subsequences.
Definition: rocrand_xorwow.h:295
#define ROCRAND_XORWOW_DEFAULT_SEED
Default seed for XORWOW PRNG.
Definition: rocrand_xorwow.h:37