21 #ifndef ROCRAND_XORWOW_H_
22 #define ROCRAND_XORWOW_H_
24 #include "rocrand/rocrand_common.h"
25 #include "rocrand/rocrand_xorwow_precomputed.h"
27 #include <hip/hip_runtime.h>
37 #define ROCRAND_XORWOW_DEFAULT_SEED 0ULL
40 namespace rocrand_device {
43 __forceinline__ __device__ __host__
void copy_vec(
unsigned int* dst,
const unsigned int* src)
45 for (
int i = 0; i < XORWOW_N; i++)
51 __forceinline__ __device__ __host__
void mul_mat_vec_inplace(
const unsigned int* m,
unsigned int* v)
53 unsigned int r[XORWOW_N] = { 0 };
54 for (
int ij = 0; ij < XORWOW_N * XORWOW_M; ij++)
56 const int i = ij / XORWOW_M;
57 const int j = ij % XORWOW_M;
58 const unsigned int b = (v[i] & (1U << j)) ? 0xffffffff : 0x0;
59 for (
int k = 0; k < XORWOW_N; k++)
61 r[k] ^= b & m[i * XORWOW_M * XORWOW_N + j * XORWOW_N + k];
77 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
83 unsigned int boxmuller_float_state;
84 unsigned int boxmuller_double_state;
85 float boxmuller_float;
86 double boxmuller_double;
93 __forceinline__ __device__ __host__ xorwow_engine()
102 __forceinline__ __device__ __host__ xorwow_engine(
const unsigned long long seed,
103 const unsigned long long subsequence,
104 const unsigned long long offset)
106 m_state.x[0] = 123456789U;
107 m_state.x[1] = 362436069U;
108 m_state.x[2] = 521288629U;
109 m_state.x[3] = 88675123U;
110 m_state.x[4] = 5783321U;
112 m_state.d = 6615241U;
115 const unsigned int s0 =
static_cast<unsigned int>(seed) ^ 0x2c7f967fU;
116 const unsigned int s1 =
static_cast<unsigned int>(seed >> 32) ^ 0xa03697cbU;
117 const unsigned int t0 = 1228688033U * s0;
118 const unsigned int t1 = 2073658381U * s1;
124 m_state.d += t1 + t0;
126 discard_subsequence(subsequence);
129 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
130 m_state.boxmuller_float_state = 0;
131 m_state.boxmuller_double_state = 0;
136 __forceinline__ __device__ __host__
void discard(
unsigned long long offset)
138 #ifdef __HIP_DEVICE_COMPILE__
139 jump(offset, d_xorwow_jump_matrices);
141 jump(offset, h_xorwow_jump_matrices);
145 m_state.d +=
static_cast<unsigned int>(offset) * 362437;
150 __forceinline__ __device__ __host__
void discard_subsequence(
unsigned long long subsequence)
153 #ifdef __HIP_DEVICE_COMPILE__
154 jump(subsequence, d_xorwow_sequence_jump_matrices);
156 jump(subsequence, h_xorwow_sequence_jump_matrices);
162 __forceinline__ __device__ __host__
unsigned int operator()()
167 __forceinline__ __device__ __host__
unsigned int next()
169 const unsigned int t = m_state.x[0] ^ (m_state.x[0] >> 2);
170 m_state.x[0] = m_state.x[1];
171 m_state.x[1] = m_state.x[2];
172 m_state.x[2] = m_state.x[3];
173 m_state.x[3] = m_state.x[4];
174 m_state.x[4] = (m_state.x[4] ^ (m_state.x[4] << 4)) ^ (t ^ (t << 1));
178 return m_state.d + m_state.x[4];
182 __forceinline__ __device__ __host__
void
183 jump(
unsigned long long v,
184 const unsigned int jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE])
202 const unsigned int is =
static_cast<unsigned int>(v) & ((1 << XORWOW_JUMP_LOG2) - 1);
203 for (
unsigned int i = 0; i < is; i++)
205 detail::mul_mat_vec_inplace(jump_matrices[mi], m_state.x);
208 v >>= XORWOW_JUMP_LOG2;
214 xorwow_state m_state;
216 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
217 friend struct detail::engine_boxmuller_helper<xorwow_engine>;
230 typedef rocrand_device::xorwow_engine rocrand_state_xorwow;
244 __forceinline__ __device__ __host__
246 const unsigned long long subsequence,
247 const unsigned long long offset,
248 rocrand_state_xorwow* state)
250 *state = rocrand_state_xorwow(seed, subsequence, offset);
265 __forceinline__ __device__ __host__
266 unsigned int rocrand(rocrand_state_xorwow* state)
268 return state->next();
279 __forceinline__ __device__ __host__
280 void skipahead(
unsigned long long offset, rocrand_state_xorwow* state)
282 return state->discard(offset);
294 __forceinline__ __device__ __host__
297 return state->discard_subsequence(subsequence);
309 __forceinline__ __device__ __host__
312 return state->discard_subsequence(sequence);
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_xorwow *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_xorwow.h:266
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by sequence sequences.
Definition: rocrand_xorwow.h:310
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by offset elements.
Definition: rocrand_xorwow.h:280
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_xorwow *state)
Initialize XORWOW state.
Definition: rocrand_xorwow.h:245
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by subsequence subsequences.
Definition: rocrand_xorwow.h:295
#define ROCRAND_XORWOW_DEFAULT_SEED
Default seed for XORWOW PRNG.
Definition: rocrand_xorwow.h:37