21 #ifndef ROCRAND_LFSR113_H_
22 #define ROCRAND_LFSR113_H_
24 #include "rocrand/rocrand_lfsr113_precomputed.h"
26 #include <hip/hip_runtime.h>
34 #define ROCRAND_LFSR113_DEFAULT_SEED_X 2
37 #define ROCRAND_LFSR113_DEFAULT_SEED_Y 8
40 #define ROCRAND_LFSR113_DEFAULT_SEED_Z 16
43 #define ROCRAND_LFSR113_DEFAULT_SEED_W 128
46 namespace rocrand_device
51 __forceinline__ __device__ __host__
void mul_mat_vec_inplace(
const unsigned int* m, uint4* z)
53 unsigned int v[4] = {z->x, z->y, z->z, z->w};
54 unsigned int r[LFSR113_N] = {0};
55 for(
int ij = 0; ij < LFSR113_N * LFSR113_M; ij++)
57 const int i = ij / LFSR113_M;
58 const int j = ij % LFSR113_M;
59 const unsigned int b = (v[i] & (1U << j)) ? 0xffffffff : 0x0;
60 for(
int k = 0; k < LFSR113_N; k++)
62 r[k] ^= b & m[i * LFSR113_M * LFSR113_N + j * LFSR113_N + k];
87 __forceinline__ __device__ __host__ lfsr113_engine(
const uint4 seed
92 const unsigned int subsequence = 0,
93 const unsigned long long offset = 0)
95 this->seed(seed, subsequence, offset);
103 __forceinline__ __device__ __host__
void seed(uint4 seed_value,
104 const unsigned long long subsequence,
105 const unsigned long long offset = 0)
107 m_state.subsequence = seed_value;
109 reset_start_subsequence();
110 discard_subsequence(subsequence);
115 __forceinline__ __device__ __host__
void discard()
121 __forceinline__ __device__ __host__
void discard(
unsigned long long offset)
123 #ifdef __HIP_DEVICE_COMPILE__
124 jump(offset, d_lfsr113_jump_matrices);
126 jump(offset, h_lfsr113_jump_matrices);
132 __forceinline__ __device__ __host__
void discard_subsequence(
unsigned int subsequence)
135 #ifdef __HIP_DEVICE_COMPILE__
136 jump(subsequence, d_lfsr113_sequence_jump_matrices);
138 jump(subsequence, h_lfsr113_sequence_jump_matrices);
142 __forceinline__ __device__ __host__
unsigned int operator()()
147 __forceinline__ __device__ __host__
unsigned int next()
151 b = (((m_state.z.x << 6) ^ m_state.z.x) >> 13);
152 m_state.z.x = (((m_state.z.x & 4294967294U) << 18) ^ b);
154 b = (((m_state.z.y << 2) ^ m_state.z.y) >> 27);
155 m_state.z.y = (((m_state.z.y & 4294967288U) << 2) ^ b);
157 b = (((m_state.z.z << 13) ^ m_state.z.z) >> 21);
158 m_state.z.z = (((m_state.z.z & 4294967280U) << 7) ^ b);
160 b = (((m_state.z.w << 3) ^ m_state.z.w) >> 12);
161 m_state.z.w = (((m_state.z.w & 4294967168U) << 13) ^ b);
163 return (m_state.z.x ^ m_state.z.y ^ m_state.z.z ^ m_state.z.w);
168 __forceinline__ __device__ __host__
void reset_start_subsequence()
170 m_state.z.x = m_state.subsequence.x;
171 m_state.z.y = m_state.subsequence.y;
172 m_state.z.z = m_state.subsequence.z;
173 m_state.z.w = m_state.subsequence.w;
177 __forceinline__ __device__ __host__
void discard_state()
182 __forceinline__ __device__ __host__
void
183 jump(
unsigned long long v,
184 const unsigned int (&jump_matrices)[LFSR113_JUMP_MATRICES][LFSR113_SIZE])
202 const unsigned int is =
static_cast<unsigned int>(v) & ((1 << LFSR113_JUMP_LOG2) - 1);
203 for(
unsigned int i = 0; i < is; i++)
205 detail::mul_mat_vec_inplace(jump_matrices[mi], &m_state.z);
208 v >>= LFSR113_JUMP_LOG2;
213 lfsr113_state m_state;
225 typedef rocrand_device::lfsr113_engine rocrand_state_lfsr113;
238 __forceinline__ __device__ __host__
239 void rocrand_init(
const uint4 seed,
const unsigned int subsequence, rocrand_state_lfsr113* state)
241 *state = rocrand_state_lfsr113(seed, subsequence);
255 __forceinline__ __device__ __host__
257 const unsigned int subsequence,
258 const unsigned long long offset,
259 rocrand_state_lfsr113* state)
261 *state = rocrand_state_lfsr113(seed, subsequence, offset);
276 __forceinline__ __device__ __host__
277 unsigned int rocrand(rocrand_state_lfsr113* state)
279 return state->next();
290 __forceinline__ __device__ __host__
291 void skipahead(
unsigned long long offset, rocrand_state_lfsr113* state)
293 return state->discard(offset);
305 __forceinline__ __device__ __host__
308 return state->discard_subsequence(subsequence);
320 __forceinline__ __device__ __host__
323 return state->discard_subsequence(sequence);
__forceinline__ __device__ __host__ void rocrand_init(const uint4 seed, const unsigned int subsequence, rocrand_state_lfsr113 *state)
Initializes LFSR113 state.
Definition: rocrand_lfsr113.h:239
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_lfsr113 *state)
Updates LFSR113 state to skip ahead by offset elements.
Definition: rocrand_lfsr113.h:291
#define ROCRAND_LFSR113_DEFAULT_SEED_Y
Default Y seed for LFSR113 PRNG.
Definition: rocrand_lfsr113.h:37
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned int sequence, rocrand_state_lfsr113 *state)
Updates LFSR113 state to skip ahead by sequence sequences.
Definition: rocrand_lfsr113.h:321
#define ROCRAND_LFSR113_DEFAULT_SEED_W
Default W seed for LFSR113 PRNG.
Definition: rocrand_lfsr113.h:43
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned int subsequence, rocrand_state_lfsr113 *state)
Updates LFSR113 state to skip ahead by subsequence subsequences.
Definition: rocrand_lfsr113.h:306
#define ROCRAND_LFSR113_DEFAULT_SEED_Z
Default Z seed for LFSR113 PRNG.
Definition: rocrand_lfsr113.h:40
#define ROCRAND_LFSR113_DEFAULT_SEED_X
Default X seed for LFSR113 PRNG.
Definition: rocrand_lfsr113.h:34
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_lfsr113 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_lfsr113.h:277