/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_lfsr113.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_lfsr113.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_lfsr113.h Source File
rocrand_lfsr113.h
1 // Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCRAND_LFSR113_H_
22 #define ROCRAND_LFSR113_H_
23 
24 #include "rocrand/rocrand_lfsr113_precomputed.h"
25 
26 #include <hip/hip_runtime.h>
27 
34 #define ROCRAND_LFSR113_DEFAULT_SEED_X 2
37 #define ROCRAND_LFSR113_DEFAULT_SEED_Y 8
40 #define ROCRAND_LFSR113_DEFAULT_SEED_Z 16
43 #define ROCRAND_LFSR113_DEFAULT_SEED_W 128 // end of group rocranddevice
45 
46 namespace rocrand_device
47 {
48 namespace detail
49 {
50 
51 __forceinline__ __device__ __host__ void mul_mat_vec_inplace(const unsigned int* m, uint4* z)
52 {
53  unsigned int v[4] = {z->x, z->y, z->z, z->w};
54  unsigned int r[LFSR113_N] = {0};
55  for(int ij = 0; ij < LFSR113_N * LFSR113_M; ij++)
56  {
57  const int i = ij / LFSR113_M;
58  const int j = ij % LFSR113_M;
59  const unsigned int b = (v[i] & (1U << j)) ? 0xffffffff : 0x0;
60  for(int k = 0; k < LFSR113_N; k++)
61  {
62  r[k] ^= b & m[i * LFSR113_M * LFSR113_N + j * LFSR113_N + k];
63  }
64  }
65  // Copy result into z
66  z->x = r[0];
67  z->y = r[1];
68  z->z = r[2];
69  z->w = r[3];
70 }
71 } // end namespace detail
72 
73 class lfsr113_engine
74 {
75 public:
76  struct lfsr113_state
77  {
78  uint4 z;
79  uint4 subsequence;
80  };
81 
87  __forceinline__ __device__ __host__ lfsr113_engine(const uint4 seed
92  const unsigned int subsequence = 0,
93  const unsigned long long offset = 0)
94  {
95  this->seed(seed, subsequence, offset);
96  }
97 
103  __forceinline__ __device__ __host__ void seed(uint4 seed_value,
104  const unsigned long long subsequence,
105  const unsigned long long offset = 0)
106  {
107  m_state.subsequence = seed_value;
108 
109  reset_start_subsequence();
110  discard_subsequence(subsequence);
111  discard(offset);
112  }
113 
115  __forceinline__ __device__ __host__ void discard()
116  {
117  discard_state();
118  }
119 
121  __forceinline__ __device__ __host__ void discard(unsigned long long offset)
122  {
123 #ifdef __HIP_DEVICE_COMPILE__
124  jump(offset, d_lfsr113_jump_matrices);
125 #else
126  jump(offset, h_lfsr113_jump_matrices);
127 #endif
128  }
129 
132  __forceinline__ __device__ __host__ void discard_subsequence(unsigned int subsequence)
133  {
134 // Discard n * 2^55 samples
135 #ifdef __HIP_DEVICE_COMPILE__
136  jump(subsequence, d_lfsr113_sequence_jump_matrices);
137 #else
138  jump(subsequence, h_lfsr113_sequence_jump_matrices);
139 #endif
140  }
141 
142  __forceinline__ __device__ __host__ unsigned int operator()()
143  {
144  return next();
145  }
146 
147  __forceinline__ __device__ __host__ unsigned int next()
148  {
149  unsigned int b;
150 
151  b = (((m_state.z.x << 6) ^ m_state.z.x) >> 13);
152  m_state.z.x = (((m_state.z.x & 4294967294U) << 18) ^ b);
153 
154  b = (((m_state.z.y << 2) ^ m_state.z.y) >> 27);
155  m_state.z.y = (((m_state.z.y & 4294967288U) << 2) ^ b);
156 
157  b = (((m_state.z.z << 13) ^ m_state.z.z) >> 21);
158  m_state.z.z = (((m_state.z.z & 4294967280U) << 7) ^ b);
159 
160  b = (((m_state.z.w << 3) ^ m_state.z.w) >> 12);
161  m_state.z.w = (((m_state.z.w & 4294967168U) << 13) ^ b);
162 
163  return (m_state.z.x ^ m_state.z.y ^ m_state.z.z ^ m_state.z.w);
164  }
165 
166 protected:
168  __forceinline__ __device__ __host__ void reset_start_subsequence()
169  {
170  m_state.z.x = m_state.subsequence.x;
171  m_state.z.y = m_state.subsequence.y;
172  m_state.z.z = m_state.subsequence.z;
173  m_state.z.w = m_state.subsequence.w;
174  }
175 
176  // Advances the internal state to the next state.
177  __forceinline__ __device__ __host__ void discard_state()
178  {
179  this->next();
180  }
181 
182  __forceinline__ __device__ __host__ void
183  jump(unsigned long long v,
184  const unsigned int (&jump_matrices)[LFSR113_JUMP_MATRICES][LFSR113_SIZE])
185  {
186  // x~(n + v) = (A^v mod m)x~n mod m
187  // The matrix (A^v mod m) can be precomputed for selected values of v.
188  //
189  // For LFSR113_JUMP_LOG2 = 2
190  // lfsr113_jump_matrices contains precomputed matrices:
191  // A^1, A^4, A^16...
192  //
193  // For LFSR113_JUMP_LOG2 = 2 and LFSR113_SEQUENCE_JUMP_LOG2 = 55
194  // lfsr113_sequence_jump_matrices contains precomputed matrices:
195  // A^(1 * 2^55), A^(4 * 2^55), A^(16 * 2^55)...
196  //
197  // Intermediate powers can be calculated as multiplication of the powers above.
198 
199  unsigned int mi = 0;
200  while(v > 0)
201  {
202  const unsigned int is = static_cast<unsigned int>(v) & ((1 << LFSR113_JUMP_LOG2) - 1);
203  for(unsigned int i = 0; i < is; i++)
204  {
205  detail::mul_mat_vec_inplace(jump_matrices[mi], &m_state.z);
206  }
207  mi++;
208  v >>= LFSR113_JUMP_LOG2;
209  }
210  }
211 
212 protected:
213  lfsr113_state m_state;
214 
215 }; // lfsr113_engine class
216 
217 } // end namespace rocrand_device
218 
225 typedef rocrand_device::lfsr113_engine rocrand_state_lfsr113;
227 
238 __forceinline__ __device__ __host__
239 void rocrand_init(const uint4 seed, const unsigned int subsequence, rocrand_state_lfsr113* state)
240 {
241  *state = rocrand_state_lfsr113(seed, subsequence);
242 }
243 
255 __forceinline__ __device__ __host__
256 void rocrand_init(const uint4 seed,
257  const unsigned int subsequence,
258  const unsigned long long offset,
259  rocrand_state_lfsr113* state)
260 {
261  *state = rocrand_state_lfsr113(seed, subsequence, offset);
262 }
263 
276 __forceinline__ __device__ __host__
277 unsigned int rocrand(rocrand_state_lfsr113* state)
278 {
279  return state->next();
280 }
281 
290 __forceinline__ __device__ __host__
291 void skipahead(unsigned long long offset, rocrand_state_lfsr113* state)
292 {
293  return state->discard(offset);
294 }
295 
305 __forceinline__ __device__ __host__
306 void skipahead_subsequence(unsigned int subsequence, rocrand_state_lfsr113* state)
307 {
308  return state->discard_subsequence(subsequence);
309 }
310 
320 __forceinline__ __device__ __host__
321 void skipahead_sequence(unsigned int sequence, rocrand_state_lfsr113* state)
322 {
323  return state->discard_subsequence(sequence);
324 }
325  // end of group rocranddevice
327 
328 #endif // ROCRAND_LFSR113_H_
__forceinline__ __device__ __host__ void rocrand_init(const uint4 seed, const unsigned int subsequence, rocrand_state_lfsr113 *state)
Initializes LFSR113 state.
Definition: rocrand_lfsr113.h:239
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_lfsr113 *state)
Updates LFSR113 state to skip ahead by offset elements.
Definition: rocrand_lfsr113.h:291
#define ROCRAND_LFSR113_DEFAULT_SEED_Y
Default Y seed for LFSR113 PRNG.
Definition: rocrand_lfsr113.h:37
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned int sequence, rocrand_state_lfsr113 *state)
Updates LFSR113 state to skip ahead by sequence sequences.
Definition: rocrand_lfsr113.h:321
#define ROCRAND_LFSR113_DEFAULT_SEED_W
Default W seed for LFSR113 PRNG.
Definition: rocrand_lfsr113.h:43
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned int subsequence, rocrand_state_lfsr113 *state)
Updates LFSR113 state to skip ahead by subsequence subsequences.
Definition: rocrand_lfsr113.h:306
#define ROCRAND_LFSR113_DEFAULT_SEED_Z
Default Z seed for LFSR113 PRNG.
Definition: rocrand_lfsr113.h:40
#define ROCRAND_LFSR113_DEFAULT_SEED_X
Default X seed for LFSR113 PRNG.
Definition: rocrand_lfsr113.h:34
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_lfsr113 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_lfsr113.h:277