/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_mtgp32.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_mtgp32.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_mtgp32.h Source File
rocrand_mtgp32.h
1 // Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 /*
22  * Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
23  * University. All rights reserved.
24  * Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
25  * University and University of Tokyo. All rights reserved.
26  *
27  * Redistribution and use in source and binary forms, with or without
28  * modification, are permitted provided that the following conditions are
29  * met:
30  *
31  * * Redistributions of source code must retain the above copyright
32  * notice, this list of conditions and the following disclaimer.
33  * * Redistributions in binary form must reproduce the above
34  * copyright notice, this list of conditions and the following
35  * disclaimer in the documentation and/or other materials provided
36  * with the distribution.
37  * * Neither the name of the Hiroshima University nor the names of
38  * its contributors may be used to endorse or promote products
39  * derived from this software without specific prior written
40  * permission.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53  */
54 
55 #ifndef ROCRAND_MTGP32_H_
56 #define ROCRAND_MTGP32_H_
57 
58 #include "rocrand/rocrand.h"
59 
60 #include <hip/hip_runtime.h>
61 
62 #include <stdlib.h>
63 #include <string.h>
64 
65 #define MTGP_MEXP 11213
66 #define MTGP_N 351
67 #define MTGP_FLOOR_2P 256
68 #define MTGP_CEIL_2P 512
69 #define MTGP_TN MTGP_FLOOR_2P
70 #define MTGP_LS (MTGP_TN * 3)
71 #define MTGP_BN_MAX 512
72 #define MTGP_TS 16
73 #define MTGP_STATE 1024
74 #define MTGP_MASK 1023
75 
76 // Source: https://github.com/MersenneTwister-Lab/MTGP/blob/master/mtgp32-fast.h
98  int mexp;
99  int pos;
100  int sh1;
101  int sh2;
102  uint32_t tbl[16];
103  uint32_t tmp_tbl[16];
104  uint32_t flt_tmp_tbl[16];
105  uint32_t mask;
106  unsigned char poly_sha1[21];
107 };
108 
109 namespace rocrand_device {
110 
111 struct mtgp32_params
112 {
113  unsigned int pos_tbl[MTGP_BN_MAX];
114  unsigned int param_tbl[MTGP_BN_MAX][MTGP_TS];
115  unsigned int temper_tbl[MTGP_BN_MAX][MTGP_TS];
116  unsigned int single_temper_tbl[MTGP_BN_MAX][MTGP_TS];
117  unsigned int sh1_tbl[MTGP_BN_MAX];
118  unsigned int sh2_tbl[MTGP_BN_MAX];
119  unsigned int mask[1];
120 };
121 
122 typedef mtgp32_params_fast_t mtgp32_fast_params;
123 
124 struct mtgp32_state
125 {
126  int offset;
127  int id;
128  unsigned int status[MTGP_STATE];
129 };
130 
131 inline
132 void rocrand_mtgp32_init_state(unsigned int array[],
133  const mtgp32_fast_params *para, unsigned int seed)
134 {
135  int i;
136  int size = para->mexp / 32 + 1;
137  unsigned int hidden_seed;
138  unsigned int tmp;
139  hidden_seed = para->tbl[4] ^ (para->tbl[8] << 16);
140  tmp = hidden_seed;
141  tmp += tmp >> 16;
142  tmp += tmp >> 8;
143  memset(array, tmp & 0xff, sizeof(unsigned int) * size);
144  array[0] = seed;
145  array[1] = hidden_seed;
146  for (i = 1; i < size; i++)
147  array[i] ^= (1812433253) * (array[i - 1] ^ (array[i - 1] >> 30)) + i;
148 }
149 
150 class mtgp32_engine
151 {
152 public:
153  __forceinline__ __device__ __host__
154  // Initialization is not supported for __shared__ variables
155  mtgp32_engine() // cppcheck-suppress uninitMemberVar
156  {
157 
158  }
159 
160  __forceinline__ __device__ __host__ mtgp32_engine(const mtgp32_state& m_state,
161  const mtgp32_params* params,
162  int bid)
163  {
164  this->m_state = m_state;
165  pos_tbl = params->pos_tbl[bid];
166  sh1_tbl = params->sh1_tbl[bid];
167  sh2_tbl = params->sh2_tbl[bid];
168  mask = params->mask[0];
169  for (int j = 0; j < MTGP_TS; j++) {
170  param_tbl[j] = params->param_tbl[bid][j];
171  temper_tbl[j] = params->temper_tbl[bid][j];
172  single_temper_tbl[j] = params->single_temper_tbl[bid][j];
173  }
174  }
175 
176  __forceinline__ __device__ __host__ void copy(const mtgp32_engine* m_engine)
177  {
178 #if defined(__HIP_DEVICE_COMPILE__)
179  const unsigned int thread_id = threadIdx.x;
180  for(int i = thread_id; i < MTGP_STATE; i += blockDim.x)
181  m_state.status[i] = m_engine->m_state.status[i];
182 
183  if (thread_id == 0)
184  {
185  m_state.offset = m_engine->m_state.offset;
186  m_state.id = m_engine->m_state.id;
187  pos_tbl = m_engine->pos_tbl;
188  sh1_tbl = m_engine->sh1_tbl;
189  sh2_tbl = m_engine->sh2_tbl;
190  mask = m_engine->mask;
191  }
192  if (thread_id < MTGP_TS)
193  {
194  param_tbl[thread_id] = m_engine->param_tbl[thread_id];
195  temper_tbl[thread_id] = m_engine->temper_tbl[thread_id];
196  single_temper_tbl[thread_id] = m_engine->single_temper_tbl[thread_id];
197  }
198  __syncthreads();
199 #else
200  this->m_state = m_engine->m_state;
201  pos_tbl = m_engine->pos_tbl;
202  sh1_tbl = m_engine->sh1_tbl;
203  sh2_tbl = m_engine->sh2_tbl;
204  mask = m_engine->mask;
205  for (int j = 0; j < MTGP_TS; j++) {
206  param_tbl[j] = m_engine->param_tbl[j];
207  temper_tbl[j] = m_engine->temper_tbl[j];
208  single_temper_tbl[j] = m_engine->single_temper_tbl[j];
209  }
210 #endif
211  }
212 
213  __forceinline__ __device__ __host__ void set_params(mtgp32_params* params)
214  {
215  pos_tbl = params->pos_tbl[m_state.id];
216  sh1_tbl = params->sh1_tbl[m_state.id];
217  sh2_tbl = params->sh2_tbl[m_state.id];
218  mask = params->mask[0];
219  for (int j = 0; j < MTGP_TS; j++) {
220  param_tbl[j] = params->param_tbl[m_state.id][j];
221  temper_tbl[j] = params->temper_tbl[m_state.id][j];
222  single_temper_tbl[j] = params->single_temper_tbl[m_state.id][j];
223  }
224  }
225 
226  __forceinline__ __device__ __host__ unsigned int operator()()
227  {
228  return this->next();
229  }
230 
231  __forceinline__ __device__ __host__ unsigned int next()
232  {
233 #ifdef __HIP_DEVICE_COMPILE__
234  unsigned int o = next_thread(threadIdx.x);
235  __syncthreads();
236  if(threadIdx.x == 0)
237  {
238  m_state.offset = (m_state.offset + blockDim.x) & MTGP_MASK;
239  }
240  __syncthreads();
241  return o;
242 #else
243  return 0;
244 #endif
245  }
246 
247  __forceinline__ __device__ __host__ unsigned int next_single()
248  {
249 #if defined(__HIP_DEVICE_COMPILE__)
250  unsigned int t = threadIdx.x;
251  unsigned int d = blockDim.x;
252  int pos = pos_tbl;
253  unsigned int r;
254  unsigned int o;
255 
256  r = para_rec(m_state.status[(t + m_state.offset) & MTGP_MASK],
257  m_state.status[(t + m_state.offset + 1) & MTGP_MASK],
258  m_state.status[(t + m_state.offset + pos) & MTGP_MASK]);
259  m_state.status[(t + m_state.offset + MTGP_N) & MTGP_MASK] = r;
260 
261  o = temper_single(r, m_state.status[(t + m_state.offset + pos - 1) & MTGP_MASK]);
262  __syncthreads();
263  if (t == 0)
264  m_state.offset = (m_state.offset + d) & MTGP_MASK;
265  __syncthreads();
266  return o;
267 #else
268  return 0;
269 #endif
270  }
271 
272 private:
273  __forceinline__ __device__ __host__ unsigned int
274  para_rec(unsigned int X1, unsigned int X2, unsigned int Y) const
275  {
276  unsigned int X = (X1 & mask) ^ X2;
277  unsigned int MAT;
278 
279  X ^= X << sh1_tbl;
280  Y = X ^ (Y >> sh2_tbl);
281  MAT = param_tbl[Y & 0x0f];
282  return Y ^ MAT;
283  }
284 
285  __forceinline__ __device__ __host__ unsigned int temper(unsigned int V, unsigned int T) const
286  {
287  unsigned int MAT;
288 
289  T ^= T >> 16;
290  T ^= T >> 8;
291  MAT = temper_tbl[T & 0x0f];
292  return V ^ MAT;
293  }
294 
295  __forceinline__ __device__ __host__ unsigned int temper_single(unsigned int V,
296  unsigned int T) const
297  {
298  unsigned int MAT;
299  unsigned int r;
300 
301  T ^= T >> 16;
302  T ^= T >> 8;
303  MAT = single_temper_tbl[T & 0x0f];
304  r = (V >> 9) ^ MAT;
305  return r;
306  }
307 
308 protected:
311  __forceinline__ __device__ __host__ unsigned int next_thread(unsigned int thread_idx)
312  {
313  const unsigned int r
314  = para_rec(m_state.status[(thread_idx + m_state.offset) & MTGP_MASK],
315  m_state.status[(thread_idx + m_state.offset + 1) & MTGP_MASK],
316  m_state.status[(thread_idx + m_state.offset + pos_tbl) & MTGP_MASK]);
317  m_state.status[(thread_idx + m_state.offset + MTGP_N) & MTGP_MASK] = r;
318  return temper(r, m_state.status[(thread_idx + m_state.offset + pos_tbl - 1) & MTGP_MASK]);
319  }
320 
321 public:
322  // State
323  mtgp32_state m_state;
324  // Parameters
325  unsigned int pos_tbl;
326  unsigned int param_tbl[MTGP_TS];
327  unsigned int temper_tbl[MTGP_TS];
328  unsigned int sh1_tbl;
329  unsigned int sh2_tbl;
330  unsigned int single_temper_tbl[MTGP_TS];
331  unsigned int mask;
332 
333 }; // mtgp32_engine class
334 
335 } // end namespace rocrand_device
336 
343 typedef rocrand_device::mtgp32_engine rocrand_state_mtgp32;
344 typedef rocrand_device::mtgp32_state mtgp32_state;
345 typedef rocrand_device::mtgp32_fast_params mtgp32_fast_params;
346 typedef rocrand_device::mtgp32_params mtgp32_params;
348 
364 __host__
365 inline rocrand_status rocrand_make_state_mtgp32(rocrand_state_mtgp32* state,
366  mtgp32_fast_params params[],
367  int n,
368  unsigned long long seed)
369 {
370  int i;
371  rocrand_state_mtgp32 * h_state = (rocrand_state_mtgp32 *) malloc(sizeof(rocrand_state_mtgp32) * n);
372  seed = seed ^ (seed >> 32);
373 
374  if (h_state == NULL)
376 
377  for (i = 0; i < n; i++) {
378  rocrand_device::rocrand_mtgp32_init_state(&(h_state[i].m_state.status[0]), &params[i], (unsigned int)seed + i + 1);
379  h_state[i].m_state.offset = 0;
380  h_state[i].m_state.id = i;
381  h_state[i].pos_tbl = params[i].pos;
382  h_state[i].sh1_tbl = params[i].sh1;
383  h_state[i].sh2_tbl = params[i].sh2;
384  h_state[i].mask = params[0].mask;
385  for (int j = 0; j < MTGP_TS; j++) {
386  h_state[i].param_tbl[j] = params[i].tbl[j];
387  h_state[i].temper_tbl[j] = params[i].tmp_tbl[j];
388  h_state[i].single_temper_tbl[j] = params[i].flt_tmp_tbl[j];
389  }
390  }
391 
392  const hipError_t error
393  = hipMemcpy(state, h_state, sizeof(rocrand_state_mtgp32) * n, hipMemcpyDefault);
394  free(h_state);
395 
396  if(error != hipSuccess)
398 
399  return ROCRAND_STATUS_SUCCESS;
400 }
401 
418 __host__
419 inline rocrand_status rocrand_make_constant(const mtgp32_fast_params params[], mtgp32_params* p)
420 {
421  const int block_num = MTGP_BN_MAX;
422  const int size1 = sizeof(uint32_t) * block_num;
423  const int size2 = sizeof(uint32_t) * block_num * MTGP_TS;
424  uint32_t *h_pos_tbl;
425  uint32_t *h_sh1_tbl;
426  uint32_t *h_sh2_tbl;
427  uint32_t *h_param_tbl;
428  uint32_t *h_temper_tbl;
429  uint32_t *h_single_temper_tbl;
430  uint32_t *h_mask;
431  h_pos_tbl = (uint32_t *)malloc(size1);
432  h_sh1_tbl = (uint32_t *)malloc(size1);
433  h_sh2_tbl = (uint32_t *)malloc(size1);
434  h_param_tbl = (uint32_t *)malloc(size2);
435  h_temper_tbl = (uint32_t *)malloc(size2);
436  h_single_temper_tbl = (uint32_t *)malloc(size2);
437  h_mask = (uint32_t *)malloc(sizeof(uint32_t));
439 
440  if (h_pos_tbl == NULL || h_sh1_tbl == NULL || h_sh2_tbl == NULL
441  || h_param_tbl == NULL || h_temper_tbl == NULL || h_single_temper_tbl == NULL
442  || h_mask == NULL) {
443  printf("failure in allocating host memory for constant table.\n");
445  }
446  else {
447  h_mask[0] = params[0].mask;
448  for (int i = 0; i < block_num; i++) {
449  h_pos_tbl[i] = params[i].pos;
450  h_sh1_tbl[i] = params[i].sh1;
451  h_sh2_tbl[i] = params[i].sh2;
452  for (int j = 0; j < MTGP_TS; j++) {
453  h_param_tbl[i * MTGP_TS + j] = params[i].tbl[j];
454  h_temper_tbl[i * MTGP_TS + j] = params[i].tmp_tbl[j];
455  h_single_temper_tbl[i * MTGP_TS + j] = params[i].flt_tmp_tbl[j];
456  }
457  }
458 
459  if (hipMemcpy(p->pos_tbl, h_pos_tbl, size1, hipMemcpyHostToDevice) != hipSuccess)
461  if (hipMemcpy(p->sh1_tbl, h_sh1_tbl, size1, hipMemcpyHostToDevice) != hipSuccess)
463  if (hipMemcpy(p->sh2_tbl, h_sh2_tbl, size1, hipMemcpyHostToDevice) != hipSuccess)
465  if (hipMemcpy(p->param_tbl, h_param_tbl, size2, hipMemcpyHostToDevice) != hipSuccess)
467  if (hipMemcpy(p->temper_tbl, h_temper_tbl, size2, hipMemcpyHostToDevice) != hipSuccess)
469  if (hipMemcpy(p->single_temper_tbl, h_single_temper_tbl, size2, hipMemcpyHostToDevice) != hipSuccess)
471  if (hipMemcpy(p->mask, h_mask, sizeof(unsigned int), hipMemcpyHostToDevice) != hipSuccess)
473  }
474 
475  free(h_pos_tbl);
476  free(h_sh1_tbl);
477  free(h_sh2_tbl);
478  free(h_param_tbl);
479  free(h_temper_tbl);
480  free(h_single_temper_tbl);
481  free(h_mask);
482 
483  return status;
484 }
485 
498 __forceinline__ __device__
499 unsigned int rocrand(rocrand_state_mtgp32* state)
500 {
501  return state->next();
502 }
503 
535 __forceinline__ __device__
536 void rocrand_mtgp32_block_copy(rocrand_state_mtgp32* src, rocrand_state_mtgp32* dest)
537 {
538  dest->copy(src);
539 }
540 
547 __forceinline__ __device__
548 void rocrand_mtgp32_set_params(rocrand_state_mtgp32* state, mtgp32_params* params)
549 {
550  state->set_params(params);
551 }
552  // end of group rocranddevice
554 
555 #endif // ROCRAND_MTGP32_H_
__forceinline__ __device__ void rocrand_mtgp32_block_copy(rocrand_state_mtgp32 *src, rocrand_state_mtgp32 *dest)
Copies MTGP32 state to another state using block of threads.
Definition: rocrand_mtgp32.h:536
__host__ rocrand_status rocrand_make_state_mtgp32(rocrand_state_mtgp32 *state, mtgp32_fast_params params[], int n, unsigned long long seed)
Initializes MTGP32 states.
Definition: rocrand_mtgp32.h:365
__forceinline__ __device__ unsigned int rocrand(rocrand_state_mtgp32 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_mtgp32.h:499
__host__ rocrand_status rocrand_make_constant(const mtgp32_fast_params params[], mtgp32_params *p)
Loads parameters for MTGP32.
Definition: rocrand_mtgp32.h:419
__forceinline__ __device__ void rocrand_mtgp32_set_params(rocrand_state_mtgp32 *state, mtgp32_params *params)
Changes parameters of a MTGP32 state.
Definition: rocrand_mtgp32.h:548
rocrand_status
rocRAND function call status type
Definition: rocrand.h:61
@ ROCRAND_STATUS_SUCCESS
No errors.
Definition: rocrand.h:62
@ ROCRAND_STATUS_ALLOCATION_FAILED
Memory allocation failed during execution.
Definition: rocrand.h:65
Definition: rocrand_mtgp32.h:97
uint32_t tmp_tbl[16]
Definition: rocrand_mtgp32.h:103
int pos
Definition: rocrand_mtgp32.h:99
int mexp
Definition: rocrand_mtgp32.h:98
int sh2
Definition: rocrand_mtgp32.h:101
int sh1
Definition: rocrand_mtgp32.h:100
uint32_t mask
Definition: rocrand_mtgp32.h:105
unsigned char poly_sha1[21]
Definition: rocrand_mtgp32.h:106
uint32_t tbl[16]
Definition: rocrand_mtgp32.h:102
uint32_t flt_tmp_tbl[16]
Definition: rocrand_mtgp32.h:104