/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/workgroup_barrier.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/workgroup_barrier.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/workgroup_barrier.hpp Source File
workgroup_barrier.hpp
Go to the documentation of this file.
1 #pragma once
2 #include <hip/hip_runtime.h>
3 #include <stdint.h>
4 
5 namespace ck {
7 {
8  __device__ workgroup_barrier(uint32_t* ptr) : base_ptr(ptr) {}
9 
10  __device__ uint32_t ld(uint32_t offset)
11  {
12 #if 0
14  amdgcn_make_buffer_resource(base_ptr),
15  0,
16  offset,
17  AMDGCN_BUFFER_GLC);
18  union cvt {
19  float f32;
20  uint32_t u32;
21  };
22  cvt x;
23  x.f32 = d;
24  return x.u32;
25 #endif
26  return __atomic_load_n(base_ptr + offset, __ATOMIC_RELAXED);
27  }
28 
29  __device__ void wait_eq(uint32_t offset, uint32_t value)
30  {
31  if(threadIdx.x == 0)
32  {
33  while(ld(offset) != value) {}
34  }
35  __syncthreads();
36  }
37 
38  __device__ void wait_lt(uint32_t offset, uint32_t value)
39  {
40  if(threadIdx.x == 0)
41  {
42  while(ld(offset) < value) {}
43  }
44  __syncthreads();
45  }
46 
47  __device__ void wait_set(uint32_t offset, uint32_t compare, uint32_t value)
48  {
49  if(threadIdx.x == 0)
50  {
51  while(atomicCAS(base_ptr + offset, compare, value) != compare) {}
52  }
53  __syncthreads();
54  }
55 
56  // enter critical zoon, assume buffer is zero when launch kernel
57  __device__ void aquire(uint32_t offset) { wait_set(offset, 0, 1); }
58 
59  // exit critical zoon, assume buffer is zero when launch kernel
60  __device__ void release(uint32_t offset) { wait_set(offset, 1, 0); }
61 
62  __device__ void inc(uint32_t offset)
63  {
64  __syncthreads();
65  if(threadIdx.x == 0)
66  {
67  atomicAdd(base_ptr + offset, 1);
68  }
69  }
70 
72 };
73 } // namespace ck
Definition: ck.hpp:267
__device__ float llvm_amdgcn_raw_buffer_load_fp32(int32x4_t srsrc, index_t voffset, index_t soffset, index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load.f32")
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1350
unsigned int uint32_t
Definition: stdint.h:126
Definition: workgroup_barrier.hpp:7
__device__ void wait_lt(uint32_t offset, uint32_t value)
Definition: workgroup_barrier.hpp:38
__device__ void wait_set(uint32_t offset, uint32_t compare, uint32_t value)
Definition: workgroup_barrier.hpp:47
uint32_t * base_ptr
Definition: workgroup_barrier.hpp:71
__device__ void release(uint32_t offset)
Definition: workgroup_barrier.hpp:60
__device__ workgroup_barrier(uint32_t *ptr)
Definition: workgroup_barrier.hpp:8
__device__ void inc(uint32_t offset)
Definition: workgroup_barrier.hpp:62
__device__ void aquire(uint32_t offset)
Definition: workgroup_barrier.hpp:57
__device__ void wait_eq(uint32_t offset, uint32_t value)
Definition: workgroup_barrier.hpp:29
__device__ uint32_t ld(uint32_t offset)
Definition: workgroup_barrier.hpp:10