15 template <
typename,
bool>
21 using type = std::underlying_type_t<T>;
73 return __builtin_amdgcn_readfirstlane(threadIdx.x /
get_warp_size());
82 #if CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
88 __builtin_amdgcn_s_waitcnt(0xc07f);
89 __builtin_amdgcn_s_barrier();
98 asm volatile(
"s_wait_loadcnt %0 \n"
99 "s_barrier_signal -1 \n"
105 asm volatile(
"s_waitcnt vmcnt(%0) \n"
116 s_waitcnt vmcnt(0) \n \
117 s_waitcnt lgkmcnt(0) \n \
125 asm volatile(
"s_nop %0" : :
"n"(cnt) :);
127 __builtin_amdgcn_sched_barrier(cnt);
131 #define CK_CONSTANT_ADDRESS_SPACE \
132 __attribute__((address_space( \
133 static_cast<safe_underlying_type_t<address_space_enum>>(address_space_enum::constant))))
135 template <
typename T>
140 #pragma clang diagnostic push
141 #pragma clang diagnostic ignored "-Wold-style-cast"
143 #pragma clang diagnostic pop
146 template <
typename T>
151 #pragma clang diagnostic push
152 #pragma clang diagnostic ignored "-Wold-style-cast"
154 #pragma clang diagnostic pop
#define CK_CONSTANT_ADDRESS_SPACE
Definition: arch.hpp:131
#define CK_TILE_DEVICE
Definition: config.hpp:40
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:41
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE index_t get_warp_size()
Definition: arch.hpp:51
CK_TILE_DEVICE index_t get_lane_id()
Definition: arch.hpp:69
CK_TILE_DEVICE void atomic_add(X *p_dst, const X &x)
memory_operation_enum
Definition: arch.hpp:44
CK_TILE_HOST_DEVICE T add(const T &a, const T &b)
Definition: generic_memory_space_atomic.hpp:12
CK_TILE_DEVICE index_t get_block_1d_id()
Definition: arch.hpp:66
CK_TILE_DEVICE void block_sync_lds()
Definition: arch.hpp:80
int32_t index_t
Definition: integer.hpp:9
__host__ __device__ T CK_CONSTANT_ADDRESS_SPACE * cast_pointer_to_constant_address_space(T *p)
Definition: arch.hpp:147
CK_TILE_DEVICE void block_sync_lds_direct_load()
Definition: arch.hpp:113
CK_TILE_DEVICE void s_nop(index_t cnt=0)
Definition: arch.hpp:122
CK_TILE_DEVICE index_t get_thread_local_1d_id()
Definition: arch.hpp:62
CK_TILE_DEVICE index_t get_block_size()
Definition: arch.hpp:59
CK_TILE_DEVICE index_t get_warp_id()
Definition: arch.hpp:71
__device__ T * cast_pointer_to_generic_address_space(T CK_CONSTANT_ADDRESS_SPACE *p)
Definition: arch.hpp:136
CK_TILE_DEVICE index_t get_thread_id()
Definition: arch.hpp:76
CK_TILE_DEVICE index_t get_thread_global_1d_id()
Definition: arch.hpp:64
address_space_enum
Definition: arch.hpp:34
CK_TILE_DEVICE index_t get_block_id()
Definition: arch.hpp:78
CK_TILE_DEVICE index_t get_grid_size()
Definition: arch.hpp:57
CK_TILE_DEVICE void block_sync_load_raw(index_t cnt=0)
Definition: arch.hpp:95
typename safe_underlying_type< T, std::is_enum< T >::value >::type safe_underlying_type_t
Definition: arch.hpp:31
void type
Definition: arch.hpp:27
std::underlying_type_t< T > type
Definition: arch.hpp:21