/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/arch/arch.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/arch/arch.hpp File Reference#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/arch/arch.hpp File Reference
arch.hpp File Reference

Go to the source code of this file.

Classes

struct  gfx11_t
 
struct  gfx12_t
 

Macros

#define CK_TILE_S_CNT_MAX   0b1100'1111'0111'1111#define CK_TILE_VMCNT(cnt) \ ([]() { static_assert(!((cnt) >> 6), "VMCNT only has 6 bits"); }(), \ ((cnt) & 0b1111) | (((cnt) & 0b110000) << 10))#define CK_TILE_EXPCNT(cnt) \ ([]() { static_assert(!((cnt) >> 3), "EXP only has 3 bits"); }(), ((cnt) << 4))#define CK_TILE_LGKMCNT(cnt) \ ([]() { static_assert(!((cnt) >> 4), "LGKM only has 4 bits"); }(), ((cnt) << 8))namespace ck_tile {template <typename, bool>struct safe_underlying_type;template <typename T>struct safe_underlying_type<T, true>{ using type = std::underlying_type_t<T>;};template <typename T>struct safe_underlying_type<T, false>{ using type = void;};template <typename T>using safe_underlying_type_t = typename safe_underlying_type<T, std::is_enum<T>::value>::type;enum struct address_space_enum : std::uint16_t{ generic = 0, global, lds, sgpr, constant, vgpr};enum struct memory_operation_enum : std::uint16_t{ set = 0, atomic_add, atomic_max, add};CK_TILE_HOST_DEVICE constexpr index_t get_warp_size(){#if defined(__GFX9__) || !defined(__HIP_DEVICE_COMPILE__) return 64;#else return 32;#endif}CK_TILE_HOST bool is_wave32(){ hipDeviceProp_t props{}; int device; auto status = hipGetDevice(&device); if(status != hipSuccess) { return false; } status = hipGetDeviceProperties(&props, device); if(status != hipSuccess) { return false; } return props.major > 9;}CK_TILE_DEVICE index_t get_grid_size() { return gridDim.x; }CK_TILE_DEVICE index_t get_block_size() { return blockDim.x; }CK_TILE_DEVICE index_t get_thread_local_1d_id() { return threadIdx.x; }CK_TILE_DEVICE index_t get_thread_global_1d_id() { return blockIdx.x * blockDim.x + threadIdx.x; }CK_TILE_DEVICE index_t get_block_1d_id() { return blockIdx.x; }CK_TILE_DEVICE index_t get_lane_id() { return __lane_id(); }template <bool ReturnSgpr = true>CK_TILE_DEVICE index_t get_warp_id(bool_constant<ReturnSgpr> = {}){ const index_t warp_id = threadIdx.x / get_warp_size(); if constexpr(ReturnSgpr) { return __builtin_amdgcn_readfirstlane(warp_id); } else { return warp_id; }}CK_TILE_DEVICE index_t get_thread_id() { return threadIdx.x; }CK_TILE_DEVICE index_t get_block_id() { return blockIdx.x; }CK_TILE_DEVICE void block_sync_load_raw(index_t cnt = 0){#ifdef __gfx12__ asm volatile("s_wait_loadcnt %0 \n" "s_barrier_signal -1 \n" "s_barrier_wait -1" : : "n"(cnt) : "memory");#else asm volatile("s_waitcnt vmcnt(%0) \n" "s_barrier" : : "n"(cnt) : "memory");#endif}struct waitcnt_arg{ CK_TILE_DEVICE static constexpr index_t MAX = 0b11'00'1111'0'111'1111;
 
#define CK_CONSTANT_ADDRESS_SPACE
 

Functions

template<index_t vmcnt = waitcnt_arg::kMaxVmCnt, index_t expcnt = waitcnt_arg::kMaxExpCnt, index_t lgkmcnt = waitcnt_arg::kMaxLgkmCnt>
CK_TILE_DEVICE void s_waitcnt ()
 
template<index_t vmcnt = waitcnt_arg::kMaxVmCnt, index_t expcnt = waitcnt_arg::kMaxExpCnt, index_t lgkmcnt = waitcnt_arg::kMaxLgkmCnt>
CK_TILE_DEVICE void s_waitcnt_barrier ()
 
template<index_t lgkmcnt = 0>
CK_TILE_DEVICE void block_sync_lds ()
 
template<index_t vmcnt = 0>
CK_TILE_DEVICE void block_sync_lds_direct_load ()
 
CK_TILE_DEVICE void s_nop (index_t cnt=0)
 
template<typename T >
__device__ T * cast_pointer_to_generic_address_space (T CK_CONSTANT_ADDRESS_SPACE *p)
 
template<typename T >
__host__ __device__ T CK_CONSTANT_ADDRESS_SPACEcast_pointer_to_constant_address_space (T *p)
 
constexpr CK_TILE_HOST_DEVICE index_t get_smem_capacity ()
 
constexpr CK_TILE_HOST_DEVICE const char * address_space_to_string (address_space_enum addr_space)
 Helper function to convert address space enum to string. More...
 

Macro Definition Documentation

◆ CK_CONSTANT_ADDRESS_SPACE

#define CK_CONSTANT_ADDRESS_SPACE
Value:
__attribute__((address_space( \
static_cast<safe_underlying_type_t<address_space_enum>>(address_space_enum::constant))))

◆ CK_TILE_S_CNT_MAX

#define CK_TILE_S_CNT_MAX   0b1100'1111'0111'1111#define CK_TILE_VMCNT(cnt) \ ([]() { static_assert(!((cnt) >> 6), "VMCNT only has 6 bits"); }(), \ ((cnt) & 0b1111) | (((cnt) & 0b110000) << 10))#define CK_TILE_EXPCNT(cnt) \ ([]() { static_assert(!((cnt) >> 3), "EXP only has 3 bits"); }(), ((cnt) << 4))#define CK_TILE_LGKMCNT(cnt) \ ([]() { static_assert(!((cnt) >> 4), "LGKM only has 4 bits"); }(), ((cnt) << 8))namespace ck_tile {template <typename, bool>struct safe_underlying_type;template <typename T>struct safe_underlying_type<T, true>{ using type = std::underlying_type_t<T>;};template <typename T>struct safe_underlying_type<T, false>{ using type = void;};template <typename T>using safe_underlying_type_t = typename safe_underlying_type<T, std::is_enum<T>::value>::type;enum struct address_space_enum : std::uint16_t{ generic = 0, global, lds, sgpr, constant, vgpr};enum struct memory_operation_enum : std::uint16_t{ set = 0, atomic_add, atomic_max, add};CK_TILE_HOST_DEVICE constexpr index_t get_warp_size(){#if defined(__GFX9__) || !defined(__HIP_DEVICE_COMPILE__) return 64;#else return 32;#endif}CK_TILE_HOST bool is_wave32(){ hipDeviceProp_t props{}; int device; auto status = hipGetDevice(&device); if(status != hipSuccess) { return false; } status = hipGetDeviceProperties(&props, device); if(status != hipSuccess) { return false; } return props.major > 9;}CK_TILE_DEVICE index_t get_grid_size() { return gridDim.x; }CK_TILE_DEVICE index_t get_block_size() { return blockDim.x; }CK_TILE_DEVICE index_t get_thread_local_1d_id() { return threadIdx.x; }CK_TILE_DEVICE index_t get_thread_global_1d_id() { return blockIdx.x * blockDim.x + threadIdx.x; }CK_TILE_DEVICE index_t get_block_1d_id() { return blockIdx.x; }CK_TILE_DEVICE index_t get_lane_id() { return __lane_id(); }template <bool ReturnSgpr = true>CK_TILE_DEVICE index_t get_warp_id(bool_constant<ReturnSgpr> = {}){ const index_t warp_id = threadIdx.x / get_warp_size(); if constexpr(ReturnSgpr) { return __builtin_amdgcn_readfirstlane(warp_id); } else { return warp_id; }}CK_TILE_DEVICE index_t get_thread_id() { return threadIdx.x; }CK_TILE_DEVICE index_t get_block_id() { return blockIdx.x; }CK_TILE_DEVICE void block_sync_load_raw(index_t cnt = 0){#ifdef __gfx12__ asm volatile("s_wait_loadcnt %0 \n" "s_barrier_signal -1 \n" "s_barrier_wait -1" : : "n"(cnt) : "memory");#else asm volatile("s_waitcnt vmcnt(%0) \n" "s_barrier" : : "n"(cnt) : "memory");#endif}struct waitcnt_arg{ CK_TILE_DEVICE static constexpr index_t MAX = 0b11'00'1111'0'111'1111;

Function Documentation

◆ address_space_to_string()

constexpr CK_TILE_HOST_DEVICE const char* address_space_to_string ( address_space_enum  addr_space)
constexpr

Helper function to convert address space enum to string.

◆ block_sync_lds()

template<index_t lgkmcnt = 0>
CK_TILE_DEVICE void block_sync_lds ( )

◆ block_sync_lds_direct_load()

template<index_t vmcnt = 0>
CK_TILE_DEVICE void block_sync_lds_direct_load ( )

◆ cast_pointer_to_constant_address_space()

template<typename T >
__host__ __device__ T CK_CONSTANT_ADDRESS_SPACE* cast_pointer_to_constant_address_space ( T *  p)

◆ cast_pointer_to_generic_address_space()

template<typename T >
__device__ T* cast_pointer_to_generic_address_space ( T CK_CONSTANT_ADDRESS_SPACE p)

◆ get_smem_capacity()

constexpr CK_TILE_HOST_DEVICE index_t get_smem_capacity ( )
constexpr

◆ s_nop()

CK_TILE_DEVICE void s_nop ( index_t  cnt = 0)

◆ s_waitcnt()

template<index_t vmcnt = waitcnt_arg::kMaxVmCnt, index_t expcnt = waitcnt_arg::kMaxExpCnt, index_t lgkmcnt = waitcnt_arg::kMaxLgkmCnt>
CK_TILE_DEVICE void s_waitcnt ( )

◆ s_waitcnt_barrier()

template<index_t vmcnt = waitcnt_arg::kMaxVmCnt, index_t expcnt = waitcnt_arg::kMaxExpCnt, index_t lgkmcnt = waitcnt_arg::kMaxLgkmCnt>
CK_TILE_DEVICE void s_waitcnt_barrier ( )