/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/config.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/config.hpp File Reference#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/config.hpp File Reference
config.hpp File Reference
#include "hip/hip_version.h"
#include "hip/hip_runtime.h"
#include "hip/hip_fp16.h"

Go to the source code of this file.

Macros

#define CK_TILE_HOST   inline
 
#define CK_TILE_DEVICE   inline
 
#define CK_TILE_HOST_DEVICE   inline
 
#define CK_TILE_DEVICE_EXTERN
 
#define CK_TILE_HOST_DEVICE_EXTERN
 
#define CK_TILE_GENERIC_ADDR
 
#define CK_TILE_GLOBAL_ADDR
 
#define CK_TILE_LDS_ADDR
 
#define CK_TILE_BUF_RES_ADDR
 
#define CK_TILE_USE_CUSTOM_DATA_TYPE   0
 
#define CK_TILE_FLOAT_TO_BFLOAT16_STANDARD   0
 
#define CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE_WITH_NAN   1
 
#define CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE   2
 
#define CK_TILE_FLOAT_TO_BFLOAT16_STANDARD_ASM   3
 
#define CK_TILE_FLOAT_TO_BFLOAT16_RTA_ASM   4
 
#define CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT   CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE
 
#define CK_TILE_FLOAT_TO_FP8_STANDARD   0
 
#define CK_TILE_FLOAT_TO_FP8_STOCHASTIC   1
 
#define CK_TILE_FLOAT_TO_FP8_DEFAULT   CK_TILE_FLOAT_TO_FP8_STANDARD
 
#define CK_TILE_STATICALLY_INDEXED_ARRAY_USE_ARRAY   0
 
#define CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE   1
 
#define CK_TILE_STATICALLY_INDEXED_ARRAY_DEFAULT   CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE
 
#define CK_TILE_THREAD_BUFFER_USE_ARRAY   0
 
#define CK_TILE_THREAD_BUFFER_USE_TUPLE   1
 
#define CK_TILE_THREAD_BUFFER_DEFAULT   CK_TILE_THREAD_BUFFER_USE_ARRAY
 
#define CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST   0
 
#define CK_TILE_USE_LAUNCH_BOUNDS   1
 
#define CK_TILE_TIME_KERNEL   1
 
#define CK_TILE_MAX_THREAD_PER_BLOCK   256
 
#define CK_TILE_MIN_BLOCK_PER_CU   2
 
#define CK_TILE_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK   0
 
#define CK_TILE_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK   1
 
#define CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK   1
 
#define CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK   1
 
#define CK_TILE_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM   1
 
#define CK_TILE_USE_AMD_BUFFER_LOAD   1
 
#define CK_TILE_USE_AMD_BUFFER_STORE   1
 
#define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER   1
 
#define CK_TILE_USE_PK4_LAYOUT_SHUFFLE   1
 
#define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT   1
 
#define CK_TILE_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64   0
 
#define CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS   0
 
#define CK_TILE_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE   1
 
#define CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE   0
 
#define CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE   0
 
#define CK_TILE_USE_LLVM_BUILTIN_BF16   0
 
#define CK_TILE_DEBUG_LOG   0
 
#define CK_TILE_BUFFER_RESOURCE_3RD_DWORD   0xffffffff
 
#define CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM   1
 
#define CK_TILE_USE_SUBDWORD_TILE_CAST   0
 
#define CK_TILE_USE_PK_FP16_TILE_CAST   0
 
#define CK_TILE_FMHA_FWD_FAST_EXP2   0
 
#define CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN   0
 
#define CK_TILE_BUFFER_LOAD_RAW_BF16_WA   1
 
#define CK_TILE_WORKAROUND_SWDEV_383542   1
 
#define CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID   1
 
#define CK_TILE_USE_OCP_FP8   0
 
#define CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN   0
 
#define CK_TILE_WA_ISSUE_2028   0
 
#define CK_TILE_ENC_SUPPORT_Y_TO_R   0
 

Macro Definition Documentation

◆ CK_TILE_BUF_RES_ADDR

#define CK_TILE_BUF_RES_ADDR

◆ CK_TILE_BUFFER_LOAD_RAW_BF16_WA

#define CK_TILE_BUFFER_LOAD_RAW_BF16_WA   1

◆ CK_TILE_BUFFER_RESOURCE_3RD_DWORD

#define CK_TILE_BUFFER_RESOURCE_3RD_DWORD   0xffffffff

◆ CK_TILE_DEBUG_LOG

#define CK_TILE_DEBUG_LOG   0

◆ CK_TILE_DEVICE

#define CK_TILE_DEVICE   inline

◆ CK_TILE_DEVICE_EXTERN

#define CK_TILE_DEVICE_EXTERN

◆ CK_TILE_ENC_SUPPORT_Y_TO_R

#define CK_TILE_ENC_SUPPORT_Y_TO_R   0

◆ CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM

#define CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM   1

◆ CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK

#define CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK   1

◆ CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK

#define CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK   1

◆ CK_TILE_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK

#define CK_TILE_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK   0

◆ CK_TILE_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK

#define CK_TILE_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK   1

◆ CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS

#define CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS   0

◆ CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT

#define CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT   CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE

◆ CK_TILE_FLOAT_TO_BFLOAT16_RTA_ASM

#define CK_TILE_FLOAT_TO_BFLOAT16_RTA_ASM   4

◆ CK_TILE_FLOAT_TO_BFLOAT16_STANDARD

#define CK_TILE_FLOAT_TO_BFLOAT16_STANDARD   0

◆ CK_TILE_FLOAT_TO_BFLOAT16_STANDARD_ASM

#define CK_TILE_FLOAT_TO_BFLOAT16_STANDARD_ASM   3

◆ CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE

#define CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE   2

◆ CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE_WITH_NAN

#define CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE_WITH_NAN   1

◆ CK_TILE_FLOAT_TO_FP8_DEFAULT

#define CK_TILE_FLOAT_TO_FP8_DEFAULT   CK_TILE_FLOAT_TO_FP8_STANDARD

◆ CK_TILE_FLOAT_TO_FP8_STANDARD

#define CK_TILE_FLOAT_TO_FP8_STANDARD   0

◆ CK_TILE_FLOAT_TO_FP8_STOCHASTIC

#define CK_TILE_FLOAT_TO_FP8_STOCHASTIC   1

◆ CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN

#define CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN   0

◆ CK_TILE_FMHA_FWD_FAST_EXP2

#define CK_TILE_FMHA_FWD_FAST_EXP2   0

◆ CK_TILE_GENERIC_ADDR

#define CK_TILE_GENERIC_ADDR

◆ CK_TILE_GLOBAL_ADDR

#define CK_TILE_GLOBAL_ADDR

◆ CK_TILE_HOST

#define CK_TILE_HOST   inline

◆ CK_TILE_HOST_DEVICE

#define CK_TILE_HOST_DEVICE   inline

◆ CK_TILE_HOST_DEVICE_EXTERN

#define CK_TILE_HOST_DEVICE_EXTERN

◆ CK_TILE_LDS_ADDR

#define CK_TILE_LDS_ADDR

◆ CK_TILE_MAX_THREAD_PER_BLOCK

#define CK_TILE_MAX_THREAD_PER_BLOCK   256

◆ CK_TILE_MIN_BLOCK_PER_CU

#define CK_TILE_MIN_BLOCK_PER_CU   2

◆ CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID

#define CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID   1

◆ CK_TILE_STATICALLY_INDEXED_ARRAY_DEFAULT

#define CK_TILE_STATICALLY_INDEXED_ARRAY_DEFAULT   CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE

◆ CK_TILE_STATICALLY_INDEXED_ARRAY_USE_ARRAY

#define CK_TILE_STATICALLY_INDEXED_ARRAY_USE_ARRAY   0

◆ CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE

#define CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE   1

◆ CK_TILE_THREAD_BUFFER_DEFAULT

#define CK_TILE_THREAD_BUFFER_DEFAULT   CK_TILE_THREAD_BUFFER_USE_ARRAY

◆ CK_TILE_THREAD_BUFFER_USE_ARRAY

#define CK_TILE_THREAD_BUFFER_USE_ARRAY   0

◆ CK_TILE_THREAD_BUFFER_USE_TUPLE

#define CK_TILE_THREAD_BUFFER_USE_TUPLE   1

◆ CK_TILE_TIME_KERNEL

#define CK_TILE_TIME_KERNEL   1

◆ CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST

#define CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST   0

◆ CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT

#define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT   1

◆ CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER

#define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER   1

◆ CK_TILE_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64

#define CK_TILE_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64   0

◆ CK_TILE_USE_AMD_BUFFER_LOAD

#define CK_TILE_USE_AMD_BUFFER_LOAD   1

◆ CK_TILE_USE_AMD_BUFFER_STORE

#define CK_TILE_USE_AMD_BUFFER_STORE   1

◆ CK_TILE_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM

#define CK_TILE_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM   1

◆ CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN

#define CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN   0

◆ CK_TILE_USE_CUSTOM_DATA_TYPE

#define CK_TILE_USE_CUSTOM_DATA_TYPE   0

◆ CK_TILE_USE_LAUNCH_BOUNDS

#define CK_TILE_USE_LAUNCH_BOUNDS   1

◆ CK_TILE_USE_LLVM_BUILTIN_BF16

#define CK_TILE_USE_LLVM_BUILTIN_BF16   0

◆ CK_TILE_USE_OCP_FP8

#define CK_TILE_USE_OCP_FP8   0

◆ CK_TILE_USE_PK4_LAYOUT_SHUFFLE

#define CK_TILE_USE_PK4_LAYOUT_SHUFFLE   1

◆ CK_TILE_USE_PK_FP16_TILE_CAST

#define CK_TILE_USE_PK_FP16_TILE_CAST   0

◆ CK_TILE_USE_SUBDWORD_TILE_CAST

#define CK_TILE_USE_SUBDWORD_TILE_CAST   0

◆ CK_TILE_WA_ISSUE_2028

#define CK_TILE_WA_ISSUE_2028   0

◆ CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE

#define CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE   0

◆ CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE

#define CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE   0

◆ CK_TILE_WORKAROUND_SWDEV_383542

#define CK_TILE_WORKAROUND_SWDEV_383542   1

◆ CK_TILE_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE

#define CK_TILE_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE   1