#include "ck/config.h"
#include <stdint.h>
#include "hip/hip_runtime.h"
#include "hip/hip_fp16.h"
 
Go to the source code of this file.
◆ CK_BUFFER_RESOURCE_3RD_DWORD
      
        
          | #define CK_BUFFER_RESOURCE_3RD_DWORD   -1 | 
        
      
 
 
◆ CK_BUILD_DEPRECATED
      
        
          | #define CK_BUILD_DEPRECATED   1 | 
        
      
 
 
◆ CK_CONSTANT_ADDRESS_SPACE
      
        
          | #define CK_CONSTANT_ADDRESS_SPACE   __attribute__((address_space(4))) | 
        
      
 
 
◆ CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
      
        
          | #define CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM   1 | 
        
      
 
 
◆ CK_EXPERIMENTAL_DEFAULT_TO_INTER_WAVE_SCHEDULING
      
        
          | #define CK_EXPERIMENTAL_DEFAULT_TO_INTER_WAVE_SCHEDULING   0 | 
        
      
 
 
◆ CK_EXPERIMENTAL_INTER_WAVE_INSTANCES
      
        
          | #define CK_EXPERIMENTAL_INTER_WAVE_INSTANCES   1 | 
        
      
 
 
◆ CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING
      
        
          | #define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING   1 | 
        
      
 
 
◆ CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS
      
        
          | #define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS   1 | 
        
      
 
 
◆ CK_EXPERIMENTAL_MERGE_USE_MAGIC_DIVISION
      
        
          | #define CK_EXPERIMENTAL_MERGE_USE_MAGIC_DIVISION   1 | 
        
      
 
 
◆ CK_EXPERIMENTAL_PIPELINE_V2_IGLP_OPT
      
        
          | #define CK_EXPERIMENTAL_PIPELINE_V2_IGLP_OPT   0 | 
        
      
 
 
◆ CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES
      
        
          | #define CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES   1 | 
        
      
 
 
◆ CK_EXPERIMENTAL_STATIC_TENSOR_DESCRIPTOR
      
        
          | #define CK_EXPERIMENTAL_STATIC_TENSOR_DESCRIPTOR   0 | 
        
      
 
 
◆ CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK
      
        
          | #define CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK   1 | 
        
      
 
 
◆ CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK
      
        
          | #define CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK   1 | 
        
      
 
 
◆ CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK
      
        
          | #define CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK   0 | 
        
      
 
 
◆ CK_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK
      
        
          | #define CK_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK   1 | 
        
      
 
 
◆ CK_EXPERIMENTAL_USE_DYNAMICALLY_INDEXED_MULTI_INDEX
      
        
          | #define CK_EXPERIMENTAL_USE_DYNAMICALLY_INDEXED_MULTI_INDEX   0 | 
        
      
 
 
◆ CK_EXPERIMENTAL_USE_IN_REGISTER_SUB_DWORD_TRANSPOSE
      
        
          | #define CK_EXPERIMENTAL_USE_IN_REGISTER_SUB_DWORD_TRANSPOSE   1 | 
        
      
 
 
◆ CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST
      
        
          | #define CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST   1 | 
        
      
 
 
◆ CK_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS
      
        
          | #define CK_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS   0 | 
        
      
 
 
◆ CK_GFX90A_DENORM_WORKAROUND [1/2]
      
        
          | #define CK_GFX90A_DENORM_WORKAROUND   0 | 
        
      
 
 
◆ CK_GFX90A_DENORM_WORKAROUND [2/2]
      
        
          | #define CK_GFX90A_DENORM_WORKAROUND   0 | 
        
      
 
 
◆ CK_HACK_MERGE_CALCULATE_IDX_DIFF_LOW_CONST_USE_AMD_GCN_READ_FIRST_LANE
      
        
          | #define CK_HACK_MERGE_CALCULATE_IDX_DIFF_LOW_CONST_USE_AMD_GCN_READ_FIRST_LANE   0 | 
        
      
 
 
◆ CK_MAX_THREAD_PER_BLOCK
      
        
          | #define CK_MAX_THREAD_PER_BLOCK   256 | 
        
      
 
 
◆ CK_MIN_BLOCK_PER_CU
      
        
          | #define CK_MIN_BLOCK_PER_CU   2 | 
        
      
 
 
◆ CK_TIME_KERNEL
◆ CK_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT
      
        
          | #define CK_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT   1 | 
        
      
 
 
◆ CK_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER
      
        
          | #define CK_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER   1 | 
        
      
 
 
◆ CK_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64
      
        
          | #define CK_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64   0 | 
        
      
 
 
◆ CK_USE_AMD_BUFFER_LOAD
      
        
          | #define CK_USE_AMD_BUFFER_LOAD   1 | 
        
      
 
 
◆ CK_USE_AMD_BUFFER_STORE
      
        
          | #define CK_USE_AMD_BUFFER_STORE   1 | 
        
      
 
 
◆ CK_USE_AMD_INLINE_ASM
      
        
          | #define CK_USE_AMD_INLINE_ASM   1 | 
        
      
 
 
◆ CK_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM
      
        
          | #define CK_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM   0 | 
        
      
 
 
◆ CK_USE_AMD_MFMA
◆ CK_USE_AMD_V_DOT_DPP8_INLINE_ASM
      
        
          | #define CK_USE_AMD_V_DOT_DPP8_INLINE_ASM   1 | 
        
      
 
 
◆ CK_USE_AMD_V_DOT_INLINE_ASM
      
        
          | #define CK_USE_AMD_V_DOT_INLINE_ASM   0 | 
        
      
 
 
◆ CK_USE_AMD_V_MAC_INLINE_ASM
      
        
          | #define CK_USE_AMD_V_MAC_INLINE_ASM   1 | 
        
      
 
 
◆ CK_USE_LAUNCH_BOUNDS
      
        
          | #define CK_USE_LAUNCH_BOUNDS   1 | 
        
      
 
 
◆ CK_USE_PK4_LAYOUT_SHUFFLE
      
        
          | #define CK_USE_PK4_LAYOUT_SHUFFLE   1 | 
        
      
 
 
◆ CK_USE_RNE_BF16_CONVERSION
      
        
          | #define CK_USE_RNE_BF16_CONVERSION   1 | 
        
      
 
 
◆ CK_USE_SR_F4_CONVERSION
      
        
          | #define CK_USE_SR_F4_CONVERSION   0 | 
        
      
 
 
◆ CK_USE_SR_F6_CONVERSION
      
        
          | #define CK_USE_SR_F6_CONVERSION   0 | 
        
      
 
 
◆ CK_USE_SR_F8_CONVERSION
      
        
          | #define CK_USE_SR_F8_CONVERSION   0 | 
        
      
 
 
◆ CK_USE_WAVES_PER_EU
      
        
          | #define CK_USE_WAVES_PER_EU   0 | 
        
      
 
 
◆ CK_WAVELET_MAX_THREAD_PER_BLOCK
      
        
          | #define CK_WAVELET_MAX_THREAD_PER_BLOCK   512 | 
        
      
 
 
◆ CK_WAVELET_MIN_BLOCK_PER_CU
      
        
          | #define CK_WAVELET_MIN_BLOCK_PER_CU   2 | 
        
      
 
 
◆ CK_WORKAROUND_BF16_TO_FP8_CONVERSION
      
        
          | #define CK_WORKAROUND_BF16_TO_FP8_CONVERSION   1 | 
        
      
 
 
◆ CK_WORKAROUND_FP16_TO_FP8_CONVERSION
      
        
          | #define CK_WORKAROUND_FP16_TO_FP8_CONVERSION   1 | 
        
      
 
 
◆ CK_WORKAROUND_SWDEV_275126
      
        
          | #define CK_WORKAROUND_SWDEV_275126   1 | 
        
      
 
 
◆ CK_WORKAROUND_SWDEV_325164
      
        
          | #define CK_WORKAROUND_SWDEV_325164   0 | 
        
      
 
 
◆ CK_WORKAROUND_SWDEV_383542
      
        
          | #define CK_WORKAROUND_SWDEV_383542   1 | 
        
      
 
 
◆ CK_WORKAROUND_SWDEV_388832
      
        
          | #define CK_WORKAROUND_SWDEV_388832   1 | 
        
      
 
 
◆ CK_WORKAROUND_SWDEV_XXXXXX_INT8_BUFFER_LOAD_STORE_ISSUE
      
        
          | #define CK_WORKAROUND_SWDEV_XXXXXX_INT8_BUFFER_LOAD_STORE_ISSUE   1 | 
        
      
 
 
◆ CK_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE
      
        
          | #define CK_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE   1 |