/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/amd_buffer_addressing_builtins.hpp File Reference#
amd_buffer_addressing_builtins.hpp File Reference
#include "data_type.hpp"
Go to the source code of this file.
Classes | |
union | ck::BufferResource< T > |
Namespaces | |
ck | |
Enumerations | |
enum class | ck::AmdBufferCoherenceEnum { ck::DefaultCoherence = 0 , ck::GLC = 1 , ck::SLC = 2 , ck::GLC_SLC = 3 , ck::WAVE_NT0 = 0 , ck::WAVE_NT1 = 2 , ck::GROUP_NT0 = 1 , ck::GROUP_NT1 = 3 , ck::DEVICE_NT0 = 8 , ck::DEVICE_NT1 = 10 , ck::SYSTEM_NT0 = 9 , ck::SYSTEM_NT1 = 11 , ck::DefaultCoherence = 0 , ck::GLC = 1 , ck::SLC = 2 , ck::GLC_SLC = 3 , ck::WAVE_NT0 = 0 , ck::WAVE_NT1 = 2 , ck::GROUP_NT0 = 1 , ck::GROUP_NT1 = 3 , ck::DEVICE_NT0 = 8 , ck::DEVICE_NT1 = 10 , ck::SYSTEM_NT0 = 9 , ck::SYSTEM_NT1 = 11 } |
Functions | |
template<typename T > | |
__device__ int32x4_t | ck::make_wave_buffer_resource (T *p_wave, index_t element_space_size) |
template<typename T > | |
__device__ int32x4_t | ck::make_wave_buffer_resource_with_default_range (T *p_wave) |
template<typename T > | |
__device__ __amdgpu_buffer_rsrc_t | ck::make_wave_buffer_resource_new (T *p_wave, index_t element_space_size) |
template<typename T > | |
__device__ __amdgpu_buffer_rsrc_t | ck::make_wave_buffer_resource_with_default_range_new (T *p_wave) |
__device__ half2_t | ck::llvm_amdgcn_raw_buffer_atomic_add_fp16x2 (half2_t vdata, int32x4_t rsrc, index_t voffset, index_t soffset, index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.atomic.fadd.v2f16") |
__device__ int32_t | ck::llvm_amdgcn_raw_buffer_atomic_add_i32 (int32_t vdata, int32x4_t rsrc, index_t voffset, index_t soffset, index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.atomic.add.i32") |
__device__ float | ck::llvm_amdgcn_raw_buffer_atomic_add_fp32 (float vdata, int32x4_t rsrc, index_t voffset, index_t soffset, index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.atomic.fadd.f32") |
__device__ double | ck::llvm_amdgcn_raw_buffer_atomic_max_fp64 (double vdata, int32x4_t rsrc, int voffset, int soffset, int glc_slc) __asm("llvm.amdgcn.raw.buffer.atomic.fmax.f64") |
template<index_t N, AmdBufferCoherenceEnum coherence = AmdBufferCoherenceEnum::DefaultCoherence> | |
__device__ vector_type< int8_t, N >::type | ck::amd_buffer_load_impl_raw (__amdgpu_buffer_rsrc_t src_wave_buffer_resource, index_t src_thread_addr_offset, index_t src_wave_addr_offset) |
template<typename T , index_t N, AmdBufferCoherenceEnum coherence = AmdBufferCoherenceEnum::DefaultCoherence> | |
__device__ vector_type< T, N >::type | ck::amd_buffer_load_impl (__amdgpu_buffer_rsrc_t src_wave_buffer_resource, index_t src_thread_addr_offset, index_t src_wave_addr_offset) |
template<index_t N, AmdBufferCoherenceEnum coherence = AmdBufferCoherenceEnum::DefaultCoherence> | |
__device__ void | ck::amd_buffer_store_impl_raw (const typename vector_type< int8_t, N >::type src_thread_data, __amdgpu_buffer_rsrc_t dst_wave_buffer_resource, index_t dst_thread_addr_offset, index_t dst_wave_addr_offset) |
template<typename T , index_t N, AmdBufferCoherenceEnum coherence = AmdBufferCoherenceEnum::DefaultCoherence> | |
__device__ void | ck::amd_buffer_store_impl (const typename vector_type< T, N >::type src_thread_data, __amdgpu_buffer_rsrc_t dst_wave_buffer_resource, index_t dst_thread_addr_offset, index_t dst_wave_addr_offset) |
template<typename T , index_t N> | |
__device__ void | ck::amd_global_atomic_add_impl (const typename vector_type< T, N >::type src_thread_data, T *addr) |
template<typename T , index_t N> | |
__device__ void | ck::amd_buffer_atomic_add_impl (const typename vector_type< T, N >::type src_thread_data, int32x4_t dst_wave_buffer_resource, index_t dst_thread_addr_offset, index_t dst_wave_addr_offset) |
template<typename T , index_t N> | |
__device__ void | ck::amd_buffer_atomic_max_impl (const typename vector_type< T, N >::type src_thread_data, int32x4_t dst_wave_buffer_resource, index_t dst_thread_addr_offset, index_t dst_wave_addr_offset) |
template<typename T , index_t N, AmdBufferCoherenceEnum coherence = AmdBufferCoherenceEnum::DefaultCoherence> | |
__device__ vector_type_maker< T, N >::type::type | ck::amd_buffer_load_invalid_element_return_zero (const T *p_src_wave, index_t src_thread_element_offset, bool src_thread_element_valid, index_t src_element_space_size) |
template<typename T , index_t N, AmdBufferCoherenceEnum coherence = AmdBufferCoherenceEnum::DefaultCoherence> | |
__device__ vector_type_maker< T, N >::type::type | ck::amd_buffer_load_invalid_element_return_customized_value (const T *p_src_wave, index_t src_thread_element_offset, bool src_thread_element_valid, index_t src_element_space_size, T customized_value) |
template<typename T , index_t N, AmdBufferCoherenceEnum coherence = AmdBufferCoherenceEnum::DefaultCoherence> | |
__device__ void | ck::amd_buffer_store (const typename vector_type_maker< T, N >::type::type src_thread_data, T *p_dst_wave, const index_t dst_thread_element_offset, const bool dst_thread_element_valid, const index_t dst_element_space_size) |
template<typename T , index_t N> | |
__device__ void | ck::amd_buffer_atomic_add (const typename vector_type_maker< T, N >::type::type src_thread_data, T *p_dst_wave, const index_t dst_thread_element_offset, const bool dst_thread_element_valid, const index_t dst_element_space_size) |
template<typename T , index_t N> | |
__device__ void | ck::amd_buffer_atomic_max (const typename vector_type_maker< T, N >::type::type src_thread_data, T *p_dst_wave, const index_t dst_thread_element_offset, const bool dst_thread_element_valid, const index_t dst_element_space_size) |
__device__ void | ck::llvm_amdgcn_raw_buffer_load_lds (int32x4_t rsrc, uint32_t *lds_ptr, index_t size, index_t voffset, index_t soffset, index_t offset, index_t aux) __asm("llvm.amdgcn.raw.buffer.load.lds") |
template<typename T , index_t NumElemsPerThread> | |
__device__ void | ck::amd_direct_load_global_to_lds (const T *global_base_ptr, const index_t global_offset, T *lds_base_ptr, const index_t lds_offset, const bool is_valid, const index_t src_element_space_size) |