#include <naive_attention.hpp>
◆ OAccType
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::OAccType = float |
◆ p_vec_type
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::p_vec_type = ext_vector_t<PType, 16 / sizeof(PType)> |
◆ PType
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::PType = VType |
◆ QCompute
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::QCompute = KType |
◆ QuantComputeType
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::QuantComputeType = float |
◆ SoftmaxType
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::SoftmaxType = float |
◆ naive_attention_fwd_kernel()
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
__host__ __device__ ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::naive_attention_fwd_kernel |
( |
| ) |
|
|
inline |
◆ cross_wave_reduce()
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
template<typename T , typename F >
constexpr __device__ T ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::cross_wave_reduce |
( |
T |
local, |
|
|
F |
reduce_f, |
|
|
T * |
smem |
|
) |
| |
|
inlineconstexpr |
◆ get_block_size()
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
__device__ static constexpr __host__ int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::get_block_size |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ get_grid_size()
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
static __host__ dim3 ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::get_grid_size |
( |
naive_attention_fwd_args |
args | ) |
|
|
inlinestatic |
◆ operator()()
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
__device__ void ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::operator() |
( |
naive_attention_fwd_args |
args | ) |
|
|
inline |
◆ wave_reduce()
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
template<typename T , typename F >
constexpr __device__ T ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::wave_reduce |
( |
T |
local, |
|
|
F |
reduce_f |
|
) |
| |
|
inlineconstexpr |
◆ is_kvcache_fp8
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
constexpr bool ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::is_kvcache_fp8 |
|
staticconstexpr |
Initial value:=
std::is_same_v<KType, fp8_t> && std::is_same_v<VType, fp8_t>
◆ is_kvcache_i8
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
constexpr bool ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::is_kvcache_i8 |
|
staticconstexpr |
Initial value:=
std::is_same_v<KType, int8_t> && std::is_same_v<VType, int8_t>
◆ kBlockSize
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
constexpr int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::kBlockSize = 256 |
|
staticconstexpr |
◆ p_vec_elem
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
constexpr int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::p_vec_elem = vector_traits<p_vec_type>::vector_size |
|
staticconstexpr |
◆ v_per_token_quant_group_size
template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
constexpr int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::v_per_token_quant_group_size = 64 |
|
staticconstexpr |
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ref/naive_attention.hpp