naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits > Struct Template Reference

naive_attention_fwd_kernel&lt; QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits &gt; Struct Template Reference#

Composable Kernel: ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits > Struct Template Reference
ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits > Struct Template Reference

#include <naive_attention.hpp>

Classes

struct  addresser
 
struct  kvscale_addresser
 
struct  page_addresser
 
struct  scale_max
 
struct  scale_max< fp8_t >
 
struct  scale_max< int8_t >
 

Public Types

using SoftmaxType = float
 
using QuantComputeType = float
 
using QCompute = KType
 
using PType = VType
 
using OAccType = float
 
using p_vec_type = ext_vector_t< PType, 16/sizeof(PType)>
 

Public Member Functions

__host__ __device__ naive_attention_fwd_kernel ()
 
template<typename T , typename F >
constexpr __device__ T wave_reduce (T local, F reduce_f)
 
template<typename T , typename F >
constexpr __device__ T cross_wave_reduce (T local, F reduce_f, T *smem)
 
__device__ void operator() (naive_attention_fwd_args args)
 

Static Public Member Functions

__device__ static constexpr __host__ int get_block_size ()
 
static __host__ dim3 get_grid_size (naive_attention_fwd_args args)
 

Static Public Attributes

static constexpr bool is_kvcache_i8
 
static constexpr bool is_kvcache_fp8
 
static constexpr int v_per_token_quant_group_size = 64
 
static constexpr int kBlockSize = 256
 
static constexpr int p_vec_elem = vector_traits<p_vec_type>::vector_size
 

Member Typedef Documentation

◆ OAccType

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::OAccType = float

◆ p_vec_type

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::p_vec_type = ext_vector_t<PType, 16 / sizeof(PType)>

◆ PType

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::PType = VType

◆ QCompute

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::QCompute = KType

◆ QuantComputeType

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::QuantComputeType = float

◆ SoftmaxType

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::SoftmaxType = float

Constructor & Destructor Documentation

◆ naive_attention_fwd_kernel()

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
__host__ __device__ ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::naive_attention_fwd_kernel ( )
inline

Member Function Documentation

◆ cross_wave_reduce()

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
template<typename T , typename F >
constexpr __device__ T ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::cross_wave_reduce ( local,
reduce_f,
T *  smem 
)
inlineconstexpr

◆ get_block_size()

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
__device__ static constexpr __host__ int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::get_block_size ( )
inlinestaticconstexpr

◆ get_grid_size()

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
static __host__ dim3 ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::get_grid_size ( naive_attention_fwd_args  args)
inlinestatic

◆ operator()()

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
__device__ void ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::operator() ( naive_attention_fwd_args  args)
inline

◆ wave_reduce()

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
template<typename T , typename F >
constexpr __device__ T ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::wave_reduce ( local,
reduce_f 
)
inlineconstexpr

Member Data Documentation

◆ is_kvcache_fp8

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
constexpr bool ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::is_kvcache_fp8
staticconstexpr
Initial value:
=
std::is_same_v<KType, fp8_t> && std::is_same_v<VType, fp8_t>

◆ is_kvcache_i8

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
constexpr bool ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::is_kvcache_i8
staticconstexpr
Initial value:
=
std::is_same_v<KType, int8_t> && std::is_same_v<VType, int8_t>

◆ kBlockSize

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
constexpr int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::kBlockSize = 256
staticconstexpr

◆ p_vec_elem

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
constexpr int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::p_vec_elem = vector_traits<p_vec_type>::vector_size
staticconstexpr

◆ v_per_token_quant_group_size

template<typename QType , typename KType , typename VType , typename OType , typename AccType , typename KVScaleType , naive_attention_layout_enum QLayout, naive_attention_layout_enum KLayout, naive_attention_layout_enum VLayout, naive_attention_layout_enum OLayout, naive_attention_layout_enum KScaleLayout, naive_attention_layout_enum VScaleLayout, typename Traits >
constexpr int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::v_per_token_quant_group_size = 64
staticconstexpr

The documentation for this struct was generated from the following file:
  • /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ref/naive_attention.hpp