detail Namespace Reference

detail Namespace Reference#

Composable Kernel: ck_tile::detail Namespace Reference
ck_tile::detail Namespace Reference

Classes

struct  pick_sequence_elements_by_mask_impl
 
struct  pick_sequence_elements_by_mask_impl< WorkSeq, sequence<>, sequence<> >
 
struct  modify_sequence_elements_by_ids_impl
 
struct  modify_sequence_elements_by_ids_impl< WorkSeq, sequence<>, sequence<> >
 
struct  sorted_sequence_histogram
 
struct  sorted_sequence_histogram< h_idx, sequence< x, xs... >, sequence< r, rs... > >
 
struct  sorted_sequence_histogram< h_idx, sequence< x >, sequence< r, rs... > >
 
struct  is_similiar_distributed_tensor
 
struct  is_similiar_distributed_tensor< static_distributed_tensor< TypeX, DistX >, static_distributed_tensor< TypeY, DistY > >
 
struct  tile_distribution_detail
 
struct  swallow
 
struct  static_for_impl
 
struct  static_for_impl< sequence< Is... > >
 
struct  applier
 
struct  static_ford_impl
 
struct  static_ford_impl< sequence<>, Orders >
 
struct  unpack_impl
 
struct  unpack_impl< sequence< Is... > >
 
struct  unpack2_impl
 
struct  unpack2_impl< sequence< Is... >, sequence< Js... > >
 
struct  static_uford_impl
 
struct  static_uford_impl< sequence<>, sequence<>, Orders >
 
struct  static_uford_one_shot_impl
 
struct  static_uford_one_shot_impl< sequence<>, sequence<>, Orders >
 
struct  ignore_t
 
struct  detector
 
struct  detector< Default, std::void_t< Op< Args... > >, Op, Args... >
 
struct  tuple_element_or_default_dispatch
 
struct  tuple_element_or_default_dispatch< true, Idx, Tuple, DefaultType >
 
struct  log2
 
struct  log2< 4 >
 
struct  log2< 8 >
 
struct  log2< 16 >
 
struct  log2< 32 >
 
struct  log2< 64 >
 
struct  log2< 128 >
 

Typedefs

template<int32_t Size>
using make_applier = __make_integer_seq< applier, index_t, Size >
 

Functions

template<typename F , typename X , index_t... Is>
constexpr CK_TILE_HOST_DEVICE auto transform_tuples_impl (F f, const X &x, sequence< Is... >)
 
template<typename F , typename X , typename Y , index_t... Is>
constexpr CK_TILE_HOST_DEVICE auto transform_tuples_impl (F f, const X &x, const Y &y, sequence< Is... >)
 
template<typename F , typename X , typename Y , typename Z , index_t... Is>
constexpr CK_TILE_HOST_DEVICE auto transform_tuples_impl (F f, const X &x, const Y &y, const Z &z, sequence< Is... >)
 
template<typename F , typename Tuple , index_t... Is>
constexpr decltype(auto) apply_impl (F &&f, Tuple &&t, sequence< Is... >)
 
template<typename F , typename X , index_t... Is>
constexpr CK_TILE_HOST_DEVICE auto embed_tuples_impl (F f, const X &x, sequence< Is... >)
 
template<typename OutTensor , typename InTensor >
CK_TILE_DEVICE void shuffle_tile_impl_in_thread (OutTensor &out_tensor, const InTensor &in_tensor)
 
template<typename Lengths , typename Strides , index_t I, typename AccOld >
constexpr CK_TILE_HOST_DEVICE auto calculate_element_space_size_impl (const Lengths &lengths, const Strides &strides, number< I > i, AccOld acc_old)
 
template<typename Distribution >
CK_TILE_HOST_DEVICE auto get_partition_index (Distribution)
 
template<index_t... Is>
constexpr CK_TILE_HOST_DEVICE auto make_tile_distributed_span (sequence< Is... >)
 
template<index_t... Is>
constexpr CK_TILE_HOST_DEVICE auto make_tile_distributed_index (sequence< Is... >)
 
template<index_t NDimMax>
constexpr CK_TILE_HOST_DEVICE auto make_sequential_index (index_t ibegin, index_t iend)
 
template<typename StaticTileDistributionEncoding_ >
constexpr CK_TILE_HOST_DEVICE auto make_adaptor_encoding_for_tile_distribution (StaticTileDistributionEncoding_)
 
template<typename Distribution , index_t... XSliceBegins, index_t... XSliceEnds>
constexpr CK_TILE_HOST_DEVICE auto slice_distribution_from_x (Distribution, sequence< XSliceBegins... > x_slice_begins, sequence< XSliceEnds... > x_slice_ends)
 
template<typename OuterDstr , typename InnerDstr >
constexpr CK_TILE_HOST_DEVICE auto make_embed_tile_distribution_encoding (OuterDstr, InnerDstr)
 
template<typename InDstr , index_t... InReduceDimXs>
constexpr CK_TILE_HOST_DEVICE auto make_reduce_tile_distribution_encoding_impl (InDstr, sequence< InReduceDimXs... > reduce_dim_xs_in)
 
template<typename InDstr , index_t... InReduceDimXs>
constexpr CK_TILE_HOST_DEVICE auto make_reduce_tile_distribution_encoding (InDstr, sequence< InReduceDimXs... > reduce_dim_xs_in)
 
template<typename OutTensor , typename InTensor >
CK_TILE_DEVICE void transpose_tile2d_impl_in_thread (OutTensor &out_tensor, const InTensor &in_tensor)
 
CK_TILE_DEVICE float fma_impl_vsv (float a, float b, float c)
 
CK_TILE_DEVICE float add_impl_vv (float lhs, float rhs)
 
CK_TILE_DEVICE fp16x2_t cvt_pk_fp16_f32 (float a, float b)
 
CK_TILE_DEVICE bf16x2_t cvt_pk_bf16_f32 (float a, float b)
 
CK_TILE_DEVICE fp32x2_t pk_mul_f32 (fp32x2_t lhs, fp32x2_t rhs)
 

Variables

template<typename X , typename Y >
constexpr bool is_similiar_distributed_tensor_v
 

Typedef Documentation

◆ make_applier

template<int32_t Size>
using ck_tile::detail::make_applier = typedef __make_integer_seq<applier, index_t, Size>

Function Documentation

◆ add_impl_vv()

CK_TILE_DEVICE float ck_tile::detail::add_impl_vv ( float  lhs,
float  rhs 
)

◆ apply_impl()

template<typename F , typename Tuple , index_t... Is>
constexpr decltype(auto) ck_tile::detail::apply_impl ( F &&  f,
Tuple &&  t,
sequence< Is... >   
)
constexpr

◆ calculate_element_space_size_impl()

template<typename Lengths , typename Strides , index_t I, typename AccOld >
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::calculate_element_space_size_impl ( const Lengths &  lengths,
const Strides &  strides,
number< I >  i,
AccOld  acc_old 
)
constexpr

◆ cvt_pk_bf16_f32()

CK_TILE_DEVICE bf16x2_t ck_tile::detail::cvt_pk_bf16_f32 ( float  a,
float  b 
)

◆ cvt_pk_fp16_f32()

CK_TILE_DEVICE fp16x2_t ck_tile::detail::cvt_pk_fp16_f32 ( float  a,
float  b 
)

◆ embed_tuples_impl()

template<typename F , typename X , index_t... Is>
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::embed_tuples_impl ( f,
const X &  x,
sequence< Is... >   
)
constexpr

◆ fma_impl_vsv()

CK_TILE_DEVICE float ck_tile::detail::fma_impl_vsv ( float  a,
float  b,
float  c 
)

◆ get_partition_index()

template<typename Distribution >
CK_TILE_HOST_DEVICE auto ck_tile::detail::get_partition_index ( Distribution  )

◆ make_adaptor_encoding_for_tile_distribution()

template<typename StaticTileDistributionEncoding_ >
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::make_adaptor_encoding_for_tile_distribution ( StaticTileDistributionEncoding_  )
constexpr

◆ make_embed_tile_distribution_encoding()

template<typename OuterDstr , typename InnerDstr >
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::make_embed_tile_distribution_encoding ( OuterDstr  ,
InnerDstr   
)
constexpr

◆ make_reduce_tile_distribution_encoding()

template<typename InDstr , index_t... InReduceDimXs>
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::make_reduce_tile_distribution_encoding ( InDstr  ,
sequence< InReduceDimXs... >  reduce_dim_xs_in 
)
constexpr

◆ make_reduce_tile_distribution_encoding_impl()

template<typename InDstr , index_t... InReduceDimXs>
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::make_reduce_tile_distribution_encoding_impl ( InDstr  ,
sequence< InReduceDimXs... >  reduce_dim_xs_in 
)
constexpr

◆ make_sequential_index()

template<index_t NDimMax>
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::make_sequential_index ( index_t  ibegin,
index_t  iend 
)
constexpr

◆ make_tile_distributed_index()

template<index_t... Is>
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::make_tile_distributed_index ( sequence< Is... >  )
constexpr

◆ make_tile_distributed_span()

template<index_t... Is>
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::make_tile_distributed_span ( sequence< Is... >  )
constexpr

◆ pk_mul_f32()

CK_TILE_DEVICE fp32x2_t ck_tile::detail::pk_mul_f32 ( fp32x2_t  lhs,
fp32x2_t  rhs 
)

◆ shuffle_tile_impl_in_thread()

template<typename OutTensor , typename InTensor >
CK_TILE_DEVICE void ck_tile::detail::shuffle_tile_impl_in_thread ( OutTensor &  out_tensor,
const InTensor &  in_tensor 
)

◆ slice_distribution_from_x()

template<typename Distribution , index_t... XSliceBegins, index_t... XSliceEnds>
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::slice_distribution_from_x ( Distribution  ,
sequence< XSliceBegins... >  x_slice_begins,
sequence< XSliceEnds... >  x_slice_ends 
)
constexpr

◆ transform_tuples_impl() [1/3]

template<typename F , typename X , typename Y , typename Z , index_t... Is>
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::transform_tuples_impl ( f,
const X &  x,
const Y &  y,
const Z &  z,
sequence< Is... >   
)
constexpr

◆ transform_tuples_impl() [2/3]

template<typename F , typename X , typename Y , index_t... Is>
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::transform_tuples_impl ( f,
const X &  x,
const Y &  y,
sequence< Is... >   
)
constexpr

◆ transform_tuples_impl() [3/3]

template<typename F , typename X , index_t... Is>
constexpr CK_TILE_HOST_DEVICE auto ck_tile::detail::transform_tuples_impl ( f,
const X &  x,
sequence< Is... >   
)
constexpr

◆ transpose_tile2d_impl_in_thread()

template<typename OutTensor , typename InTensor >
CK_TILE_DEVICE void ck_tile::detail::transpose_tile2d_impl_in_thread ( OutTensor &  out_tensor,
const InTensor &  in_tensor 
)

Variable Documentation

◆ is_similiar_distributed_tensor_v

template<typename X , typename Y >
constexpr bool ck_tile::detail::is_similiar_distributed_tensor_v
inlineconstexpr
Initial value:
=
is_similiar_distributed_tensor<X, Y>::value