detail Namespace Reference#
Typedefs | |
template<int32_t Size> | |
using | make_applier = __make_integer_seq< applier, index_t, Size > |
Functions | |
template<typename F , typename X , index_t... Is> | |
constexpr CK_TILE_HOST_DEVICE auto | transform_tuples_impl (F f, const X &x, sequence< Is... >) |
template<typename F , typename X , typename Y , index_t... Is> | |
constexpr CK_TILE_HOST_DEVICE auto | transform_tuples_impl (F f, const X &x, const Y &y, sequence< Is... >) |
template<typename F , typename X , typename Y , typename Z , index_t... Is> | |
constexpr CK_TILE_HOST_DEVICE auto | transform_tuples_impl (F f, const X &x, const Y &y, const Z &z, sequence< Is... >) |
template<typename F , typename Tuple , index_t... Is> | |
constexpr decltype(auto) | apply_impl (F &&f, Tuple &&t, sequence< Is... >) |
template<typename F , typename X , index_t... Is> | |
constexpr CK_TILE_HOST_DEVICE auto | embed_tuples_impl (F f, const X &x, sequence< Is... >) |
template<typename OutTensor , typename InTensor > | |
CK_TILE_DEVICE void | shuffle_tile_impl_in_thread (OutTensor &out_tensor, const InTensor &in_tensor) |
template<typename Lengths , typename Strides , index_t I, typename AccOld > | |
constexpr CK_TILE_HOST_DEVICE auto | calculate_element_space_size_impl (const Lengths &lengths, const Strides &strides, number< I > i, AccOld acc_old) |
template<typename Distribution > | |
CK_TILE_HOST_DEVICE auto | get_partition_index (Distribution) |
template<index_t... Is> | |
constexpr CK_TILE_HOST_DEVICE auto | make_tile_distributed_span (sequence< Is... >) |
template<index_t... Is> | |
constexpr CK_TILE_HOST_DEVICE auto | make_tile_distributed_index (sequence< Is... >) |
template<index_t NDimMax> | |
constexpr CK_TILE_HOST_DEVICE auto | make_sequential_index (index_t ibegin, index_t iend) |
template<typename StaticTileDistributionEncoding_ > | |
constexpr CK_TILE_HOST_DEVICE auto | make_adaptor_encoding_for_tile_distribution (StaticTileDistributionEncoding_) |
template<typename Distribution , index_t... XSliceBegins, index_t... XSliceEnds> | |
constexpr CK_TILE_HOST_DEVICE auto | slice_distribution_from_x (Distribution, sequence< XSliceBegins... > x_slice_begins, sequence< XSliceEnds... > x_slice_ends) |
template<typename OuterDstr , typename InnerDstr > | |
constexpr CK_TILE_HOST_DEVICE auto | make_embed_tile_distribution_encoding (OuterDstr, InnerDstr) |
template<typename InDstr , index_t... InReduceDimXs> | |
constexpr CK_TILE_HOST_DEVICE auto | make_reduce_tile_distribution_encoding_impl (InDstr, sequence< InReduceDimXs... > reduce_dim_xs_in) |
template<typename InDstr , index_t... InReduceDimXs> | |
constexpr CK_TILE_HOST_DEVICE auto | make_reduce_tile_distribution_encoding (InDstr, sequence< InReduceDimXs... > reduce_dim_xs_in) |
template<typename OutTensor , typename InTensor > | |
CK_TILE_DEVICE void | transpose_tile2d_impl_in_thread (OutTensor &out_tensor, const InTensor &in_tensor) |
CK_TILE_DEVICE float | fma_impl_vsv (float a, float b, float c) |
CK_TILE_DEVICE float | add_impl_vv (float lhs, float rhs) |
CK_TILE_DEVICE fp16x2_t | cvt_pk_fp16_f32 (float a, float b) |
CK_TILE_DEVICE bf16x2_t | cvt_pk_bf16_f32 (float a, float b) |
CK_TILE_DEVICE fp32x2_t | pk_mul_f32 (fp32x2_t lhs, fp32x2_t rhs) |
Variables | |
template<typename X , typename Y > | |
constexpr bool | is_similiar_distributed_tensor_v |
Typedef Documentation
◆ make_applier
template<int32_t Size>
using ck_tile::detail::make_applier = typedef __make_integer_seq<applier, index_t, Size> |
Function Documentation
◆ add_impl_vv()
CK_TILE_DEVICE float ck_tile::detail::add_impl_vv | ( | float | lhs, |
float | rhs | ||
) |
◆ apply_impl()
template<typename F , typename Tuple , index_t... Is>
|
constexpr |
◆ calculate_element_space_size_impl()
template<typename Lengths , typename Strides , index_t I, typename AccOld >
|
constexpr |
◆ cvt_pk_bf16_f32()
CK_TILE_DEVICE bf16x2_t ck_tile::detail::cvt_pk_bf16_f32 | ( | float | a, |
float | b | ||
) |
◆ cvt_pk_fp16_f32()
CK_TILE_DEVICE fp16x2_t ck_tile::detail::cvt_pk_fp16_f32 | ( | float | a, |
float | b | ||
) |
◆ embed_tuples_impl()
template<typename F , typename X , index_t... Is>
|
constexpr |
◆ fma_impl_vsv()
CK_TILE_DEVICE float ck_tile::detail::fma_impl_vsv | ( | float | a, |
float | b, | ||
float | c | ||
) |
◆ get_partition_index()
template<typename Distribution >
CK_TILE_HOST_DEVICE auto ck_tile::detail::get_partition_index | ( | Distribution | ) |
◆ make_adaptor_encoding_for_tile_distribution()
template<typename StaticTileDistributionEncoding_ >
|
constexpr |
◆ make_embed_tile_distribution_encoding()
template<typename OuterDstr , typename InnerDstr >
|
constexpr |
◆ make_reduce_tile_distribution_encoding()
template<typename InDstr , index_t... InReduceDimXs>
|
constexpr |
◆ make_reduce_tile_distribution_encoding_impl()
template<typename InDstr , index_t... InReduceDimXs>
|
constexpr |
◆ make_sequential_index()
template<index_t NDimMax>
|
constexpr |
◆ make_tile_distributed_index()
template<index_t... Is>
|
constexpr |
◆ make_tile_distributed_span()
template<index_t... Is>
|
constexpr |
◆ pk_mul_f32()
CK_TILE_DEVICE fp32x2_t ck_tile::detail::pk_mul_f32 | ( | fp32x2_t | lhs, |
fp32x2_t | rhs | ||
) |
◆ shuffle_tile_impl_in_thread()
template<typename OutTensor , typename InTensor >
CK_TILE_DEVICE void ck_tile::detail::shuffle_tile_impl_in_thread | ( | OutTensor & | out_tensor, |
const InTensor & | in_tensor | ||
) |
◆ slice_distribution_from_x()
template<typename Distribution , index_t... XSliceBegins, index_t... XSliceEnds>
|
constexpr |
◆ transform_tuples_impl() [1/3]
template<typename F , typename X , typename Y , typename Z , index_t... Is>
|
constexpr |
◆ transform_tuples_impl() [2/3]
template<typename F , typename X , typename Y , index_t... Is>
|
constexpr |
◆ transform_tuples_impl() [3/3]
template<typename F , typename X , index_t... Is>
|
constexpr |
◆ transpose_tile2d_impl_in_thread()
template<typename OutTensor , typename InTensor >
CK_TILE_DEVICE void ck_tile::detail::transpose_tile2d_impl_in_thread | ( | OutTensor & | out_tensor, |
const InTensor & | in_tensor | ||
) |
Variable Documentation
◆ is_similiar_distributed_tensor_v
template<typename X , typename Y >
|
inlineconstexpr |
Initial value:
=
is_similiar_distributed_tensor<X, Y>::value