/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/epilogue/cshuffle_epilogue.hpp Source File#
cshuffle_epilogue.hpp
Go to the documentation of this file.
493 // TODO: Check if there would be nicer ways to overload rather than with EmptyScale or nullptr_t
constexpr CK_TILE_HOST_DEVICE auto make_embed_tile_distribution_encoding(OuterDstr, InnerDstr)
Definition: tile_distribution_encoding.hpp:457
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE auto make_naive_tensor_descriptor(const tuple< Lengths... > &lengths, const tuple< Strides... > &strides, number< GuaranteedLastDimensionVectorLength >=number<-1 >{}, number< GuaranteedLastDimensionVectorStride >=number<-1 >{})
Definition: tensor_descriptor.hpp:274
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition: tile_elementwise.hpp:23
auto concat(const Ts &... xs) -> std::enable_if_t<!AllConvertibleToStringView< Ts... >, std::string >
Definition: concat.hpp:43
constexpr CK_TILE_HOST_DEVICE auto generate_tie(F &&f, number< N >)
Definition: tuple.hpp:435
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition: type_traits.hpp:21
typename impl::warp_gemm_dispatcher::Dispatcher< AType, BType, AccType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity, AttrNumAccess >::Type WarpGemmDispatcher
Definition: warp_gemm_dispatcher.hpp:176
CK_TILE_DEVICE auto tile_elementwise_inout_unpack(const InElementFunc &in_element_func, const Tuple &t, std::index_sequence< I... >)
Template function that "unpacks" a tuple and applies an element-wise operation.
Definition: tile_elementwise.hpp:71
CK_TILE_DEVICE void shuffle_tile(OutTensor &out, const InTensor &in)
Definition: shuffle_tile.hpp:154
constexpr CK_TILE_HOST_DEVICE auto to_sequence(tuple< number< Is >... >)
Definition: sequence.hpp:1066
constexpr CK_TILE_HOST_DEVICE auto merge_sequences(Seqs...)
Definition: sequence.hpp:837
constexpr CK_TILE_DEVICE auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition: null_tile_window.hpp:75
typename detail::detector< nonesuch, void, Op, Args... >::value_t is_detected
Definition: type_traits.hpp:67
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition: null_tile_window.hpp:95
constexpr CK_TILE_HOST_DEVICE auto generate_tuple(F &&f, number< N >)
Definition: tuple.hpp:429
constexpr CK_TILE_HOST_DEVICE auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:360
constexpr CK_TILE_HOST_DEVICE auto concat_tuple_of_reference(const tuple< X &... > &tx, const tuple< Y &... > &ty)
Definition: tuple.hpp:443
CK_TILE_DEVICE void update_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition: update_tile.hpp:22
CK_TILE_DEVICE void store_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition: store_tile.hpp:24
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:36
constexpr CK_TILE_HOST_DEVICE auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition: tile_distribution.hpp:495
typename uniform_sequence_gen< NSize, I >::type uniform_sequence_gen_t
Definition: sequence.hpp:1037
typename tuple_element< I, TTuple >::type tuple_element_t
Definition: tuple.hpp:208
typename conditional< predicate, X, Y >::type conditional_t
Definition: functional.hpp:115
Definition: cshuffle_epilogue.hpp:495
typename T::DataType DataType
Definition: cshuffle_epilogue.hpp:507
Definition: cshuffle_epilogue.hpp:500
float DataType
Definition: cshuffle_epilogue.hpp:501
Definition: cshuffle_epilogue.hpp:72
static constexpr index_t kBlockSize
Definition: cshuffle_epilogue.hpp:108
CK_TILE_DEVICE void scale_tile(LdsTile &lds_tile, ScaleM &scale_m_window, ScaleN &scale_n_window)
Definition: cshuffle_epilogue.hpp:376
std::conditional_t< ADataTypeIsTuple, remove_cvref_t< AsDataType >, remove_cvref_t< tuple< AsDataType > >> AsDataTypeTuple
Definition: cshuffle_epilogue.hpp:86
static constexpr index_t NRepeat
Definition: cshuffle_epilogue.hpp:126
CK_TILE_DEVICE void slice_acc_tile(const OAccTile &o_acc_tile, LdsTile &lds_tile)
Definition: cshuffle_epilogue.hpp:417
static constexpr CK_TILE_HOST_DEVICE auto MakeLdsBlockDescriptor()
Definition: cshuffle_epilogue.hpp:297
static constexpr index_t MRepeat
Definition: cshuffle_epilogue.hpp:125
typename WG::CWarpTensor CWarpTensor
Definition: cshuffle_epilogue.hpp:290
typename WG::CWarpDstrEncoding CWarpDstrEncoding
Definition: cshuffle_epilogue.hpp:291
remove_cvref_t< typename Problem::AsDataType > AsDataType
Definition: cshuffle_epilogue.hpp:74
remove_cvref_t< Problem_ > Problem
Definition: cshuffle_epilogue.hpp:73
static constexpr index_t MPerXdl
Definition: cshuffle_epilogue.hpp:113
static constexpr bool FixedVectorSize
Definition: cshuffle_epilogue.hpp:117
std::conditional_t< std::is_same_v< BDataType, pk_int4_t >||std::is_same_v< BDataType, pk_fp4_t >||std::is_same_v< BDataType, pk_fp4_raw_t >, ADataType, BDataType > BTypeToUse
Definition: cshuffle_epilogue.hpp:104
static constexpr CK_TILE_HOST_DEVICE index_t GetVectorSizeD(number< I > index)
Get the vector store size for Di tensor.
Definition: cshuffle_epilogue.hpp:185
remove_cvref_t< typename Problem::ODataType > ODataType
Definition: cshuffle_epilogue.hpp:77
CK_TILE_DEVICE void store_to_dram(OutDramWindow &out_dram_window, const COutTensor &c_out_tensor)
Definition: cshuffle_epilogue.hpp:458
static constexpr bool ADataTypeIsTuple
Definition: cshuffle_epilogue.hpp:81
static constexpr index_t kNPerBlock
Definition: cshuffle_epilogue.hpp:110
static constexpr index_t BlockedXDLN_PerWarp
Definition: cshuffle_epilogue.hpp:119
static constexpr CK_TILE_HOST_DEVICE auto AlignShuffleTileWithSmem()
Shuffle tile configuration parameters check and aligment.
Definition: cshuffle_epilogue.hpp:213
remove_cvref_t< typename Problem::ELayout > ELayout
Definition: cshuffle_epilogue.hpp:106
static constexpr bool TiledMMAPermuteN
Definition: cshuffle_epilogue.hpp:118
static constexpr bool BDataTypeIsTuple
Definition: cshuffle_epilogue.hpp:82
remove_cvref_t< std::tuple_element_t< number< 0 >{}, BsDataTypeTuple > > BDataType
Definition: cshuffle_epilogue.hpp:93
remove_cvref_t< typename Problem::DsLayout > DsLayout
Definition: cshuffle_epilogue.hpp:79
static constexpr CK_TILE_DEVICE auto MakeLdsDistributionEncode()
Definition: cshuffle_epilogue.hpp:319
static constexpr index_t MPerIteration
Definition: cshuffle_epilogue.hpp:122
static constexpr auto MNPerIterationShuffle
Definition: cshuffle_epilogue.hpp:270
static constexpr index_t isCTransposed
Definition: cshuffle_epilogue.hpp:116
static constexpr CK_TILE_HOST_DEVICE index_t GetSmemSize()
Definition: cshuffle_epilogue.hpp:368
CK_TILE_DEVICE void apply_d_tensors(DramWindows &d_dram_windows, COutTensor &c_out_tensor)
Definition: cshuffle_epilogue.hpp:444
static constexpr CK_TILE_HOST_DEVICE index_t GetVectorSizeC()
Get the vector store size for C tensor.
Definition: cshuffle_epilogue.hpp:156
static constexpr index_t VectorSizeC
Definition: cshuffle_epilogue.hpp:121
remove_cvref_t< typename Problem::DsDataType > DsDataType
Definition: cshuffle_epilogue.hpp:78
CK_TILE_DEVICE void move_windows(OutDramWindow &out_dram_window, DDramWindows &d_dram_windows)
Move both the output and D tensors windows for the next access.
Definition: cshuffle_epilogue.hpp:476
remove_cvref_t< typename Problem::CDElementwise > CDElementwise
Definition: cshuffle_epilogue.hpp:107
static CK_TILE_HOST const std::string GetName()
Definition: cshuffle_epilogue.hpp:135
static constexpr index_t NPerIterationShuffle
Definition: cshuffle_epilogue.hpp:279
std::conditional_t< std::is_same_v< ADataType, pk_int4_t >||std::is_same_v< ADataType, pk_fp4_t >, BDataType, ADataType > ATypeToUse
Definition: cshuffle_epilogue.hpp:98
remove_cvref_t< typename Problem::AccDataType > AccDataType
Definition: cshuffle_epilogue.hpp:76
CK_TILE_DEVICE auto operator()(ODramWindow &out_dram_window, const OAccTile &o_acc_tile, const DsDramWindows &ds_dram_windows, void *, const ScaleM &scale_m={}, const ScaleN &scale_n={})
Definition: cshuffle_epilogue.hpp:517
static constexpr index_t NumDTensor
Definition: cshuffle_epilogue.hpp:124
static constexpr index_t KPerXdl
Definition: cshuffle_epilogue.hpp:115
CK_TILE_DEVICE auto operator()(ODramWindow &out_dram_window, const OAccTile &o_acc_tile, const DsDramWindows &ds_dram_windows, void *p_smem, const ScaleM &scale_m={}, const ScaleN &scale_n={})
Definition: cshuffle_epilogue.hpp:659
static constexpr bool DoubleSmemBuffer
Definition: cshuffle_epilogue.hpp:120
static constexpr index_t NumMXdlPerWavePerShuffle
Definition: cshuffle_epilogue.hpp:266
std::conditional_t< BDataTypeIsTuple, remove_cvref_t< BsDataType >, remove_cvref_t< tuple< BsDataType > >> BsDataTypeTuple
Definition: cshuffle_epilogue.hpp:90
static constexpr index_t NumNXdlPerWavePerShuffle
Definition: cshuffle_epilogue.hpp:267
remove_cvref_t< typename Problem::BsDataType > BsDataType
Definition: cshuffle_epilogue.hpp:75
WarpGemmDispatcher< ATypeToUse, BTypeToUse, AccDataType, MPerXdl, NPerXdl, KPerXdl, isCTransposed > WG
Definition: cshuffle_epilogue.hpp:287
static constexpr index_t MPerIterationShuffle
Definition: cshuffle_epilogue.hpp:278
CK_TILE_DEVICE void cast_lds_tile(LdsTile &lds_tile, InLdsWindow &in_lds_window)
Definition: cshuffle_epilogue.hpp:436
CK_TILE_DEVICE CShuffleEpilogue(CDElementwise elfunc=CDElementwise{})
Definition: cshuffle_epilogue.hpp:130
static constexpr auto shuffle_tile_tuple
Shuffle tile configuration parameters.
Definition: cshuffle_epilogue.hpp:234
static constexpr index_t kMPerBlock
Definition: cshuffle_epilogue.hpp:109
static constexpr index_t NPerIteration
Definition: cshuffle_epilogue.hpp:123
static constexpr index_t NPerXdl
Definition: cshuffle_epilogue.hpp:114
typename WG::CWarpDstr CWarpDstr
Definition: cshuffle_epilogue.hpp:289
remove_cvref_t< std::tuple_element_t< number< 0 >{}, AsDataTypeTuple > > ADataType
Definition: cshuffle_epilogue.hpp:92
Definition: cshuffle_epilogue.hpp:40
remove_cvref_t< BsDataType_ > BsDataType
Definition: cshuffle_epilogue.hpp:42
remove_cvref_t< ODataType_ > ODataType
Definition: cshuffle_epilogue.hpp:44
static constexpr index_t isCTransposed
Definition: cshuffle_epilogue.hpp:57
static constexpr index_t MWave
Definition: cshuffle_epilogue.hpp:52
static constexpr index_t BlockedXDLN_PerWarp
Definition: cshuffle_epilogue.hpp:60
static constexpr bool DoubleSmemBuffer
Definition: cshuffle_epilogue.hpp:61
static constexpr index_t VectorSizeC
Definition: cshuffle_epilogue.hpp:59
static constexpr index_t NWave
Definition: cshuffle_epilogue.hpp:53
static constexpr index_t KPerXdl
Definition: cshuffle_epilogue.hpp:56
static constexpr index_t kBlockSize
Definition: cshuffle_epilogue.hpp:49
static constexpr bool FixedVectorSize
Definition: cshuffle_epilogue.hpp:58
static constexpr index_t kMPerBlock
Definition: cshuffle_epilogue.hpp:50
static constexpr index_t MPerXdl
Definition: cshuffle_epilogue.hpp:54
remove_cvref_t< CDElementwise_ > CDElementwise
Definition: cshuffle_epilogue.hpp:48
remove_cvref_t< AsDataType_ > AsDataType
Definition: cshuffle_epilogue.hpp:41
static constexpr bool TiledMMAPermuteN
Definition: cshuffle_epilogue.hpp:62
static constexpr index_t NPerXdl
Definition: cshuffle_epilogue.hpp:55
remove_cvref_t< DsLayout_ > DsLayout
Definition: cshuffle_epilogue.hpp:46
remove_cvref_t< DsDataType_ > DsDataType
Definition: cshuffle_epilogue.hpp:45
remove_cvref_t< ELayout_ > ELayout
Definition: cshuffle_epilogue.hpp:47
remove_cvref_t< AccDataType_ > AccDataType
Definition: cshuffle_epilogue.hpp:43
static constexpr index_t kNumWaveGroups
Definition: cshuffle_epilogue.hpp:63
static constexpr index_t kNPerBlock
Definition: cshuffle_epilogue.hpp:51
static constexpr index_t NumDTensor
Definition: cshuffle_epilogue.hpp:64
Definition: integral_constant.hpp:13
Definition: unary_element_wise_operation.hpp:503
Definition: type_traits.hpp:115
Definition: sequence.hpp:49
Definition: space_filling_curve.hpp:20
static constexpr CK_TILE_HOST_DEVICE auto get_forward_step(number< AccessIdx1d >)
Definition: space_filling_curve.hpp:70
static constexpr CK_TILE_HOST_DEVICE auto get_index(number< AccessIdx1d >)
Definition: space_filling_curve.hpp:158
static constexpr CK_TILE_HOST_DEVICE index_t get_num_of_access()
Definition: space_filling_curve.hpp:46
Definition: functional.hpp:43
Definition: tile_distribution_encoding.hpp:26
Definition: tuple.hpp:192