/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/epilogue/chainer/cshuffle_epilogue_chainer_ops.hpp Source File#
cshuffle_epilogue_chainer_ops.hpp
Go to the documentation of this file.
constexpr CK_TILE_HOST_DEVICE auto make_embed_tile_distribution_encoding(OuterDstr, InnerDstr)
Definition: tile_distribution_encoding.hpp:457
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE auto make_naive_tensor_descriptor(const tuple< Lengths... > &lengths, const tuple< Strides... > &strides, number< GuaranteedLastDimensionVectorLength >=number<-1 >{}, number< GuaranteedLastDimensionVectorStride >=number<-1 >{})
Definition: tensor_descriptor.hpp:274
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition: tile_elementwise.hpp:23
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition: type_traits.hpp:21
typename impl::warp_gemm_dispatcher::Dispatcher< AType, BType, AccType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity, AttrNumAccess >::Type WarpGemmDispatcher
Definition: warp_gemm_dispatcher.hpp:176
constexpr CK_TILE_HOST_DEVICE auto to_sequence(tuple< number< Is >... >)
Definition: sequence.hpp:1066
constexpr CK_TILE_HOST_DEVICE auto merge_sequences(Seqs...)
Definition: sequence.hpp:837
constexpr CK_TILE_DEVICE auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition: null_tile_window.hpp:75
typename detail::detector< nonesuch, void, Op, Args... >::value_t is_detected
Definition: type_traits.hpp:67
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition: null_tile_window.hpp:95
constexpr CK_TILE_HOST_DEVICE auto generate_tuple(F &&f, number< N >)
Definition: tuple.hpp:429
constexpr CK_TILE_HOST_DEVICE auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:360
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:36
constexpr CK_TILE_HOST_DEVICE auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition: tile_distribution.hpp:495
typename uniform_sequence_gen< NSize, I >::type uniform_sequence_gen_t
Definition: sequence.hpp:1037
typename tuple_element< I, TTuple >::type tuple_element_t
Definition: tuple.hpp:208
typename conditional< predicate, X, Y >::type conditional_t
Definition: functional.hpp:115
Context structure for CShuffle epilogue operations.
Definition: cshuffle_epilogue_chainer_ops.hpp:447
OutTileType out_tile
Definition: cshuffle_epilogue_chainer_ops.hpp:453
LdsBlockType lds_block
Definition: cshuffle_epilogue_chainer_ops.hpp:449
LdsWriteWindowType lds_write_window
Definition: cshuffle_epilogue_chainer_ops.hpp:450
LdsReadWindowType lds_read_window
Definition: cshuffle_epilogue_chainer_ops.hpp:451
WorkingTileType working_tile
Definition: cshuffle_epilogue_chainer_ops.hpp:448
AuxWindowsType aux_windows
Definition: cshuffle_epilogue_chainer_ops.hpp:452
Definition: cshuffle_epilogue_chainer_ops.hpp:181
static constexpr index_t NumDTensor
Definition: cshuffle_epilogue_chainer_ops.hpp:227
static constexpr index_t kBlockSize
Definition: cshuffle_epilogue_chainer_ops.hpp:212
static constexpr index_t kNPerBlock
Definition: cshuffle_epilogue_chainer_ops.hpp:214
remove_cvref_t< typename Problem::ODataType > ODataType
Definition: cshuffle_epilogue_chainer_ops.hpp:186
std::conditional_t< ADataTypeIsTuple, remove_cvref_t< AsDataType >, remove_cvref_t< tuple< AsDataType > >> AsDataTypeTuple
Definition: cshuffle_epilogue_chainer_ops.hpp:195
std::conditional_t< BDataTypeIsTuple, remove_cvref_t< BsDataType >, remove_cvref_t< tuple< BsDataType > >> BsDataTypeTuple
Definition: cshuffle_epilogue_chainer_ops.hpp:199
static constexpr index_t BlockedXDLN_PerWarp
Definition: cshuffle_epilogue_chainer_ops.hpp:223
static constexpr index_t NumMXdlPerWavePerShuffle
Definition: cshuffle_epilogue_chainer_ops.hpp:326
static constexpr index_t NWave
Definition: cshuffle_epilogue_chainer_ops.hpp:216
CK_TILE_DEVICE auto operator()([[maybe_unused]] OutDramWindow &out_window, [[maybe_unused]] const AccTile &acc_tile, const DsDramWindows &ds_windows, void *p_smem)
Definition: cshuffle_epilogue_chainer_ops.hpp:457
static constexpr index_t MWave
Definition: cshuffle_epilogue_chainer_ops.hpp:215
static constexpr CK_TILE_HOST_DEVICE auto MakeLdsBlockDescriptor()
Definition: cshuffle_epilogue_chainer_ops.hpp:357
static constexpr auto shuffle_tile_tuple
Shuffle tile configuration parameters.
Definition: cshuffle_epilogue_chainer_ops.hpp:299
static constexpr index_t isCTransposed
Definition: cshuffle_epilogue_chainer_ops.hpp:220
remove_cvref_t< typename Problem::ELayout > ELayout
Definition: cshuffle_epilogue_chainer_ops.hpp:209
std::conditional_t< std::is_same_v< BDataType, pk_int4_t >, ADataType, BDataType > BTypeToUse
Definition: cshuffle_epilogue_chainer_ops.hpp:208
remove_cvref_t< typename Problem::AccDataType > AccDataType
Definition: cshuffle_epilogue_chainer_ops.hpp:185
static constexpr bool FixedVectorSize
Definition: cshuffle_epilogue_chainer_ops.hpp:221
static constexpr auto MNPerIterationShuffle
Definition: cshuffle_epilogue_chainer_ops.hpp:330
static constexpr CK_TILE_DEVICE auto MakeLdsDistributionEncode()
Definition: cshuffle_epilogue_chainer_ops.hpp:379
static constexpr CK_TILE_HOST_DEVICE index_t GetSmemSize()
Definition: cshuffle_epilogue_chainer_ops.hpp:412
static constexpr index_t VectorSizeC
Definition: cshuffle_epilogue_chainer_ops.hpp:224
remove_cvref_t< std::tuple_element_t< number< 0 >{}, AsDataTypeTuple > > ADataType
Definition: cshuffle_epilogue_chainer_ops.hpp:201
remove_cvref_t< std::tuple_element_t< number< 0 >{}, BsDataTypeTuple > > BDataType
Definition: cshuffle_epilogue_chainer_ops.hpp:202
static constexpr index_t NPerXdl
Definition: cshuffle_epilogue_chainer_ops.hpp:218
static constexpr bool ADataTypeIsTuple
Definition: cshuffle_epilogue_chainer_ops.hpp:190
static constexpr memory_operation_enum MemoryOperation
Definition: cshuffle_epilogue_chainer_ops.hpp:211
static constexpr index_t MPerIterationShuffle
Definition: cshuffle_epilogue_chainer_ops.hpp:338
remove_cvref_t< typename Problem::DsDataType > DsDataType
Definition: cshuffle_epilogue_chainer_ops.hpp:187
typename WG::CWarpDstr CWarpDstr
Definition: cshuffle_epilogue_chainer_ops.hpp:349
static constexpr index_t NumNXdlPerWavePerShuffle
Definition: cshuffle_epilogue_chainer_ops.hpp:327
static constexpr index_t MPerIteration
Definition: cshuffle_epilogue_chainer_ops.hpp:225
remove_cvref_t< typename Problem::AsDataType > AsDataType
Definition: cshuffle_epilogue_chainer_ops.hpp:183
remove_cvref_t< Problem_ > Problem
Definition: cshuffle_epilogue_chainer_ops.hpp:182
std::conditional_t< std::is_same_v< ADataType, pk_int4_t >, BDataType, ADataType > ATypeToUse
Definition: cshuffle_epilogue_chainer_ops.hpp:205
remove_cvref_t< typename Problem::CDElementwise > CDElementwise
Definition: cshuffle_epilogue_chainer_ops.hpp:210
static constexpr index_t MPerXdl
Definition: cshuffle_epilogue_chainer_ops.hpp:217
static constexpr index_t KPerXdl
Definition: cshuffle_epilogue_chainer_ops.hpp:219
static constexpr bool TiledMMAPermuteN
Definition: cshuffle_epilogue_chainer_ops.hpp:222
remove_cvref_t< typename Problem::DsLayout > DsLayout
Definition: cshuffle_epilogue_chainer_ops.hpp:188
static constexpr index_t NPerIterationShuffle
Definition: cshuffle_epilogue_chainer_ops.hpp:339
static constexpr index_t kMPerBlock
Definition: cshuffle_epilogue_chainer_ops.hpp:213
WarpGemmDispatcher< ATypeToUse, BTypeToUse, AccDataType, MPerXdl, NPerXdl, KPerXdl, isCTransposed > WG
Definition: cshuffle_epilogue_chainer_ops.hpp:347
typename WG::CWarpDstrEncoding CWarpDstrEncoding
Definition: cshuffle_epilogue_chainer_ops.hpp:351
static constexpr index_t NPerIteration
Definition: cshuffle_epilogue_chainer_ops.hpp:226
static constexpr bool BDataTypeIsTuple
Definition: cshuffle_epilogue_chainer_ops.hpp:191
typename WG::CWarpTensor CWarpTensor
Definition: cshuffle_epilogue_chainer_ops.hpp:350
remove_cvref_t< typename Problem::BsDataType > BsDataType
Definition: cshuffle_epilogue_chainer_ops.hpp:184
static constexpr CK_TILE_HOST_DEVICE index_t GetVectorSizeD(number< I > index)
Get the vector store size for Di tensor.
Definition: cshuffle_epilogue_chainer_ops.hpp:271
static constexpr CK_TILE_HOST_DEVICE index_t GetVectorSizeC()
Get the vector store size for C tensor.
Definition: cshuffle_epilogue_chainer_ops.hpp:242
Problem configuration for CShuffle epilogue chainer operations.
Definition: cshuffle_epilogue_chainer_ops.hpp:149
static constexpr index_t VectorSizeC
Definition: cshuffle_epilogue_chainer_ops.hpp:169
static constexpr index_t isCTransposed
Definition: cshuffle_epilogue_chainer_ops.hpp:166
remove_cvref_t< ODataType_ > ODataType
Definition: cshuffle_epilogue_chainer_ops.hpp:153
static constexpr index_t MWave
Definition: cshuffle_epilogue_chainer_ops.hpp:161
remove_cvref_t< AsDataType_ > AsDataType
Definition: cshuffle_epilogue_chainer_ops.hpp:150
remove_cvref_t< CDElementwise_ > CDElementwise
Definition: cshuffle_epilogue_chainer_ops.hpp:157
static constexpr index_t MPerXdl
Definition: cshuffle_epilogue_chainer_ops.hpp:163
static constexpr index_t KPerXdl
Definition: cshuffle_epilogue_chainer_ops.hpp:165
static constexpr index_t NWave
Definition: cshuffle_epilogue_chainer_ops.hpp:162
static constexpr memory_operation_enum MemoryOperation
Definition: cshuffle_epilogue_chainer_ops.hpp:167
remove_cvref_t< AccDataType_ > AccDataType
Definition: cshuffle_epilogue_chainer_ops.hpp:152
static constexpr index_t BlockedXDLN_PerWarp
Definition: cshuffle_epilogue_chainer_ops.hpp:170
static constexpr index_t kNumWaveGroups
Definition: cshuffle_epilogue_chainer_ops.hpp:172
static constexpr bool FixedVectorSize
Definition: cshuffle_epilogue_chainer_ops.hpp:168
static constexpr index_t NPerXdl
Definition: cshuffle_epilogue_chainer_ops.hpp:164
static constexpr index_t kNPerBlock
Definition: cshuffle_epilogue_chainer_ops.hpp:160
remove_cvref_t< DsLayout_ > DsLayout
Definition: cshuffle_epilogue_chainer_ops.hpp:155
remove_cvref_t< BsDataType_ > BsDataType
Definition: cshuffle_epilogue_chainer_ops.hpp:151
static constexpr index_t kBlockSize
Definition: cshuffle_epilogue_chainer_ops.hpp:158
remove_cvref_t< DsDataType_ > DsDataType
Definition: cshuffle_epilogue_chainer_ops.hpp:154
static constexpr index_t kMPerBlock
Definition: cshuffle_epilogue_chainer_ops.hpp:159
static constexpr index_t NumDTensor
Definition: cshuffle_epilogue_chainer_ops.hpp:173
static constexpr bool TiledMMAPermuteN
Definition: cshuffle_epilogue_chainer_ops.hpp:171
remove_cvref_t< ELayout_ > ELayout
Definition: cshuffle_epilogue_chainer_ops.hpp:156
Scale working tile using tensor windows (CShuffle-specific)
Definition: cshuffle_epilogue_chainer_ops.hpp:79
CK_TILE_DEVICE void operator()([[maybe_unused]] OutWindow &out_window, [[maybe_unused]] const AccTile &acc_tile, [[maybe_unused]] const AuxWindows &aux_windows, [[maybe_unused]] void *p_smem, IAccess iAccess, Context &context, const ScaleRowTensor &scale_row_tensor, const ScaleColTensor &scale_col_tensor)
Definition: cshuffle_epilogue_chainer_ops.hpp:87
Slice accumulator tile for CShuffle epilogue.
Definition: cshuffle_epilogue_chainer_ops.hpp:39
CK_TILE_DEVICE void operator()([[maybe_unused]] OutWindow &out_window, const AccTile &acc_tile, [[maybe_unused]] const AuxWindows &aux_windows, [[maybe_unused]] void *p_smem, IAccess iAccess, Context &context)
Definition: cshuffle_epilogue_chainer_ops.hpp:45
Definition: integral_constant.hpp:13
Definition: unary_element_wise_operation.hpp:509
Definition: sequence.hpp:49
Definition: space_filling_curve.hpp:20
Class creating 2D static tile distribution with different load/store patterns.
Definition: static_encoding_pattern.hpp:130
Definition: tile_distribution_encoding.hpp:26
Definition: tuple.hpp:192