/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/tensor_operation/gpu/grid/epilogue_cshuffle_v3_wmma_base.hpp Source File#
epilogue_cshuffle_v3_wmma_base.hpp
Go to the documentation of this file.
81 constexpr auto c_block_desc_mrepeat_mwave_msubgroup_nrepeat_nwave_nthreadpersubgroup_maccvgprs_tmp =
121 constexpr auto c_block_desc_mrepeat_mwave_msubgroup_nrepeat_nwave_nthreadpersubgroup_maccvgprs_tmp =
ck_tile::element_wise::PassThrough PassThrough
Definition: grouped_convolution_utils.hpp:47
Definition: ck.hpp:268
__host__ constexpr __device__ auto make_multi_index(Xs &&... xs)
Definition: array_multi_index.hpp:15
typename uniform_sequence_gen< NSize, I >::type uniform_sequence_gen_t
Definition: sequence.hpp:928
__host__ constexpr __device__ auto generate_tuple(F &&f, Number< N >)
Definition: tuple_helper.hpp:21
__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition: tensor_descriptor_helper.hpp:101
__host__ constexpr __device__ auto make_merge_transform(const LowLengths &low_lengths)
Definition: multi_index_transform_helper.hpp:55
__host__ constexpr __device__ auto make_single_stage_tensor_adaptor(const Transforms &transforms, LowerDimensionOldTopIdss, UpperDimensionNewTopIdss)
Definition: tensor_adaptor.hpp:425
__host__ constexpr __device__ auto make_freeze_transform(const LowerIndex &low_idx)
Definition: multi_index_transform_helper.hpp:151
__host__ constexpr __device__ auto container_concat(const X &x, const Ys &... ys)
Definition: container_helper.hpp:320
typename sequence_merge< Sx, Sy >::type sequence_merge_t
Definition: sequence.hpp:925
__host__ constexpr __device__ auto make_unmerge_transform(const UpLengths &up_lengths, integral_constant< bool, Use24BitIntegerCalculation >=integral_constant< bool, false >{})
Definition: multi_index_transform_helper.hpp:90
__host__ constexpr __device__ auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition: tensor_descriptor.hpp:319
Definition: epilogue_cshuffle_v3_wmma_base.hpp:29
static constexpr auto I6
Definition: epilogue_cshuffle_v3_wmma_base.hpp:36
static constexpr auto I2
Definition: epilogue_cshuffle_v3_wmma_base.hpp:32
static constexpr auto I4
Definition: epilogue_cshuffle_v3_wmma_base.hpp:34
static constexpr auto I5
Definition: epilogue_cshuffle_v3_wmma_base.hpp:35
static constexpr index_t NumDTensor
Definition: epilogue_cshuffle_v3_wmma_base.hpp:38
static constexpr auto I0
Definition: epilogue_cshuffle_v3_wmma_base.hpp:30
static constexpr auto I3
Definition: epilogue_cshuffle_v3_wmma_base.hpp:33
static __device__ auto GetLDSToVmemEpilogueDescriptor(CDsDescRefs &c_ds_desc_refs, EGridDesc &e_grid_desc_mblock_mperblock_nblock_nperblock, CDEElementwiseOperation &cde_element_op, const index_t &block_m_id, const index_t &block_n_id)
Definition: epilogue_cshuffle_v3_wmma_base.hpp:204
static constexpr __device__ auto GetCShuffleBlockDescriptor_MShRepeat_MPerShRepeat_NShRepeat_NPerShRepeat()
Definition: epilogue_cshuffle_v3_wmma_base.hpp:63
static constexpr auto I1
Definition: epilogue_cshuffle_v3_wmma_base.hpp:31
static __device__ auto GetVgprToLDSEpilogueDescriptor()
Definition: epilogue_cshuffle_v3_wmma_base.hpp:118
static constexpr __device__ auto GetCShuffleLDSDescriptor()
Definition: epilogue_cshuffle_v3_wmma_base.hpp:78
static constexpr auto EShuffleBlockTransferScalarPerVector
Definition: epilogue_cshuffle_v3_wmma_base.hpp:39
Definition: sequence.hpp:43
Definition: tensor_space_filling_curve.hpp:20
Definition: thread_group.hpp:12
Definition: thread_group_tensor_slice_transfer_v7r3.hpp:48
Definition: threadwise_tensor_slice_transfer.hpp:39
Definition: tuple.hpp:117
Definition: integral_constant.hpp:20
Definition: unary_element_wise_operation.hpp:340