/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp Source File#
blockwise_gemm_dl_v2r3.hpp
Go to the documentation of this file.
Definition: ck.hpp:267
__host__ constexpr __device__ auto make_multi_index(Xs &&... xs)
Definition: array_multi_index.hpp:15
__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition: tensor_descriptor_helper.hpp:101
__host__ constexpr __device__ auto make_merge_transform(const LowLengths &low_lengths)
Definition: multi_index_transform_helper.hpp:55
__host__ constexpr __device__ auto make_single_stage_tensor_adaptor(const Transforms &transforms, LowerDimensionOldTopIdss, UpperDimensionNewTopIdss)
Definition: tensor_adaptor.hpp:425
__host__ constexpr __device__ auto make_pass_through_transform(const LowLength &low_length)
Definition: multi_index_transform_helper.hpp:12
__host__ constexpr __device__ auto make_unmerge_transform(const UpLengths &up_lengths, integral_constant< bool, Use24BitIntegerCalculation >=integral_constant< bool, false >{})
Definition: multi_index_transform_helper.hpp:90
__host__ constexpr __device__ auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition: tensor_descriptor.hpp:319
__host__ constexpr __device__ auto chain_tensor_adaptors(const TensorAdaptor0 &adaptor0, const TensorAdaptor1 &adaptor1)
Definition: tensor_adaptor.hpp:245
Definition: array.hpp:14
Definition: blockwise_gemm_dl_v2r3.hpp:47
static constexpr auto I0
Definition: blockwise_gemm_dl_v2r3.hpp:52
static constexpr index_t BK0
Definition: blockwise_gemm_dl_v2r3.hpp:57
static constexpr auto I2
Definition: blockwise_gemm_dl_v2r3.hpp:54
MultiIndex< 4 > CIndex
Definition: blockwise_gemm_dl_v2r3.hpp:50
static constexpr index_t BM0
Definition: blockwise_gemm_dl_v2r3.hpp:74
__device__ BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2()
Definition: blockwise_gemm_dl_v2r3.hpp:153
static constexpr index_t BM101
Definition: blockwise_gemm_dl_v2r3.hpp:65
static constexpr index_t BM
Definition: blockwise_gemm_dl_v2r3.hpp:59
static constexpr index_t BK1
Definition: blockwise_gemm_dl_v2r3.hpp:58
static constexpr index_t BN101
Definition: blockwise_gemm_dl_v2r3.hpp:66
static __device__ CIndex CalculateCThreadOriginOnBlock_BM0_BM1_BN0_BN1(index_t thread_id)
Definition: blockwise_gemm_dl_v2r3.hpp:184
static constexpr index_t BM1
Definition: blockwise_gemm_dl_v2r3.hpp:71
__host__ static constexpr __device__ auto MakeABlockDescriptor_BK0_BM0_BM1_BK1(const ABlockDesc_BK0_BM_BK1 &a_block_desc_bk0_bm_bk1)
Definition: blockwise_gemm_dl_v2r3.hpp:78
__device__ void Run(const CThreadDesc_BM0_BM11_BN0_BN11 &, const ABlockBuffer &a_block_buf, const BBlockBuffer &b_block_buf, CThreadBuffer &c_thread_buf) const
Definition: blockwise_gemm_dl_v2r3.hpp:212
static constexpr index_t BN1
Definition: blockwise_gemm_dl_v2r3.hpp:72
__host__ static constexpr __device__ auto MakeCBlockAdaptor_BM0_BM100_BM101_BM11_BN0_BN100_BN101_BN11_To_BM0_BM1_BN0_BN1()
Definition: blockwise_gemm_dl_v2r3.hpp:123
static constexpr index_t BN0
Definition: blockwise_gemm_dl_v2r3.hpp:75
__host__ static constexpr __device__ auto MakeCBlockAdaptor_BM0_BM100_BM101_BM11_BN0_BN100_BN101_BN11_To_BM_BN()
Definition: blockwise_gemm_dl_v2r3.hpp:106
static constexpr auto I1
Definition: blockwise_gemm_dl_v2r3.hpp:53
static constexpr auto a_block_desc_bk0_bm0_bm1_bk1_
Definition: blockwise_gemm_dl_v2r3.hpp:146
static constexpr index_t BN11
Definition: blockwise_gemm_dl_v2r3.hpp:69
static constexpr index_t BN100
Definition: blockwise_gemm_dl_v2r3.hpp:63
static constexpr index_t BM11
Definition: blockwise_gemm_dl_v2r3.hpp:68
__host__ static constexpr __device__ auto MakeBBlockDescriptor_BK0_BN0_BN1_BK1(const BBlockDesc_BK0_BN_BK1 &b_block_desc_bk0_bn_bk1)
Definition: blockwise_gemm_dl_v2r3.hpp:92
__host__ static constexpr __device__ auto GetCThreadTensorLengths_BM0_BM1_BN0_BN1()
Definition: blockwise_gemm_dl_v2r3.hpp:141
static constexpr index_t BN
Definition: blockwise_gemm_dl_v2r3.hpp:60
static constexpr auto b_block_desc_bk0_bn0_bn1_bk1_
Definition: blockwise_gemm_dl_v2r3.hpp:149
static constexpr auto I3
Definition: blockwise_gemm_dl_v2r3.hpp:55
static constexpr index_t BM100
Definition: blockwise_gemm_dl_v2r3.hpp:62
Definition: sequence.hpp:43
Definition: threadwise_contraction_dl.hpp:130
__device__ void Run(const SrcDesc &, const SrcRefToOriginDisplacement &, const SrcBuffer &src_buf, const DstDesc &, const DstOriginIdx &, DstBuffer &dst_buf) const
Definition: threadwise_tensor_slice_transfer_v4r1.hpp:62
Definition: integral_constant.hpp:20
Definition: functional2.hpp:33