/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_base.hpp Source File#
blockwise_gemm_pipeline_xdlops_base.hpp
Go to the documentation of this file.
Definition: ck.hpp:267
__host__ constexpr __device__ auto make_multi_index(Xs &&... xs)
Definition: array_multi_index.hpp:15
__host__ constexpr __device__ auto make_naive_tensor_descriptor(const Tuple< Lengths... > &lengths, const Tuple< Strides... > &strides)
Definition: tensor_descriptor_helper.hpp:49
__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition: tensor_descriptor_helper.hpp:101
__host__ constexpr __device__ auto make_merge_transform(const LowLengths &low_lengths)
Definition: multi_index_transform_helper.hpp:55
__host__ constexpr __device__ auto make_single_stage_tensor_adaptor(const Transforms &transforms, LowerDimensionOldTopIdss, UpperDimensionNewTopIdss)
Definition: tensor_adaptor.hpp:425
__host__ constexpr __device__ auto make_pass_through_transform(const LowLength &low_length)
Definition: multi_index_transform_helper.hpp:12
__host__ constexpr __device__ auto make_unmerge_transform(const UpLengths &up_lengths, integral_constant< bool, Use24BitIntegerCalculation >=integral_constant< bool, false >{})
Definition: multi_index_transform_helper.hpp:90
__host__ constexpr __device__ auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition: tensor_descriptor.hpp:319
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:35
StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, AccDataType, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > c_thread_buf_
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:106
static constexpr index_t NWaves
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:45
__host__ __device__ BlockwiseGemmXdlops_pipeline_base(Tuple4 a_origin=CalculateAThreadOriginDataIndex(), Tuple4 b_origin=CalculateBThreadOriginDataIndex())
Constructor for BlockwiseGemmXdlops_pipeline_base.
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:219
static constexpr index_t MWaves
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:44
static constexpr index_t A_K0
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:48
__host__ static constexpr __device__ auto GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:277
__host__ static constexpr __device__ auto GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:291
static constexpr auto c_thread_desc_
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:375
static constexpr auto xdlops_gemm
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:54
static __device__ auto CalculateBThreadOriginDataIndex()
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:144
static constexpr BMmaTileDesc b_block_desc_n0_n1_n2_k
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:357
static __device__ auto GetWaveIdx()
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:110
static constexpr index_t KGroup
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:64
static constexpr auto I1
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:37
__host__ static constexpr __device__ auto GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:236
static constexpr index_t AMmaKStride
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:57
BThreadCopy b_thread_copy_
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:399
static __device__ auto CalculateAThreadOriginDataIndex()
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:122
static __device__ auto CalculateAThreadOriginDataIndex6D()
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:133
static constexpr index_t WaveSize
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:46
static constexpr index_t B_K1
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:51
ThisThreadBlock< BlockSize > ThisThreadBlock
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:41
__host__ static constexpr __device__ auto MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2(const CGridDesc_G_M_N &c_grid_desc_g_m_n)
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:338
__host__ static constexpr __device__ auto MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2(const CGridDesc_M_N &c_grid_desc_m_n)
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:321
static constexpr index_t KPerInnerLoop
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:62
static constexpr auto I0
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:36
__host__ static constexpr __device__ auto GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:250
static __device__ auto CalculateCThreadOriginDataIndex(Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:157
static __device__ auto CalculateCThreadOriginDataIndex8D(Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:186
__host__ static constexpr __device__ auto GetCThreadDesc()
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:355
static constexpr auto a_thread_desc_
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:363
static constexpr index_t KRepeat
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:61
static constexpr AMmaTileDesc a_block_desc_m0_m1_m2_k
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:356
__host__ static constexpr __device__ auto GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:304
static constexpr auto b_thread_desc_
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:369
static constexpr auto I2
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:38
static constexpr auto I3
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:39
static constexpr index_t A_K1
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:50
static constexpr index_t BMmaKStride
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:58
decltype(CalculateAThreadOriginDataIndex()) Tuple4
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:199
AThreadCopy a_thread_copy_
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:398
static constexpr index_t KPerThread
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:60
__host__ static constexpr __device__ auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:263
__host__ constexpr __device__ auto & GetCThreadBuffer()
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:108
static constexpr index_t B_K0
Definition: blockwise_gemm_pipeline_xdlops_base.hpp:49
Definition: blockwise_gemm_pipeline_xdlops.hpp:34
Definition: sequence.hpp:43
Definition: static_buffer.hpp:75
static __device__ index_t GetThreadId()
Definition: thread_group.hpp:19
static constexpr __device__ index_t GetNumOfThread()
Definition: thread_group.hpp:15
Definition: xdlops_gemm.hpp:1711
Definition: integral_constant.hpp:20