/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_base.hpp Source File#
blockwise_gemm_pipeline_wmmaops_base.hpp
Go to the documentation of this file.
Definition: ck.hpp:270
typename detail::StaticallyIndexedArrayImpl< T, N >::type StaticallyIndexedArray
Definition: statically_indexed_array.hpp:45
__host__ constexpr __device__ auto make_multi_index(Xs &&... xs)
Definition: array_multi_index.hpp:15
__host__ constexpr __device__ auto make_naive_tensor_descriptor(const Tuple< Lengths... > &lengths, const Tuple< Strides... > &strides)
Definition: tensor_descriptor_helper.hpp:49
__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition: tensor_descriptor_helper.hpp:101
__host__ constexpr __device__ auto make_merge_transform(const LowLengths &low_lengths)
Definition: multi_index_transform_helper.hpp:55
__host__ constexpr __device__ auto make_single_stage_tensor_adaptor(const Transforms &transforms, LowerDimensionOldTopIdss, UpperDimensionNewTopIdss)
Definition: tensor_adaptor.hpp:425
__host__ constexpr __device__ auto make_unmerge_transform(const UpLengths &up_lengths, integral_constant< bool, Use24BitIntegerCalculation >=integral_constant< bool, false >{})
Definition: multi_index_transform_helper.hpp:90
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:123
__device__ void GlobalLoad(bool cond)
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:147
StaticallyIndexedArray< ThreadStaticBuffer, Number< NumberOfBuffers >{}> scale_thread_bufs
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:175
static constexpr index_t num_slice_k
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:135
GridDesc scale_grid_desc
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:173
static constexpr index_t reg_size_per_wmma
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:136
static constexpr index_t num_slice_mn
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:134
GridBuffer scale_grid_buf
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:174
static constexpr auto scale_thread_desc
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:138
static constexpr auto scale_thread_copy_step
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:140
ThreadCopy scale_thread_copy
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:172
__device__ ABScale(GridDesc scale_grid_desc_, ThreadCopy scale_thread_copy_, GridBuffer scale_grid_buf_)
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:124
static constexpr index_t num_scale_k_block
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:131
static constexpr index_t num_scale_krepeat
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:132
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:180
__device__ void Load(AScaleStruct &a_scale_struct, BScaleStruct &b_scale_struct)
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:198
__device__ void Clear()
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:221
__device__ void UpdateCThreadBuf(CThreadBuf &c_thread_buf)
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:230
decltype(c_scale_thread_desc) CScaleThreadDesc
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:191
decltype(make_static_buffer< AddressSpaceEnum::Vgpr, AccDataType >(c_scale_thread_desc.GetElementSpaceSize())) ThreadStaticBuffer
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:196
__device__ CScale()
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:181
StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, AccDataType, 1, reg_size_per_wmma, true > c_thread_buf_per_scale
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:250
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:102
__device__ void GlobalLoad(bool cond)
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:105
__device__ Empty()
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:103
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:36
static constexpr auto I2
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:39
__host__ static constexpr __device__ auto GetCThreadDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs()
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:384
static constexpr index_t NWaves
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:49
decltype(CalculateAThreadOriginDataIndex()) Tuple7
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:329
static constexpr BWmmaTileDesc b_block_desc_k0_n0_n1_n2_k1
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:422
static constexpr index_t KPerThread
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:67
static constexpr index_t MWaves
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:48
BThreadCopy b_thread_copy_
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:484
static constexpr index_t A_KRow
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:55
ThisThreadBlock< BlockSize > ThisThreadBlock
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:44
static constexpr auto WmmaK
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:75
static __device__ auto CalculateAThreadOriginDataIndex()
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:267
__host__ static constexpr __device__ auto GetCBlockDescriptor_MRepeat_MWave_MSubGroup_NRepeat_NWave_NThreadPerSubGroup_MAccVgprs()
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:404
static __device__ auto CalculateCThreadOriginDataIndex(Number< m0 >, Number< n0 >)
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:302
StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, AccDataType, MRepeat *NRepeat, wmma_gemm.GetRegSizePerWmma(), true > c_thread_buf_
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:99
static constexpr auto I5
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:41
static constexpr auto I3
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:40
static constexpr index_t WaveSize
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:46
__host__ static constexpr __device__ auto GetCThreadDescriptor_MRepeat_MWave_MThreadPerSubGroup_NRepeat_NWave_NSubGroup_NAccVgprs()
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:367
static __device__ auto GetWaveIdx()
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:255
__host__ __device__ BlockwiseGemmWmmaops_pipeline_base(Tuple7 a_origin=CalculateAThreadOriginDataIndex(), Tuple7 b_origin=CalculateBThreadOriginDataIndex())
Constructor for BlockwiseGemmWmmaops_pipeline_base.
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:349
__host__ constexpr __device__ auto & GetCThreadBuffer()
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:253
static __device__ auto CalculateBThreadOriginDataIndex()
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:284
AThreadCopy a_thread_copy_
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:483
static constexpr index_t B_KRow
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:56
static constexpr index_t B_K1
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:69
static constexpr index_t A_K1
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:68
static constexpr auto I6
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:42
static constexpr index_t KRepeat
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:73
static constexpr AWmmaTileDesc a_block_desc_k0_m0_m1_m2_k1
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:421
static constexpr auto I1
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:38
static constexpr auto I0
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:37
static constexpr auto wmma_gemm
Definition: blockwise_gemm_pipeline_wmmaops_base.hpp:59
Definition: blockwise_gemm_pipeline_wmmaops.hpp:26
Definition: sequence.hpp:43
Definition: static_buffer.hpp:75
Definition: wmma_gemm.hpp:675
Definition: integral_constant.hpp:20
Definition: functional2.hpp:33