18 template <
typename AGridDesc,
20 typename ABlockTransfer,
22 typename ABlockBuffer,
23 typename ABlockTransferStep,
26 typename BBlockTransfer,
28 typename BBlockBuffer,
29 typename BBlockTransferStep,
30 typename BlockwiseGemm,
31 typename CThreadBuffer>
32 __device__
static void Run(
const AGridDesc& a_grid_desc,
33 const ABlockDesc& a_block_desc,
34 ABlockTransfer& a_blockwise_copy,
35 const AGridBuffer& a_grid_buf,
36 ABlockBuffer& a_block_buf,
37 const ABlockTransferStep& a_block_copy_step,
38 const BGridDesc& b_grid_desc,
39 const BBlockDesc& b_block_desc,
40 BBlockTransfer& b_blockwise_copy,
41 const BGridBuffer& b_grid_buf,
42 BBlockBuffer& b_block_buf,
43 const BBlockTransferStep& b_block_copy_step,
44 const BlockwiseGemm& blockwise_gemm,
45 CThreadBuffer& c_thread_buf,
49 a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
50 b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf);
52 a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step);
53 b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step);
59 a_blockwise_copy.RunWrite(a_block_desc, a_block_buf);
60 b_blockwise_copy.RunWrite(b_block_desc, b_block_buf);
66 a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
68 b_blockwise_copy.RunRead(b_grid_desc, b_grid_buf);
70 blockwise_gemm.Run(a_block_buf, b_block_buf, c_thread_buf);
74 a_blockwise_copy.MoveSrcSliceWindow(a_grid_desc, a_block_copy_step);
75 b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step);
76 a_blockwise_copy.RunWrite(a_block_desc, a_block_buf);
77 b_blockwise_copy.RunWrite(b_block_desc, b_block_buf);
84 blockwise_gemm.Run(a_block_buf, b_block_buf, c_thread_buf);
int32_t index_t
Definition: ck.hpp:298
__device__ void block_sync_lds()
Definition: synchronization.hpp:10
Definition: gridwise_gemm_pipeline_v3.hpp:11
static __device__ void Run(const AGridDesc &a_grid_desc, const ABlockDesc &a_block_desc, ABlockTransfer &a_blockwise_copy, const AGridBuffer &a_grid_buf, ABlockBuffer &a_block_buf, const ABlockTransferStep &a_block_copy_step, const BGridDesc &b_grid_desc, const BBlockDesc &b_block_desc, BBlockTransfer &b_blockwise_copy, const BGridBuffer &b_grid_buf, BBlockBuffer &b_block_buf, const BBlockTransferStep &b_block_copy_step, const BlockwiseGemm &blockwise_gemm, CThreadBuffer &c_thread_buf, index_t num_loop)
Definition: gridwise_gemm_pipeline_v3.hpp:32
__host__ static constexpr __device__ bool IsSupported(index_t)
Definition: gridwise_gemm_pipeline_v3.hpp:12