#include <streamk_gemm_kernel.hpp>
|
static CK_TILE_HOST const std::string | GetName () |
|
static CK_TILE_HOST auto | GridSize (const TilePartitioner &tile_partitioner) -> dim3 |
| Compute the grid size for the Stream K kernel using the tile_partitioner. More...
|
|
static CK_TILE_HOST auto | MaxOccupancyGridSize (const stream_config &s) -> dim3 |
| Get the maximum occupancy grid size for the persistent kernel on the current device. More...
|
|
static constexpr CK_TILE_HOST auto | BlockSize () -> dim3 |
|
static CK_TILE_HOST StreamKKernelArgs | MakeKernelArgs (const StreamKHostArgs &host_args, int num_cu=NumCU(), int occupancy=Occupancy()) |
| Constructs kernel arguments for the Stream-K kernel. More...
|
|
template<bool UseDefaultScheduler = true> |
static CK_TILE_DEVICE void | RunGemm (const std::array< const ADataType *, UniversalGemmKernel::NumATensor > &as_ptr, const std::array< const BDataType *, UniversalGemmKernel::NumBTensor > &bs_ptr, const std::array< const void *, UniversalGemmKernel::NumDTensor > &ds_ptr, CDataType *c_ptr, void *smem_ptr_0, const typename UniversalGemmKernel::KernelArgs &kargs, const index_t num_loop, const index_t block_idx_m, const index_t block_idx_n, const index_t k_size) |
|
static CK_TILE_HOST bool | IsSupportedArgument (const StreamKKernelArgs &kargs) |
|
static CK_TILE_HOST uint32_t | GetWorkSpaceSize (const StreamKKernelArgs &kargs) |
| Computes the buffer size needed to store accumulation results for Stream K. More...
|
|
static CK_TILE_HOST void | SetWorkSpacePointer (StreamKKernelArgs &kargs, void *workspace_ptr) |
| Sets the kargs' current workspace_ptr to the given workspace_ptr. More...
|
|
◆ ADataType
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
Specify the data type configurations for A, B, and C.
◆ ALayout
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
Specify the layout configurations for A, B, and C.
◆ BDataType
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ BLayout
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ CDataType
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ CLayout
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ EpiloguePipeline
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ GemmPipeline
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ Kernel
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ KernelArgs
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ TilePartitioner
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ UniversalGemmKernel
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ BlockSize()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ GetName()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ GetWorkSpaceSize()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
Computes the buffer size needed to store accumulation results for Stream K.
- Returns
- The buffer size needed.
◆ GridSize()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
Compute the grid size for the Stream K kernel using the tile_partitioner.
- Returns
- The grid size.
◆ IsSupportedArgument()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
◆ MakeKernelArgs()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
Constructs kernel arguments for the Stream-K kernel.
- Parameters
-
host_args | Stream-K host arguments. |
num_cu | Number of compute units (CUs). The default is the number of CUs on the device. The caller may select their own to assist with test reproducibility, etc. |
occupancy | The maximum number of active blocks per CU for this kernel. The caller may select their own to assist with test reproducibility, etc. |
- Returns
- The kernel arguments for Stream-K.
◆ MaxOccupancyGridSize()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
Get the maximum occupancy grid size for the persistent kernel on the current device.
- Returns
- The maximum occupancy grid size.
- Note
- This function queries the maximum occupancy of the kernel using
hipOccupancyMaxActiveBlocksPerMultiprocessor
.
◆ operator()()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
Entry point for the Stream-K Kernel, performing the main Stream-K loop.
◆ RunGemm()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
template<bool UseDefaultScheduler = true>
static CK_TILE_DEVICE void ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::RunGemm |
( |
const std::array< const ADataType *, UniversalGemmKernel::NumATensor > & |
as_ptr, |
|
|
const std::array< const BDataType *, UniversalGemmKernel::NumBTensor > & |
bs_ptr, |
|
|
const std::array< const void *, UniversalGemmKernel::NumDTensor > & |
ds_ptr, |
|
|
CDataType * |
c_ptr, |
|
|
void * |
smem_ptr_0, |
|
|
const typename UniversalGemmKernel::KernelArgs & |
kargs, |
|
|
const index_t |
num_loop, |
|
|
const index_t |
block_idx_m, |
|
|
const index_t |
block_idx_n, |
|
|
const index_t |
k_size |
|
) |
| |
|
inlinestatic |
◆ SetWorkSpacePointer()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
Sets the kargs' current workspace_ptr to the given workspace_ptr.
- Note
- Assumes that the given workspace_ptr points to allocated device memory.
◆ kBlockSize
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/kernel/streamk_gemm_kernel.hpp