StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference
#include <streamk_gemm_kernel.hpp>
Classes | |
struct | StreamKKernelArgs |
ALayout and ADataType are expected to be scalars, not a tuple. More... | |
Public Types | |
using | UniversalGemmKernel = UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > |
Inject the UniversalGemmKernel base class to support execution of all necessary functions. More... | |
using | TilePartitioner = remove_cvref_t< TilePartitioner_ > |
using | GemmPipeline = remove_cvref_t< GemmPipeline_ > |
using | EpiloguePipeline = remove_cvref_t< EpiloguePipeline_ > |
using | ALayout = remove_cvref_t< typename GemmPipeline::ALayout > |
Specify the layout configurations for A, B, and C. More... | |
using | BLayout = remove_cvref_t< typename GemmPipeline::BLayout > |
using | CLayout = remove_cvref_t< typename GemmPipeline::CLayout > |
using | ADataType = remove_cvref_t< typename GemmPipeline::ADataType > |
Specify the data type configurations for A, B, and C. More... | |
using | BDataType = remove_cvref_t< typename GemmPipeline::BDataType > |
using | CDataType = remove_cvref_t< typename EpiloguePipeline::ODataType > |
using | KernelArgs = StreamKKernelArgs |
using | Kernel = StreamKKernel< TilePartitioner, GemmPipeline, EpiloguePipeline > |
Public Member Functions | |
CK_TILE_DEVICE void | operator() (StreamKKernelArgs) const |
Static Public Member Functions | |
static CK_TILE_HOST const std::string | GetName () |
static CK_TILE_HOST auto | GridSize (const TilePartitioner &tile_partitioner) -> dim3 |
Compute the grid size for the Stream K kernel using the tile_partitioner. More... | |
static CK_TILE_HOST auto | MaxOccupancyGridSize (const stream_config &s) -> dim3 |
Get the maximum occupancy grid size for the persistent kernel on the current device. More... | |
static constexpr CK_TILE_HOST auto | BlockSize () -> dim3 |
static CK_TILE_HOST StreamKKernelArgs | MakeKernelArgs (const StreamKHostArgs &host_args) |
static CK_TILE_HOST bool | IsSupportedArgument (const typename UniversalGemmKernel::KernelArgs &kargs) |
static CK_TILE_HOST uint32_t | GetWorkSpaceSize (const StreamKKernelArgs &kargs) |
Computes the buffer size needed to store accumulation results for Stream K. More... | |
static CK_TILE_HOST void | SetWorkSpacePointer (StreamKKernelArgs &kargs, void *workspace_ptr) |
Sets the kargs' current workspace_ptr to the given workspace_ptr. More... | |
Static Public Attributes | |
static constexpr index_t | kBlockSize = UniversalGemmKernel::kBlockSize |
Member Typedef Documentation
◆ ADataType
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::ADataType = remove_cvref_t<typename GemmPipeline::ADataType> |
Specify the data type configurations for A, B, and C.
◆ ALayout
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::ALayout = remove_cvref_t<typename GemmPipeline::ALayout> |
Specify the layout configurations for A, B, and C.
◆ BDataType
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BDataType = remove_cvref_t<typename GemmPipeline::BDataType> |
◆ BLayout
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BLayout = remove_cvref_t<typename GemmPipeline::BLayout> |
◆ CDataType
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::CDataType = remove_cvref_t<typename EpiloguePipeline::ODataType> |
◆ CLayout
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::CLayout = remove_cvref_t<typename GemmPipeline::CLayout> |
◆ EpiloguePipeline
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::EpiloguePipeline = remove_cvref_t<EpiloguePipeline_> |
◆ GemmPipeline
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmPipeline = remove_cvref_t<GemmPipeline_> |
◆ Kernel
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::Kernel = StreamKKernel<TilePartitioner, GemmPipeline, EpiloguePipeline> |
◆ KernelArgs
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::KernelArgs = StreamKKernelArgs |
◆ TilePartitioner
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::TilePartitioner = remove_cvref_t<TilePartitioner_> |
◆ UniversalGemmKernel
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::UniversalGemmKernel = UniversalGemmKernel<TilePartitioner_, GemmPipeline_, EpiloguePipeline_> |
Inject the UniversalGemmKernel base class to support execution of all necessary functions.
Member Function Documentation
◆ BlockSize()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
|
inlinestaticconstexpr |
◆ GetName()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
|
inlinestatic |
◆ GetWorkSpaceSize()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
|
inlinestatic |
Computes the buffer size needed to store accumulation results for Stream K.
- Returns
- The buffer size needed.
◆ GridSize()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
|
inlinestatic |
Compute the grid size for the Stream K kernel using the tile_partitioner.
- Returns
- The grid size.
◆ IsSupportedArgument()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
|
inlinestatic |
◆ MakeKernelArgs()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
|
inlinestatic |
◆ MaxOccupancyGridSize()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
|
inlinestatic |
Get the maximum occupancy grid size for the persistent kernel on the current device.
- Returns
- The maximum occupancy grid size.
- Note
- This function queries the maximum occupancy of the kernel using
hipOccupancyMaxActiveBlocksPerMultiprocessor
.
◆ operator()()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
|
inline |
◆ SetWorkSpacePointer()
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
|
inlinestatic |
Sets the kargs' current workspace_ptr to the given workspace_ptr.
- Note
- Assumes that the given workspace_ptr points to allocated device memory.
Member Data Documentation
◆ kBlockSize
template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
|
staticconstexpr |
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/kernel/streamk_gemm_kernel.hpp