StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference

StreamKKernel&lt; TilePartitioner_, GemmPipeline_, EpiloguePipeline_ &gt; Struct Template Reference#

Composable Kernel: ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference
ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference

#include <streamk_gemm_kernel.hpp>

Classes

struct  StreamKKernelArgs
 ALayout and ADataType are expected to be scalars, not a tuple. More...
 

Public Types

using UniversalGemmKernel = UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >
 Inject the UniversalGemmKernel base class to support execution of all necessary functions. More...
 
using TilePartitioner = remove_cvref_t< TilePartitioner_ >
 
using GemmPipeline = remove_cvref_t< GemmPipeline_ >
 
using EpiloguePipeline = remove_cvref_t< EpiloguePipeline_ >
 
using ALayout = remove_cvref_t< typename GemmPipeline::ALayout >
 Specify the layout configurations for A, B, and C. More...
 
using BLayout = remove_cvref_t< typename GemmPipeline::BLayout >
 
using CLayout = remove_cvref_t< typename GemmPipeline::CLayout >
 
using ADataType = remove_cvref_t< typename GemmPipeline::ADataType >
 Specify the data type configurations for A, B, and C. More...
 
using BDataType = remove_cvref_t< typename GemmPipeline::BDataType >
 
using CDataType = remove_cvref_t< typename EpiloguePipeline::ODataType >
 
using KernelArgs = StreamKKernelArgs
 
using Kernel = StreamKKernel< TilePartitioner, GemmPipeline, EpiloguePipeline >
 

Public Member Functions

CK_TILE_DEVICE void operator() (StreamKKernelArgs) const
 

Static Public Member Functions

static CK_TILE_HOST const std::string GetName ()
 
static CK_TILE_HOST auto GridSize (const TilePartitioner &tile_partitioner) -> dim3
 Compute the grid size for the Stream K kernel using the tile_partitioner. More...
 
static CK_TILE_HOST auto MaxOccupancyGridSize (const stream_config &s) -> dim3
 Get the maximum occupancy grid size for the persistent kernel on the current device. More...
 
static constexpr CK_TILE_HOST auto BlockSize () -> dim3
 
static CK_TILE_HOST StreamKKernelArgs MakeKernelArgs (const StreamKHostArgs &host_args)
 
static CK_TILE_HOST bool IsSupportedArgument (const typename UniversalGemmKernel::KernelArgs &kargs)
 
static CK_TILE_HOST uint32_t GetWorkSpaceSize (const StreamKKernelArgs &kargs)
 Computes the buffer size needed to store accumulation results for Stream K. More...
 
static CK_TILE_HOST void SetWorkSpacePointer (StreamKKernelArgs &kargs, void *workspace_ptr)
 Sets the kargs' current workspace_ptr to the given workspace_ptr. More...
 

Static Public Attributes

static constexpr index_t kBlockSize = UniversalGemmKernel::kBlockSize
 

Member Typedef Documentation

◆ ADataType

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::ADataType = remove_cvref_t<typename GemmPipeline::ADataType>

Specify the data type configurations for A, B, and C.

◆ ALayout

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::ALayout = remove_cvref_t<typename GemmPipeline::ALayout>

Specify the layout configurations for A, B, and C.

◆ BDataType

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BDataType = remove_cvref_t<typename GemmPipeline::BDataType>

◆ BLayout

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BLayout = remove_cvref_t<typename GemmPipeline::BLayout>

◆ CDataType

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::CDataType = remove_cvref_t<typename EpiloguePipeline::ODataType>

◆ CLayout

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::CLayout = remove_cvref_t<typename GemmPipeline::CLayout>

◆ EpiloguePipeline

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::EpiloguePipeline = remove_cvref_t<EpiloguePipeline_>

◆ GemmPipeline

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmPipeline = remove_cvref_t<GemmPipeline_>

◆ Kernel

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::Kernel = StreamKKernel<TilePartitioner, GemmPipeline, EpiloguePipeline>

◆ KernelArgs

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::KernelArgs = StreamKKernelArgs

◆ TilePartitioner

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::TilePartitioner = remove_cvref_t<TilePartitioner_>

◆ UniversalGemmKernel

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::UniversalGemmKernel = UniversalGemmKernel<TilePartitioner_, GemmPipeline_, EpiloguePipeline_>

Inject the UniversalGemmKernel base class to support execution of all necessary functions.

Member Function Documentation

◆ BlockSize()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static constexpr CK_TILE_HOST auto ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BlockSize ( ) -> dim3
inlinestaticconstexpr

◆ GetName()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_HOST const std::string ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GetName ( )
inlinestatic

◆ GetWorkSpaceSize()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_HOST uint32_t ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GetWorkSpaceSize ( const StreamKKernelArgs kargs)
inlinestatic

Computes the buffer size needed to store accumulation results for Stream K.

Returns
The buffer size needed.

◆ GridSize()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_HOST auto ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GridSize ( const TilePartitioner tile_partitioner) -> dim3
inlinestatic

Compute the grid size for the Stream K kernel using the tile_partitioner.

Returns
The grid size.

◆ IsSupportedArgument()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_HOST bool ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::IsSupportedArgument ( const typename UniversalGemmKernel::KernelArgs kargs)
inlinestatic

◆ MakeKernelArgs()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_HOST StreamKKernelArgs ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::MakeKernelArgs ( const StreamKHostArgs host_args)
inlinestatic

◆ MaxOccupancyGridSize()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_HOST auto ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::MaxOccupancyGridSize ( const stream_config s) -> dim3
inlinestatic

Get the maximum occupancy grid size for the persistent kernel on the current device.

Returns
The maximum occupancy grid size.
Note
This function queries the maximum occupancy of the kernel using hipOccupancyMaxActiveBlocksPerMultiprocessor.

◆ operator()()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
CK_TILE_DEVICE void ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::operator() ( StreamKKernelArgs  ) const
inline

◆ SetWorkSpacePointer()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_HOST void ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::SetWorkSpacePointer ( StreamKKernelArgs kargs,
void *  workspace_ptr 
)
inlinestatic

Sets the kargs' current workspace_ptr to the given workspace_ptr.

Note
Assumes that the given workspace_ptr points to allocated device memory.

Member Data Documentation

◆ kBlockSize

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr index_t ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::kBlockSize = UniversalGemmKernel::kBlockSize
staticconstexpr

The documentation for this struct was generated from the following file:
  • /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/kernel/streamk_gemm_kernel.hpp