UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference

UniversalGemmKernel&lt; TilePartitioner_, GemmPipeline_, EpiloguePipeline_ &gt; Struct Template Reference#

Composable Kernel: ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference
ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference

The Universal GEMM kernel template. More...

#include <universal_gemm_kernel.hpp>

Classes

struct  has_persistent_kernel
 
struct  has_tile_partitioner_output_offset_impl
 
struct  SplitKBatchOffset
 

Public Types

using TilePartitioner = remove_cvref_t< TilePartitioner_ >
 
using GemmPipeline = remove_cvref_t< GemmPipeline_ >
 
using EpiloguePipeline = remove_cvref_t< EpiloguePipeline_ >
 
using AsLayout = std::conditional_t< ALayoutIsTuple, remove_cvref_t< typename GemmPipeline::ALayout >, remove_cvref_t< tuple< typename GemmPipeline::ALayout > >>
 
using BsLayout = std::conditional_t< BLayoutIsTuple, remove_cvref_t< typename GemmPipeline::BLayout >, remove_cvref_t< tuple< typename GemmPipeline::BLayout > >>
 
using DsLayout = std::conditional_t< DLayoutIsTuple, remove_cvref_t< typename EpiloguePipeline::DsLayout >, remove_cvref_t< tuple< typename EpiloguePipeline::DsLayout > >>
 
using AsDataType = std::conditional_t< ADataTypeIsTuple, remove_cvref_t< typename GemmPipeline::ADataType >, remove_cvref_t< tuple< typename GemmPipeline::ADataType > >>
 
using BsDataType = std::conditional_t< BDataTypeIsTuple, remove_cvref_t< typename GemmPipeline::BDataType >, remove_cvref_t< tuple< typename GemmPipeline::BDataType > >>
 
using DsDataType = std::conditional_t< DDataTypeIsTuple, remove_cvref_t< typename EpiloguePipeline::DsDataType >, remove_cvref_t< tuple< typename EpiloguePipeline::DsDataType > >>
 
using ELayout = remove_cvref_t< typename GemmPipeline::CLayout >
 
using EDataType = remove_cvref_t< typename EpiloguePipeline::ODataType >
 
using ADataType = remove_cvref_t< std::tuple_element_t< I0, AsDataType > >
 
using BDataType = remove_cvref_t< std::tuple_element_t< I0, BsDataType > >
 
using KernelArgs = UniversalGemmKernelArgs< AsLayout::size(), BsLayout::size(), DsLayout::size()>
 

Public Member Functions

template<bool U = !PersistentKernel, typename = std::enable_if_t<U>>
CK_TILE_DEVICE void operator() (KernelArgs kargs) const
 
template<bool U = PersistentKernel, typename = std::enable_if_t<U>, typename = void>
CK_TILE_DEVICE void operator() (KernelArgs kargs) const
 

Static Public Member Functions

static CK_TILE_HOST const std::string GetName ()
 
static constexpr CK_TILE_HOST auto GridSize (index_t M, index_t N, index_t KBatch)
 
static CK_TILE_HOST auto MaxOccupancyGridSize (const stream_config &s) -> dim3
 Get the maximum occupancy grid size for the persistent kernel on the current device. More...
 
static CK_TILE_HOST auto BlockSize ()
 
static constexpr CK_TILE_HOST KernelArgs MakeKernelArgs (const UniversalGemmHostArgs< NumATensor, NumBTensor, NumDTensor > &hostArgs)
 
static constexpr CK_TILE_HOST_DEVICE index_t GetSmemSize ()
 
static CK_TILE_HOST bool IsSupportedArgument (const KernelArgs &kargs)
 
template<memory_operation_enum DstInMemOp = memory_operation_enum::set>
static CK_TILE_DEVICE auto MakeGemmTensorViews (const std::array< const ADataType *, NumATensor > &as_ptr, const std::array< const BDataType *, NumBTensor > &bs_ptr, const std::array< const void *, NumDTensor > &ds_ptr, EDataType *e_ptr, const KernelArgs &kargs, const SplitKBatchOffset &splitk_batch_offset)
 
template<typename TensorView >
static CK_TILE_DEVICE auto MakeGemmPadViews (const TensorView &views)
 
template<typename PadView >
static CK_TILE_DEVICE auto MakeGemmTileWindows (const PadView &views, const index_t i_m, const index_t i_n)
 
template<bool UseDefaultScheduler = true>
static CK_TILE_DEVICE void RunGemm (const std::array< const ADataType *, NumATensor > &as_ptr, const std::array< const BDataType *, NumBTensor > &bs_ptr, const std::array< const void *, NumDTensor > &ds_ptr, EDataType *e_ptr, void *smem_ptr_0, const KernelArgs &kargs, const SplitKBatchOffset &splitk_batch_offset, const index_t block_idx_m, const index_t block_idx_n)
 Runs single GEMM problem cooperatively by whole workgroup. More...
 
static CK_TILE_DEVICE void RunGemm2LDS (const std::array< const ADataType *, NumATensor > &as_ptr, const std::array< const BDataType *, NumBTensor > &bs_ptr, const std::array< const void *, NumDTensor > &ds_ptr, EDataType *e_ptr, void *__restrict__ smem_ptr_0, void *__restrict__ smem_ptr_1, const KernelArgs &kargs, const SplitKBatchOffset &splitk_batch_offset, const index_t block_idx_m, const index_t block_idx_n)
 Runs single GEMM problem cooperatively by whole workgroup. More...
 

Static Public Attributes

static constexpr bool ADataTypeIsTuple
 
static constexpr bool BDataTypeIsTuple
 
static constexpr bool DDataTypeIsTuple
 
static constexpr bool ALayoutIsTuple
 
static constexpr bool BLayoutIsTuple
 
static constexpr bool DLayoutIsTuple
 
static constexpr index_t kBlockSize = GemmPipeline::BlockSize
 
static constexpr bool PersistentKernel = has_persistent_kernel::value
 
static constexpr bool has_tile_partitioner_output_offset
 
static constexpr auto I0 = number<0>()
 
static constexpr auto I1 = number<1>()
 
static constexpr auto I2 = number<2>()
 
static constexpr auto I3 = number<3>{}
 
static constexpr index_t NumATensor = AsDataType::size()
 
static constexpr index_t NumBTensor = BsDataType::size()
 
static constexpr index_t NumDTensor = DsDataType::size()
 

Detailed Description

template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
struct ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >

The Universal GEMM kernel template.

Overview

This class provides the generic matrix multiplication kernel template. By semantic division of GEMM algorithm into following parts we achieve flexible, versatile and robust kernel implementation.

  • Prolog - The start of GEMM kernel implementation in operator() function call operator" which determines the work scope of each workgroup. @li @b GemmPipeline - The core part @a "heart" of matrix multiplication algorithm. This is the place where each workgroup is loading data from global memory and carrying out dot products.
  • Epilogue - The "final" part of matrix multiplication implementation responsible for storing results to global memory. This is also the place where any additional operator fusion may take place.

Additionally both GemmPipeline and EpiloguePipeline are parameterized with so called Policy which determines all internal details of those functional parts. You can think of it like both gemm and epilogue pipelines provides the control-flow logic controlled by policies. Moreover the policy is responsible for definition of all necessary data layouts and thread's work distribution.

Template Parameters
TilePartitioner_The type of class providing mapping of workgroup index into the output data tile to be calculated. It determines the workgroup to data relationship (or in other words - which data would be processed and calculated by which workgroup).
GemmPipeline_The type of class which provides the core part of matrix multiplication. This class should provide implementation of data loading from global memory and performing block-wise matrix multiplication. You can think of it as a work done by single workgroup point of view.
EpiloguePipeline_The type of class providing the final part of matrix multiplication implementation. It is responsible for storing results calculated by GemmPipeline to the output E tensor in global memory.

Member Typedef Documentation

◆ ADataType

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::ADataType = remove_cvref_t<std::tuple_element_t<I0, AsDataType> >

◆ AsDataType

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::AsDataType = std::conditional_t<ADataTypeIsTuple, remove_cvref_t<typename GemmPipeline::ADataType>, remove_cvref_t<tuple<typename GemmPipeline::ADataType> >>

◆ AsLayout

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::AsLayout = std::conditional_t<ALayoutIsTuple, remove_cvref_t<typename GemmPipeline::ALayout>, remove_cvref_t<tuple<typename GemmPipeline::ALayout> >>

◆ BDataType

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BDataType = remove_cvref_t<std::tuple_element_t<I0, BsDataType> >

◆ BsDataType

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BsDataType = std::conditional_t<BDataTypeIsTuple, remove_cvref_t<typename GemmPipeline::BDataType>, remove_cvref_t<tuple<typename GemmPipeline::BDataType> >>

◆ BsLayout

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BsLayout = std::conditional_t<BLayoutIsTuple, remove_cvref_t<typename GemmPipeline::BLayout>, remove_cvref_t<tuple<typename GemmPipeline::BLayout> >>

◆ DsDataType

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::DsDataType = std::conditional_t<DDataTypeIsTuple, remove_cvref_t<typename EpiloguePipeline::DsDataType>, remove_cvref_t<tuple<typename EpiloguePipeline::DsDataType> >>

◆ DsLayout

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::DsLayout = std::conditional_t<DLayoutIsTuple, remove_cvref_t<typename EpiloguePipeline::DsLayout>, remove_cvref_t<tuple<typename EpiloguePipeline::DsLayout> >>

◆ EDataType

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::EDataType = remove_cvref_t<typename EpiloguePipeline::ODataType>

◆ ELayout

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::ELayout = remove_cvref_t<typename GemmPipeline::CLayout>

◆ EpiloguePipeline

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::EpiloguePipeline = remove_cvref_t<EpiloguePipeline_>

◆ GemmPipeline

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmPipeline = remove_cvref_t<GemmPipeline_>

◆ KernelArgs

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::KernelArgs = UniversalGemmKernelArgs<AsLayout::size(), BsLayout::size(), DsLayout::size()>

◆ TilePartitioner

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::TilePartitioner = remove_cvref_t<TilePartitioner_>

Member Function Documentation

◆ BlockSize()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_HOST auto ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BlockSize ( )
inlinestatic

◆ GetName()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_HOST const std::string ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GetName ( )
inlinestatic

◆ GetSmemSize()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static constexpr CK_TILE_HOST_DEVICE index_t ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GetSmemSize ( )
inlinestaticconstexpr

◆ GridSize()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static constexpr CK_TILE_HOST auto ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GridSize ( index_t  M,
index_t  N,
index_t  KBatch 
)
inlinestaticconstexpr

◆ IsSupportedArgument()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_HOST bool ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::IsSupportedArgument ( const KernelArgs kargs)
inlinestatic

◆ MakeGemmPadViews()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
template<typename TensorView >
static CK_TILE_DEVICE auto ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::MakeGemmPadViews ( const TensorView &  views)
inlinestatic

◆ MakeGemmTensorViews()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
template<memory_operation_enum DstInMemOp = memory_operation_enum::set>
static CK_TILE_DEVICE auto ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::MakeGemmTensorViews ( const std::array< const ADataType *, NumATensor > &  as_ptr,
const std::array< const BDataType *, NumBTensor > &  bs_ptr,
const std::array< const void *, NumDTensor > &  ds_ptr,
EDataType e_ptr,
const KernelArgs kargs,
const SplitKBatchOffset splitk_batch_offset 
)
inlinestatic

◆ MakeGemmTileWindows()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
template<typename PadView >
static CK_TILE_DEVICE auto ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::MakeGemmTileWindows ( const PadView &  views,
const index_t  i_m,
const index_t  i_n 
)
inlinestatic

◆ MakeKernelArgs()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static constexpr CK_TILE_HOST KernelArgs ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::MakeKernelArgs ( const UniversalGemmHostArgs< NumATensor, NumBTensor, NumDTensor > &  hostArgs)
inlinestaticconstexpr

◆ MaxOccupancyGridSize()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_HOST auto ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::MaxOccupancyGridSize ( const stream_config s) -> dim3
inlinestatic

Get the maximum occupancy grid size for the persistent kernel on the current device.

Returns
The maximum occupancy grid size.
Note
This function queries the maximum occupancy of the kernel using hipOccupancyMaxActiveBlocksPerMultiprocessor.

◆ operator()() [1/2]

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
template<bool U = !PersistentKernel, typename = std::enable_if_t<U>>
CK_TILE_DEVICE void ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::operator() ( KernelArgs  kargs) const
inline

◆ operator()() [2/2]

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
template<bool U = PersistentKernel, typename = std::enable_if_t<U>, typename = void>
CK_TILE_DEVICE void ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::operator() ( KernelArgs  kargs) const
inline

◆ RunGemm()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
template<bool UseDefaultScheduler = true>
static CK_TILE_DEVICE void ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::RunGemm ( const std::array< const ADataType *, NumATensor > &  as_ptr,
const std::array< const BDataType *, NumBTensor > &  bs_ptr,
const std::array< const void *, NumDTensor > &  ds_ptr,
EDataType e_ptr,
void *  smem_ptr_0,
const KernelArgs kargs,
const SplitKBatchOffset splitk_batch_offset,
const index_t  block_idx_m,
const index_t  block_idx_n 
)
inlinestatic

Runs single GEMM problem cooperatively by whole workgroup.

Parameters
as_ptrinput As pointer
bs_ptrinput Bs pointer
ds_ptrinput Ds pointer
e_ptroutput E pointer
smem_ptr_0The start memory pointer of the shared memory block.
kargsGEMM kernel arguments
splitk_batch_offsetsplitk_batch_offset Utility structure used to calculate k batch.
block_idx_mThe GEMM's output M dimension tile index processed by this workgroup.
block_idx_nThe GEMM's output N dimension tile index processed by this workgroup.

◆ RunGemm2LDS()

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_DEVICE void ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::RunGemm2LDS ( const std::array< const ADataType *, NumATensor > &  as_ptr,
const std::array< const BDataType *, NumBTensor > &  bs_ptr,
const std::array< const void *, NumDTensor > &  ds_ptr,
EDataType e_ptr,
void *__restrict__  smem_ptr_0,
void *__restrict__  smem_ptr_1,
const KernelArgs kargs,
const SplitKBatchOffset splitk_batch_offset,
const index_t  block_idx_m,
const index_t  block_idx_n 
)
inlinestatic

Runs single GEMM problem cooperatively by whole workgroup.

Note
RunGEMM2LDS in with two shared memory buffers using the ping pong buffer mechanism.
Parameters
as_ptrinput As pointer
bs_ptrinput Bs pointer
ds_ptrinput Ds pointer
e_ptroutput E pointer
smem_ptr_0The starting pointer of 1st shared memory block.
smem_ptr_1The starting pointer of 2nd shared memory block.
kargsGEMM kernel arguments
splitk_batch_offsetUtility structure used to calculate k batch.
block_idx_mThe GEMM's output M dimension tile index processed by this workgroup.
block_idx_nThe GEMM's output N dimension tile index processed by this workgroup.

Member Data Documentation

◆ ADataTypeIsTuple

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr bool ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::ADataTypeIsTuple
staticconstexpr
Initial value:
=
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1350

◆ ALayoutIsTuple

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr bool ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::ALayoutIsTuple
staticconstexpr

◆ BDataTypeIsTuple

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr bool ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BDataTypeIsTuple
staticconstexpr

◆ BLayoutIsTuple

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr bool ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BLayoutIsTuple
staticconstexpr

◆ DDataTypeIsTuple

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr bool ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::DDataTypeIsTuple
staticconstexpr

◆ DLayoutIsTuple

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr bool ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::DLayoutIsTuple
staticconstexpr

◆ has_tile_partitioner_output_offset

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr bool ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::has_tile_partitioner_output_offset
staticconstexpr
Initial value:

◆ I0

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr auto ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::I0 = number<0>()
staticconstexpr

◆ I1

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr auto ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::I1 = number<1>()
staticconstexpr

◆ I2

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr auto ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::I2 = number<2>()
staticconstexpr

◆ I3

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr auto ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::I3 = number<3>{}
staticconstexpr

◆ kBlockSize

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr index_t ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::kBlockSize = GemmPipeline::BlockSize
staticconstexpr

◆ NumATensor

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr index_t ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::NumATensor = AsDataType::size()
staticconstexpr

◆ NumBTensor

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr index_t ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::NumBTensor = BsDataType::size()
staticconstexpr

◆ NumDTensor

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr index_t ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::NumDTensor = DsDataType::size()
staticconstexpr

◆ PersistentKernel

template<typename TilePartitioner_ , typename GemmPipeline_ , typename EpiloguePipeline_ >
constexpr bool ck_tile::UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::PersistentKernel = has_persistent_kernel::value
staticconstexpr

The documentation for this struct was generated from the following file:
  • /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp