GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ > Struct Template Reference

GroupedFlatmmKernel&lt; TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ &gt; Struct Template Reference#

Composable Kernel: ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ > Struct Template Reference
ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ > Struct Template Reference

#include <grouped_flatmm_kernel.hpp>

Inheritance diagram for ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >:
ck_tile::FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >

Public Types

using UnderlyingGemmKernel = FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >
 
using BlockGemmShape = typename UnderlyingGemmKernel::BlockGemmShape
 
using TilePartitioner = remove_cvref_t< TilePartitioner_ >
 
using FlatmmPipeline = remove_cvref_t< FlatmmPipeline_ >
 
using EpiloguePipeline = remove_cvref_t< EpiloguePipeline_ >
 
using ADataType = remove_cvref_t< typename FlatmmPipeline::ADataType >
 
using BDataType = remove_cvref_t< typename FlatmmPipeline::BDataType >
 
using CDataType = remove_cvref_t< typename EpiloguePipeline::ODataType >
 
using DsLayout = remove_cvref_t< typename EpiloguePipeline::DsLayout >
 
using DsDataType = remove_cvref_t< typename EpiloguePipeline::DsDataType >
 
- Public Types inherited from ck_tile::FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >
using TilePartitioner = remove_cvref_t< TilePartitioner_ >
 
using FlatmmPipeline = remove_cvref_t< FlatmmPipeline_ >
 
using BlockGemmShape = remove_cvref_t< typename FlatmmPipeline::BlockGemmShape >
 
using EpiloguePipeline = remove_cvref_t< EpiloguePipeline_ >
 
using ALayout = remove_cvref_t< typename FlatmmPipeline::ALayout >
 
using BLayout = remove_cvref_t< typename FlatmmPipeline::BLayout >
 
using ELayout = remove_cvref_t< typename FlatmmPipeline::CLayout >
 
using DsLayout = remove_cvref_t< typename EpiloguePipeline::DsLayout >
 
using DsDataType = remove_cvref_t< typename EpiloguePipeline::DsDataType >
 
using ADataType = remove_cvref_t< typename FlatmmPipeline::ADataType >
 
using BDataType = remove_cvref_t< typename FlatmmPipeline::BDataType >
 
using EDataType = remove_cvref_t< typename EpiloguePipeline::ODataType >
 

Public Member Functions

template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, index_t NumDTensor = 0>
CK_TILE_DEVICE void operator() (GroupedFlatmmHostArgs< ScaleM, ScaleN, NumDTensor > kargs) const
 
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, index_t NumDTensor = 0>
CK_TILE_DEVICE void operator() (ContiguousGroupedFlatmmHostArgs< ScaleM, ScaleN, NumDTensor > kargs) const
 
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, index_t NumDTensor = 0>
CK_TILE_DEVICE void operator() (MaskedGroupedFlatmmHostArgs< ScaleM, ScaleN, NumDTensor > kargs) const
 
- Public Member Functions inherited from ck_tile::FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >
template<class ScaleM , class ScaleN >
CK_TILE_DEVICE void operator() (FlatmmKernelArgs< ScaleM, ScaleN, DsDataType::size()> kargs, int partition_idx=blockIdx.x) const
 

Static Public Member Functions

static CK_TILE_HOST const std::string GetName ()
 
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, index_t NumDTensor = 0>
static CK_TILE_HOST_DEVICE auto GridSize ([[maybe_unused]] const GroupedFlatmmHostArgs< ScaleM, ScaleN, NumDTensor > &kernelArgs)
 
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, index_t NumDTensor = 0>
static CK_TILE_HOST_DEVICE auto GridSize ([[maybe_unused]] const ContiguousGroupedFlatmmHostArgs< ScaleM, ScaleN, NumDTensor > &kernelArgs)
 
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, index_t NumDTensor = 0>
static CK_TILE_HOST_DEVICE auto GridSize ([[maybe_unused]] const MaskedGroupedFlatmmHostArgs< ScaleM, ScaleN, NumDTensor > &kernelArgs)
 
template<typename HostArgs >
static constexpr CK_TILE_HOST auto MakeKernelArgs (const HostArgs &hostArgs)
 
- Static Public Member Functions inherited from ck_tile::FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >
static CK_TILE_HOST const std::string GetName ()
 
static constexpr CK_TILE_HOST auto GridSize (index_t M, index_t N, index_t KBatch)
 
template<class ScaleM , class ScaleN >
static constexpr CK_TILE_HOST auto GridSize (const FlatmmKernelArgs< ScaleM, ScaleN, DsDataType::size()> &kargs)
 
static constexpr CK_TILE_HOST auto BlockSize ()
 
template<class ScaleM , class ScaleN >
static constexpr CK_TILE_HOST FlatmmKernelArgs< ScaleM, ScaleN, DsDataType::size()> MakeKernelArgs (const ScaleFlatmmHostArgs< ScaleM, ScaleN, DsDataType::size()> &hostArgs)
 
static constexpr CK_TILE_HOST_DEVICE index_t GetSmemPingSize ()
 
static constexpr CK_TILE_HOST_DEVICE index_t GetSmemPongSize ()
 
template<class KernelArgs >
static CK_TILE_HOST bool IsSupportedArgument (const KernelArgs &kargs)
 
template<memory_operation_enum DstInMemOp = memory_operation_enum::set, class KernelArgs >
static CK_TILE_DEVICE auto MakeGemmTensorViews (const ADataType *a_ptr, const BDataType *b_flat_ptr, const std::array< const void *, NumDTensor > &ds_ptr, EDataType *e_ptr, const KernelArgs &kargs, const SplitKBatchOffset &splitk_batch_offset)
 
template<typename TensorView >
static CK_TILE_DEVICE auto MakeGemmPadViews (const TensorView &views)
 
template<typename PadView >
static CK_TILE_DEVICE auto MakeGemmTileWindows (const PadView &views, const index_t i_m, const index_t i_n)
 
template<class ScaleM , class ScaleN , bool UseDefaultScheduler = true>
static CK_TILE_DEVICE void RunFlatmm (const ADataType *a_ptr, const BDataType *b_flat_ptr, const std::array< const void *, NumDTensor > &ds_ptr, EDataType *e_ptr, void *smem_ptr_ping, void *smem_ptr_pong, const FlatmmKernelArgs< ScaleM, ScaleN, DsDataType::size()> &kargs, const SplitKBatchOffset &splitk_batch_offset, const index_t block_idx_m, const index_t block_idx_n)
 

Static Public Attributes

static constexpr index_t NumDTensor = DsDataType::size()
 
static constexpr index_t kBlockSize = FlatmmPipeline_::BlockSize
 
static constexpr auto I0 = number<0>()
 
static constexpr auto I1 = number<1>()
 
static constexpr auto I2 = number<2>()
 
static constexpr auto I3 = number<3>()
 
- Static Public Attributes inherited from ck_tile::FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >
static constexpr index_t kBlockSize = FlatmmPipeline::BlockSize
 
static constexpr bool UsePersistentKernel = FlatmmPipeline::UsePersistentKernel
 
static constexpr index_t NumDTensor = DsDataType::size()
 
static constexpr auto I0 = number<0>()
 
static constexpr auto I1 = number<1>()
 
static constexpr auto I2 = number<2>()
 
static constexpr auto I3 = number<3>()
 

Member Typedef Documentation

◆ ADataType

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::ADataType = remove_cvref_t<typename FlatmmPipeline::ADataType>

◆ BDataType

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::BDataType = remove_cvref_t<typename FlatmmPipeline::BDataType>

◆ BlockGemmShape

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::BlockGemmShape = typename UnderlyingGemmKernel::BlockGemmShape

◆ CDataType

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::CDataType = remove_cvref_t<typename EpiloguePipeline::ODataType>

◆ DsDataType

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::DsDataType = remove_cvref_t<typename EpiloguePipeline::DsDataType>

◆ DsLayout

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::DsLayout = remove_cvref_t<typename EpiloguePipeline::DsLayout>

◆ EpiloguePipeline

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::EpiloguePipeline = remove_cvref_t<EpiloguePipeline_>

◆ FlatmmPipeline

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::FlatmmPipeline = remove_cvref_t<FlatmmPipeline_>

◆ TilePartitioner

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::TilePartitioner = remove_cvref_t<TilePartitioner_>

◆ UnderlyingGemmKernel

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
using ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::UnderlyingGemmKernel = FlatmmKernel<TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_>

Member Function Documentation

◆ GetName()

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
static CK_TILE_HOST const std::string ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::GetName ( )
inlinestatic

◆ GridSize() [1/3]

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, index_t NumDTensor = 0>
static CK_TILE_HOST_DEVICE auto ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::GridSize ( [[maybe_unused] ] const ContiguousGroupedFlatmmHostArgs< ScaleM, ScaleN, NumDTensor > &  kernelArgs)
inlinestatic

◆ GridSize() [2/3]

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, index_t NumDTensor = 0>
static CK_TILE_HOST_DEVICE auto ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::GridSize ( [[maybe_unused] ] const GroupedFlatmmHostArgs< ScaleM, ScaleN, NumDTensor > &  kernelArgs)
inlinestatic

◆ GridSize() [3/3]

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, index_t NumDTensor = 0>
static CK_TILE_HOST_DEVICE auto ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::GridSize ( [[maybe_unused] ] const MaskedGroupedFlatmmHostArgs< ScaleM, ScaleN, NumDTensor > &  kernelArgs)
inlinestatic

◆ MakeKernelArgs()

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
template<typename HostArgs >
static constexpr CK_TILE_HOST auto ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::MakeKernelArgs ( const HostArgs &  hostArgs)
inlinestaticconstexpr

◆ operator()() [1/3]

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, index_t NumDTensor = 0>
CK_TILE_DEVICE void ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::operator() ( ContiguousGroupedFlatmmHostArgs< ScaleM, ScaleN, NumDTensor kargs) const
inline

◆ operator()() [2/3]

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, index_t NumDTensor = 0>
CK_TILE_DEVICE void ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::operator() ( GroupedFlatmmHostArgs< ScaleM, ScaleN, NumDTensor kargs) const
inline

◆ operator()() [3/3]

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, index_t NumDTensor = 0>
CK_TILE_DEVICE void ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::operator() ( MaskedGroupedFlatmmHostArgs< ScaleM, ScaleN, NumDTensor kargs) const
inline

Member Data Documentation

◆ I0

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
constexpr auto ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::I0 = number<0>()
staticconstexpr

◆ I1

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
constexpr auto ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::I1 = number<1>()
staticconstexpr

◆ I2

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
constexpr auto ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::I2 = number<2>()
staticconstexpr

◆ I3

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
constexpr auto ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::I3 = number<3>()
staticconstexpr

◆ kBlockSize

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
constexpr index_t ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::kBlockSize = FlatmmPipeline_::BlockSize
staticconstexpr

◆ NumDTensor

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ >
constexpr index_t ck_tile::GroupedFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::NumDTensor = DsDataType::size()
staticconstexpr

The documentation for this struct was generated from the following file:
  • /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/flatmm/kernel/grouped_flatmm_kernel.hpp