MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation > Struct Template Reference

MoeFlatmmKernel&lt; TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation &gt; Struct Template Reference#

Composable Kernel: ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation > Struct Template Reference
ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation > Struct Template Reference

#include <moe_flatmm_kernel.hpp>

Classes

struct  MoeFlatmmKernelArgs
 
struct  SplitKBatchOffset
 

Public Types

using TilePartitioner = remove_cvref_t< TilePartitioner_ >
 
using FlatmmPipeline = remove_cvref_t< FlatmmPipeline_ >
 
using BlockGemmShape = remove_cvref_t< typename FlatmmPipeline::BlockGemmShape >
 
using EpiloguePipeline = remove_cvref_t< EpiloguePipeline_ >
 
using ALayout = remove_cvref_t< typename FlatmmPipeline::ALayout >
 
using BLayout = remove_cvref_t< typename FlatmmPipeline::BLayout >
 
using ELayout = remove_cvref_t< typename FlatmmPipeline::CLayout >
 
using DsLayout = remove_cvref_t< typename EpiloguePipeline::DsLayout >
 
using DsDataType = remove_cvref_t< typename EpiloguePipeline::DsDataType >
 
using ADataType = remove_cvref_t< typename FlatmmPipeline::ADataType >
 
using BDataType = remove_cvref_t< typename FlatmmPipeline::BDataType >
 
using EDataType = remove_cvref_t< typename EpiloguePipeline::ODataType >
 
using AccDataType = float
 
using ActivationOp = FusedActivation
 

Public Member Functions

template<class MoeFlatmmKernelArgs >
CK_TILE_DEVICE void operator() (MoeFlatmmKernelArgs kargs) const
 
template<class MoeFlatmmKernelArgs >
CK_TILE_DEVICE void operator() (MoeFlatmmKernelArgs kargs, index_t iM, index_t iN) const
 

Static Public Member Functions

template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, class ExpertBias = FlatmmScalePointer<-1>>
static constexpr CK_TILE_HOST auto MakeKernelArgs (const MoeFlatmmHostArgs< ScaleM, ScaleN, ExpertBias > &hostArgs)
 
static CK_TILE_HOST const std::string GetName ()
 
static constexpr auto BlockSize () -> dim3
 
static constexpr auto GridSize (index_t M, index_t N, index_t KBatch)
 
template<class MoeFlatmmKernelArgs >
static constexpr auto GridSize (const MoeFlatmmKernelArgs &kargs)
 
static constexpr CK_TILE_HOST_DEVICE index_t GetSmemPingSize ()
 
static constexpr CK_TILE_HOST_DEVICE index_t GetSmemPongSize ()
 
template<typename KernelArgs >
static CK_TILE_HOST bool IsSupportedArgument (const KernelArgs &kargs)
 
template<memory_operation_enum DstInMemOp = IsInputGemm ? memory_operation_enum::set : memory_operation_enum::atomic_add, typename KernelArgs >
static CK_TILE_DEVICE auto MakeGemmTensorViews (const ADataType *a_ptr, const BDataType *b_flat_ptr, EDataType *e_ptr, [[maybe_unused]] const AccDataType *exp_weight_ptr, const int expert_id, const KernelArgs &kargs, const SplitKBatchOffset &splitk_batch_offset)
 
template<typename TensorView >
static CK_TILE_DEVICE auto MakeGemmPadViews (const TensorView &views)
 
template<typename PadView >
static CK_TILE_DEVICE auto MakeGemmTileWindows (const PadView &views, [[maybe_unused]] const index_t coord_m, const index_t coord_n)
 

Static Public Attributes

static constexpr index_t kBlockSize = FlatmmPipeline::BlockSize
 
static constexpr bool UsePersistentKernel = FlatmmPipeline::UsePersistentKernel
 
static constexpr index_t NumDTensor = DsDataType::size()
 
static constexpr auto I0 = number<0>()
 
static constexpr auto I1 = number<1>()
 
static constexpr auto I2 = number<2>()
 
static constexpr auto I3 = number<3>()
 
static constexpr bool IsInputGemm = kind != MoeFlatmmKind::kFFN_gemm2
 
static constexpr bool IsGateUp = kind == MoeFlatmmKind::kFFN_gemm1_gate_up
 
static constexpr index_t kMPerBlock = EpiloguePipeline::kMPerBlock
 
static constexpr index_t kNPerBlock = EpiloguePipeline::kNPerBlock
 
static constexpr index_t MWave = EpiloguePipeline::MWave
 
static constexpr index_t NWave = EpiloguePipeline::NWave
 
static constexpr index_t MPerXdl = EpiloguePipeline::MPerXdl
 
static constexpr index_t NPerXdl = EpiloguePipeline::NPerXdl
 
static constexpr index_t KPerXdl = EpiloguePipeline::KPerXdl
 
static constexpr index_t isCTransposed = EpiloguePipeline::isCTransposed
 
static constexpr index_t kMPerIteration = MPerXdl * MWave
 
static constexpr index_t kNPerIteration = NPerXdl * NWave
 
static constexpr index_t kNRepeat = kNPerBlock / kNPerIteration
 
static constexpr int OutputNPerBlock
 
static constexpr bool MXFP4_Pipeline = std::is_same_v<BDataType, pk_fp4_t>
 
static constexpr int MXFP4N_Pack = 2
 
static constexpr int MXFP4K_Pack = 2
 
static constexpr int N_Pack = MXFP4_Pipeline ? MXFP4N_Pack : 1
 
static constexpr int K_Pack = MXFP4_Pipeline ? MXFP4K_Pack : 1
 
static constexpr int WeightPackedSize = numeric_traits<BDataType>::PackedSize
 

Member Typedef Documentation

◆ AccDataType

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::AccDataType = float

◆ ActivationOp

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::ActivationOp = FusedActivation

◆ ADataType

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::ADataType = remove_cvref_t<typename FlatmmPipeline::ADataType>

◆ ALayout

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::ALayout = remove_cvref_t<typename FlatmmPipeline::ALayout>

◆ BDataType

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::BDataType = remove_cvref_t<typename FlatmmPipeline::BDataType>

◆ BLayout

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::BLayout = remove_cvref_t<typename FlatmmPipeline::BLayout>

◆ BlockGemmShape

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::BlockGemmShape = remove_cvref_t<typename FlatmmPipeline::BlockGemmShape>

◆ DsDataType

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::DsDataType = remove_cvref_t<typename EpiloguePipeline::DsDataType>

◆ DsLayout

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::DsLayout = remove_cvref_t<typename EpiloguePipeline::DsLayout>

◆ EDataType

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::EDataType = remove_cvref_t<typename EpiloguePipeline::ODataType>

◆ ELayout

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::ELayout = remove_cvref_t<typename FlatmmPipeline::CLayout>

◆ EpiloguePipeline

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::EpiloguePipeline = remove_cvref_t<EpiloguePipeline_>

◆ FlatmmPipeline

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::FlatmmPipeline = remove_cvref_t<FlatmmPipeline_>

◆ TilePartitioner

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
using ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::TilePartitioner = remove_cvref_t<TilePartitioner_>

Member Function Documentation

◆ BlockSize()

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
static constexpr auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::BlockSize ( ) -> dim3
inlinestaticconstexpr

◆ GetName()

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
static CK_TILE_HOST const std::string ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::GetName ( )
inlinestatic

◆ GetSmemPingSize()

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
static constexpr CK_TILE_HOST_DEVICE index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::GetSmemPingSize ( )
inlinestaticconstexpr

◆ GetSmemPongSize()

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
static constexpr CK_TILE_HOST_DEVICE index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::GetSmemPongSize ( )
inlinestaticconstexpr

◆ GridSize() [1/2]

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<class MoeFlatmmKernelArgs >
static constexpr auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::GridSize ( const MoeFlatmmKernelArgs kargs)
inlinestaticconstexpr

◆ GridSize() [2/2]

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
static constexpr auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::GridSize ( index_t  M,
index_t  N,
index_t  KBatch 
)
inlinestaticconstexpr

◆ IsSupportedArgument()

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<typename KernelArgs >
static CK_TILE_HOST bool ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::IsSupportedArgument ( const KernelArgs &  kargs)
inlinestatic

◆ MakeGemmPadViews()

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<typename TensorView >
static CK_TILE_DEVICE auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::MakeGemmPadViews ( const TensorView &  views)
inlinestatic

◆ MakeGemmTensorViews()

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<memory_operation_enum DstInMemOp = IsInputGemm ? memory_operation_enum::set : memory_operation_enum::atomic_add, typename KernelArgs >
static CK_TILE_DEVICE auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::MakeGemmTensorViews ( const ADataType a_ptr,
const BDataType b_flat_ptr,
EDataType e_ptr,
[[maybe_unused] ] const AccDataType exp_weight_ptr,
const int  expert_id,
const KernelArgs &  kargs,
const SplitKBatchOffset splitk_batch_offset 
)
inlinestatic

◆ MakeGemmTileWindows()

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<typename PadView >
static CK_TILE_DEVICE auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::MakeGemmTileWindows ( const PadView &  views,
[[maybe_unused] ] const index_t  coord_m,
const index_t  coord_n 
)
inlinestatic

◆ MakeKernelArgs()

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<class ScaleM = FlatmmScalePointer<-1>, class ScaleN = FlatmmScalePointer<-1>, class ExpertBias = FlatmmScalePointer<-1>>
static constexpr CK_TILE_HOST auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::MakeKernelArgs ( const MoeFlatmmHostArgs< ScaleM, ScaleN, ExpertBias > &  hostArgs)
inlinestaticconstexpr

◆ operator()() [1/2]

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<class MoeFlatmmKernelArgs >
CK_TILE_DEVICE void ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::operator() ( MoeFlatmmKernelArgs  kargs) const
inline

◆ operator()() [2/2]

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
template<class MoeFlatmmKernelArgs >
CK_TILE_DEVICE void ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::operator() ( MoeFlatmmKernelArgs  kargs,
index_t  iM,
index_t  iN 
) const
inline

Member Data Documentation

◆ I0

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::I0 = number<0>()
staticconstexpr

◆ I1

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::I1 = number<1>()
staticconstexpr

◆ I2

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::I2 = number<2>()
staticconstexpr

◆ I3

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr auto ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::I3 = number<3>()
staticconstexpr

◆ isCTransposed

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::isCTransposed = EpiloguePipeline::isCTransposed
staticconstexpr

◆ IsGateUp

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr bool ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::IsGateUp = kind == MoeFlatmmKind::kFFN_gemm1_gate_up
staticconstexpr

◆ IsInputGemm

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr bool ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::IsInputGemm = kind != MoeFlatmmKind::kFFN_gemm2
staticconstexpr

◆ K_Pack

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr int ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::K_Pack = MXFP4_Pipeline ? MXFP4K_Pack : 1
staticconstexpr

◆ kBlockSize

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::kBlockSize = FlatmmPipeline::BlockSize
staticconstexpr

◆ kMPerBlock

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::kMPerBlock = EpiloguePipeline::kMPerBlock
staticconstexpr

◆ kMPerIteration

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::kMPerIteration = MPerXdl * MWave
staticconstexpr

◆ kNPerBlock

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::kNPerBlock = EpiloguePipeline::kNPerBlock
staticconstexpr

◆ kNPerIteration

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::kNPerIteration = NPerXdl * NWave
staticconstexpr

◆ kNRepeat

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::kNRepeat = kNPerBlock / kNPerIteration
staticconstexpr

◆ KPerXdl

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::KPerXdl = EpiloguePipeline::KPerXdl
staticconstexpr

◆ MPerXdl

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::MPerXdl = EpiloguePipeline::MPerXdl
staticconstexpr

◆ MWave

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::MWave = EpiloguePipeline::MWave
staticconstexpr

◆ MXFP4_Pipeline

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr bool ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::MXFP4_Pipeline = std::is_same_v<BDataType, pk_fp4_t>
staticconstexpr

◆ MXFP4K_Pack

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr int ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::MXFP4K_Pack = 2
staticconstexpr

◆ MXFP4N_Pack

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr int ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::MXFP4N_Pack = 2
staticconstexpr

◆ N_Pack

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr int ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::N_Pack = MXFP4_Pipeline ? MXFP4N_Pack : 1
staticconstexpr

◆ NPerXdl

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::NPerXdl = EpiloguePipeline::NPerXdl
staticconstexpr

◆ NumDTensor

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::NumDTensor = DsDataType::size()
staticconstexpr

◆ NWave

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr index_t ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::NWave = EpiloguePipeline::NWave
staticconstexpr

◆ OutputNPerBlock

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr int ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::OutputNPerBlock
staticconstexpr
Initial value:
=
IsGateUp ? TilePartitioner::NPerBlock / 2 : TilePartitioner::NPerBlock
static constexpr bool IsGateUp
Definition: moe_flatmm_kernel.hpp:225

◆ UsePersistentKernel

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr bool ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::UsePersistentKernel = FlatmmPipeline::UsePersistentKernel
staticconstexpr

◆ WeightPackedSize

template<typename TilePartitioner_ , typename FlatmmPipeline_ , typename EpiloguePipeline_ , MoeFlatmmKind kind, typename FusedActivation = moe::MoeSilu>
constexpr int ck_tile::MoeFlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_, kind, FusedActivation >::WeightPackedSize = numeric_traits<BDataType>::PackedSize
staticconstexpr

The documentation for this struct was generated from the following file:
  • /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/flatmm/kernel/moe_flatmm_kernel.hpp