/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_traits.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_traits.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_traits.hpp Source File
fused_moegemm_traits.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include "ck_tile/core.hpp"
7 
8 namespace ck_tile {
9 
11 {
12  // permute_b_n0_k0_n1_k1_n2_k2 = 0, // 0,1,4,2,5,3,6
13  // permute_b_n0_n1_k0_k1_n2_k2 = 1, // 0,1,2,4,5,3,6
14  no_permute = 0,
15  b_nr_kr_kw_nw_kv = 1, // 0,1,3,4,2,5
17 };
18 
19 template <bool IsGateOnly_,
20  bool UseSmoothQuant_,
21  index_t OAtomic_, // 0-no atomic, 1-atomic-pk-f16/bf16, 2-atomic-f32
22  FusedMoeGemmWeightPermuteEnum PermuteEnum_ =
24  bool PadHiddenSize_ = false,
25  bool PadIntermediateSize_ = false,
26  bool PipeInterleave_ = true>
28 {
29  // Gate+Up or Gate only
30  static constexpr bool IsGateOnly = IsGateOnly_;
31  static constexpr bool UseSmoothQuant = UseSmoothQuant_;
32  static constexpr index_t OAtomic = OAtomic_;
33  static constexpr FusedMoeGemmWeightPermuteEnum PermuteEnum = PermuteEnum_;
34  static constexpr bool PadHiddenSize = PadHiddenSize_;
35  static constexpr bool PadIntermediateSize = PadIntermediateSize_;
36  static constexpr bool PipeInterleave = PipeInterleave_;
37 };
38 
39 // Note: this need to be a bit mask
41 {
42  SLD_A = 1 << 0, // shared load a
43  SLD_B = 1 << 1,
44  GLD_A = 1 << 2, // global load a
45  GLD_B = 1 << 3,
46  SST_A = 1 << 4, // shared store a
47  SST_B = 1 << 5,
48  GST_O = 1 << 6, // global store out
49 };
50 } // namespace ck_tile
Definition: cluster_descriptor.hpp:13
FusedMoeGemmPipelineSequencerEnum
Definition: fused_moegemm_traits.hpp:41
int32_t index_t
Definition: integer.hpp:9
FusedMoeGemmWeightPermuteEnum
Definition: fused_moegemm_traits.hpp:11
Definition: fused_moegemm_traits.hpp:28
static constexpr FusedMoeGemmWeightPermuteEnum PermuteEnum
Definition: fused_moegemm_traits.hpp:33
static constexpr bool PadHiddenSize
Definition: fused_moegemm_traits.hpp:34
static constexpr bool PipeInterleave
Definition: fused_moegemm_traits.hpp:36
static constexpr bool PadIntermediateSize
Definition: fused_moegemm_traits.hpp:35
static constexpr bool UseSmoothQuant
Definition: fused_moegemm_traits.hpp:31
static constexpr index_t OAtomic
Definition: fused_moegemm_traits.hpp:32
static constexpr bool IsGateOnly
Definition: fused_moegemm_traits.hpp:30