/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/pipeline/tile_gemm_shape.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/pipeline/tile_gemm_shape.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/pipeline/tile_gemm_shape.hpp Source File
tile_gemm_shape.hpp
Go to the documentation of this file.
1 // Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2 // SPDX-License-Identifier: MIT
3 
4 #pragma once
5 
6 #include "ck_tile/core.hpp"
8 
9 namespace ck_tile {
10 
11 template <typename BlockTile_,
12  typename BlockWarps_,
13  typename WarpTile_,
14  bool PermuteA_ = false,
15  bool PermuteB_ = false>
17 {
21 
22  static constexpr index_t NumWarps = reduce_on_sequence(BlockWarps{}, multiplies{}, number<1>{});
23 
24  static constexpr index_t kM = BlockTile::at(number<0>{});
25  static constexpr index_t kN = BlockTile::at(number<1>{});
26  static constexpr index_t kK = BlockTile::at(number<2>{});
27 
28  static constexpr bool PermuteA = PermuteA_;
29  static constexpr bool PermuteB = PermuteB_;
30 
31  static constexpr index_t flatNPerWarp = BlockWarps::at(number<1>{});
32  static constexpr index_t flatKPerWarp = WarpTile::at(number<2>{}) * WarpTile::at(number<1>{});
33  static constexpr index_t flatKPerBlock = flatKPerWarp * kK / WarpTile::at(number<2>{});
34 
35  CK_TILE_HOST static std::string GetName()
36  {
37  // clang-format off
38  return concat('_', "tile_gemm_shape",
39  concat('x', kM, kN, kK, NumWarps),
40  concat('x', BlockWarps::at(number<0>{}), BlockWarps::at(number<1>{}), BlockWarps::at(number<2>{})),
41  concat('x', (WarpTile::at(number<0>{})), WarpTile::at(number<1>{}), WarpTile::at(number<2>{})));
42  // clang-format on
43  }
44 };
45 
46 template <typename PrecType, index_t M_Warp_Tile, bool IsFlatMM = false>
48 {
49 #if CK_TILE_USE_WMMA
50  return 16;
51 #else
52 #if defined(CK_GFX950_SUPPORT)
53  constexpr bool is_8bit_float =
54  std::is_same_v<PrecType, fp8_t> || std::is_same_v<PrecType, bf8_t>;
55  if constexpr(M_Warp_Tile == 32)
56  return is_8bit_float ? 64 : 16;
57  else
58  return is_8bit_float ? 128 : 32;
59 #else
60  if constexpr(M_Warp_Tile == 32)
61  return (sizeof(PrecType) == 2 || IsFlatMM == false) ? 16 : 32;
62  else
63  return (sizeof(PrecType) == 2 || IsFlatMM == false) ? 32 : 64;
64 #endif
65 #endif
66 }
67 
68 } // namespace ck_tile
#define CK_TILE_HOST
Definition: config.hpp:44
Definition: cluster_descriptor.hpp:13
constexpr index_t get_k_warp_tile()
Definition: tile_gemm_shape.hpp:47
int32_t index_t
Definition: integer.hpp:9
auto concat(const Ts &... xs) -> std::enable_if_t<!AllConvertibleToStringView< Ts... >, std::string >
Definition: concat.hpp:43
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition: type_traits.hpp:21
constexpr CK_TILE_HOST_DEVICE index_t reduce_on_sequence(Seq, Reduce f, number< Init >)
Definition: sequence.hpp:993
Definition: tile_gemm_shape.hpp:17
static constexpr index_t flatNPerWarp
Definition: tile_gemm_shape.hpp:31
static constexpr index_t kK
Definition: tile_gemm_shape.hpp:26
static constexpr bool PermuteB
Definition: tile_gemm_shape.hpp:29
remove_cvref_t< BlockWarps_ > BlockWarps
Definition: tile_gemm_shape.hpp:19
static constexpr index_t kN
Definition: tile_gemm_shape.hpp:25
static constexpr index_t kM
Definition: tile_gemm_shape.hpp:24
static constexpr index_t flatKPerWarp
Definition: tile_gemm_shape.hpp:32
static CK_TILE_HOST std::string GetName()
Definition: tile_gemm_shape.hpp:35
static constexpr index_t flatKPerBlock
Definition: tile_gemm_shape.hpp:33
static constexpr bool PermuteA
Definition: tile_gemm_shape.hpp:28
remove_cvref_t< BlockTile_ > BlockTile
Definition: tile_gemm_shape.hpp:18
remove_cvref_t< WarpTile_ > WarpTile
Definition: tile_gemm_shape.hpp:20
static constexpr index_t NumWarps
Definition: tile_gemm_shape.hpp:22
Definition: integral_constant.hpp:13
Definition: math.hpp:95