/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/arch/amd_transpose_load_encoding.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/arch/amd_transpose_load_encoding.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/arch/amd_transpose_load_encoding.hpp Source File
amd_transpose_load_encoding.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
9 
10 namespace ck_tile {
11 
12 // this generate wave level tile distribution
13 template <typename T, index_t LaneGroupSize = 16, typename = void>
15 
16 template <typename T, index_t LaneGroupSize>
17 struct LaneGroupTransposeTraits<T, LaneGroupSize, std::enable_if_t<sizeof(T) == 2>>
18 {
19  static_assert(LaneGroupSize == 16 || LaneGroupSize == 32 || LaneGroupSize == 64,
20  "LaneGroupSize must be 16, 32, or 64");
21  // before transpose, 4x16
22  static constexpr index_t ksecondDim = 4;
23  static constexpr index_t kleadDim = LaneGroupSize;
24  // after transpose, 16x4
25  static constexpr index_t ksecondDimT = LaneGroupSize;
26  static constexpr index_t kleadDimT = 4;
27  template <index_t kOuterDistDim0,
28  index_t kOuterDistDim1,
29  index_t kInnerDistDim0,
30  index_t kInnerDistDim1>
32  sequence<>,
34  sequence<kInnerDistDim0, kInnerDistDim1, LaneGroupSize / 16, 4, 4>>,
39 };
40 
41 template <typename T, index_t LaneGroupSize>
42 struct LaneGroupTransposeTraits<T, LaneGroupSize, std::enable_if_t<sizeof(T) == 1>>
43 {
44  static constexpr index_t ksecondDim = 8;
45  static constexpr index_t kleadDim = LaneGroupSize;
46 
47  static constexpr index_t ksecondDimT = LaneGroupSize;
48  static constexpr index_t kleadDimT = 8;
49 
50  template <index_t kOuterDistDim0,
51  index_t kOuterDistDim1,
52  index_t kInnerDistDim0,
53  index_t kInnerDistDim1>
55  sequence<>,
57  sequence<kInnerDistDim0, kInnerDistDim1, LaneGroupSize / 16, 2, 8>>,
62 };
63 
64 /*
65  * @brief This function is used to generate the transposed distribution encoding
66  * for the given data type and distribution dimensions.
67  *
68  * @tparam T The data type of the elements in the tensor.
69  * @tparam kOuterDistDim0 The outer distribution dimension 0, which is outer dimension for stride.
70  * @tparam kOuterDistDim1 The outer distribution dimension 1, which is inner dimension for stride.
71  * @tparam kInnerDistDim0 The inner distribution dimension 0, which is outer dimension for
72  * consecutive.
73  * @tparam kInnerDistDim1 The inner distribution dimension 1, which is inner dimension for
74  * consecutive.
75  */
76 template <typename T,
77  index_t LaneGroupSize,
78  index_t kOuterDistDim0,
79  index_t kOuterDistDim1,
80  index_t kInnerDistDim0,
81  index_t kInnerDistDim1>
83 {
85  template TileDistribution<kOuterDistDim0, kOuterDistDim1, kInnerDistDim0, kInnerDistDim1>{};
86 }
87 
88 } // namespace ck_tile
#define CK_TILE_DEVICE
Definition: config.hpp:41
Definition: cluster_descriptor.hpp:13
int32_t index_t
Definition: integer.hpp:9
constexpr CK_TILE_DEVICE auto make_transposed_distr_encode()
Definition: amd_transpose_load_encoding.hpp:82
typename std::enable_if< B, T >::type enable_if_t
Definition: enable_if.hpp:27
Definition: amd_transpose_load_encoding.hpp:14
Definition: sequence.hpp:49
Definition: tile_distribution_encoding.hpp:26
Definition: tuple.hpp:192