/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/batched_transpose/pipeline/batched_transpose_problem.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/batched_transpose/pipeline/batched_transpose_problem.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/batched_transpose/pipeline/batched_transpose_problem.hpp Source File
batched_transpose_problem.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include "ck_tile/core.hpp"
7 #include <type_traits>
8 
9 namespace ck_tile {
10 
11 template <typename DataType_,
12  typename BlockTile, // Sequence<...
13  typename WarpLayout,
14  bool kPadM_ = false,
15  bool kPadN_ = false> // Sequence<...
17 {
19 
20  static constexpr index_t kMPerWarp = WarpLayout::at(number<0>{});
21  static constexpr index_t kNPerWarp = WarpLayout::at(number<1>{});
22 
23  static constexpr index_t kMPerBlock = BlockTile::at(number<0>{});
24  static constexpr index_t kNPerBlock = BlockTile::at(number<1>{});
25 
27 
28  static constexpr bool kPadM = kPadM_;
29  static constexpr bool kPadN = kPadN_;
30 
31  // 128-bit is the max single-instruction bandwidth for load/store
32  static constexpr index_t MaxLoadStoreSize = 16;
33  static constexpr index_t VectorSizeInput = kPadN ? 1 : MaxLoadStoreSize / sizeof(DataType);
34  static constexpr index_t VectorSizeOutput = kPadM ? 1 : MaxLoadStoreSize / sizeof(DataType);
35 };
36 } // namespace ck_tile
Definition: cluster_descriptor.hpp:13
int32_t index_t
Definition: integer.hpp:9
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition: type_traits.hpp:21
__host__ constexpr __device__ index_t get_warp_size()
Definition: get_id.hpp:42
Definition: batched_transpose_problem.hpp:17
static constexpr bool kPadN
Definition: batched_transpose_problem.hpp:29
static constexpr index_t kMPerWarp
Definition: batched_transpose_problem.hpp:20
static constexpr index_t VectorSizeInput
Definition: batched_transpose_problem.hpp:33
static constexpr index_t kNPerBlock
Definition: batched_transpose_problem.hpp:24
remove_cvref_t< DataType_ > DataType
Definition: batched_transpose_problem.hpp:18
static constexpr bool kPadM
Definition: batched_transpose_problem.hpp:28
static constexpr index_t VectorSizeOutput
Definition: batched_transpose_problem.hpp:34
static constexpr index_t kNPerWarp
Definition: batched_transpose_problem.hpp:21
static constexpr index_t MaxLoadStoreSize
Definition: batched_transpose_problem.hpp:32
static constexpr index_t kMPerBlock
Definition: batched_transpose_problem.hpp:23
static constexpr index_t kBlockSize
Definition: batched_transpose_problem.hpp:26
Definition: integral_constant.hpp:13