include/ck_tile/core/tensor/tile_window_utils.hpp Source File

include/ck_tile/core/tensor/tile_window_utils.hpp Source File#

Composable Kernel: include/ck_tile/core/tensor/tile_window_utils.hpp Source File
tile_window_utils.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
3 
17 
18 #pragma once
19 namespace ck_tile {
20 
21 // input a lds store tile, extract some information from it
22 // used to set m0 value for gfx9 serious
23 template <typename LdsTileWindow_>
24 CK_TILE_DEVICE auto get_async_store_smem_info(LdsTileWindow_&& lds_tile)
25 {
26  using LdsTileWindow = remove_cvref_t<LdsTileWindow_>;
27  using LdsDataType = typename LdsTileWindow::DataType;
28 
29  // issues * warps * lanes
30  static_assert(LdsTileWindow::get_num_of_dimension() == 3); // TODO: hard coded
31 
32  const index_t size_per_buf =
33  lds_tile.get_bottom_tensor_view().get_tensor_descriptor().calculate_offset(
35  sizeof(LdsDataType);
36 
37  const index_t size_per_wave =
38  lds_tile.get_bottom_tensor_view().get_tensor_descriptor().calculate_offset(
40  sizeof(LdsDataType) -
41  size_per_buf;
42 
43  const index_t size_per_issue =
44  lds_tile.get_bottom_tensor_view().get_tensor_descriptor().calculate_offset(
46  sizeof(LdsDataType) -
47  size_per_buf;
48 
49  const index_t m0_init_value = size_per_buf + size_per_wave * get_warp_id();
50 
51  return make_tuple(m0_init_value, size_per_issue);
52 }
53 
54 } // namespace ck_tile
#define CK_TILE_DEVICE
Definition: config.hpp:40
Definition: cluster_descriptor.hpp:13
int32_t index_t
Definition: integer.hpp:9
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition: type_traits.hpp:20
CK_TILE_DEVICE index_t get_warp_id()
Definition: arch.hpp:71
CK_TILE_DEVICE auto get_async_store_smem_info(LdsTileWindow_ &&lds_tile)
Definition: tile_window_utils.hpp:24
constexpr CK_TILE_HOST_DEVICE auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:337
Definition: integral_constant.hpp:13