/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/tensor/load_tile.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/tensor/load_tile.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/tensor/load_tile.hpp Source File
load_tile.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
18 
19 namespace ck_tile {
20 
21 template <typename TileWindow_, index_t i_access = -1, bool oob_conditional_check = true>
22 CK_TILE_DEVICE auto load_tile(const TileWindow_& tile_window,
23  number<i_access> = {},
24  bool_constant<oob_conditional_check> = {})
25 {
26  return tile_window.load(number<i_access>{}, bool_constant<oob_conditional_check>{});
27 }
28 
29 template <typename DistributedTensor_,
30  typename TileWindow_,
31  index_t i_access = -1,
32  bool oob_conditional_check = true>
33 CK_TILE_DEVICE auto load_tile(DistributedTensor_& dst_tile,
34  const TileWindow_& tile_window,
35  number<i_access> = {},
36  bool_constant<oob_conditional_check> = {})
37 {
38  return tile_window.load(dst_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
39 }
40 
50 template <typename T,
51  typename BottomTensorView_,
52  typename WindowLengths_,
53  typename TileDistribution_,
54  index_t NumCoord,
55  index_t i_access = -1,
56  bool oob_conditional_check = true,
57  bool pre_nop = false>
59  const tile_window_with_static_distribution<BottomTensorView_,
60  WindowLengths_,
61  TileDistribution_,
62  NumCoord>& tile_window,
63  number<i_access> = {},
64  bool_constant<oob_conditional_check> = {},
65  bool_constant<pre_nop> = {})
66 {
67  tile_window.load_raw(
68  tile, number<i_access>{}, bool_constant<oob_conditional_check>{}, bool_constant<pre_nop>{});
69 }
70 
71 template <typename T,
72  typename BottomTensorView_,
73  typename WindowLengths_,
74  typename TileDistribution_,
75  typename LinearBottomDims_,
76  index_t i_access = -1,
77  bool oob_conditional_check = true,
78  bool pre_nop = false>
80  const tile_window_linear<BottomTensorView_,
81  WindowLengths_,
82  TileDistribution_,
83  LinearBottomDims_>& tile_window,
84  number<i_access> = {},
85  bool_constant<oob_conditional_check> = {},
86  bool_constant<pre_nop> = {})
87 {
88  tile_window.load_raw(
89  tile, number<i_access>{}, bool_constant<oob_conditional_check>{}, bool_constant<pre_nop>{});
90 }
91 
92 template <typename LdsTileWindow_,
93  typename TileWindow_,
94  index_t i_access = -1,
95  bool oob_conditional_check = true>
96 CK_TILE_DEVICE auto async_load_tile(LdsTileWindow_&& lds_tile,
97  const TileWindow_& tile_window,
98  number<i_access> = {},
99  bool_constant<oob_conditional_check> = {})
100 {
101  return tile_window.async_load(
102  lds_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
103 }
104 
105 template <typename LdsTileWindow_,
106  typename TileWindow_,
107  index_t i_access = -1,
108  bool oob_conditional_check = true,
109  bool pre_nop = false>
110 CK_TILE_DEVICE auto async_load_tile_raw(LdsTileWindow_&& lds_tile,
111  const TileWindow_& tile_window,
112  number<i_access> = {},
113  bool_constant<oob_conditional_check> = {},
114  bool_constant<pre_nop> = {})
115 {
116  return tile_window.async_load_raw(lds_tile,
117  number<i_access>{},
118  bool_constant<oob_conditional_check>{},
119  bool_constant<pre_nop>{});
120 }
121 
123 {
124  asm volatile("s_waitcnt vmcnt(%0)" : : "n"(cnt) : "memory");
125 }
126 
127 template <typename WindowLengths>
129 {
130  return null_tensor{};
131 }
132 
133 template <typename T, typename WindowLengths>
135 {
136 }
137 
138 } // namespace ck_tile
#define CK_TILE_DEVICE
Definition: config.hpp:41
Definition: cluster_descriptor.hpp:13
CK_TILE_DEVICE auto async_load_tile(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:96
CK_TILE_DEVICE auto async_load_fence(index_t cnt=0)
Definition: load_tile.hpp:122
int32_t index_t
Definition: integer.hpp:9
CK_TILE_DEVICE auto load_tile_raw(T &tile, const tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, TileDistribution_, NumCoord > &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Loads a tile of data using inline assembly.
Definition: load_tile.hpp:58
CK_TILE_DEVICE auto async_load_tile_raw(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Definition: load_tile.hpp:110
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:22
Definition: integral_constant.hpp:13
Definition: null_tensor.hpp:9
Definition: null_tile_window.hpp:19
Definition: tile_window_linear.hpp:55
This class provides tile (windowed) view and access to the device memory.
Definition: tile_window.hpp:46