21 template <
typename BottomTensorView_,
22 typename WindowLengths_,
23 typename TileDistribution_,
26 bool oob_conditional_check =
true>
30 NumCoord>& tile_window,
32 bool_constant<oob_conditional_check> = {})
34 return tile_window.load(number<i_access>{}, bool_constant<oob_conditional_check>{});
37 template <
typename BottomTensorView_,
38 typename WindowLengths_,
39 typename TileDistribution_,
40 typename LinearBottomDims_,
42 bool oob_conditional_check =
true>
46 LinearBottomDims_>& tile_window,
48 bool_constant<oob_conditional_check> = {})
50 return tile_window.load(number<i_access>{}, bool_constant<oob_conditional_check>{});
53 template <
typename DistributedTensor_,
54 typename BottomTensorView_,
55 typename WindowLengths_,
56 typename TileDistribution_,
59 bool oob_conditional_check =
true>
64 NumCoord>& tile_window,
66 bool_constant<oob_conditional_check> = {})
68 return tile_window.load(dst_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
71 template <
typename DistributedTensor_,
72 typename BottomTensorView_,
73 typename WindowLengths_,
74 typename TileDistribution_,
75 typename LinearBottomDims_,
77 bool oob_conditional_check =
true>
82 LinearBottomDims_>& tile_window,
84 bool_constant<oob_conditional_check> = {})
86 return tile_window.load(dst_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
99 typename BottomTensorView_,
100 typename WindowLengths_,
101 typename TileDistribution_,
104 bool oob_conditional_check =
true,
105 bool pre_nop =
false>
110 NumCoord>& tile_window,
112 bool_constant<oob_conditional_check> = {},
113 bool_constant<pre_nop> = {})
115 tile_window.load_raw(
116 tile, number<i_access>{}, bool_constant<oob_conditional_check>{}, bool_constant<pre_nop>{});
119 template <
typename T,
120 typename BottomTensorView_,
121 typename WindowLengths_,
122 typename TileDistribution_,
123 typename LinearBottomDims_,
125 bool oob_conditional_check =
true,
126 bool pre_nop =
false>
131 LinearBottomDims_>& tile_window,
133 bool_constant<oob_conditional_check> = {},
134 bool_constant<pre_nop> = {})
136 tile_window.load_raw(
137 tile, number<i_access>{}, bool_constant<oob_conditional_check>{}, bool_constant<pre_nop>{});
140 template <
typename LdsTileWindow_,
141 typename BottomTensorView_,
142 typename WindowLengths_,
143 typename TileDistribution_,
146 bool oob_conditional_check =
true,
147 bool pre_nop =
false>
153 NumCoord>& tile_window,
155 bool_constant<oob_conditional_check> = {},
156 bool_constant<pre_nop> = {})
158 return tile_window.async_load_raw(lds_tile,
160 bool_constant<oob_conditional_check>{},
161 bool_constant<pre_nop>{});
164 template <
typename LdsTileWindow_,
165 typename BottomTensorView_,
166 typename WindowLengths_,
167 typename TileDistribution_,
168 typename LinearBottomDims_,
170 bool oob_conditional_check =
true,
171 bool pre_nop =
false>
176 LinearBottomDims_>& tile_window,
178 bool_constant<oob_conditional_check> = {},
179 bool_constant<pre_nop> = {})
181 return tile_window.async_load_raw(lds_tile,
183 bool_constant<oob_conditional_check>{},
184 bool_constant<pre_nop>{});
189 asm volatile(
"s_waitcnt vmcnt(%0)" : :
"n"(cnt) :
"memory");
192 template <
typename WindowLengths>
198 template <
typename T,
typename WindowLengths>
#define CK_TILE_DEVICE
Definition: config.hpp:40
Definition: cluster_descriptor.hpp:13
CK_TILE_DEVICE auto async_load_fence(index_t cnt=0)
Definition: load_tile.hpp:187
CK_TILE_DEVICE auto async_load_tile_raw(LdsTileWindow_ &&lds_tile, const tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, TileDistribution_, NumCoord > &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Definition: load_tile.hpp:149
int32_t index_t
Definition: integer.hpp:9
CK_TILE_DEVICE auto load_tile(const tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, TileDistribution_, NumCoord > &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:27
CK_TILE_DEVICE auto load_tile_raw(T &tile, const tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, TileDistribution_, NumCoord > &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Loads a tile of data using inline assembly.
Definition: load_tile.hpp:106
Definition: integral_constant.hpp:13
Definition: null_tensor.hpp:9
Definition: null_tile_window.hpp:19
Definition: tile_window_linear.hpp:46
This class provides tile (windowed) view and access to the device memory.
Definition: tile_window.hpp:37