21 template <
typename TileWindow_,
index_t i_access = -1,
bool oob_conditional_check =
true>
24 bool_constant<oob_conditional_check> = {})
26 return tile_window.load(number<i_access>{}, bool_constant<oob_conditional_check>{});
37 template <
typename TileWindow_,
38 typename ElementWise_,
40 bool oob_conditional_check =
true>
42 ElementWise_ elementwise,
44 bool_constant<oob_conditional_check> = {})
48 return tile_window[number<0>{}].load(
49 tile_window, elementwise, number<i_access>{}, bool_constant<oob_conditional_check>{});
52 template <
typename DistributedTensor_,
55 bool oob_conditional_check =
true>
57 const TileWindow_& tile_window,
59 bool_constant<oob_conditional_check> = {})
61 return tile_window.load(dst_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
74 typename BottomTensorView_,
75 typename WindowLengths_,
76 typename TileDistribution_,
79 bool oob_conditional_check =
true,
85 NumCoord>& tile_window,
87 bool_constant<oob_conditional_check> = {},
88 bool_constant<pre_nop> = {})
91 tile, number<i_access>{}, bool_constant<oob_conditional_check>{}, bool_constant<pre_nop>{});
95 typename BottomTensorView_,
96 typename WindowLengths_,
97 typename TileDistribution_,
98 typename LinearBottomDims_,
100 bool oob_conditional_check =
true,
101 bool pre_nop =
false>
106 LinearBottomDims_>& tile_window,
108 bool_constant<oob_conditional_check> = {},
109 bool_constant<pre_nop> = {})
111 tile_window.load_raw(
112 tile, number<i_access>{}, bool_constant<oob_conditional_check>{}, bool_constant<pre_nop>{});
115 template <
typename LdsTileWindow_,
116 typename TileWindow_,
118 bool oob_conditional_check =
true>
120 const TileWindow_& tile_window,
122 bool_constant<oob_conditional_check> = {})
124 return tile_window.async_load(
125 lds_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
128 template <
typename LdsTileWindow_,
129 typename TileWindow_,
131 bool oob_conditional_check =
true,
132 bool pre_nop =
false>
134 const TileWindow_& tile_window,
136 bool_constant<oob_conditional_check> = {},
137 bool_constant<pre_nop> = {})
139 return tile_window.async_load_raw(lds_tile,
141 bool_constant<oob_conditional_check>{},
142 bool_constant<pre_nop>{});
147 asm volatile(
"s_waitcnt vmcnt(%0)" : :
"n"(cnt) :
"memory");
150 template <
typename WindowLengths>
156 template <
typename T,
typename WindowLengths>
#define CK_TILE_DEVICE
Definition: config.hpp:41
Definition: cluster_descriptor.hpp:13
CK_TILE_DEVICE auto async_load_tile(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:119
CK_TILE_DEVICE auto async_load_fence(index_t cnt=0)
Definition: load_tile.hpp:145
CK_TILE_DEVICE auto load_tile_with_elementwise(const TileWindow_ &tile_window, ElementWise_ elementwise, number< i_access >={}, bool_constant< oob_conditional_check >={})
Load tile with elementwise function.
Definition: load_tile.hpp:41
int32_t index_t
Definition: integer.hpp:9
CK_TILE_DEVICE auto load_tile_raw(T &tile, const tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, TileDistribution_, NumCoord > &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Loads a tile of data using inline assembly.
Definition: load_tile.hpp:81
CK_TILE_DEVICE auto async_load_tile_raw(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Definition: load_tile.hpp:133
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:22
Definition: integral_constant.hpp:13
Definition: null_tensor.hpp:9
Definition: null_tile_window.hpp:19
Definition: tile_window_linear.hpp:55
This class provides tile (windowed) view and access to the device memory.
Definition: tile_window.hpp:46