21 template <
typename TileWindow_,
index_t i_access = -1,
bool oob_conditional_check =
true>
24 bool_constant<oob_conditional_check> = {})
26 return tile_window.load(number<i_access>{}, bool_constant<oob_conditional_check>{});
29 template <
typename DistributedTensor_,
32 bool oob_conditional_check =
true>
34 const TileWindow_& tile_window,
36 bool_constant<oob_conditional_check> = {})
38 return tile_window.load(dst_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
51 typename BottomTensorView_,
52 typename WindowLengths_,
53 typename TileDistribution_,
56 bool oob_conditional_check =
true,
62 NumCoord>& tile_window,
64 bool_constant<oob_conditional_check> = {},
65 bool_constant<pre_nop> = {})
68 tile, number<i_access>{}, bool_constant<oob_conditional_check>{}, bool_constant<pre_nop>{});
72 typename BottomTensorView_,
73 typename WindowLengths_,
74 typename TileDistribution_,
75 typename LinearBottomDims_,
77 bool oob_conditional_check =
true,
83 LinearBottomDims_>& tile_window,
85 bool_constant<oob_conditional_check> = {},
86 bool_constant<pre_nop> = {})
89 tile, number<i_access>{}, bool_constant<oob_conditional_check>{}, bool_constant<pre_nop>{});
92 template <
typename LdsTileWindow_,
95 bool oob_conditional_check =
true>
97 const TileWindow_& tile_window,
99 bool_constant<oob_conditional_check> = {})
101 return tile_window.async_load(
102 lds_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
105 template <
typename LdsTileWindow_,
106 typename TileWindow_,
108 bool oob_conditional_check =
true,
109 bool pre_nop =
false>
111 const TileWindow_& tile_window,
113 bool_constant<oob_conditional_check> = {},
114 bool_constant<pre_nop> = {})
116 return tile_window.async_load_raw(lds_tile,
118 bool_constant<oob_conditional_check>{},
119 bool_constant<pre_nop>{});
124 asm volatile(
"s_waitcnt vmcnt(%0)" : :
"n"(cnt) :
"memory");
127 template <
typename WindowLengths>
133 template <
typename T,
typename WindowLengths>
#define CK_TILE_DEVICE
Definition: config.hpp:41
Definition: cluster_descriptor.hpp:13
CK_TILE_DEVICE auto async_load_tile(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:96
CK_TILE_DEVICE auto async_load_fence(index_t cnt=0)
Definition: load_tile.hpp:122
int32_t index_t
Definition: integer.hpp:9
CK_TILE_DEVICE auto load_tile_raw(T &tile, const tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, TileDistribution_, NumCoord > &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Loads a tile of data using inline assembly.
Definition: load_tile.hpp:58
CK_TILE_DEVICE auto async_load_tile_raw(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Definition: load_tile.hpp:110
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:22
Definition: integral_constant.hpp:13
Definition: null_tensor.hpp:9
Definition: null_tile_window.hpp:19
Definition: tile_window_linear.hpp:55
This class provides tile (windowed) view and access to the device memory.
Definition: tile_window.hpp:46