/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/epilogue/chainer/common_epilogue_ops.hpp Source File#
common_epilogue_ops.hpp
Go to the documentation of this file.
Definition: cluster_descriptor.hpp:13
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition: tile_elementwise.hpp:23
constexpr CK_TILE_HOST_DEVICE auto generate_tie(F &&f, number< N >)
Definition: tuple.hpp:435
CK_TILE_DEVICE auto tile_elementwise_inout_unpack(const InElementFunc &in_element_func, const Tuple &t, std::index_sequence< I... >)
Template function that "unpacks" a tuple and applies an element-wise operation.
Definition: tile_elementwise.hpp:71
constexpr CK_TILE_DEVICE auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition: null_tile_window.hpp:75
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition: null_tile_window.hpp:95
constexpr CK_TILE_HOST_DEVICE auto generate_tuple(F &&f, number< N >)
Definition: tuple.hpp:429
constexpr CK_TILE_HOST_DEVICE auto concat_tuple_of_reference(const tuple< X &... > &tx, const tuple< Y &... > &ty)
Definition: tuple.hpp:443
CK_TILE_DEVICE void update_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition: update_tile.hpp:22
CK_TILE_DEVICE void store_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition: store_tile.hpp:24
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:36
Cast working tile and store to LDS.
Definition: common_epilogue_ops.hpp:58
CK_TILE_DEVICE void operator()([[maybe_unused]] OutWindow &out_window, [[maybe_unused]] const AccTile &acc_tile, [[maybe_unused]] const AuxWindows &aux_windows, [[maybe_unused]] void *p_smem, [[maybe_unused]] IAccess iAccess, Context &context)
Definition: common_epilogue_ops.hpp:64
Apply elementwise operation with auxiliary tensors.
Definition: common_epilogue_ops.hpp:114
CK_TILE_DEVICE void operator()([[maybe_unused]] OutWindow &out_window, [[maybe_unused]] const AccTile &acc_tile, [[maybe_unused]] const AuxWindows &aux_windows, [[maybe_unused]] void *p_smem, [[maybe_unused]] IAccess iAccess, Context &context)
Definition: common_epilogue_ops.hpp:120
Load output tile from LDS with synchronization.
Definition: common_epilogue_ops.hpp:85
CK_TILE_DEVICE void operator()([[maybe_unused]] OutWindow &out_window, [[maybe_unused]] const AccTile &acc_tile, [[maybe_unused]] const AuxWindows &aux_windows, [[maybe_unused]] void *p_smem, [[maybe_unused]] IAccess iAccess, Context &context)
Definition: common_epilogue_ops.hpp:91
Move output and auxiliary windows by step from space-filling curve.
Definition: common_epilogue_ops.hpp:180
CK_TILE_DEVICE void operator()(OutWindow &out_window, [[maybe_unused]] const AccTile &acc_tile, [[maybe_unused]] const AuxWindows &aux_windows, [[maybe_unused]] void *p_smem, IAccess iAccess, Context &context)
Definition: common_epilogue_ops.hpp:186
CK_TILE_DEVICE void operator()([[maybe_unused]] OutWindow &out_window, [[maybe_unused]] const AccTile &acc_tile, [[maybe_unused]] const AuxWindows &aux_windows, [[maybe_unused]] void *p_smem, [[maybe_unused]] IAccess iAccess, Context &context, const ScaleA &scale_a, const ScaleB &scale_b)
Definition: common_epilogue_ops.hpp:35
CK_TILE_DEVICE void operator()(OutWindow &out_window, [[maybe_unused]] const AccTile &acc_tile, [[maybe_unused]] const AuxWindows &aux_windows, [[maybe_unused]] void *p_smem, [[maybe_unused]] IAccess iAccess, Context &context)
Definition: common_epilogue_ops.hpp:153
Definition: integral_constant.hpp:13
Definition: functional.hpp:43
Definition: tile_distribution.hpp:70