#include <multi_reduce2d_kernel.hpp>
|
| template<typename InputShape , typename InputStrides , typename KeptDim , typename ReduceDims , typename ElementwiseOps , typename AccumulatorOps > |
| CK_TILE_DEVICE void | operator() (const XDataType *p_x, YDataType *p_y_tuple, InputShape input_shape, InputStrides input_strides, KeptDim kept_dim, ReduceDims reduce_dims, index_t output_tensor_offset, ElementwiseOps elementwise_ops, AccumulatorOps accumulator_ops) const |
| |
| template<typename InputShape , typename InputStrides , typename KeptDim , typename ReduceDims , typename ElementwiseOps , typename AccumulatorOps , typename InterblockReduceOps > |
| CK_TILE_DEVICE void | operator() (const XDataType *p_x, YDataType *p_y_tuple, InputShape input_shape, InputStrides input_strides, KeptDim kept_dim, ReduceDims reduce_dims, index_t output_tensor_offset, ElementwiseOps elementwise_ops, AccumulatorOps accumulator_ops, InterblockReduceOps interblock_reduce_ops) const |
| |
◆ ComputeDataType
template<typename Problem_ , typename Policy_ = Reduce2dDefaultPolicy, bool ForceMultiBlock_ = false>
◆ Policy
template<typename Problem_ , typename Policy_ = Reduce2dDefaultPolicy, bool ForceMultiBlock_ = false>
◆ Problem
template<typename Problem_ , typename Policy_ = Reduce2dDefaultPolicy, bool ForceMultiBlock_ = false>
◆ TilePartitioner
template<typename Problem_ , typename Policy_ = Reduce2dDefaultPolicy, bool ForceMultiBlock_ = false>
◆ XDataType
template<typename Problem_ , typename Policy_ = Reduce2dDefaultPolicy, bool ForceMultiBlock_ = false>
◆ YDataType
template<typename Problem_ , typename Policy_ = Reduce2dDefaultPolicy, bool ForceMultiBlock_ = false>
◆ BlockSize()
template<typename Problem_ , typename Policy_ = Reduce2dDefaultPolicy, bool ForceMultiBlock_ = false>
◆ IsSupportedArgument()
template<typename Problem_ , typename Policy_ = Reduce2dDefaultPolicy, bool ForceMultiBlock_ = false>
template<typename InputStrides >
Validates if the given arguments are supported by the 2D multi reduction kernel.
- Parameters
-
| y_continous_dim | Size of the continuous dimension of the output tensor. Must be a multiple of ThreadTile_N for proper thread mapping. |
| input_strides | The stride configuration of the input tensor. The last stride must be 1 to ensure contiguous memory access and enable efficient vectorized loads. |
- Returns
- true if the arguments are supported, false otherwise. Error messages are logged when CK_TILE_LOGGING is enabled.
- Note
- Requirements:
- y_continous_dim % ThreadTile_N == 0 (for proper thread distribution)
- input_strides[-1] == 1 (for contiguous memory access)
◆ operator()() [1/2]
template<typename Problem_ , typename Policy_ = Reduce2dDefaultPolicy, bool ForceMultiBlock_ = false>
template<typename InputShape , typename InputStrides , typename KeptDim , typename ReduceDims , typename ElementwiseOps , typename AccumulatorOps >
| CK_TILE_DEVICE void ck_tile::MultiReduce2d< Problem_, Policy_, ForceMultiBlock_ >::operator() |
( |
const XDataType * |
p_x, |
|
|
YDataType * |
p_y_tuple, |
|
|
InputShape |
input_shape, |
|
|
InputStrides |
input_strides, |
|
|
KeptDim |
kept_dim, |
|
|
ReduceDims |
reduce_dims, |
|
|
index_t |
output_tensor_offset, |
|
|
ElementwiseOps |
elementwise_ops, |
|
|
AccumulatorOps |
accumulator_ops |
|
) |
| const |
|
inline |
◆ operator()() [2/2]
template<typename Problem_ , typename Policy_ = Reduce2dDefaultPolicy, bool ForceMultiBlock_ = false>
template<typename InputShape , typename InputStrides , typename KeptDim , typename ReduceDims , typename ElementwiseOps , typename AccumulatorOps , typename InterblockReduceOps >
| CK_TILE_DEVICE void ck_tile::MultiReduce2d< Problem_, Policy_, ForceMultiBlock_ >::operator() |
( |
const XDataType * |
p_x, |
|
|
YDataType * |
p_y_tuple, |
|
|
InputShape |
input_shape, |
|
|
InputStrides |
input_strides, |
|
|
KeptDim |
kept_dim, |
|
|
ReduceDims |
reduce_dims, |
|
|
index_t |
output_tensor_offset, |
|
|
ElementwiseOps |
elementwise_ops, |
|
|
AccumulatorOps |
accumulator_ops, |
|
|
InterblockReduceOps |
interblock_reduce_ops |
|
) |
| const |
|
inline |
◆ ForceMultiBlock
template<typename Problem_ , typename Policy_ = Reduce2dDefaultPolicy, bool ForceMultiBlock_ = false>
| constexpr bool ck_tile::MultiReduce2d< Problem_, Policy_, ForceMultiBlock_ >::ForceMultiBlock = ForceMultiBlock_ |
|
staticconstexpr |
◆ kBlockSize
template<typename Problem_ , typename Policy_ = Reduce2dDefaultPolicy, bool ForceMultiBlock_ = false>
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/reduce/kernel/multi_reduce2d_kernel.hpp