#include "ck_tile/core.hpp"
#include <tuple>
Go to the source code of this file.
|
template<typename AccDistributedTensor_ , typename ReduceFunc , bool WithBroadcast = true, bool CrossWarp = true> |
CK_TILE_DEVICE void | ck_tile::block_tile_reduce_sync (AccDistributedTensor_ &acc_tensor, const ReduceFunc &reduce_func, bool_constant< WithBroadcast >={}, bool_constant< CrossWarp >={}) |
|
template<typename AccDistributedTensor_ , typename ReduceFunc > |
CK_TILE_DEVICE void | ck_tile::block_tile_reduce_xor_sync (AccDistributedTensor_ &acc_tensor, const ReduceFunc &reduce_func) |
|
template<typename AccDistributedTensor_ , typename InDistributedTensor_ , index_t... InReduceDims, typename ReduceFunc > |
CK_TILE_DEVICE void | ck_tile::block_tile_reduce (AccDistributedTensor_ &acc_tensor, const InDistributedTensor_ &in_tensor, sequence< InReduceDims... >, const ReduceFunc &reduce_func) |
|
template<typename AccDataType_ , typename InDistributedTensor_ , index_t... InReduceDims, typename ReduceFunc , typename InDataType_ > |
CK_TILE_DEVICE auto | ck_tile::block_tile_reduce (const InDistributedTensor_ &in_tensor, sequence< InReduceDims... > in_reduce_dims, const ReduceFunc &reduce_func, const InDataType_ &reduce_init) |
|
template<typename T > |
CK_TILE_HOST_DEVICE_EXTERN | ck_tile::BlockReduce2D (const T &, const typename T::DataType &) -> BlockReduce2D< T > |
|