Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ > Struct Template Reference

Reduce2dTilePartitioner&lt; BlockShape_, ForceMultiBlock_ &gt; Struct Template Reference#

Composable Kernel: ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ > Struct Template Reference
ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ > Struct Template Reference

TilePartitioner for 2D reduction operations. More...

#include <multi_reduce2d_tile_partitioner.hpp>

Public Types

using BlockShape = remove_cvref_t< BlockShape_ >
 

Public Member Functions

CK_TILE_HOST_DEVICE Reduce2dTilePartitioner () noexcept=delete
 
CK_TILE_HOST_DEVICE Reduce2dTilePartitioner (index_t total_reduce_len) noexcept
 Construct partitioner with problem dimensions. More...
 
CK_TILE_DEVICE auto GetOutputTileIndex (index_t block_idx) const noexcept -> index_t
 Get output tile index for threadwise reduction. More...
 
CK_TILE_DEVICE auto GetOutputTileIndexMultiBlock (index_t block_global_idx, index_t block_group_size) const noexcept -> tuple< index_t, index_t >
 Get output tile index and block local ID for multi-block reduction. More...
 
CK_TILE_HOST_DEVICE auto GetBlockGroupParams () const noexcept -> tuple< index_t, index_t >
 Calculate the number of iterations and the number of blocks required to perform the reduction. More...
 
CK_TILE_DEVICE auto GetInputTileOffsets (const index_t block_global_idx, const index_t block_group_size, const index_t num_iterations) const -> tuple< index_t, index_t >
 Compute the input tile offset for the given thread, block index. More...
 
CK_TILE_DEVICE index_t GetOutputTileOffset (const index_t block_group_id) const
 Compute the output tile offset for the given operation and block group. More...
 

Static Public Attributes

static constexpr bool ForceMultiBlock = ForceMultiBlock_
 
static constexpr index_t MPerBlock = BlockShape::Block_M
 
static constexpr index_t NPerBlock = BlockShape::Block_N
 

Detailed Description

template<typename BlockShape_, bool ForceMultiBlock_ = false>
struct ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ >

TilePartitioner for 2D reduction operations.

Member Typedef Documentation

◆ BlockShape

template<typename BlockShape_ , bool ForceMultiBlock_ = false>
using ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ >::BlockShape = remove_cvref_t<BlockShape_>

Constructor & Destructor Documentation

◆ Reduce2dTilePartitioner() [1/2]

template<typename BlockShape_ , bool ForceMultiBlock_ = false>
CK_TILE_HOST_DEVICE ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ >::Reduce2dTilePartitioner ( )
deletenoexcept

◆ Reduce2dTilePartitioner() [2/2]

template<typename BlockShape_ , bool ForceMultiBlock_ = false>
CK_TILE_HOST_DEVICE ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ >::Reduce2dTilePartitioner ( index_t  total_reduce_len)
inlinenoexcept

Construct partitioner with problem dimensions.

Parameters
total_reduce_lenTotal number of element in the reduction dimension

Member Function Documentation

◆ GetBlockGroupParams()

template<typename BlockShape_ , bool ForceMultiBlock_ = false>
CK_TILE_HOST_DEVICE auto ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ >::GetBlockGroupParams ( ) const -> tuple<index_t, index_t>
inlinenoexcept

Calculate the number of iterations and the number of blocks required to perform the reduction.

Returns
Tuple of (number of iteration per thread, number of blocks used in the reduction)

◆ GetInputTileOffsets()

template<typename BlockShape_ , bool ForceMultiBlock_ = false>
CK_TILE_DEVICE auto ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ >::GetInputTileOffsets ( const index_t  block_global_idx,
const index_t  block_group_size,
const index_t  num_iterations 
) const -> tuple<index_t, index_t>
inline

Compute the input tile offset for the given thread, block index.

Parameters
block_global_idxGlobal index of the block processing (part) of the reduction
block_group_sizeNumber of blocks taking part in the reduction
num_iterationsTotal number of iteration per thread
Returns
Tuple of (M offset, N offset) for the input tile

◆ GetOutputTileIndex()

template<typename BlockShape_ , bool ForceMultiBlock_ = false>
CK_TILE_DEVICE auto ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ >::GetOutputTileIndex ( index_t  block_idx) const -> index_t
inlinenoexcept

Get output tile index for threadwise reduction.

Parameters
block_idxBlock index

◆ GetOutputTileIndexMultiBlock()

template<typename BlockShape_ , bool ForceMultiBlock_ = false>
CK_TILE_DEVICE auto ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ >::GetOutputTileIndexMultiBlock ( index_t  block_global_idx,
index_t  block_group_size 
) const -> tuple<index_t, index_t>
inlinenoexcept

Get output tile index and block local ID for multi-block reduction.

Parameters
block_global_idxGlobal block index
block_group_sizeNumber of blocks per output tile
Returns
Tuple of (tile_index, local_block_id)

◆ GetOutputTileOffset()

template<typename BlockShape_ , bool ForceMultiBlock_ = false>
CK_TILE_DEVICE index_t ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ >::GetOutputTileOffset ( const index_t  block_group_id) const
inline

Compute the output tile offset for the given operation and block group.

Parameters
block_group_idIndex of block group processing a batch of rows
Returns
Output tile offset

Member Data Documentation

◆ ForceMultiBlock

template<typename BlockShape_ , bool ForceMultiBlock_ = false>
constexpr bool ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ >::ForceMultiBlock = ForceMultiBlock_
staticconstexpr

◆ MPerBlock

template<typename BlockShape_ , bool ForceMultiBlock_ = false>
constexpr index_t ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ >::MPerBlock = BlockShape::Block_M
staticconstexpr

◆ NPerBlock

template<typename BlockShape_ , bool ForceMultiBlock_ = false>
constexpr index_t ck_tile::Reduce2dTilePartitioner< BlockShape_, ForceMultiBlock_ >::NPerBlock = BlockShape::Block_N
staticconstexpr

The documentation for this struct was generated from the following file:
  • /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/reduce/kernel/multi_reduce2d_tile_partitioner.hpp