/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/wrapper/utils/tensor_partition.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/wrapper/utils/tensor_partition.hpp File Reference#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/wrapper/utils/tensor_partition.hpp File Reference
tensor_partition.hpp File Reference

Go to the source code of this file.

Functions

template<typename TensorType , typename ThreadShape , typename ThreadUnrolledDesc , typename ProjectionTuple >
__host__ constexpr __device__ auto make_local_partition (TensorType &tensor, [[maybe_unused]] const Layout< ThreadShape, ThreadUnrolledDesc > &thread_layout, const index_t thread_id, const ProjectionTuple &projection)
 Create local partition for thread (At now only packed partition is supported). More...
 
template<typename TensorType , typename ThreadShape , typename ThreadUnrolledDesc >
__host__ constexpr __device__ auto make_local_partition (TensorType &tensor, const Layout< ThreadShape, ThreadUnrolledDesc > &thread_lengths, const index_t thread_id)
 Create local partition for thread (At now only packed partition is supported). More...
 
template<typename TensorType , typename BlockShapeTuple , typename BlockIdxs , typename ProjectionTuple >
__host__ constexpr __device__ auto make_local_tile (const TensorType &tensor, const BlockShapeTuple &tile_shape, const BlockIdxs &block_idxs, const ProjectionTuple &projection)
 Create local tile for thread block. (At now only packed tile is supported). More...
 
template<typename TensorType , typename BlockShapeTuple , typename BlockIdxs >
__host__ constexpr __device__ auto make_local_tile (const TensorType &tensor, const BlockShapeTuple &tile_shape, const BlockIdxs &block_idxs)
 Create local tile for thread block. (At now only packed tile is supported). More...
 

Function Documentation

◆ make_local_partition() [1/2]

template<typename TensorType , typename ThreadShape , typename ThreadUnrolledDesc , typename ProjectionTuple >
__host__ constexpr __device__ auto make_local_partition ( TensorType &  tensor,
[[maybe_unused] ] const Layout< ThreadShape, ThreadUnrolledDesc > &  thread_layout,
const index_t  thread_id,
const ProjectionTuple &  projection 
)
constexpr

Create local partition for thread (At now only packed partition is supported).

Parameters
tensorTensor for partition.
thread_layoutLayout of threads (could not be transformed).
thread_idThread index represented as integer.
projectionProjection is used to remove selected dim from partitioning. Use slice(X) to remove dimension, where X is dim size. Use Number<1>{} to keep it.
Returns
Partition tensor.

◆ make_local_partition() [2/2]

template<typename TensorType , typename ThreadShape , typename ThreadUnrolledDesc >
__host__ constexpr __device__ auto make_local_partition ( TensorType &  tensor,
const Layout< ThreadShape, ThreadUnrolledDesc > &  thread_lengths,
const index_t  thread_id 
)
constexpr

Create local partition for thread (At now only packed partition is supported).

Parameters
tensorTensor for partition.
thread_lengthsLayout of threads (could not be nested).
thread_idThread index represented as integer.
Returns
Partition tensor.

◆ make_local_tile() [1/2]

template<typename TensorType , typename BlockShapeTuple , typename BlockIdxs >
__host__ constexpr __device__ auto make_local_tile ( const TensorType &  tensor,
const BlockShapeTuple &  tile_shape,
const BlockIdxs &  block_idxs 
)
constexpr

Create local tile for thread block. (At now only packed tile is supported).

Note
Currently to get the best performance please use 2d shape.
Parameters
tensorTensor for partition.
tile_shapeShapes of requested tile.
block_idxsTuple of block indexes represented as integer. If slice, then get whole dim.
Returns
Tile tensor.

◆ make_local_tile() [2/2]

template<typename TensorType , typename BlockShapeTuple , typename BlockIdxs , typename ProjectionTuple >
__host__ constexpr __device__ auto make_local_tile ( const TensorType &  tensor,
const BlockShapeTuple &  tile_shape,
const BlockIdxs &  block_idxs,
const ProjectionTuple &  projection 
)
constexpr

Create local tile for thread block. (At now only packed tile is supported).

Note
Temporary to gain the best performance use 2d tile_shape.
Parameters
tensorTensor for partition.
tile_shapeShapes of requested tile.
block_idxsTuple of block indexes represented as integer. If slice, then get whole dim.
projectionProjection is used to remove selected dim from partitioning. Use slice(X) to remove dimension, where X is dim size. Use Number<1>{} to keep it.
Returns
Tile tensor.