/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/wrapper/utils/tensor_partition.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/wrapper/utils/tensor_partition.hpp File Reference#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/wrapper/utils/tensor_partition.hpp File Reference

#include "tensor_utils.hpp"
#include "layout_utils.hpp"
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
#include "ck/tensor_description/cluster_descriptor.hpp"

Go to the source code of this file.

Functions
template<typename TensorType , typename ThreadShape , typename ThreadUnrolledDesc , typename ProjectionTuple >
__host__ constexpr __device__ auto	make_local_partition (TensorType &tensor, [[maybe_unused]] const Layout< ThreadShape, ThreadUnrolledDesc > &thread_layout, const index_t thread_id, const ProjectionTuple &projection)
	Create local partition for thread (At now only packed partition is supported). More...

template<typename TensorType , typename ThreadShape , typename ThreadUnrolledDesc >
__host__ constexpr __device__ auto	make_local_partition (TensorType &tensor, const Layout< ThreadShape, ThreadUnrolledDesc > &thread_lengths, const index_t thread_id)
	Create local partition for thread (At now only packed partition is supported). More...

template<typename TensorType , typename BlockShapeTuple , typename BlockIdxs , typename ProjectionTuple >
__host__ constexpr __device__ auto	make_local_tile (const TensorType &tensor, const BlockShapeTuple &tile_shape, const BlockIdxs &block_idxs, const ProjectionTuple &projection)
	Create local tile for thread block. (At now only packed tile is supported). More...

template<typename TensorType , typename BlockShapeTuple , typename BlockIdxs >
__host__ constexpr __device__ auto	make_local_tile (const TensorType &tensor, const BlockShapeTuple &tile_shape, const BlockIdxs &block_idxs)
	Create local tile for thread block. (At now only packed tile is supported). More...

Function Documentation

◆ make_local_partition() [1/2]

template<typename TensorType , typename ThreadShape , typename ThreadUnrolledDesc , typename ProjectionTuple >

__host__ constexpr __device__ auto make_local_partition	(	TensorType &	tensor,
		[[maybe_unused] ] const Layout< ThreadShape, ThreadUnrolledDesc > &	thread_layout,
		const index_t	thread_id,
		const ProjectionTuple &	projection
	)

constexpr

Create local partition for thread (At now only packed partition is supported).

Parameters

tensor	Tensor for partition.
thread_layout	Layout of threads (could not be transformed).
thread_id	Thread index represented as integer.
projection	Projection is used to remove selected dim from partitioning. Use `slice(X)` to remove dimension, where X is dim size. Use `Number<1>{}` to keep it.

Returns: Partition tensor.

◆ make_local_partition() [2/2]

template<typename TensorType , typename ThreadShape , typename ThreadUnrolledDesc >

__host__ constexpr __device__ auto make_local_partition	(	TensorType &	tensor,
		const Layout< ThreadShape, ThreadUnrolledDesc > &	thread_lengths,
		const index_t	thread_id
	)

constexpr

Create local partition for thread (At now only packed partition is supported).

Parameters

tensor	Tensor for partition.
thread_lengths	Layout of threads (could not be nested).
thread_id	Thread index represented as integer.

Returns: Partition tensor.

◆ make_local_tile() [1/2]

template<typename TensorType , typename BlockShapeTuple , typename BlockIdxs >

__host__ constexpr __device__ auto make_local_tile	(	const TensorType &	tensor,
		const BlockShapeTuple &	tile_shape,
		const BlockIdxs &	block_idxs
	)

constexpr

Create local tile for thread block. (At now only packed tile is supported).

Note: Currently to get the best performance please use 2d shape.

Parameters

tensor	Tensor for partition.
tile_shape	Shapes of requested tile.
block_idxs	Tuple of block indexes represented as integer. If slice, then get whole dim.

Returns: Tile tensor.

◆ make_local_tile() [2/2]

template<typename TensorType , typename BlockShapeTuple , typename BlockIdxs , typename ProjectionTuple >

__host__ constexpr __device__ auto make_local_tile	(	const TensorType &	tensor,
		const BlockShapeTuple &	tile_shape,
		const BlockIdxs &	block_idxs,
		const ProjectionTuple &	projection
	)

constexpr

Create local tile for thread block. (At now only packed tile is supported).

Note: Temporary to gain the best performance use 2d tile_shape.

Parameters

tensor	Tensor for partition.
tile_shape	Shapes of requested tile.
block_idxs	Tuple of block indexes represented as integer. If slice, then get whole dim.
projection	Projection is used to remove selected dim from partitioning. Use `slice(X)` to remove dimension, where X is dim size. Use `Number<1>{}` to keep it.

Returns: Tile tensor.

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/wrapper/utils/tensor_partition.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/wrapper/utils/tensor_partition.hpp File Reference#

Functions

Function Documentation

◆ make_local_partition() [1/2]

◆ make_local_partition() [2/2]

◆ make_local_tile() [1/2]

◆ make_local_tile() [2/2]