/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/wrapper/utils/tensor_partition.hpp File Reference#
tensor_partition.hpp File Reference
#include "tensor_utils.hpp"
#include "layout_utils.hpp"
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
#include "ck/tensor_description/cluster_descriptor.hpp"
Go to the source code of this file.
Functions | |
template<typename TensorType , typename ThreadShape , typename ThreadUnrolledDesc , typename ProjectionTuple > | |
__host__ constexpr __device__ auto | make_local_partition (TensorType &tensor, [[maybe_unused]] const Layout< ThreadShape, ThreadUnrolledDesc > &thread_layout, const index_t thread_id, const ProjectionTuple &projection) |
Create local partition for thread (At now only packed partition is supported). More... | |
template<typename TensorType , typename ThreadShape , typename ThreadUnrolledDesc > | |
__host__ constexpr __device__ auto | make_local_partition (TensorType &tensor, const Layout< ThreadShape, ThreadUnrolledDesc > &thread_lengths, const index_t thread_id) |
Create local partition for thread (At now only packed partition is supported). More... | |
template<typename TensorType , typename BlockShapeTuple , typename BlockIdxs , typename ProjectionTuple > | |
__host__ constexpr __device__ auto | make_local_tile (const TensorType &tensor, const BlockShapeTuple &tile_shape, const BlockIdxs &block_idxs, const ProjectionTuple &projection) |
Create local tile for thread block. (At now only packed tile is supported). More... | |
template<typename TensorType , typename BlockShapeTuple , typename BlockIdxs > | |
__host__ constexpr __device__ auto | make_local_tile (const TensorType &tensor, const BlockShapeTuple &tile_shape, const BlockIdxs &block_idxs) |
Create local tile for thread block. (At now only packed tile is supported). More... | |
Function Documentation
◆ make_local_partition() [1/2]
template<typename TensorType , typename ThreadShape , typename ThreadUnrolledDesc , typename ProjectionTuple >
|
constexpr |
Create local partition for thread (At now only packed partition is supported).
- Parameters
-
tensor Tensor for partition. thread_layout Layout of threads (could not be transformed). thread_id Thread index represented as integer. projection Projection is used to remove selected dim from partitioning. Use slice(X)
to remove dimension, where X is dim size. UseNumber<1>{}
to keep it.
- Returns
- Partition tensor.
◆ make_local_partition() [2/2]
template<typename TensorType , typename ThreadShape , typename ThreadUnrolledDesc >
|
constexpr |
◆ make_local_tile() [1/2]
template<typename TensorType , typename BlockShapeTuple , typename BlockIdxs >
|
constexpr |
Create local tile for thread block. (At now only packed tile is supported).
- Note
- Currently to get the best performance please use 2d shape.
- Parameters
-
tensor Tensor for partition. tile_shape Shapes of requested tile. block_idxs Tuple of block indexes represented as integer. If slice, then get whole dim.
- Returns
- Tile tensor.
◆ make_local_tile() [2/2]
template<typename TensorType , typename BlockShapeTuple , typename BlockIdxs , typename ProjectionTuple >
|
constexpr |
Create local tile for thread block. (At now only packed tile is supported).
- Note
- Temporary to gain the best performance use 2d tile_shape.
- Parameters
-
tensor Tensor for partition. tile_shape Shapes of requested tile. block_idxs Tuple of block indexes represented as integer. If slice, then get whole dim. projection Projection is used to remove selected dim from partitioning. Use slice(X)
to remove dimension, where X is dim size. UseNumber<1>{}
to keep it.
- Returns
- Tile tensor.