/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp Source File#
blockwise_gemm_dlops_v3.hpp
Go to the documentation of this file.
155 __device__ void MoveABlockSliceWindow(const ABlockSliceMoveStepIdx& a_block_slice_move_step_idx)
Definition: ck.hpp:267
__host__ constexpr __device__ auto make_multi_index(Xs &&... xs)
Definition: array_multi_index.hpp:15
__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition: tensor_descriptor_helper.hpp:101
__host__ constexpr __device__ auto make_merge_transform(const LowLengths &low_lengths)
Definition: multi_index_transform_helper.hpp:55
__host__ constexpr __device__ auto make_single_stage_tensor_adaptor(const Transforms &transforms, LowerDimensionOldTopIdss, UpperDimensionNewTopIdss)
Definition: tensor_adaptor.hpp:425
Definition: array.hpp:14
Definition: blockwise_gemm_dlops_v3.hpp:22
static constexpr auto E1
Definition: blockwise_gemm_dlops_v3.hpp:33
static constexpr auto b_thread_mtx_
Definition: blockwise_gemm_dlops_v3.hpp:47
__device__ void MoveABlockSliceWindow(const ABlockSliceMoveStepIdx &a_block_slice_move_step_idx)
Definition: blockwise_gemm_dlops_v3.hpp:155
static constexpr auto I4
Definition: blockwise_gemm_dlops_v3.hpp:27
static constexpr auto E2
Definition: blockwise_gemm_dlops_v3.hpp:35
static constexpr auto WoPerBlock
Definition: blockwise_gemm_dlops_v3.hpp:38
static constexpr auto KPerBlock
Definition: blockwise_gemm_dlops_v3.hpp:34
__device__ void Run(const ABlockBuffer &a_block_buf, const BThreadBuffer &b_thread_buf, CThreadBuffer &c_thread_buf) const
Definition: blockwise_gemm_dlops_v3.hpp:112
static constexpr __device__ auto GetCThreadDesc_K_N_Ho_WoLengths()
Definition: blockwise_gemm_dlops_v3.hpp:86
static constexpr auto I1
Definition: blockwise_gemm_dlops_v3.hpp:24
static constexpr auto HoPerBlock
Definition: blockwise_gemm_dlops_v3.hpp:37
static constexpr auto c_thread_mtx_
Definition: blockwise_gemm_dlops_v3.hpp:54
static constexpr auto a_thread_mtx_
Definition: blockwise_gemm_dlops_v3.hpp:44
static constexpr auto HoPerThread
Definition: blockwise_gemm_dlops_v3.hpp:41
static constexpr auto I0
Definition: blockwise_gemm_dlops_v3.hpp:23
static __device__ CIndex GetBeginOfCThreadDesc_K_N_Ho_Wo(index_t thread_id)
Definition: blockwise_gemm_dlops_v3.hpp:91
__device__ BlockwiseGemmDlops_km_kn_m0m1n0n1_v3()
Definition: blockwise_gemm_dlops_v3.hpp:57
static constexpr auto KPerThread
Definition: blockwise_gemm_dlops_v3.hpp:40
static constexpr auto I3
Definition: blockwise_gemm_dlops_v3.hpp:26
MultiIndex< 4 > CIndex
Definition: blockwise_gemm_dlops_v3.hpp:31
static constexpr auto I2
Definition: blockwise_gemm_dlops_v3.hpp:25
static constexpr auto WoPerThread
Definition: blockwise_gemm_dlops_v3.hpp:42
Definition: sequence.hpp:43
Definition: static_buffer.hpp:16
Definition: threadwise_gemm_dlops_v3.hpp:29
Definition: threadwise_tensor_slice_transfer.hpp:1260
__device__ void Run(const SrcDesc &, const SrcRefToOriginDisplacement &, const SrcBuffer &src_buf, const DstDesc &, const DstOriginIdx &, DstBuffer &dst_buf) const
Definition: threadwise_tensor_slice_transfer.hpp:1293
__device__ void MoveSrcSliceWindow(const SrcDesc &, const SrcSliceMoveStepIdx &src_slice_move_step_idx)
Definition: threadwise_tensor_slice_transfer.hpp:1683
Definition: integral_constant.hpp:20
Definition: type.hpp:177
Definition: functional2.hpp:33