#include <warp_gemm_impl.hpp>
|
| template<typename CTensor , typename ATensor , typename BTensor , bool post_nop_ = false> |
| CK_TILE_DEVICE void | operator() (CTensor &c, const ATensor &a, const BTensor &b, bool_constant< post_nop_ >={}) const |
| |
| template<typename CTensor , typename ATensor , typename BTensor , index_t i_subk, bool post_nop_ = false> |
| CK_TILE_DEVICE void | operator() (CTensor &c, const ATensor &a, const BTensor &b, number< i_subk >, bool_constant< post_nop_ >={}) const |
| |
| template<index_t opselA, index_t opselB, typename CTensor , typename ATensor , typename BTensor , bool post_nop_ = false> |
| CK_TILE_DEVICE void | operator() (CTensor &c, const ATensor &a, const BTensor &b, const int32_t &a_scale, const int32_t &b_scale, bool_constant< post_nop_ >={}) const |
| |
| template<typename ATensor , typename BTensor > |
| CK_TILE_DEVICE auto | operator() (const ATensor &a, const BTensor &b) const |
| |
| template<index_t opselA, index_t opselB, typename ATensor , typename BTensor > |
| CK_TILE_DEVICE auto | operator() (const ATensor &a, const BTensor &b, const int32_t &a_scale, const int32_t &b_scale) const |
| |
|
| static constexpr index_t | kM = WarpGemmAttribute::kM |
| |
| static constexpr index_t | kN = WarpGemmAttribute::kN |
| |
| static constexpr index_t | kK = WarpGemmAttribute::kK |
| |
| static constexpr index_t | kCMLane = WarpGemmAttribute::kCMLane |
| |
| static constexpr index_t | kKPerThread = WarpGemmAttribute::kKPerThread |
| | The number of elements in K dimension processed by single thread in wavefront. More...
|
| |
◆ ADataType
template<typename WarpGemmAttribute_ >
◆ AWarpDstr
template<typename WarpGemmAttribute_ >
◆ AWarpDstrEncoding
template<typename WarpGemmAttribute_ >
◆ AWarpTensor
template<typename WarpGemmAttribute_ >
◆ BDataType
template<typename WarpGemmAttribute_ >
◆ BWarpDstr
template<typename WarpGemmAttribute_ >
◆ BWarpDstrEncoding
template<typename WarpGemmAttribute_ >
◆ BWarpTensor
template<typename WarpGemmAttribute_ >
◆ CDataType
template<typename WarpGemmAttribute_ >
◆ CWarpDstr
template<typename WarpGemmAttribute_ >
◆ CWarpDstrEncoding
template<typename WarpGemmAttribute_ >
◆ CWarpTensor
template<typename WarpGemmAttribute_ >
◆ WarpGemmAttribute
template<typename WarpGemmAttribute_ >
◆ get_num_of_access()
template<typename WarpGemmAttribute_ >
◆ operator()() [1/5]
template<typename WarpGemmAttribute_ >
template<typename ATensor , typename BTensor >
◆ operator()() [2/5]
template<typename WarpGemmAttribute_ >
template<index_t opselA, index_t opselB, typename ATensor , typename BTensor >
◆ operator()() [3/5]
template<typename WarpGemmAttribute_ >
template<typename CTensor , typename ATensor , typename BTensor , bool post_nop_ = false>
◆ operator()() [4/5]
template<typename WarpGemmAttribute_ >
template<index_t opselA, index_t opselB, typename CTensor , typename ATensor , typename BTensor , bool post_nop_ = false>
◆ operator()() [5/5]
template<typename WarpGemmAttribute_ >
template<typename CTensor , typename ATensor , typename BTensor , index_t i_subk, bool post_nop_ = false>
◆ kCMLane
template<typename WarpGemmAttribute_ >
◆ kK
template<typename WarpGemmAttribute_ >
◆ kKPerThread
template<typename WarpGemmAttribute_ >
The number of elements in K dimension processed by single thread in wavefront.
- Note
- Note that WarpGemm may run MFMA instruction multiple times (on different K). In such situation this value reflects this fact.
◆ kM
template<typename WarpGemmAttribute_ >
◆ kN
template<typename WarpGemmAttribute_ >
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/warp/warp_gemm_impl.hpp