/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/host/reference/reference_gemm.hpp File Reference#
reference_gemm.hpp File Reference
#include <cstdlib>
#include <thread>
#include "ck_tile/core.hpp"
#include "ck_tile/host/host_tensor.hpp"
Go to the source code of this file.
Namespaces | |
ck_tile | |
Functions | |
template<typename ADataType , typename QDataType , typename BDataType , typename AccDataType , typename CDataType , uint32_t QuantGroupSize, bool aquant, typename AElementOp = ck_tile::identity, typename BElementOp = ck_tile::identity, typename ACCElementOp = ck_tile::identity> | |
CK_TILE_HOST void | ck_tile::reference_gemm_quant (const HostTensor< ADataType > &a_m_k, const HostTensor< QDataType > &q, const HostTensor< BDataType > &b_k_n, HostTensor< CDataType > &c_m_n, const AElementOp &a_element_op={}, const BElementOp &b_element_op={}, const ACCElementOp &acc_element_op={}) |
template<typename ADataType , typename AQDataType , typename BDataType , typename BQDataType , typename AccDataType , typename CDataType , typename AElementOp = ck_tile::identity, typename BElementOp = ck_tile::identity, typename ACCElementOp = ck_tile::identity> | |
CK_TILE_HOST void | ck_tile::reference_gemm_rowcol_quant (const HostTensor< ADataType > &a_m_k, const HostTensor< AQDataType > &aq_m_1, const HostTensor< BDataType > &b_k_n, const HostTensor< BQDataType > &bq_1_n, HostTensor< CDataType > &c_m_n, const AElementOp &a_element_op={}, const BElementOp &b_element_op={}, const ACCElementOp &acc_element_op={}) |
template<typename ADataType , typename BDataType , typename AccDataType , typename CDataType , typename AElementOp = ck_tile::identity, typename BElementOp = ck_tile::identity, typename ACCElementOp = ck_tile::identity> | |
CK_TILE_HOST void | ck_tile::reference_gemm (const HostTensor< ADataType > &a_m_k, const HostTensor< BDataType > &b_k_n, HostTensor< CDataType > &c_m_n, const AElementOp &a_element_op={}, const BElementOp &b_element_op={}, const ACCElementOp &acc_element_op={}) |
template<typename ADataType , typename BDataType , typename DsDataType , typename AccDataType , typename CDataType , typename ACCElementOp , typename DDataType = remove_cvref_t<std::tuple_element_t<0, DsDataType>>> | |
CK_TILE_HOST void | ck_tile::reference_gemm_multiple_d (const HostTensor< ADataType > &a_m_k, const HostTensor< BDataType > &b_k_n, const std::array< HostTensor< DDataType >, DsDataType::size()> &ds_m_n, HostTensor< CDataType > &c_m_n, const ACCElementOp &acc_element_op={}) |
template<typename ADataType , typename BDataType , typename AccDataType , typename CDataType , typename LayoutA , typename LayoutB , typename LayoutC > | |
__global__ void | ck_tile::naive_gemm_kernel (ADataType *A, BDataType *B, CDataType *C, ck_tile::index_t M, ck_tile::index_t N, ck_tile::index_t K, ck_tile::index_t strideA, ck_tile::index_t strideB, ck_tile::index_t strideC) |
template<typename ADataType , typename BDataType , typename AccDataType , typename CDataType , typename LayoutA , typename LayoutB , typename LayoutC > | |
void | ck_tile::reference_gemm_gpu (ADataType *a_ptr, BDataType *b_ptr, CDataType *c_ptr, index_t M, index_t N, index_t K, index_t stride_a, index_t stride_b, index_t stride_c) |
template<typename ADataType , typename BDataType , typename AccDataType , typename CDataType , typename LayoutA , typename LayoutB , typename LayoutC > | |
void | ck_tile::reference_batched_gemm_gpu (ADataType *a_ptr, BDataType *b_ptr, CDataType *c_ptr, index_t M, index_t N, index_t K, index_t stride_a, index_t stride_b, index_t stride_c, index_t batch_stride_A, index_t batch_stride_B, index_t batch_stride_C, index_t batch_count) |