/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/inner_product_dpp8.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/inner_product_dpp8.hpp File Reference#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/inner_product_dpp8.hpp File Reference
inner_product_dpp8.hpp File Reference
#include "amd_gemm_dpp.hpp"
#include "data_type.hpp"
#include "type_convert.hpp"

Go to the source code of this file.

Namespaces

 ck
 
 ck::dpp8
 

Functions

template<int SrcLaneIdx>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr (const half2_t &a, const half2_t &b, float &c)
 
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 0 > (const half2_t &a, const half2_t &b, float &c)
 
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 1 > (const half2_t &a, const half2_t &b, float &c)
 
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 2 > (const half2_t &a, const half2_t &b, float &c)
 
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 3 > (const half2_t &a, const half2_t &b, float &c)
 
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 4 > (const half2_t &a, const half2_t &b, float &c)
 
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 5 > (const half2_t &a, const half2_t &b, float &c)
 
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 6 > (const half2_t &a, const half2_t &b, float &c)
 
template<>
__device__ void ck::dpp8::inline_v_dot2c_dpp8_instr< 7 > (const half2_t &a, const half2_t &b, float &c)
 
template<int SrcLaneIdx, bool ShareA>
__device__ void ck::dpp8::inline_v_dot2c_dpp8 (const half2_t &a, const half2_t &b, float &c)
 
template<int SrcLaneIdx>
constexpr int ck::dpp8::get_dpp_sel_mask_broadcast ()
 
template<int SrcLaneIdx>
__device__ void ck::dpp8::intrinsic_fdot2_impl (const half2_t &a, const half2_t &b, float &c)
 
template<int SrcLaneIdx, bool ShareA>
__device__ void ck::dpp8::intrinsic_fdot2 (const half2_t &a, const half2_t &b, float &c)
 
template<typename TA , typename TB , typename TC , int SrcLaneIdx, bool ShareA>
__device__ void ck::dpp8::inner_product_dpp (const TA &a, const TB &b, TC &c)
 

Variables

constexpr index_t ck::dpp8::lane_group_size = 8
 Number of lanes that can share data using DPP8 modifiers. More...
 
constexpr std::array< int, dpp8::lane_group_size > ck::dpp8::IntrinsicMaskDpp8