/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/inner_product_dpp8.hpp Source File#
inner_product_dpp8.hpp
Go to the documentation of this file.
23 asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[0, 0, 0, 0, 0, 0, 0, 0]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
27 asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[1, 1, 1, 1, 1, 1, 1, 1]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
31 asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[2, 2, 2, 2, 2, 2, 2, 2]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
35 asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[3, 3, 3, 3, 3, 3, 3, 3]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
39 asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[4, 4, 4, 4, 4, 4, 4, 4]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
43 asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[5, 5, 5, 5, 5, 5, 5, 5]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
47 asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[6, 6, 6, 6, 6, 6, 6, 6]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
51 asm volatile("\n v_dot2c_f32_f16_dpp %0, %1, %2 dpp8:[7, 7, 7, 7, 7, 7, 7, 7]" : "=v"(c) : "v"(a), "v"(b), "0"(c));
__device__ void inline_v_dot2c_dpp8_instr< 5 >(const half2_t &a, const half2_t &b, float &c)
Definition: inner_product_dpp8.hpp:42
__device__ void inline_v_dot2c_dpp8_instr< 4 >(const half2_t &a, const half2_t &b, float &c)
Definition: inner_product_dpp8.hpp:38
__device__ void inner_product_dpp(const TA &a, const TB &b, TC &c)
Definition: inner_product_dpp8.hpp:135
__device__ void inline_v_dot2c_dpp8(const half2_t &a, const half2_t &b, float &c)
Definition: inner_product_dpp8.hpp:59
__device__ void inline_v_dot2c_dpp8_instr(const half2_t &a, const half2_t &b, float &c)
__device__ void inline_v_dot2c_dpp8_instr< 3 >(const half2_t &a, const half2_t &b, float &c)
Definition: inner_product_dpp8.hpp:34
__device__ void inline_v_dot2c_dpp8_instr< 2 >(const half2_t &a, const half2_t &b, float &c)
Definition: inner_product_dpp8.hpp:30
constexpr std::array< int, dpp8::lane_group_size > IntrinsicMaskDpp8
Definition: inner_product_dpp8.hpp:77
__device__ void intrinsic_fdot2(const half2_t &a, const half2_t &b, float &c)
Definition: inner_product_dpp8.hpp:112
__device__ void intrinsic_fdot2_impl(const half2_t &a, const half2_t &b, float &c)
Definition: inner_product_dpp8.hpp:100
__device__ void inline_v_dot2c_dpp8_instr< 7 >(const half2_t &a, const half2_t &b, float &c)
Definition: inner_product_dpp8.hpp:50
__device__ void inline_v_dot2c_dpp8_instr< 6 >(const half2_t &a, const half2_t &b, float &c)
Definition: inner_product_dpp8.hpp:46
constexpr int get_dpp_sel_mask_broadcast()
Definition: inner_product_dpp8.hpp:92
__device__ void inline_v_dot2c_dpp8_instr< 0 >(const half2_t &a, const half2_t &b, float &c)
Definition: inner_product_dpp8.hpp:22
constexpr index_t lane_group_size
Number of lanes that can share data using DPP8 modifiers.
Definition: inner_product_dpp8.hpp:15
__device__ void inline_v_dot2c_dpp8_instr< 1 >(const half2_t &a, const half2_t &b, float &c)
Definition: inner_product_dpp8.hpp:26
Definition: ck.hpp:267