9 template <index_t MPerWave, index_t NPerWave>
17 template <
class FloatC, index_t ab
id = 0>
18 __device__
static void
21 #if defined(__gfx94__)
22 reg_c.template AsType<float4_t>()(
Number<0>{}) = __builtin_amdgcn_smfmac_f32_16x16x32_f16(
23 reg_a, reg_b, reg_c.template AsType<float4_t>()[
Number<0>{}], reg_idx, 0, abid);
33 template <index_t MPerWave, index_t NPerWave>
39 template <
class FloatC, index_t ab
id = 0>
40 __device__
static void
43 #if defined(__gfx94__)
44 reg_c.template AsType<float4_t>()(
Number<0>{}) = __builtin_amdgcn_smfmac_f32_16x16x32_bf16(
45 reg_a, reg_b, reg_c.template AsType<float4_t>()[
Number<0>{}], reg_idx, 0, abid);
55 template <index_t MPerWave, index_t NPerWave>
61 template <
class FloatC, index_t ab
id = 0>
62 __device__
static void
65 #if defined(__gfx94__)
66 reg_c.template AsType<float16_t>()(
Number<0>{}) = __builtin_amdgcn_smfmac_f32_32x32x16_f16(
67 reg_a, reg_b, reg_c.template AsType<float16_t>()[
Number<0>{}], reg_idx, 0, abid);
77 template <index_t MPerWave, index_t NPerWave>
83 template <
class FloatC, index_t ab
id = 0>
84 __device__
static void
87 #if defined(__gfx94__)
88 reg_c.template AsType<float16_t>()(
Number<0>{}) = __builtin_amdgcn_smfmac_f32_32x32x16_bf16(
89 reg_a, reg_b, reg_c.template AsType<float16_t>()[
Number<0>{}], reg_idx, 0, abid);
typename vector_type< bhalf_t, 4 >::type bhalf4_t
Definition: dtype_vector.hpp:2147
typename vector_type< bhalf_t, 8 >::type bhalf8_t
Definition: dtype_vector.hpp:2148
typename vector_type< half_t, 4 >::type half4_t
Definition: dtype_vector.hpp:2140
constexpr detail::ignore_t ignore
Definition: ignore.hpp:20
int32_t index_t
Definition: ck.hpp:298
typename vector_type< half_t, 8 >::type half8_t
Definition: dtype_vector.hpp:2141
Definition: integral_constant.hpp:20
static __device__ void Run(const bhalf4_t ®_a, const bhalf8_t ®_b, const index_t ®_idx, FloatC ®_c)
Definition: amd_smfmac.hpp:41
Definition: amd_smfmac.hpp:34
static __device__ void Run(const half4_t ®_a, const half8_t ®_b, const index_t ®_idx, FloatC ®_c)
Definition: amd_smfmac.hpp:19
Definition: amd_smfmac.hpp:10
static __device__ void Run(const bhalf4_t ®_a, const bhalf8_t ®_b, const index_t ®_idx, FloatC ®_c)
Definition: amd_smfmac.hpp:85
Definition: amd_smfmac.hpp:78
static __device__ void Run(const half4_t ®_a, const half8_t ®_b, const index_t ®_idx, FloatC ®_c)
Definition: amd_smfmac.hpp:63
Definition: amd_smfmac.hpp:56