/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/numeric/pk_fp4.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/numeric/pk_fp4.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/numeric/pk_fp4.hpp Source File
pk_fp4.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include <cmath>
10 
11 #if defined(__gfx950__)
12 #define CK_TILE_FP4_CVT_DEVICE 1
13 #else
14 #define CK_TILE_FP4_CVT_DEVICE 0
15 #endif
16 
17 #define TEST_convert_with_table 0
18 
19 namespace ck_tile {
20 
21 using fp32_t = float;
22 using fp32x2_t = float __attribute__((ext_vector_type(2)));
23 using fp16x2_t = _Float16 __attribute__((ext_vector_type(2)));
24 using bf16x2_t = bfloat16_t __attribute__((ext_vector_type(2)));
25 
26 CK_TILE_HOST_DEVICE constexpr uint8_t float_to_e2m1(float x, float scale = 1.f);
27 
28 // TODO: Add stochastic method
30 {
31  // TODO: Can we merge raw_type and type?
32  using raw_type = uint8_t;
33  using type = raw_type;
35 
37  template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>>
38  CK_TILE_HOST_DEVICE constexpr pk_float4_e2m1_t(T init) : data{static_cast<type>(init)}
39  {
40  }
41  CK_TILE_HOST_DEVICE explicit constexpr pk_float4_e2m1_t(float init, float scale = 1.f)
42  : data{float_to_e2m1(init, scale)}
43  {
44  }
45  CK_TILE_HOST_DEVICE constexpr operator type() const { return data; }
46  CK_TILE_HOST_DEVICE constexpr raw_type& get() { return data; }
47  CK_TILE_HOST_DEVICE constexpr raw_type get() const { return data; }
48 
49  CK_TILE_HOST_DEVICE constexpr float to_float(float scale = 1.f) const;
50  CK_TILE_HOST_DEVICE constexpr fp32x2_t to_fp32x2(float scale = 1.f) const;
51  CK_TILE_HOST_DEVICE constexpr fp16_t to_fp16(float scale = 1.f) const;
52  CK_TILE_HOST_DEVICE constexpr fp16x2_t to_fp16x2(float scale = 1.f) const;
53  CK_TILE_HOST_DEVICE constexpr bf16_t to_bf16(float scale = 1.f) const;
54  CK_TILE_HOST_DEVICE constexpr bf16x2_t to_bf16x2(float scale = 1.f) const;
55 
56  CK_TILE_HOST_DEVICE constexpr operator float() const { return to_float(); }
57  CK_TILE_HOST_DEVICE constexpr operator fp32x2_t() const { return to_fp32x2(); }
58  CK_TILE_HOST_DEVICE constexpr operator fp16_t() const { return to_fp16(); }
59  CK_TILE_HOST_DEVICE constexpr operator fp16x2_t() const { return to_fp16x2(); }
60  CK_TILE_HOST_DEVICE constexpr operator bf16_t() const { return to_bf16(); }
61  CK_TILE_HOST_DEVICE constexpr operator bf16x2_t() const { return to_bf16x2(); }
62 
63  template <index_t I>
65  CK_TILE_HOST_DEVICE constexpr static pk_float4_e2m1_t pack(const type x0, const type x1)
66  {
67  return (x1 << 4) | (x0 & 0b00001111);
68  }
69 
70 #if TEST_convert_with_table
71  static constexpr float e2m1_to_fp32_table[16] = {
72  0, 0.5, 1, 1.5, 2, 3, 4, 6, -0, -0.5, -1, -1.5, -2, -3, -4, -6};
73  static constexpr fp16_t e2m1_to_fp16_table[16] = {
74  bit_cast<fp16_t>(static_cast<uint16_t>(0x0000)), // 0
75  bit_cast<fp16_t>(static_cast<uint16_t>(0x3800)), // 0.5
76  bit_cast<fp16_t>(static_cast<uint16_t>(0x3C00)), // 1
77  bit_cast<fp16_t>(static_cast<uint16_t>(0x3E00)), // 1.5
78  bit_cast<fp16_t>(static_cast<uint16_t>(0x4000)), // 2
79  bit_cast<fp16_t>(static_cast<uint16_t>(0x4200)), // 3
80  bit_cast<fp16_t>(static_cast<uint16_t>(0x4400)), // 4
81  bit_cast<fp16_t>(static_cast<uint16_t>(0x4600)), // 6
82  bit_cast<fp16_t>(static_cast<uint16_t>(0x8000)), // -0
83  bit_cast<fp16_t>(static_cast<uint16_t>(0xB800)), // -0.5
84  bit_cast<fp16_t>(static_cast<uint16_t>(0xBC00)), // -1
85  bit_cast<fp16_t>(static_cast<uint16_t>(0xBE00)), // -1.5
86  bit_cast<fp16_t>(static_cast<uint16_t>(0xC000)), // -2
87  bit_cast<fp16_t>(static_cast<uint16_t>(0xC200)), // -3
88  bit_cast<fp16_t>(static_cast<uint16_t>(0xC400)), // -4
89  bit_cast<fp16_t>(static_cast<uint16_t>(0xC600)) // -6
90  };
91 #endif
92 };
93 
96 
97 template <>
99 {
101 
102  static constexpr int exp = 2;
103  static constexpr int mant = 1;
104  static constexpr int bias = 1;
105  static constexpr int PackedSize = 2;
106 };
107 
108 // limits
109 template <class T>
110 struct numeric;
111 
112 template <>
114 {
115  static constexpr pk_fp4_raw_t binary_min_normal = 0b00100010; // 1
116  static constexpr pk_fp4_raw_t binary_max_normal = 0b01110111; // 6
117  static constexpr pk_fp4_raw_t binary_lowest_normal = 0b11111111; // -6
118  static constexpr pk_fp4_raw_t binary_min_subnorm = 0b00010001; // 0.5
119  static constexpr pk_fp4_raw_t binary_max_subnorm = 0b00010001; // 0.5
120  static constexpr pk_fp4_raw_t binary_zero = 0b00000000; // 0
121  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t min() { return binary_min_normal; }
122  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t max() { return binary_max_normal; }
123  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t lowest() { return binary_lowest_normal; }
124  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t epsilon() { return binary_min_subnorm; }
125  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t round_error() { return binary_min_subnorm; }
126  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t zero() { return binary_zero; }
127  CK_TILE_HOST_DEVICE static constexpr fp8_t denorm_min() { return binary_min_subnorm; }
128 
129  CK_TILE_HOST_DEVICE static constexpr bool has_inf() { return false; }
130  // N/A
131  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t infinity() { return max(); }
132  // N/A
133  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t quiet_NaN() { return max(); }
134  // N/A
135  CK_TILE_HOST_DEVICE static constexpr pk_fp4_t signaling_NaN() { return max(); }
136 };
137 
138 template <index_t I>
139 CK_TILE_HOST_DEVICE constexpr pk_fp4_raw_t pk_fp4_t::unpack(number<I>) const
140 {
141  static_assert(I < 2, "Index is out of range.");
142  if constexpr(I == 1)
143  return (data >> 4);
144  else
145  return data & 0b00001111;
146 }
148 // TODO: consider replace this macro to improve performance
149 
150 #if CK_TILE_FP4_CVT_DEVICE
151 namespace impl {
152 
153 template <typename T>
154 CK_TILE_DEVICE T _from_f4(pk_fp4_raw_t src, float scale = 1.0f)
155 {
156  if constexpr(std::is_same_v<T, fp32_t>)
157  return fp32x2_t(__builtin_amdgcn_cvt_scalef32_pk_f32_fp4(src, scale, 0))[0];
158  else if constexpr(std::is_same_v<T, fp32x2_t>)
159  return __builtin_amdgcn_cvt_scalef32_pk_f32_fp4(src, scale, 0);
160  else if constexpr(std::is_same_v<T, fp16_t>)
161  return fp16x2_t(__builtin_amdgcn_cvt_scalef32_pk_f16_fp4(src, scale, 0))[0];
162  else if constexpr(std::is_same_v<T, fp16x2_t>)
163  return __builtin_amdgcn_cvt_scalef32_pk_f16_fp4(src, scale, 0);
164  else if constexpr(std::is_same_v<T, bf16_t>)
165  return bf16x2_t(__builtin_amdgcn_cvt_scalef32_pk_bf16_fp4(src, scale, 0))[0];
166  else if constexpr(std::is_same_v<T, bf16x2_t>)
167  return __builtin_amdgcn_cvt_scalef32_pk_bf16_fp4(src, scale, 0);
168  else
169  static_assert(std::false_type::value, "Unsupported type.");
170  return T{};
171 }
172 template <typename T>
173 CK_TILE_DEVICE pk_fp4_raw_t _to_f4(T src, float scale = 1.0f)
174 {
175  union
176  {
177  uint32_t u32;
178  pk_fp4_raw_t pf4[4];
179  } cvt{0};
180  if constexpr(std::is_same_v<T, fp32_t>)
181  cvt.u32 = __builtin_amdgcn_cvt_scalef32_pk_fp4_f32(cvt.u32, src, src, scale, 0);
182  else if constexpr(std::is_same_v<T, fp32x2_t>)
183  cvt.u32 = __builtin_amdgcn_cvt_scalef32_pk_fp4_f32(cvt.u32, src[0], src[1], scale, 0);
184  else if constexpr(std::is_same_v<T, fp16_t>)
185  cvt.u32 = __builtin_amdgcn_cvt_scalef32_pk_fp4_f16(cvt.u32, fp16x2_t{src, src}, scale, 0);
186  else if constexpr(std::is_same_v<T, fp16x2_t>)
187  cvt.u32 = __builtin_amdgcn_cvt_scalef32_pk_fp4_f16(cvt.u32, src, scale, 0);
188  else if constexpr(std::is_same_v<T, bf16_t>)
189  cvt.u32 = __builtin_amdgcn_cvt_scalef32_pk_fp4_bf16(cvt.u32, bf16x2_t{src, src}, scale, 0);
190  else if constexpr(std::is_same_v<T, bf16x2_t>)
191  cvt.u32 = __builtin_amdgcn_cvt_scalef32_pk_fp4_bf16(cvt.u32, src, scale, 0);
192  else
193  static_assert(std::false_type::value, "Unsupported type.");
194  return cvt.pf4[0];
195 }
196 
197 } // namespace impl
198 #endif
199 
200 CK_TILE_HOST_DEVICE constexpr bf16_t pk_fp4_t::to_bf16(float scale) const
201 {
202 #if CK_TILE_FP4_CVT_DEVICE
203  return impl::_from_f4<bf16_t>(data, scale);
204 #else
205  return bf16_t{type_convert<bf16_t>(convert_to_float<pk_fp4_t>(unpack(number<0>{}), scale))};
206 #endif
207 }
208 
210 {
211 #if CK_TILE_FP4_CVT_DEVICE
212  return impl::_from_f4<bf16x2_t>(data, scale);
213 #else
214  return bf16x2_t{type_convert<bf16_t>(convert_to_float<pk_fp4_t>(unpack(number<0>{}), scale)),
215  type_convert<bf16_t>(convert_to_float<pk_fp4_t>(unpack(number<1>{}), scale))};
216 #endif
217 }
218 
219 // TODO: make float_to_e2m1 generic so that we can convert from directrly.
220 CK_TILE_HOST_DEVICE constexpr pk_fp4_raw_t float_to_e2m1(float x, float scale)
221 {
222 #if CK_TILE_FP4_CVT_DEVICE
223  return impl::_to_f4(x, scale);
224 #else
225  return convert_to_type<pk_fp4_t>(x, scale);
226 #endif
227 }
228 CK_TILE_HOST_DEVICE constexpr pk_fp4_t float_to_pk_fp4(const float& x, float scale)
229 {
230  return float_to_e2m1(x, scale);
231 }
232 CK_TILE_HOST_DEVICE constexpr pk_fp4_t fp16_to_pk_fp4(const fp16_t& x, float scale)
233 {
234 #if CK_TILE_FP4_CVT_DEVICE
235  return impl::_to_f4(x, scale);
236 #else
237  return float_to_e2m1(type_convert<float>(x), scale);
238 #endif
239 }
240 CK_TILE_HOST_DEVICE constexpr pk_fp4_t bf16_to_pk_fp4(const bf16_t& x, float scale)
241 {
242 #if CK_TILE_FP4_CVT_DEVICE
243  return impl::_to_f4(x, scale);
244 #else
245  return float_to_e2m1(type_convert<float>(x), scale);
246 #endif
247 }
248 CK_TILE_HOST_DEVICE constexpr pk_fp4_t fp16x2_to_pk_fp4(const fp16x2_t& x, float scale)
249 {
250 #if CK_TILE_FP4_CVT_DEVICE
251  return impl::_to_f4(x, scale);
252 #else
253  return pk_fp4_t::pack(float_to_e2m1(x[0], scale), float_to_e2m1(x[1], scale));
254 #endif
255 }
256 CK_TILE_HOST_DEVICE constexpr pk_fp4_t bf16x2_to_pk_fp4(const bf16x2_t& x, float scale)
257 {
258 #if CK_TILE_FP4_CVT_DEVICE
259  return impl::_to_f4(x, scale);
260 #else
261  return pk_fp4_t::pack(float_to_e2m1(x[0], scale), float_to_e2m1(x[1], scale));
262 #endif
263 }
264 CK_TILE_HOST_DEVICE constexpr pk_fp4_t fp32x2_to_pk_fp4(const fp32x2_t& x, float scale)
265 {
266 #if CK_TILE_FP4_CVT_DEVICE
267  return impl::_to_f4(x, scale);
268 #else
269  return pk_fp4_t::pack(float_to_e2m1(x[0], scale), float_to_e2m1(x[1], scale));
270 #endif
271 }
272 
273 CK_TILE_HOST_DEVICE constexpr fp32x2_t pk_fp4_to_fp32x2(const pk_fp4_t& x, float scale)
274 {
275  return x.to_fp32x2(scale);
276 }
277 CK_TILE_HOST_DEVICE constexpr fp16x2_t pk_fp4_to_fp16x2(const pk_fp4_t& x, float scale)
278 {
279  return x.to_fp16x2(scale);
280 }
281 CK_TILE_HOST_DEVICE constexpr bf16x2_t pk_fp4_to_bf16x2(const pk_fp4_t& x, float scale)
282 {
283  return x.to_bf16x2(scale);
284 }
285 CK_TILE_HOST_DEVICE constexpr float pk_fp4_to_float(const pk_fp4_t& x, float scale)
286 {
287  return x.to_float(scale);
288 }
289 CK_TILE_HOST_DEVICE constexpr fp16_t pk_fp4_to_fp16(const pk_fp4_t& x, float scale)
290 {
291  return x.to_fp16(scale);
292 }
293 CK_TILE_HOST_DEVICE constexpr bf16_t pk_fp4_to_bf16(const pk_fp4_t& x, float scale)
294 {
295  return x.to_bf16(scale);
296 }
297 
298 #if TEST_convert_with_table == 0
299 CK_TILE_HOST_DEVICE constexpr float pk_fp4_t::to_float(float scale) const
300 {
301 #if CK_TILE_FP4_CVT_DEVICE
302  return impl::_from_f4<fp32_t>(data, scale);
303 #else
304  return convert_to_float<pk_fp4_t>(unpack(number<0>{}), scale);
305 #endif
306 }
308 {
309 #if CK_TILE_FP4_CVT_DEVICE
310  return impl::_from_f4<fp32x2_t>(data, scale);
311 #else
312  return fp32x2_t{convert_to_float<pk_fp4_t>(unpack(number<0>{}), scale),
313  convert_to_float<pk_fp4_t>(unpack(number<1>{}), scale)};
314 #endif
315 }
316 
317 CK_TILE_HOST_DEVICE constexpr fp16_t pk_fp4_t::to_fp16(float scale) const
318 {
319 #if CK_TILE_FP4_CVT_DEVICE
320  return impl::_from_f4<fp16_t>(data, scale);
321 #else
322  return fp16_t{type_convert<fp16_t>(convert_to_float<pk_fp4_t>(unpack(number<0>{}), scale))};
323 #endif
324 }
326 {
327 #if CK_TILE_FP4_CVT_DEVICE
328  return impl::_from_f4<fp16x2_t>(data, scale);
329 #else
330  return fp16x2_t{type_convert<fp16_t>(convert_to_float<pk_fp4_t>(unpack(number<0>{}), scale)),
331  type_convert<fp16_t>(convert_to_float<pk_fp4_t>(unpack(number<1>{}), scale))};
332 #endif
333 }
334 #else
335 CK_TILE_HOST_DEVICE constexpr float pk_fp4_t::to_float(float scale) const
336 {
337  return e2m1_to_fp32_table[unpack(number<0>{})] * scale;
338 }
339 CK_TILE_HOST_DEVICE constexpr fp32x2_t pk_fp4_t::to_fp32x2(float scale) const
340 {
341  return fp32x2_t{e2m1_to_fp32_table[unpack(number<0>{})] * scale, e2m1_to_fp32_table[unpack(number<1>{}] * scale};
342 }
343 CK_TILE_HOST_DEVICE constexpr fp16_t pk_fp4_t::to_fp16(float scale) const
344 {
345  return type_convert<float>(e2m1_to_fp16_table[unpack(number<0>{})]) * scale;
346 }
347 CK_TILE_HOST_DEVICE constexpr fp16x2_t pk_fp4_t::to_fp16x2(float scale) const
348 {
349  return fp16x2_t{
350  type_convert<fp16_t>(type_convert<float>(e2m1_to_fp16_table[unpack(number<0>{})]) * scale),
351  type_convert<fp16_t>(type_convert<float>(e2m1_to_fp16_table[unpack(number<1>{})]) * scale)};
352 }
353 #endif
354 
355 } // namespace ck_tile
#define CK_TILE_DEVICE
Definition: config.hpp:41
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:42
Definition: cluster_descriptor.hpp:13
_BitInt(8) fp8_t
Definition: float8.hpp:204
ushort bfloat16_t
Definition: bfloat16.hpp:111
constexpr CK_TILE_HOST_DEVICE pk_fp4_t fp16_to_pk_fp4(const fp16_t &x, float scale)
Definition: pk_fp4.hpp:232
bfloat16_t bf16x2_t
Definition: pk_fp4.hpp:24
constexpr CK_TILE_HOST_DEVICE pk_fp4_t float_to_pk_fp4(const float &x, float scale)
Definition: pk_fp4.hpp:228
_Float16 fp16_t
Definition: half.hpp:110
constexpr CK_TILE_HOST_DEVICE uint8_t float_to_e2m1(float x, float scale=1.f)
Definition: pk_fp4.hpp:220
float fp32x2_t
Definition: pk_fp4.hpp:22
bfloat16_t bf16_t
Definition: bfloat16.hpp:113
constexpr CK_TILE_HOST_DEVICE pk_fp4_t fp32x2_to_pk_fp4(const fp32x2_t &x, float scale)
Definition: pk_fp4.hpp:264
pk_float4_e2m1_t pk_fp4_t
Definition: pk_fp4.hpp:94
float fp32_t
Definition: pk_fp4.hpp:21
_Float16 fp16x2_t
Definition: half.hpp:385
constexpr CK_TILE_HOST_DEVICE float pk_fp4_to_float(const pk_fp4_t &x, float scale)
Definition: pk_fp4.hpp:285
constexpr CK_TILE_HOST_DEVICE fp16_t pk_fp4_to_fp16(const pk_fp4_t &x, float scale)
Definition: pk_fp4.hpp:289
CK_TILE_DEVICE bfloat16_t exp(bfloat16_t x)
Definition: bfloat16.hpp:423
constexpr CK_TILE_HOST_DEVICE pk_fp4_t bf16x2_to_pk_fp4(const bf16x2_t &x, float scale)
Definition: pk_fp4.hpp:256
constexpr CK_TILE_HOST_DEVICE pk_fp4_t fp16x2_to_pk_fp4(const fp16x2_t &x, float scale)
Definition: pk_fp4.hpp:248
constexpr CK_TILE_HOST_DEVICE fp32x2_t pk_fp4_to_fp32x2(const pk_fp4_t &x, float scale)
Definition: pk_fp4.hpp:273
constexpr CK_TILE_HOST_DEVICE pk_fp4_t bf16_to_pk_fp4(const bf16_t &x, float scale)
Definition: pk_fp4.hpp:240
constexpr CK_TILE_HOST_DEVICE fp16x2_t pk_fp4_to_fp16x2(const pk_fp4_t &x, float scale)
Definition: pk_fp4.hpp:277
typename pk_fp4_t::raw_type pk_fp4_raw_t
Definition: pk_fp4.hpp:95
constexpr CK_TILE_HOST_DEVICE bf16_t pk_fp4_to_bf16(const pk_fp4_t &x, float scale)
Definition: pk_fp4.hpp:293
constexpr CK_TILE_HOST_DEVICE bf16x2_t pk_fp4_to_bf16x2(const pk_fp4_t &x, float scale)
Definition: pk_fp4.hpp:281
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1350
unsigned short uint16_t
Definition: stdint.h:125
unsigned int uint32_t
Definition: stdint.h:126
unsigned char uint8_t
Definition: stdint.h:124
Definition: integral_constant.hpp:13
static constexpr CK_TILE_HOST_DEVICE bool has_inf()
Definition: pk_fp4.hpp:129
static constexpr CK_TILE_HOST_DEVICE fp8_t denorm_min()
Definition: pk_fp4.hpp:127
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t min()
Definition: pk_fp4.hpp:121
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t infinity()
Definition: pk_fp4.hpp:131
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t round_error()
Definition: pk_fp4.hpp:125
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t epsilon()
Definition: pk_fp4.hpp:124
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t zero()
Definition: pk_fp4.hpp:126
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t quiet_NaN()
Definition: pk_fp4.hpp:133
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t signaling_NaN()
Definition: pk_fp4.hpp:135
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t lowest()
Definition: pk_fp4.hpp:123
static constexpr CK_TILE_HOST_DEVICE pk_fp4_t max()
Definition: pk_fp4.hpp:122
pk_fp4_raw_t bitwise_type
Definition: pk_fp4.hpp:100
Definition: numeric.hpp:81
static constexpr int PackedSize
Definition: numeric.hpp:82
Definition: numeric.hpp:18
static constexpr CK_TILE_HOST_DEVICE T max()
Definition: numeric.hpp:26
Definition: pk_fp4.hpp:30
constexpr CK_TILE_HOST_DEVICE bf16x2_t to_bf16x2(float scale=1.f) const
Definition: pk_fp4.hpp:209
constexpr CK_TILE_HOST_DEVICE raw_type & get()
Definition: pk_fp4.hpp:46
raw_type data
Definition: pk_fp4.hpp:34
constexpr CK_TILE_HOST_DEVICE fp16x2_t to_fp16x2(float scale=1.f) const
Definition: pk_fp4.hpp:325
constexpr CK_TILE_HOST_DEVICE fp16_t to_fp16(float scale=1.f) const
Definition: pk_fp4.hpp:317
constexpr CK_TILE_HOST_DEVICE float to_float(float scale=1.f) const
Definition: pk_fp4.hpp:299
constexpr CK_TILE_HOST_DEVICE pk_float4_e2m1_t()
Definition: pk_fp4.hpp:36
uint8_t raw_type
Definition: pk_fp4.hpp:32
constexpr CK_TILE_HOST_DEVICE pk_float4_e2m1_t(float init, float scale=1.f)
Definition: pk_fp4.hpp:41
constexpr CK_TILE_HOST_DEVICE raw_type unpack(number< I >) const
constexpr CK_TILE_HOST_DEVICE raw_type get() const
Definition: pk_fp4.hpp:47
constexpr CK_TILE_HOST_DEVICE fp32x2_t to_fp32x2(float scale=1.f) const
Definition: pk_fp4.hpp:307
constexpr CK_TILE_HOST_DEVICE bf16_t to_bf16(float scale=1.f) const
Definition: pk_fp4.hpp:200
constexpr CK_TILE_HOST_DEVICE pk_float4_e2m1_t(T init)
Definition: pk_fp4.hpp:38
constexpr static CK_TILE_HOST_DEVICE pk_float4_e2m1_t pack(const type x0, const type x1)
Definition: pk_fp4.hpp:65
raw_type type
Definition: pk_fp4.hpp:33
#define CK_TILE_ARITHMETIC_USING_FLOAT(attr_, type_)
Definition: numeric.hpp:106