9 #define UINT_MAX 4294967295
19 template <
typename DTYPE>
22 return DTYPE::dataInfo.hasInf;
47 return get_exponent_value<T>(x) == 0;
53 double mantissa = is_subnormal<T>(x) ? 0.0f : 1.0f;
55 for(uint i = 0; i < NumericUtils<T>::mant; i++)
79 if(is_subnormal<T>(data))
83 float d_mant = get_mantissa_value<T>(data);
85 float data_value = d_sign * d_exp * d_mant;
89 return data_value * scale_value;
101 template <
typename T>
135 float diff = max_value - prev_val;
137 float actual_max = max_value + (diff / 2);
139 if(std::abs(
value) < actual_max)
142 (exp << NumericUtils<T>::mant) | mantissa;
146 if(!get_data_has_inf<T>())
155 (exp << NumericUtils<T>::mant);
161 x = bit_cast<uint32_t>(
value);
179 const int mini_denormal_act_exponent = 1 - mini_bias;
181 int act_exponent, out_exponent, exponent_diff;
183 bool is_subnorm =
false;
187 act_exponent = exponent - bias + 1;
188 exponent_diff = mini_denormal_act_exponent - act_exponent;
193 act_exponent = exponent - bias;
194 if(act_exponent <= mini_denormal_act_exponent)
196 exponent_diff = mini_denormal_act_exponent - act_exponent;
203 mantissa += (1UL << mfmt);
207 shift_amount = (shift_amount >= 64) ? 63 : shift_amount;
208 bool midpoint = (mantissa & ((1UL << shift_amount) - 1)) == (1UL << (shift_amount - 1));
212 if(is_subnorm && std::abs(
value) < std::abs(min_subnorm))
215 if(std::abs(
value) <= std::abs(min_subnorm -
value))
221 if(exponent_diff > 0)
222 mantissa >>= exponent_diff;
223 else if(exponent_diff == -1)
224 mantissa <<= -exponent_diff;
225 bool implicit_one = mantissa & (1 << mfmt);
226 out_exponent = (act_exponent + exponent_diff) + mini_bias - (implicit_one ? 0 : 1);
230 mantissa += (midpoint ? (odd ? mantissa : mantissa - 1) : mantissa) & drop_mask;
232 if(out_exponent == 0)
234 if((1UL << mfmt) & mantissa)
241 if((1UL << (mfmt + 1)) & mantissa)
250 if(out_exponent == 0 && mantissa == 0)
260 template <
typename T>
289 float diff = max_value - prev_val;
291 float actual_max = max_value + (diff / 2);
293 if(std::abs(
value) < actual_max)
295 double d_max_value =
static_cast<double>(max_value);
296 double d_actual_max =
static_cast<double>(actual_max);
297 double d_value =
static_cast<double>(
value);
298 double d_is = std::abs(d_max_value - d_actual_max);
299 double d_seed =
static_cast<double>(seed);
300 double d_prob = 1.0f - (std::abs(d_value - d_max_value) / d_is);
302 double thresh = UINT_MAX * d_prob;
304 if(!get_data_has_inf<T>() || d_seed <= thresh)
312 | (exp << NumericUtils<T>::mant);
317 if(!get_data_has_inf<T>())
323 | (exp << NumericUtils<T>::mant);
339 auto mant = f32_mant;
340 bool subnorm =
false;
373 mant += seed >> sr_shift;
385 auto val = sign | biased_exp << NumericUtils<T>::mant | mant;
__host__ T exp(T x)
Definition: math_v2.hpp:391
__host__ T pow(T x, T gamma)
Definition: math_v2.hpp:427
Definition: check_err.hpp:24
__host__ __device__ T sat_convert_to_type(float value)
__host__ __device__ bool is_subnormal(T x)
Definition: mxfp_utils.hpp:45
__host__ __device__ bool get_data_has_inf()
Definition: mxfp_utils.hpp:67
__host__ __device__ T sat_convert_to_type_sr(float value, uint32_t seed)
__host__ __device__ float convert_to_float(T data, int scale_exp)
Definition: mxfp_utils.hpp:73
__host__ __device__ T convert_to_type_sr(float value, uint32_t seed)
Definition: mxfp_utils.hpp:261
__host__ __device__ bool is_zero(e8m0_bexp_t const scale, T const data)
__host__ __device__ T convert_to_type(float value)
Definition: mxfp_utils.hpp:102
__host__ __device__ bool is_inf(e8m0_bexp_t const scale, T const data)
__host__ constexpr __device__ int32_t get_exponent_value(T x)
Definition: mxfp_utils.hpp:35
__host__ __device__ double get_mantissa_value(T x)
Definition: mxfp_utils.hpp:51
__host__ __device__ bool is_nan(e8m0_bexp_t const scale, T const data)
bool getDataHasInf()
Definition: mxfp_utils.hpp:20
__host__ __device__ float to_float(e8m0_bexp_t const scale, T const data)
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1350
unsigned int uint32_t
Definition: stdint.h:126
signed int int32_t
Definition: stdint.h:123
Definition: numeric_limits.hpp:309
__host__ static constexpr __device__ T Max()
Definition: numeric_limits.hpp:311
Definition: numeric_utils.hpp:10
Unsigned representation of a conventional biased Float32 exponent.
Definition: e8m0.hpp:25
Definition: mxfp_utils.hpp:14
float value_float
Definition: mxfp_utils.hpp:15
uint32_t value_bitwise
Definition: mxfp_utils.hpp:16