36 while((1U << shift_u32) < divisor)
41 uint64_t tmp_u64 =
static_cast<uint64_t>((1UL << shift_u32) - divisor) << 32;
42 uint32_t multiplier_u32 = tmp_u64 / divisor + 1;
47 template <auto Divisor,
typename = std::enable_if_t<(0 < Divisor)>>
48 CK_TILE_HOST_DEVICE static constexpr auto calculate_magic_numbers(constant<Divisor>)
50 constexpr auto tmp = calculate_magic_numbers(u
int32_t{Divisor});
52 constexpr u
int32_t multiplier = tmp[number<0>{}];
53 constexpr u
int32_t shift = tmp[number<1>{}];
55 return make_tuple(constant<multiplier>{}, constant<shift>{});
59 CK_TILE_DEVICE static constexpr u
int32_t
60 do_magic_division(u
int32_t div
idend, u
int32_t multiplier, u
int32_t shift)
62 if(__builtin_is_constant_evaluated())
64 u
int32_t tmp = (static_cast<u
int64_t>(div
idend) * multiplier) >> 32;
65 return (tmp + dividend) >> shift;
69 uint32_t tmp = __umulhi(dividend, multiplier);
70 return (tmp + dividend) >> shift;
78 return (tmp + dividend) >> shift;
88 if(__builtin_is_constant_evaluated())
90 uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
92 return (tmp + dividend_u32) >> shift;
96 uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
97 uint32_t tmp = __umulhi(dividend_u32, multiplier);
98 return (tmp + dividend_u32) >> shift;
105 uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
107 return (tmp + dividend_u32) >> shift;
123 while((1U << shift_u32) < divisor)
129 uint32_t multiplier_u32 = ((one << 16) * ((one << shift_u32) - divisor)) / divisor + 1;
135 template <auto Divisor>
138 constexpr
auto tmp = calculate_magic_numbers(
uint32_t{Divisor});
150 uint32_t tmp = (dividend * multiplier) >> 16;
151 return (tmp + dividend) >> shift;
157 uint32_t tmp = (dividend * multiplier) >> 16;
158 return (tmp + dividend) >> shift;
168 uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
169 uint32_t tmp = (dividend_u32 * multiplier) >> 16;
170 return (tmp + dividend_u32) >> shift;
176 uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
177 uint32_t tmp = (dividend_u32 * multiplier) >> 16;
178 return (tmp + dividend_u32) >> shift;
195 auto tmp = magic_division::calculate_magic_numbers(divisor_);
206 auto tmp = magic_division::calculate_magic_numbers(divisor_);
214 return magic_division::do_magic_division(dividend_, multiplier, shift);
220 quotient_ = div(dividend_);
221 remainder_ = dividend_ - (quotient_ * divisor);
236 auto tmp = magic_division::calculate_magic_numbers(divisor_);
246 return magic_division::do_magic_division(dividend_, multiplier, shift);
252 quotient_ = div(dividend_);
253 remainder_ = dividend_ - (quotient_ * divisor_);
#define CK_TILE_DEVICE
Definition: config.hpp:41
#define CK_TILE_HOST
Definition: config.hpp:40
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:42
Definition: cluster_descriptor.hpp:13
int32_t int32_t
Definition: integer.hpp:10
constexpr CK_TILE_HOST_DEVICE auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:360
__host__ constexpr __device__ auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:211
unsigned int uint32_t
Definition: stdint.h:126
unsigned __int64 uint64_t
Definition: stdint.h:136
Definition: integral_constant.hpp:13
Definition: magic_div.hpp:114
static constexpr CK_TILE_DEVICE uint32_t do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:148
static constexpr CK_TILE_DEVICE int32_t do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:166
static constexpr CK_TILE_HOST int32_t do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:174
static constexpr CK_TILE_HOST uint32_t do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:155
static constexpr CK_TILE_HOST_DEVICE auto calculate_magic_numbers(uint32_t divisor)
Definition: magic_div.hpp:116
static constexpr CK_TILE_HOST_DEVICE auto calculate_magic_numbers(constant< Divisor >)
Definition: magic_div.hpp:136
Definition: magic_div.hpp:27
static constexpr CK_TILE_HOST int32_t do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:103
static constexpr CK_TILE_HOST uint32_t do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:75
static constexpr CK_TILE_HOST_DEVICE auto calculate_magic_numbers(uint32_t divisor)
Definition: magic_div.hpp:29
static constexpr CK_TILE_DEVICE int32_t do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:86
Definition: magic_div.hpp:228
CK_TILE_HOST_DEVICE void divmod(uint32_t dividend_, uint32_t divisor_, uint32_t "ient_, uint32_t &remainder_) const
Definition: magic_div.hpp:250
CK_TILE_HOST_DEVICE mdiv2(uint32_t divisor_)
Definition: magic_div.hpp:234
CK_TILE_HOST_DEVICE uint32_t div(uint32_t dividend_) const
Definition: magic_div.hpp:244
uint32_t multiplier
Definition: magic_div.hpp:230
CK_TILE_HOST_DEVICE mdiv2()
Definition: magic_div.hpp:242
uint32_t shift
Definition: magic_div.hpp:231
Definition: magic_div.hpp:186
CK_TILE_HOST_DEVICE mdiv(uint32_t divisor_)
Definition: magic_div.hpp:193
CK_TILE_HOST_DEVICE uint32_t get() const
Definition: magic_div.hpp:224
CK_TILE_HOST_DEVICE mdiv()
Definition: magic_div.hpp:201
CK_TILE_HOST_DEVICE void divmod(uint32_t dividend_, uint32_t "ient_, uint32_t &remainder_) const
Definition: magic_div.hpp:218
uint32_t divisor
Definition: magic_div.hpp:188
CK_TILE_HOST_DEVICE uint32_t div(uint32_t dividend_) const
Definition: magic_div.hpp:212
uint32_t shift
Definition: magic_div.hpp:190
uint32_t multiplier
Definition: magic_div.hpp:189
CK_TILE_HOST_DEVICE void update(uint32_t divisor_)
Definition: magic_div.hpp:203