/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/magic_division.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/magic_division.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/magic_division.hpp Source File
magic_division.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include "ck/ck.hpp"
7 #include "numeric_limits.hpp"
8 #include "integral_constant.hpp"
9 #include "number.hpp"
10 #include "type.hpp"
11 #include "tuple.hpp"
12 
13 #ifdef CK_CODE_GEN_RTC
14 #define INT32_MAX 2147483647
15 #endif
16 
17 namespace ck {
18 
19 // magic number division
20 // Caution:
21 // 1. For uint32_t as dividend: magic number division implementation being used would produce
22 // correct result if the dividend is uint32_t and its value is within 31-bit value range.
23 // 2. For int32_t as dividendd: magic number division for int32_t dividened has not been
24 // implemented, the int32_t dividend would be bit-wise interpreted as uint32_t and magic number
25 // division implementation for uint32_t is then used. Therefore, dividend value need to be
26 // non-negative.
27 // TODO:
28 // 1. Implement magic number divison for int32_t
29 // 2. Implement magic number divison for unit32_t with 32-bit value range
31 {
32  // uint32_t
33  __host__ __device__ static constexpr auto CalculateMagicNumbers(uint32_t divisor)
34  {
35  // WARNING: magic division is only applicable for division inside this range.
36  // You should use the return value of CalculateMagicNumbers, if division is not inside this
37  // range. The "else" logic below is to quiet down run-time error.
38  if(divisor >= 1 && divisor <= ck::NumericLimits<int32_t>::Max())
39  {
40  uint32_t shift = 0;
41  for(shift = 0; shift < 32; ++shift)
42  {
43  if((1U << shift) >= divisor)
44  {
45  break;
46  }
47  }
48 
49  uint64_t one = 1;
50  uint64_t multiplier = ((one << 32) * ((one << shift) - divisor)) / divisor + 1;
51  // assert(multiplier <= 0xffffffffUL);
52 
53  return make_tuple(uint32_t(multiplier), shift);
54  }
55  else
56  {
57  return make_tuple(uint32_t(0), uint32_t(0));
58  }
59  }
60 
61  __host__ __device__ static constexpr uint32_t CalculateMagicMultiplier(uint32_t divisor)
62  {
63  auto tmp = CalculateMagicNumbers(divisor);
64 
65  return tmp[Number<0>{}];
66  }
67 
68  __host__ __device__ static constexpr uint32_t CalculateMagicShift(uint32_t divisor)
69  {
70  auto tmp = CalculateMagicNumbers(divisor);
71 
72  return tmp[Number<1>{}];
73  }
74 
75  // integral_constant<uint32_t, .>
76  template <uint32_t Divisor>
77  __host__ __device__ static constexpr auto
79  {
80  constexpr auto tmp = CalculateMagicNumbers(uint32_t{Divisor});
81 
82  constexpr uint32_t multiplier = tmp[Number<0>{}];
83  constexpr uint32_t shift = tmp[Number<1>{}];
84 
87  }
88 
89  template <uint32_t Divisor>
90  __host__ __device__ static constexpr auto
92  {
93  constexpr uint32_t multiplier = CalculateMagicMultiplier(uint32_t{Divisor});
94 
96  }
97 
98  template <uint32_t Divisor>
99  __host__ __device__ static constexpr auto
101  {
102  constexpr uint32_t shift = CalculateMagicShift(uint32_t{Divisor});
103 
105  }
106 
107  // integral_constant<int32_t, .>
108  template <int32_t Divisor>
109  __host__ __device__ static constexpr auto
111  {
113  }
114 
115  template <int32_t Divisor>
116  __host__ __device__ static constexpr auto
118  {
120  }
121 
122  template <int32_t Divisor>
123  __host__ __device__ static constexpr auto
125  {
127  }
128 
129  // magic division for uint32_t
130  __device__ static constexpr uint32_t
131  DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift)
132  {
133  uint32_t tmp = __umulhi(dividend, multiplier);
134  return (tmp + dividend) >> shift;
135  }
136 
137  __host__ static constexpr uint32_t
138  DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift)
139  {
140  uint32_t tmp = static_cast<uint64_t>(dividend) * multiplier >> 32;
141  return (tmp + dividend) >> shift;
142  }
143 
144  // magic division for int32_t
145  // HACK: use dividend_i32 as if it's uint32_t, dividend_i32 need to be
146  // non-negative for result to be correct
147  // TODO: figure out how to do magic number divison for int32_t as dividended
148  __device__ static constexpr int32_t
149  DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
150  {
151  uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
152  uint32_t tmp = __umulhi(dividend_u32, multiplier);
153  return (tmp + dividend_u32) >> shift;
154  }
155 
156  __host__ static constexpr int32_t
157  DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
158  {
159  uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
160  uint32_t tmp = static_cast<uint64_t>(dividend_u32) * multiplier >> 32;
161  return (tmp + dividend_u32) >> shift;
162  }
163 };
164 
165 struct MDiv
166 {
167  // 1 dword -> 3 dword storage
170  uint32_t shift; // TODO: 8 bit is enough
171 
172  // prefer construct on host
173  __host__ __device__ MDiv(uint32_t divisor_) : divisor(divisor_)
174  {
175  auto tmp = MagicDivision::CalculateMagicNumbers(divisor_);
176 
177  multiplier = tmp[Number<0>{}];
178  shift = tmp[Number<1>{}];
179  }
180 
181  __host__ __device__ MDiv() : divisor(0), multiplier(0), shift(0) {}
182 
183  __host__ __device__ void update(uint32_t divisor_)
184  {
185  divisor = divisor_;
186  auto tmp = MagicDivision::CalculateMagicNumbers(divisor_);
187 
188  multiplier = tmp[Number<0>{}];
189  shift = tmp[Number<1>{}];
190  }
191 
192  __host__ __device__ uint32_t div(uint32_t dividend_) const
193  {
194  return MagicDivision::DoMagicDivision(dividend_, multiplier, shift);
195  }
196 
197  __host__ __device__ void
198  divmod(uint32_t dividend_, uint32_t& quotient_, uint32_t& remainder_) const
199  {
200  quotient_ = div(dividend_);
201  remainder_ = dividend_ - (quotient_ * divisor);
202  }
203 
204  __host__ __device__ uint32_t get() const { return divisor; }
205 };
206 
207 struct MDiv2
208 {
209  // 1 dword -> 2 dword storage, divisor need compute from runtime
211  uint32_t shift; // TODO: 8 bit is enough
212 
213  // prefer construct on host
214  __host__ __device__ MDiv2(uint32_t divisor_)
215  {
216  auto tmp = MagicDivision::CalculateMagicNumbers(divisor_);
217 
218  multiplier = tmp[Number<0>{}];
219  shift = tmp[Number<1>{}];
220  }
221 
222  __host__ __device__ MDiv2() : multiplier(0), shift(0) {}
223 
224  __host__ __device__ uint32_t div(uint32_t dividend_) const
225  {
226  return MagicDivision::DoMagicDivision(dividend_, multiplier, shift);
227  }
228 
229  __host__ __device__ void
230  divmod(uint32_t dividend_, uint32_t divisor_, uint32_t& quotient_, uint32_t& remainder_) const
231  {
232  quotient_ = div(dividend_);
233  remainder_ = dividend_ - (quotient_ * divisor_);
234  }
235 };
236 
237 } // namespace ck
Definition: ck.hpp:267
__host__ constexpr __device__ auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:211
unsigned int uint32_t
Definition: stdint.h:126
signed int int32_t
Definition: stdint.h:123
unsigned __int64 uint64_t
Definition: stdint.h:136
Definition: magic_division.hpp:208
__host__ __device__ MDiv2(uint32_t divisor_)
Definition: magic_division.hpp:214
uint32_t shift
Definition: magic_division.hpp:211
uint32_t multiplier
Definition: magic_division.hpp:210
__host__ __device__ void divmod(uint32_t dividend_, uint32_t divisor_, uint32_t &quotient_, uint32_t &remainder_) const
Definition: magic_division.hpp:230
__host__ __device__ uint32_t div(uint32_t dividend_) const
Definition: magic_division.hpp:224
__host__ __device__ MDiv2()
Definition: magic_division.hpp:222
Definition: magic_division.hpp:166
__host__ __device__ MDiv()
Definition: magic_division.hpp:181
uint32_t divisor
Definition: magic_division.hpp:168
__host__ __device__ uint32_t get() const
Definition: magic_division.hpp:204
__host__ __device__ void divmod(uint32_t dividend_, uint32_t &quotient_, uint32_t &remainder_) const
Definition: magic_division.hpp:198
__host__ __device__ void update(uint32_t divisor_)
Definition: magic_division.hpp:183
__host__ __device__ MDiv(uint32_t divisor_)
Definition: magic_division.hpp:173
uint32_t multiplier
Definition: magic_division.hpp:169
uint32_t shift
Definition: magic_division.hpp:170
__host__ __device__ uint32_t div(uint32_t dividend_) const
Definition: magic_division.hpp:192
Definition: magic_division.hpp:31
static constexpr __host__ uint32_t DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition: magic_division.hpp:138
__host__ static constexpr __device__ auto CalculateMagicShift(integral_constant< int32_t, Divisor >)
Definition: magic_division.hpp:124
__host__ static constexpr __device__ auto CalculateMagicMultiplier(integral_constant< uint32_t, Divisor >)
Definition: magic_division.hpp:91
__host__ static constexpr __device__ auto CalculateMagicNumbers(uint32_t divisor)
Definition: magic_division.hpp:33
static constexpr __device__ uint32_t DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition: magic_division.hpp:131
__host__ static constexpr __device__ auto CalculateMagicShift(integral_constant< uint32_t, Divisor >)
Definition: magic_division.hpp:100
__host__ static constexpr __device__ uint32_t CalculateMagicShift(uint32_t divisor)
Definition: magic_division.hpp:68
static constexpr __device__ int32_t DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition: magic_division.hpp:149
static constexpr __host__ int32_t DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition: magic_division.hpp:157
__host__ static constexpr __device__ auto CalculateMagicNumbers(integral_constant< int32_t, Divisor >)
Definition: magic_division.hpp:110
__host__ static constexpr __device__ auto CalculateMagicNumbers(integral_constant< uint32_t, Divisor >)
Definition: magic_division.hpp:78
__host__ static constexpr __device__ uint32_t CalculateMagicMultiplier(uint32_t divisor)
Definition: magic_division.hpp:61
__host__ static constexpr __device__ auto CalculateMagicMultiplier(integral_constant< int32_t, Divisor >)
Definition: magic_division.hpp:117
Definition: numeric_limits.hpp:309
Definition: integral_constant.hpp:20