/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/magic_division.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/magic_division.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/magic_division.hpp Source File
magic_division.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include "ck/ck.hpp"
7 #include "numeric_limits.hpp"
8 #include "integral_constant.hpp"
9 #include "number.hpp"
10 #include "type.hpp"
11 #include "tuple.hpp"
12 
13 namespace ck {
14 
15 // magic number division
16 // Caution:
17 // 1. For uint32_t as dividend: magic number division implementation being used would produce
18 // correct result if the dividend is uint32_t and its value is within 31-bit value range.
19 // 2. For int32_t as dividendd: magic number division for int32_t dividened has not been
20 // implemented, the int32_t dividend would be bit-wise interpreted as uint32_t and magic number
21 // division implementation for uint32_t is then used. Therefore, dividend value need to be
22 // non-negative.
23 // TODO:
24 // 1. Implement magic number divison for int32_t
25 // 2. Implement magic number divison for unit32_t with 32-bit value range
27 {
28  // uint32_t
29  __host__ __device__ static constexpr auto CalculateMagicNumbers(uint32_t divisor)
30  {
31  // WARNING: magic division is only applicable for division inside this range.
32  // You should use the return value of CalculateMagicNumbers, if division is not inside this
33  // range. The "else" logic below is to quiet down run-time error.
34  if(divisor >= 1 && divisor <= ck::NumericLimits<int32_t>::Max())
35  {
36  uint32_t shift = 0;
37  for(shift = 0; shift < 32; ++shift)
38  {
39  if((1U << shift) >= divisor)
40  {
41  break;
42  }
43  }
44 
45  uint64_t one = 1;
46  uint64_t multiplier = ((one << 32) * ((one << shift) - divisor)) / divisor + 1;
47  // assert(multiplier <= 0xffffffffUL);
48 
49  return make_tuple(uint32_t(multiplier), shift);
50  }
51  else
52  {
53  return make_tuple(uint32_t(0), uint32_t(0));
54  }
55  }
56 
57  __host__ __device__ static constexpr uint32_t CalculateMagicMultiplier(uint32_t divisor)
58  {
59  auto tmp = CalculateMagicNumbers(divisor);
60 
61  return tmp[Number<0>{}];
62  }
63 
64  __host__ __device__ static constexpr uint32_t CalculateMagicShift(uint32_t divisor)
65  {
66  auto tmp = CalculateMagicNumbers(divisor);
67 
68  return tmp[Number<1>{}];
69  }
70 
71  // integral_constant<uint32_t, .>
72  template <uint32_t Divisor>
73  __host__ __device__ static constexpr auto
75  {
76  constexpr auto tmp = CalculateMagicNumbers(uint32_t{Divisor});
77 
78  constexpr uint32_t multiplier = tmp[Number<0>{}];
79  constexpr uint32_t shift = tmp[Number<1>{}];
80 
83  }
84 
85  template <uint32_t Divisor>
86  __host__ __device__ static constexpr auto
88  {
89  constexpr uint32_t multiplier = CalculateMagicMultiplier(uint32_t{Divisor});
90 
92  }
93 
94  template <uint32_t Divisor>
95  __host__ __device__ static constexpr auto
97  {
98  constexpr uint32_t shift = CalculateMagicShift(uint32_t{Divisor});
99 
101  }
102 
103  // integral_constant<int32_t, .>
104  template <int32_t Divisor>
105  __host__ __device__ static constexpr auto
107  {
109  }
110 
111  template <int32_t Divisor>
112  __host__ __device__ static constexpr auto
114  {
116  }
117 
118  template <int32_t Divisor>
119  __host__ __device__ static constexpr auto
121  {
123  }
124 
125  // magic division for uint32_t
126  __device__ static constexpr uint32_t
127  DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift)
128  {
129  uint32_t tmp = __umulhi(dividend, multiplier);
130  return (tmp + dividend) >> shift;
131  }
132 
133  __host__ static constexpr uint32_t
134  DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift)
135  {
136  uint32_t tmp = static_cast<uint64_t>(dividend) * multiplier >> 32;
137  return (tmp + dividend) >> shift;
138  }
139 
140  // magic division for int32_t
141  // HACK: use dividend_i32 as if it's uint32_t, dividend_i32 need to be
142  // non-negative for result to be correct
143  // TODO: figure out how to do magic number divison for int32_t as dividended
144  __device__ static constexpr int32_t
145  DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
146  {
147  uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
148  uint32_t tmp = __umulhi(dividend_u32, multiplier);
149  return (tmp + dividend_u32) >> shift;
150  }
151 
152  __host__ static constexpr int32_t
153  DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
154  {
155  uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
156  uint32_t tmp = static_cast<uint64_t>(dividend_u32) * multiplier >> 32;
157  return (tmp + dividend_u32) >> shift;
158  }
159 };
160 
161 struct MDiv
162 {
163  // 1 dword -> 3 dword storage
166  uint32_t shift; // TODO: 8 bit is enough
167 
168  // prefer construct on host
169  __host__ __device__ MDiv(uint32_t divisor_) : divisor(divisor_)
170  {
171  auto tmp = MagicDivision::CalculateMagicNumbers(divisor_);
172 
173  multiplier = tmp[Number<0>{}];
174  shift = tmp[Number<1>{}];
175  }
176 
177  __host__ __device__ MDiv() : divisor(0), multiplier(0), shift(0) {}
178 
179  __host__ __device__ void update(uint32_t divisor_)
180  {
181  divisor = divisor_;
182  auto tmp = MagicDivision::CalculateMagicNumbers(divisor_);
183 
184  multiplier = tmp[Number<0>{}];
185  shift = tmp[Number<1>{}];
186  }
187 
188  __host__ __device__ uint32_t div(uint32_t dividend_) const
189  {
190  return MagicDivision::DoMagicDivision(dividend_, multiplier, shift);
191  }
192 
193  __host__ __device__ void
194  divmod(uint32_t dividend_, uint32_t& quotient_, uint32_t& remainder_) const
195  {
196  quotient_ = div(dividend_);
197  remainder_ = dividend_ - (quotient_ * divisor);
198  }
199 
200  __host__ __device__ uint32_t get() const { return divisor; }
201 };
202 
203 struct MDiv2
204 {
205  // 1 dword -> 2 dword storage, divisor need compute from runtime
207  uint32_t shift; // TODO: 8 bit is enough
208 
209  // prefer construct on host
210  __host__ __device__ MDiv2(uint32_t divisor_)
211  {
212  auto tmp = MagicDivision::CalculateMagicNumbers(divisor_);
213 
214  multiplier = tmp[Number<0>{}];
215  shift = tmp[Number<1>{}];
216  }
217 
218  __host__ __device__ MDiv2() : multiplier(0), shift(0) {}
219 
220  __host__ __device__ uint32_t div(uint32_t dividend_) const
221  {
222  return MagicDivision::DoMagicDivision(dividend_, multiplier, shift);
223  }
224 
225  __host__ __device__ void
226  divmod(uint32_t dividend_, uint32_t divisor_, uint32_t& quotient_, uint32_t& remainder_) const
227  {
228  quotient_ = div(dividend_);
229  remainder_ = dividend_ - (quotient_ * divisor_);
230  }
231 };
232 
233 } // namespace ck
Definition: ck.hpp:268
__host__ constexpr __device__ auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:211
unsigned int uint32_t
Definition: stdint.h:126
signed int int32_t
Definition: stdint.h:123
unsigned __int64 uint64_t
Definition: stdint.h:136
Definition: magic_division.hpp:204
__host__ __device__ MDiv2(uint32_t divisor_)
Definition: magic_division.hpp:210
uint32_t shift
Definition: magic_division.hpp:207
uint32_t multiplier
Definition: magic_division.hpp:206
__host__ __device__ void divmod(uint32_t dividend_, uint32_t divisor_, uint32_t &quotient_, uint32_t &remainder_) const
Definition: magic_division.hpp:226
__host__ __device__ uint32_t div(uint32_t dividend_) const
Definition: magic_division.hpp:220
__host__ __device__ MDiv2()
Definition: magic_division.hpp:218
Definition: magic_division.hpp:162
__host__ __device__ MDiv()
Definition: magic_division.hpp:177
uint32_t divisor
Definition: magic_division.hpp:164
__host__ __device__ uint32_t get() const
Definition: magic_division.hpp:200
__host__ __device__ void divmod(uint32_t dividend_, uint32_t &quotient_, uint32_t &remainder_) const
Definition: magic_division.hpp:194
__host__ __device__ void update(uint32_t divisor_)
Definition: magic_division.hpp:179
__host__ __device__ MDiv(uint32_t divisor_)
Definition: magic_division.hpp:169
uint32_t multiplier
Definition: magic_division.hpp:165
uint32_t shift
Definition: magic_division.hpp:166
__host__ __device__ uint32_t div(uint32_t dividend_) const
Definition: magic_division.hpp:188
Definition: magic_division.hpp:27
static constexpr __host__ uint32_t DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition: magic_division.hpp:134
__host__ static constexpr __device__ auto CalculateMagicShift(integral_constant< int32_t, Divisor >)
Definition: magic_division.hpp:120
__host__ static constexpr __device__ auto CalculateMagicMultiplier(integral_constant< uint32_t, Divisor >)
Definition: magic_division.hpp:87
__host__ static constexpr __device__ auto CalculateMagicNumbers(uint32_t divisor)
Definition: magic_division.hpp:29
static constexpr __device__ uint32_t DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition: magic_division.hpp:127
__host__ static constexpr __device__ auto CalculateMagicShift(integral_constant< uint32_t, Divisor >)
Definition: magic_division.hpp:96
__host__ static constexpr __device__ uint32_t CalculateMagicShift(uint32_t divisor)
Definition: magic_division.hpp:64
static constexpr __device__ int32_t DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition: magic_division.hpp:145
static constexpr __host__ int32_t DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition: magic_division.hpp:153
__host__ static constexpr __device__ auto CalculateMagicNumbers(integral_constant< int32_t, Divisor >)
Definition: magic_division.hpp:106
__host__ static constexpr __device__ auto CalculateMagicNumbers(integral_constant< uint32_t, Divisor >)
Definition: magic_division.hpp:74
__host__ static constexpr __device__ uint32_t CalculateMagicMultiplier(uint32_t divisor)
Definition: magic_division.hpp:57
__host__ static constexpr __device__ auto CalculateMagicMultiplier(integral_constant< int32_t, Divisor >)
Definition: magic_division.hpp:113
Definition: numeric_limits.hpp:309
Definition: integral_constant.hpp:20