/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/utility/magic_div.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/utility/magic_div.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/utility/magic_div.hpp Source File
magic_div.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
11 #include <stdint.h>
12 
13 namespace ck_tile {
14 
15 // magic number division
16 // Caution:
17 // 1. For uint32_t as dividend: magic number division implementation being used would produce
18 // correct result if the dividend is uint32_t and its value is within 31-bit value range.
19 // 2. For int32_t as dividendd: magic number division for int32_t dividened has not been
20 // implemented, the int32_t dividend would be bit-wise interpreted as uint32_t and magic number
21 // division implementation for uint32_t is then used. Therefore, dividend value need to be
22 // non-negative.
23 // TODO:
24 // 1. Implement magic number divison for int32_t
25 // 2. Implement magic number divison for unit32_t with 32-bit value range
27 {
28  // uint32_t
29  CK_TILE_HOST_DEVICE static constexpr auto calculate_magic_numbers(uint32_t divisor)
30  {
31  // WARNING: magic division is only valid for division inside this range.
32  // assert(divisor >= 1 && divisor <= INT32_MAX)
33 
34  uint32_t shift_u32 = 0;
35 
36  while((1U << shift_u32) < divisor)
37  {
38  shift_u32++;
39  };
40 
41  uint64_t tmp_u64 = static_cast<uint64_t>((1UL << shift_u32) - divisor) << 32;
42  uint32_t multiplier_u32 = tmp_u64 / divisor + 1;
43 
44  return make_tuple(multiplier_u32, shift_u32);
45  }
46 
47  template <auto Divisor, typename = std::enable_if_t<(0 < Divisor)>>
48  CK_TILE_HOST_DEVICE static constexpr auto calculate_magic_numbers(constant<Divisor>)
49  {
50  constexpr auto tmp = calculate_magic_numbers(uint32_t{Divisor});
51 
52  constexpr uint32_t multiplier = tmp[number<0>{}];
53  constexpr uint32_t shift = tmp[number<1>{}];
54 
55  return make_tuple(constant<multiplier>{}, constant<shift>{});
56  }
57 
58  // magic division for uint32_t
59  CK_TILE_DEVICE static constexpr uint32_t
60  do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
61  {
62  if(__builtin_is_constant_evaluated())
63  {
64  uint32_t tmp = (static_cast<uint64_t>(dividend) * multiplier) >> 32;
65  return (tmp + dividend) >> shift;
66  }
67  else
68  {
69  uint32_t tmp = __umulhi(dividend, multiplier);
70  return (tmp + dividend) >> shift;
71  }
72  }
73 
74  CK_TILE_HOST static constexpr uint32_t
75  do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
76  {
77  uint32_t tmp = (static_cast<uint64_t>(dividend) * multiplier) >> 32;
78  return (tmp + dividend) >> shift;
79  }
80 
81  // magic division for int32_t
82  // HACK: use dividend_i32 as if it's uint32_t, dividend_i32 need to be
83  // non-negative for result to be correct
84  // TODO: figure out how to do magic number divison for int32_t as dividended
85  CK_TILE_DEVICE static constexpr int32_t
86  do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
87  {
88  if(__builtin_is_constant_evaluated())
89  {
90  uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
91  uint32_t tmp = (static_cast<uint64_t>(dividend_u32) * multiplier) >> 32;
92  return (tmp + dividend_u32) >> shift;
93  }
94  else
95  {
96  uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
97  uint32_t tmp = __umulhi(dividend_u32, multiplier);
98  return (tmp + dividend_u32) >> shift;
99  }
100  }
101 
102  CK_TILE_HOST static constexpr int32_t
103  do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
104  {
105  uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
106  uint32_t tmp = (static_cast<uint64_t>(dividend_u32) * multiplier) >> 32;
107  return (tmp + dividend_u32) >> shift;
108  }
109 };
110 
111 // magic number division
112 // This version on works for divisor and dividended between [0, 1 << 16]
114 {
115  // uint32_t
116  CK_TILE_HOST_DEVICE static constexpr auto calculate_magic_numbers(uint32_t divisor)
117  {
118  // WARNING: magic division is only valid for division inside this range.
119  // assert(divisor >= 1 && divisor <= (1U << 16));
120 
121  uint32_t shift_u32 = 0;
122 
123  while((1U << shift_u32) < divisor)
124  {
125  shift_u32++;
126  };
127 
128  uint32_t one = 1;
129  uint32_t multiplier_u32 = ((one << 16) * ((one << shift_u32) - divisor)) / divisor + 1;
130 
131  return make_tuple(multiplier_u32, shift_u32);
132  }
133 
134  // integral_constant<uint32_t, .>
135  template <auto Divisor>
137  {
138  constexpr auto tmp = calculate_magic_numbers(uint32_t{Divisor});
139 
140  constexpr uint32_t multiplier = tmp[number<0>{}];
141  constexpr uint32_t shift = tmp[number<1>{}];
142 
144  }
145 
146  // magic division for uint32_t
147  CK_TILE_DEVICE static constexpr uint32_t
148  do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
149  {
150  uint32_t tmp = (dividend * multiplier) >> 16;
151  return (tmp + dividend) >> shift;
152  }
153 
154  CK_TILE_HOST static constexpr uint32_t
155  do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
156  {
157  uint32_t tmp = (dividend * multiplier) >> 16;
158  return (tmp + dividend) >> shift;
159  }
160 
161  // magic division for int32_t
162  // HACK: use dividend_i32 as if it's uint32_t, dividend_i32 need to be
163  // non-negative for result to be correct
164  // TODO: figure out how to do magic number divison for int32_t as dividended
165  CK_TILE_DEVICE static constexpr int32_t
166  do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
167  {
168  uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
169  uint32_t tmp = (dividend_u32 * multiplier) >> 16;
170  return (tmp + dividend_u32) >> shift;
171  }
172 
173  CK_TILE_HOST static constexpr int32_t
174  do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
175  {
176  uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32);
177  uint32_t tmp = (dividend_u32 * multiplier) >> 16;
178  return (tmp + dividend_u32) >> shift;
179  }
180 };
181 
182 // use 32bit version
184 
185 struct mdiv
186 {
187  // 1 dword -> 3 dword storage
190  uint32_t shift; // TODO: 8 bit is enough
191 
192  // prefer construct on host
193  CK_TILE_HOST_DEVICE mdiv(uint32_t divisor_) : divisor(divisor_)
194  {
195  auto tmp = magic_division::calculate_magic_numbers(divisor_);
196 
197  multiplier = tmp[number<0>{}];
198  shift = tmp[number<1>{}];
199  }
200 
201  CK_TILE_HOST_DEVICE mdiv() : divisor(0), multiplier(0), shift(0) {}
202 
204  {
205  divisor = divisor_;
206  auto tmp = magic_division::calculate_magic_numbers(divisor_);
207 
208  multiplier = tmp[number<0>{}];
209  shift = tmp[number<1>{}];
210  }
211 
213  {
214  return magic_division::do_magic_division(dividend_, multiplier, shift);
215  }
216 
218  divmod(uint32_t dividend_, uint32_t& quotient_, uint32_t& remainder_) const
219  {
220  quotient_ = div(dividend_);
221  remainder_ = dividend_ - (quotient_ * divisor);
222  }
223 
224  CK_TILE_HOST_DEVICE uint32_t get() const { return divisor; }
225 };
226 
227 struct mdiv2
228 {
229  // 1 dword -> 2 dword storage, divisor need compute from runtime
231  uint32_t shift; // TODO: 8 bit is enough
232 
233  // prefer construct on host
235  {
236  auto tmp = magic_division::calculate_magic_numbers(divisor_);
237 
238  multiplier = tmp[number<0>{}];
239  shift = tmp[number<1>{}];
240  }
241 
242  CK_TILE_HOST_DEVICE mdiv2() : multiplier(0), shift(0) {}
243 
245  {
246  return magic_division::do_magic_division(dividend_, multiplier, shift);
247  }
248 
250  divmod(uint32_t dividend_, uint32_t divisor_, uint32_t& quotient_, uint32_t& remainder_) const
251  {
252  quotient_ = div(dividend_);
253  remainder_ = dividend_ - (quotient_ * divisor_);
254  }
255 };
256 
257 } // namespace ck_tile
#define CK_TILE_DEVICE
Definition: config.hpp:41
#define CK_TILE_HOST
Definition: config.hpp:40
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:42
Definition: cluster_descriptor.hpp:13
int32_t int32_t
Definition: integer.hpp:10
constexpr CK_TILE_HOST_DEVICE auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:360
__host__ constexpr __device__ auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:211
unsigned int uint32_t
Definition: stdint.h:126
unsigned __int64 uint64_t
Definition: stdint.h:136
Definition: integral_constant.hpp:13
Definition: magic_div.hpp:114
static constexpr CK_TILE_DEVICE uint32_t do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:148
static constexpr CK_TILE_DEVICE int32_t do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:166
static constexpr CK_TILE_HOST int32_t do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:174
static constexpr CK_TILE_HOST uint32_t do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:155
static constexpr CK_TILE_HOST_DEVICE auto calculate_magic_numbers(uint32_t divisor)
Definition: magic_div.hpp:116
static constexpr CK_TILE_HOST_DEVICE auto calculate_magic_numbers(constant< Divisor >)
Definition: magic_div.hpp:136
Definition: magic_div.hpp:27
static constexpr CK_TILE_HOST int32_t do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:103
static constexpr CK_TILE_HOST uint32_t do_magic_division(uint32_t dividend, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:75
static constexpr CK_TILE_HOST_DEVICE auto calculate_magic_numbers(uint32_t divisor)
Definition: magic_div.hpp:29
static constexpr CK_TILE_DEVICE int32_t do_magic_division(int32_t dividend_i32, uint32_t multiplier, uint32_t shift)
Definition: magic_div.hpp:86
Definition: magic_div.hpp:228
CK_TILE_HOST_DEVICE void divmod(uint32_t dividend_, uint32_t divisor_, uint32_t &quotient_, uint32_t &remainder_) const
Definition: magic_div.hpp:250
CK_TILE_HOST_DEVICE mdiv2(uint32_t divisor_)
Definition: magic_div.hpp:234
CK_TILE_HOST_DEVICE uint32_t div(uint32_t dividend_) const
Definition: magic_div.hpp:244
uint32_t multiplier
Definition: magic_div.hpp:230
CK_TILE_HOST_DEVICE mdiv2()
Definition: magic_div.hpp:242
uint32_t shift
Definition: magic_div.hpp:231
Definition: magic_div.hpp:186
CK_TILE_HOST_DEVICE mdiv(uint32_t divisor_)
Definition: magic_div.hpp:193
CK_TILE_HOST_DEVICE uint32_t get() const
Definition: magic_div.hpp:224
CK_TILE_HOST_DEVICE mdiv()
Definition: magic_div.hpp:201
CK_TILE_HOST_DEVICE void divmod(uint32_t dividend_, uint32_t &quotient_, uint32_t &remainder_) const
Definition: magic_div.hpp:218
uint32_t divisor
Definition: magic_div.hpp:188
CK_TILE_HOST_DEVICE uint32_t div(uint32_t dividend_) const
Definition: magic_div.hpp:212
uint32_t shift
Definition: magic_div.hpp:190
uint32_t multiplier
Definition: magic_div.hpp:189
CK_TILE_HOST_DEVICE void update(uint32_t divisor_)
Definition: magic_div.hpp:203