include/ck_tile/host/fill.hpp Source File

include/ck_tile/host/fill.hpp Source File#

Composable Kernel: include/ck_tile/host/fill.hpp Source File
fill.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <iterator>
9 #include <optional>
10 #include <random>
11 #include <type_traits>
12 #include <utility>
13 #include <unordered_set>
14 
15 #include "ck_tile/core.hpp"
17 
18 namespace ck_tile {
19 
20 template <typename T>
22 {
23  float a_{-5.f};
24  float b_{5.f};
25  std::optional<uint32_t> seed_{11939};
26  // ATTENTION: threaded does not guarantee the distribution between thread
27  bool threaded = false;
28 
29  template <typename ForwardIter>
30  void operator()(ForwardIter first, ForwardIter last) const
31  {
32  if(threaded)
33  {
34  uint32_t num_thread = std::thread::hardware_concurrency();
35  auto total = static_cast<std::size_t>(std::distance(first, last));
36  auto work_per_thread = static_cast<std::size_t>((total + num_thread - 1) / num_thread);
37 
38  std::vector<joinable_thread> threads(num_thread);
39  for(std::size_t it = 0; it < num_thread; ++it)
40  {
41  std::size_t iw_begin = it * work_per_thread;
42  std::size_t iw_end = std::min((it + 1) * work_per_thread, total);
43  auto thread_f = [this, total, iw_begin, iw_end, &first] {
44  if(iw_begin > total || iw_end > total)
45  return;
46  // need to make each thread unique, add an offset to current seed
47  std::mt19937 gen(seed_.has_value() ? (*seed_ + iw_begin)
48  : std::random_device{}());
49  std::uniform_real_distribution<float> dis(a_, b_);
50  std::generate(first + iw_begin, first + iw_end, [&dis, &gen]() {
51  return ck_tile::type_convert<T>(dis(gen));
52  });
53  };
54  threads[it] = joinable_thread(thread_f);
55  }
56  }
57  else
58  {
59  std::mt19937 gen(seed_.has_value() ? *seed_ : std::random_device{}());
60  std::uniform_real_distribution<float> dis(a_, b_);
61  std::generate(
62  first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(dis(gen)); });
63  }
64  }
65 
66  template <typename ForwardRange>
67  auto operator()(ForwardRange&& range) const
68  -> std::void_t<decltype(std::declval<const FillUniformDistribution&>()(
69  std::begin(std::forward<ForwardRange>(range)),
70  std::end(std::forward<ForwardRange>(range))))>
71  {
72  (*this)(std::begin(std::forward<ForwardRange>(range)),
73  std::end(std::forward<ForwardRange>(range)));
74  }
75 };
76 
77 namespace impl {
78 
79 // clang-format off
80 template<index_t bytes> struct RawIntegerType_ {};
81 template<> struct RawIntegerType_<1> { using type = uint8_t;};
82 template<> struct RawIntegerType_<2> { using type = uint16_t;};
83 template<> struct RawIntegerType_<4> { using type = uint32_t;};
84 template<> struct RawIntegerType_<8> { using type = uint64_t;};
85 // clang-format on
86 
87 template <typename T>
88 using RawIntegerType = typename RawIntegerType_<sizeof(T)>::type;
89 } // namespace impl
90 
91 // Note: this struct will have no const-ness will generate random
92 template <typename T>
94 {
95  float a_{-5.f};
96  float b_{5.f};
97  std::optional<uint32_t> seed_{11939};
98 
99  std::mt19937 gen_{};
100  std::unordered_set<impl::RawIntegerType<T>> set_{};
101 
103  float b = 5.f,
104  std::optional<uint32_t> seed = {11939})
105  : a_(a),
106  b_(b),
107  seed_(seed),
108  gen_{seed_.has_value() ? *seed_ : std::random_device{}()},
109  set_{}
110  {
111  }
112 
113  template <typename ForwardIter>
114  void operator()(ForwardIter first, ForwardIter last)
115  {
116  std::mt19937& gen = gen_;
117  std::uniform_real_distribution<float> dis(a_, b_);
118  auto& set = set_;
119  std::generate(first, last, [&dis, &gen, &set]() {
120  T v = static_cast<T>(0);
121  do
122  {
123  v = ck_tile::type_convert<T>(dis(gen));
124  } while(set.count(bit_cast<impl::RawIntegerType<T>>(v)) == 1);
126 
127  return v;
128  });
129  }
130 
131  template <typename ForwardRange>
132  auto operator()(ForwardRange&& range)
133  -> std::void_t<decltype(std::declval<FillUniformDistribution_Unique&>()(
134  std::begin(std::forward<ForwardRange>(range)),
135  std::end(std::forward<ForwardRange>(range))))>
136  {
137  (*this)(std::begin(std::forward<ForwardRange>(range)),
138  std::end(std::forward<ForwardRange>(range)));
139  }
140 
141  void clear() { set_.clear(); }
142 };
143 
144 template <typename T>
146 {
147  float mean_{0.f};
148  float variance_{1.f};
149  std::optional<uint32_t> seed_{11939};
150  // ATTENTION: threaded does not guarantee the distribution between thread
151  bool threaded = false;
152 
153  template <typename ForwardIter>
154  void operator()(ForwardIter first, ForwardIter last) const
155  {
156  if(threaded)
157  {
158  uint32_t num_thread = std::thread::hardware_concurrency();
159  auto total = static_cast<std::size_t>(std::distance(first, last));
160  auto work_per_thread = static_cast<std::size_t>((total + num_thread - 1) / num_thread);
161 
162  std::vector<joinable_thread> threads(num_thread);
163  for(std::size_t it = 0; it < num_thread; ++it)
164  {
165  std::size_t iw_begin = it * work_per_thread;
166  std::size_t iw_end = std::min((it + 1) * work_per_thread, total);
167  auto thread_f = [this, total, iw_begin, iw_end, &first] {
168  if(iw_begin > total || iw_end > total)
169  return;
170  // need to make each thread unique, add an offset to current seed
171  std::mt19937 gen(seed_.has_value() ? (*seed_ + iw_begin)
172  : std::random_device{}());
173  std::normal_distribution<float> dis(mean_, std::sqrt(variance_));
174  std::generate(first + iw_begin, first + iw_end, [&dis, &gen]() {
175  return ck_tile::type_convert<T>(dis(gen));
176  });
177  };
178  threads[it] = joinable_thread(thread_f);
179  }
180  }
181  else
182  {
183  std::mt19937 gen(seed_.has_value() ? *seed_ : std::random_device{}());
184  std::normal_distribution<float> dis(mean_, std::sqrt(variance_));
185  std::generate(
186  first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(dis(gen)); });
187  }
188  }
189 
190  template <typename ForwardRange>
191  auto operator()(ForwardRange&& range) const
192  -> std::void_t<decltype(std::declval<const FillNormalDistribution&>()(
193  std::begin(std::forward<ForwardRange>(range)),
194  std::end(std::forward<ForwardRange>(range))))>
195  {
196  (*this)(std::begin(std::forward<ForwardRange>(range)),
197  std::end(std::forward<ForwardRange>(range)));
198  }
199 };
200 
201 // Normally FillUniformDistributionIntegerValue should use std::uniform_int_distribution as below.
202 // However this produces segfaults in std::mt19937 which look like inifite loop.
203 // template <typename T>
204 // struct FillUniformDistributionIntegerValue
205 // {
206 // int a_{-5};
207 // int b_{5};
208 //
209 // template <typename ForwardIter>
210 // void operator()(ForwardIter first, ForwardIter last) const
211 // {
212 // std::mt19937 gen(11939);
213 // std::uniform_int_distribution<int> dis(a_, b_);
214 // std::generate(
215 // first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(dis(gen)); });
216 // }
217 // };
218 
219 // Workaround for uniform_int_distribution not working as expected. See note above.<
220 template <typename T>
222 {
223  float a_{-5.f};
224  float b_{5.f};
225  std::optional<uint32_t> seed_{11939};
226 
227  template <typename ForwardIter>
228  void operator()(ForwardIter first, ForwardIter last) const
229  {
230  std::mt19937 gen(seed_.has_value() ? *seed_ : std::random_device{}());
231  std::uniform_real_distribution<float> dis(a_, b_);
232  std::generate(
233  first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(std::round(dis(gen))); });
234  }
235 
236  template <typename ForwardRange>
237  auto operator()(ForwardRange&& range) const
238  -> std::void_t<decltype(std::declval<const FillUniformDistributionIntegerValue&>()(
239  std::begin(std::forward<ForwardRange>(range)),
240  std::end(std::forward<ForwardRange>(range))))>
241  {
242  (*this)(std::begin(std::forward<ForwardRange>(range)),
243  std::end(std::forward<ForwardRange>(range)));
244  }
245 };
246 
247 template <typename T>
249 {
250  float mean_{0.f};
251  float variance_{1.f};
252  std::optional<uint32_t> seed_{11939};
253 
254  template <typename ForwardIter>
255  void operator()(ForwardIter first, ForwardIter last) const
256  {
257  std::mt19937 gen(seed_.has_value() ? *seed_ : std::random_device{}());
258  std::normal_distribution<float> dis(mean_, std::sqrt(variance_));
259  std::generate(
260  first, last, [&dis, &gen]() { return ck_tile::type_convert<T>(std::round(dis(gen))); });
261  }
262 
263  template <typename ForwardRange>
264  auto operator()(ForwardRange&& range) const
265  -> std::void_t<decltype(std::declval<const FillNormalDistributionIntegerValue&>()(
266  std::begin(std::forward<ForwardRange>(range)),
267  std::end(std::forward<ForwardRange>(range))))>
268  {
269  (*this)(std::begin(std::forward<ForwardRange>(range)),
270  std::end(std::forward<ForwardRange>(range)));
271  }
272 };
273 
274 template <typename T>
276 {
278  T step_{1};
279 
280  template <typename ForwardIter>
281  void operator()(ForwardIter first, ForwardIter last) const
282  {
283  std::generate(first, last, [=, n = init_value_]() mutable {
284  auto tmp = n;
285  n += step_;
286  return tmp;
287  });
288  }
289 
290  template <typename ForwardRange>
291  auto operator()(ForwardRange&& range) const
292  -> std::void_t<decltype(std::declval<const FillMonotonicSeq&>()(
293  std::begin(std::forward<ForwardRange>(range)),
294  std::end(std::forward<ForwardRange>(range))))>
295  {
296  (*this)(std::begin(std::forward<ForwardRange>(range)),
297  std::end(std::forward<ForwardRange>(range)));
298  }
299 };
300 
301 template <typename T, bool IsAscending = true>
303 {
304  float start_value_{0};
305  float end_value_{3};
306  float step_{1};
307 
308  template <typename ForwardIter>
309  void operator()(ForwardIter first, ForwardIter last) const
310  {
311  std::generate(first, last, [=, n = start_value_]() mutable {
312  auto tmp = n;
313  n += step_;
314  if constexpr(IsAscending)
315  {
316  if(n > end_value_)
317  n = start_value_;
318  }
319  else
320  {
321  if(n < end_value_)
322  n = start_value_;
323  }
324 
325  return type_convert<T>(tmp);
326  });
327  }
328 
329  template <typename ForwardRange>
330  auto operator()(ForwardRange&& range) const -> std::void_t<
331  decltype(std::declval<const FillStepRange&>()(std::begin(std::forward<ForwardRange>(range)),
332  std::end(std::forward<ForwardRange>(range))))>
333  {
334  (*this)(std::begin(std::forward<ForwardRange>(range)),
335  std::end(std::forward<ForwardRange>(range)));
336  }
337 };
338 
339 template <typename T>
341 {
342  T value_{0};
343 
344  template <typename ForwardIter>
345  void operator()(ForwardIter first, ForwardIter last) const
346  {
347  std::fill(first, last, value_);
348  }
349 
350  template <typename ForwardRange>
351  auto operator()(ForwardRange&& range) const -> std::void_t<
352  decltype(std::declval<const FillConstant&>()(std::begin(std::forward<ForwardRange>(range)),
353  std::end(std::forward<ForwardRange>(range))))>
354  {
355  (*this)(std::begin(std::forward<ForwardRange>(range)),
356  std::end(std::forward<ForwardRange>(range)));
357  }
358 };
359 
360 template <typename T, bool UseCos = true, bool UseAbs = false>
362 {
363  template <typename T_, bool UseCos_ = true, bool UseAbs_ = false>
365  {
366  int i{0};
367  auto operator()()
368  {
369  float v = 0;
370  if constexpr(UseCos_)
371  {
372  v = cos(i);
373  }
374  else
375  {
376  v = sin(i);
377  }
378  if constexpr(UseAbs_)
379  v = abs(v);
380  i++;
381  return ck_tile::type_convert<T_>(v);
382  }
383  };
384  template <typename ForwardIter>
385  void operator()(ForwardIter first, ForwardIter last) const
386  {
388  std::generate(first, last, gen);
389  }
390 
391  template <typename ForwardRange>
392  auto operator()(ForwardRange&& range) const -> std::void_t<
393  decltype(std::declval<const FillTrigValue&>()(std::begin(std::forward<ForwardRange>(range)),
394  std::end(std::forward<ForwardRange>(range))))>
395  {
396  (*this)(std::begin(std::forward<ForwardRange>(range)),
397  std::end(std::forward<ForwardRange>(range)));
398  }
399 };
400 
401 } // namespace ck_tile
__host__ constexpr __device__ T min(T x)
Definition: math.hpp:116
auto fill(OutputRange &&range, const T &init) -> std::void_t< decltype(std::fill(std::begin(std::forward< OutputRange >(range)), std::end(std::forward< OutputRange >(range)), init))>
Definition: algorithm.hpp:25
typename RawIntegerType_< sizeof(T)>::type RawIntegerType
Definition: fill.hpp:88
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE Y bit_cast(const X &x)
Definition: bit_cast.hpp:11
CK_TILE_HOST T cos(T x)
Definition: math.hpp:749
CK_TILE_HOST T sin(T x)
Definition: math.hpp:695
CK_TILE_HOST_DEVICE bfloat16_t abs(const bfloat16_t &x)
Definition: bfloat16.hpp:395
Definition: fill.hpp:341
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillConstant & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:351
T value_
Definition: fill.hpp:342
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:345
Definition: fill.hpp:276
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillMonotonicSeq & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:291
T init_value_
Definition: fill.hpp:277
T step_
Definition: fill.hpp:278
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:281
Definition: fill.hpp:146
std::optional< uint32_t > seed_
Definition: fill.hpp:149
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:154
float variance_
Definition: fill.hpp:148
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillNormalDistribution & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:191
bool threaded
Definition: fill.hpp:151
float mean_
Definition: fill.hpp:147
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:255
float mean_
Definition: fill.hpp:250
float variance_
Definition: fill.hpp:251
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillNormalDistributionIntegerValue & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:264
std::optional< uint32_t > seed_
Definition: fill.hpp:252
Definition: fill.hpp:303
float end_value_
Definition: fill.hpp:305
float start_value_
Definition: fill.hpp:304
float step_
Definition: fill.hpp:306
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:309
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillStepRange & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:330
int i
Definition: fill.hpp:366
auto operator()()
Definition: fill.hpp:367
Definition: fill.hpp:362
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:385
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillTrigValue & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:392
void operator()(ForwardIter first, ForwardIter last)
Definition: fill.hpp:114
std::optional< uint32_t > seed_
Definition: fill.hpp:97
float a_
Definition: fill.hpp:95
FillUniformDistribution_Unique(float a=-5.f, float b=5.f, std::optional< uint32_t > seed={11939})
Definition: fill.hpp:102
auto operator()(ForwardRange &&range) -> std::void_t< decltype(std::declval< FillUniformDistribution_Unique & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:132
std::mt19937 gen_
Definition: fill.hpp:99
void clear()
Definition: fill.hpp:141
float b_
Definition: fill.hpp:96
std::unordered_set< impl::RawIntegerType< T > > set_
Definition: fill.hpp:100
Definition: fill.hpp:22
float b_
Definition: fill.hpp:24
bool threaded
Definition: fill.hpp:27
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:30
float a_
Definition: fill.hpp:23
std::optional< uint32_t > seed_
Definition: fill.hpp:25
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillUniformDistribution & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:67
std::optional< uint32_t > seed_
Definition: fill.hpp:225
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillUniformDistributionIntegerValue & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:237
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:228
float b_
Definition: fill.hpp:224
float a_
Definition: fill.hpp:223
uint8_t type
Definition: fill.hpp:81
uint16_t type
Definition: fill.hpp:82
uint32_t type
Definition: fill.hpp:83
uint64_t type
Definition: fill.hpp:84
Definition: fill.hpp:80
Definition: joinable_thread.hpp:12