12 #include <type_traits>
14 #include <unordered_set>
43 std::optional<uint32_t>
seed_{11939};
48 template <
typename ForwardIter>
49 void operator()(ForwardIter first, ForwardIter last)
const
53 uint32_t num_thread = std::thread::hardware_concurrency();
54 auto total =
static_cast<std::size_t
>(std::distance(first, last));
55 auto work_per_thread =
static_cast<std::size_t
>((total + num_thread - 1) / num_thread);
57 std::vector<joinable_thread> threads(num_thread);
58 for(std::size_t it = 0; it < num_thread; ++it)
60 std::size_t iw_begin = it * work_per_thread;
61 std::size_t iw_end =
std::min((it + 1) * work_per_thread, total);
62 auto thread_f = [
this, total, iw_begin, iw_end, &first] {
63 if(iw_begin > total || iw_end > total)
66 std::mt19937 gen(
seed_.has_value() ? (*
seed_ + iw_begin)
67 : std::random_device{}());
68 std::uniform_real_distribution<float> dis(
a_,
b_);
69 std::generate(first + iw_begin, first + iw_end, [&dis, &gen]() {
70 return ck_tile::type_convert<T>(dis(gen));
78 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
79 std::uniform_real_distribution<float> dis(
a_,
b_);
81 first, last, [&dis, &gen]() {
return ck_tile::type_convert<T>(dis(gen)); });
85 template <
typename ForwardRange>
87 -> std::void_t<decltype(std::declval<const FillUniformDistribution&>()(
88 std::begin(std::forward<ForwardRange>(range)),
89 std::end(std::forward<ForwardRange>(range))))>
91 (*this)(std::begin(std::forward<ForwardRange>(range)),
92 std::end(std::forward<ForwardRange>(range)));
102 std::optional<uint32_t>
seed_{11939};
103 template <
typename ForwardIter>
106 if(a_ < -8.0f || b_ > 7.0f)
108 throw std::runtime_error(
109 "a_ or b_ of FillUniformDistribution<ck_tile::pk_int4_t> is out of range.");
112 int min_value =
static_cast<int>(
a_);
113 int max_value =
static_cast<int>(
b_);
114 constexpr
auto int4_array = std::array<uint8_t, 16>{0x88,
130 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
131 std::uniform_int_distribution<std::int32_t> dis(0, max_value - min_value + 1);
134 int randomInt = dis(gen);
135 *first = int4_array[randomInt + (min_value + 8)];
139 template <
typename ForwardRange>
141 -> std::void_t<decltype(std::declval<const FillUniformDistribution&>()(
142 std::begin(std::forward<ForwardRange>(range)),
143 std::end(std::forward<ForwardRange>(range))))>
145 (*this)(std::begin(std::forward<ForwardRange>(range)),
146 std::end(std::forward<ForwardRange>(range)));
160 template <
typename T>
165 template <
typename T>
170 std::optional<uint32_t>
seed_{11939};
173 std::unordered_set<impl::RawIntegerType<T>>
set_{};
177 std::optional<uint32_t> seed = {11939})
186 template <
typename ForwardIter>
189 std::mt19937& gen =
gen_;
190 std::uniform_real_distribution<float> dis(
a_,
b_);
192 std::generate(first, last, [&dis, &gen, &set]() {
193 T v =
static_cast<T
>(0);
196 v = ck_tile::type_convert<T>(dis(gen));
204 template <
typename ForwardRange>
206 -> std::void_t<decltype(std::declval<FillUniformDistribution_Unique&>()(
207 std::begin(std::forward<ForwardRange>(range)),
208 std::end(std::forward<ForwardRange>(range))))>
210 (*this)(std::begin(std::forward<ForwardRange>(range)),
211 std::end(std::forward<ForwardRange>(range)));
217 template <
typename T>
222 std::optional<uint32_t>
seed_{11939};
226 template <
typename ForwardIter>
231 uint32_t num_thread = std::thread::hardware_concurrency();
232 auto total =
static_cast<std::size_t
>(std::distance(first, last));
233 auto work_per_thread =
static_cast<std::size_t
>((total + num_thread - 1) / num_thread);
235 std::vector<joinable_thread> threads(num_thread);
236 for(std::size_t it = 0; it < num_thread; ++it)
238 std::size_t iw_begin = it * work_per_thread;
239 std::size_t iw_end =
std::min((it + 1) * work_per_thread, total);
240 auto thread_f = [
this, total, iw_begin, iw_end, &first] {
241 if(iw_begin > total || iw_end > total)
244 std::mt19937 gen(
seed_.has_value() ? (*
seed_ + iw_begin)
245 : std::random_device{}());
247 std::generate(first + iw_begin, first + iw_end, [&dis, &gen]() {
248 return ck_tile::type_convert<T>(dis(gen));
256 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
259 first, last, [&dis, &gen]() {
return ck_tile::type_convert<T>(dis(gen)); });
263 template <
typename ForwardRange>
265 -> std::void_t<decltype(std::declval<const FillNormalDistribution&>()(
266 std::begin(std::forward<ForwardRange>(range)),
267 std::end(std::forward<ForwardRange>(range))))>
269 (*this)(std::begin(std::forward<ForwardRange>(range)),
270 std::end(std::forward<ForwardRange>(range)));
293 template <
typename T>
298 std::optional<uint32_t>
seed_{11939};
300 template <
typename ForwardIter>
303 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
304 std::uniform_real_distribution<float> dis(
a_,
b_);
306 first, last, [&dis, &gen]() {
return ck_tile::type_convert<T>(std::round(dis(gen))); });
309 template <
typename ForwardRange>
311 -> std::void_t<decltype(std::declval<const FillUniformDistributionIntegerValue&>()(
312 std::begin(std::forward<ForwardRange>(range)),
313 std::end(std::forward<ForwardRange>(range))))>
315 (*this)(std::begin(std::forward<ForwardRange>(range)),
316 std::end(std::forward<ForwardRange>(range)));
320 template <
typename T>
325 std::optional<uint32_t>
seed_{11939};
327 template <
typename ForwardIter>
330 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
333 first, last, [&dis, &gen]() {
return ck_tile::type_convert<T>(std::round(dis(gen))); });
336 template <
typename ForwardRange>
338 -> std::void_t<decltype(std::declval<const FillNormalDistributionIntegerValue&>()(
339 std::begin(std::forward<ForwardRange>(range)),
340 std::end(std::forward<ForwardRange>(range))))>
342 (*this)(std::begin(std::forward<ForwardRange>(range)),
343 std::end(std::forward<ForwardRange>(range)));
347 template <
typename T>
353 template <
typename ForwardIter>
356 std::generate(first, last, [=, *
this, n =
init_value_]()
mutable {
360 n.data +=
step_.data;
370 template <
typename ForwardRange>
372 -> std::void_t<decltype(std::declval<const FillMonotonicSeq&>()(
373 std::begin(std::forward<ForwardRange>(range)),
374 std::end(std::forward<ForwardRange>(range))))>
376 (*this)(std::begin(std::forward<ForwardRange>(range)),
377 std::end(std::forward<ForwardRange>(range)));
381 template <
typename T,
bool IsAscending = true>
388 template <
typename ForwardIter>
391 std::generate(first, last, [=, *
this, n =
start_value_]()
mutable {
394 if constexpr(IsAscending)
405 return type_convert<T>(tmp);
409 template <
typename ForwardRange>
411 -> std::void_t<decltype(std::declval<const FillStepRange&>()(
412 std::begin(std::forward<ForwardRange>(range)),
413 std::end(std::forward<ForwardRange>(range))))>
415 (*this)(std::begin(std::forward<ForwardRange>(range)),
416 std::end(std::forward<ForwardRange>(range)));
420 template <
typename T>
425 template <
typename ForwardIter>
431 template <
typename ForwardRange>
433 -> std::void_t<decltype(std::declval<const FillConstant&>()(
434 std::begin(std::forward<ForwardRange>(range)),
435 std::end(std::forward<ForwardRange>(range))))>
437 (*this)(std::begin(std::forward<ForwardRange>(range)),
438 std::end(std::forward<ForwardRange>(range)));
445 template <
typename T>
463 template <
typename ForwardIter>
470 return type_convert<T>(tmp);
474 template <
typename ForwardRange>
476 -> std::void_t<decltype(std::declval<const AdjustToStructuredSparsity&>()(
477 std::begin(std::forward<ForwardRange>(range)),
478 std::end(std::forward<ForwardRange>(range))))>
480 (*this)(std::begin(std::forward<ForwardRange>(range)),
481 std::end(std::forward<ForwardRange>(range)));
485 template <
typename T,
bool UseCos = true,
bool UseAbs = false>
488 template <
typename T_,
bool UseCos_ = true,
bool UseAbs_ = false>
495 if constexpr(UseCos_)
503 if constexpr(UseAbs_)
506 return ck_tile::type_convert<T_>(v);
509 template <
typename ForwardIter>
513 std::generate(first, last, gen);
516 template <
typename ForwardRange>
518 -> std::void_t<decltype(std::declval<const FillTrigValue&>()(
519 std::begin(std::forward<ForwardRange>(range)),
520 std::end(std::forward<ForwardRange>(range))))>
522 (*this)(std::begin(std::forward<ForwardRange>(range)),
523 std::end(std::forward<ForwardRange>(range)));
__host__ constexpr __device__ T min(T x)
Definition: math.hpp:116
auto fill(OutputRange &&range, const T &init) -> std::void_t< decltype(std::fill(std::begin(std::forward< OutputRange >(range)), std::end(std::forward< OutputRange >(range)), init))>
Definition: algorithm.hpp:25
auto transform(InputRange &&range, OutputIterator iter, UnaryOperation unary_op) -> decltype(std::transform(std::begin(range), std::end(range), iter, unary_op))
Definition: algorithm.hpp:36
typename RawIntegerType_< sizeof(T)>::type RawIntegerType
Definition: fill.hpp:161
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE Y bit_cast(const X &x)
Definition: bit_cast.hpp:11
CK_TILE_HOST T cos(T x)
Definition: math.hpp:752
CK_TILE_HOST T sin(T x)
Definition: math.hpp:698
int32_t int32_t
Definition: integer.hpp:10
CK_TILE_HOST_DEVICE bfloat16_t abs(const bfloat16_t &x)
Definition: bfloat16.hpp:404
constexpr bool is_same_v
Definition: type.hpp:283
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1249
unsigned short uint16_t
Definition: stdint.h:125
unsigned int uint32_t
Definition: stdint.h:126
unsigned char uint8_t
Definition: stdint.h:124
unsigned __int64 uint64_t
Definition: stdint.h:136
Transforms given input to fit 2:4 structured sparsity pattern so every subgroup of 4 elements contain...
Definition: fill.hpp:447
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const AdjustToStructuredSparsity & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:475
size_t start
Definition: fill.hpp:448
static constexpr int32_t masks[]
Definition: fill.hpp:451
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:464
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillConstant & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:432
T value_
Definition: fill.hpp:423
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:426
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillMonotonicSeq & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:371
T init_value_
Definition: fill.hpp:350
T step_
Definition: fill.hpp:351
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:354
std::optional< uint32_t > seed_
Definition: fill.hpp:222
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:227
float variance_
Definition: fill.hpp:221
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillNormalDistribution & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:264
bool threaded
Definition: fill.hpp:224
float mean_
Definition: fill.hpp:220
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:328
float mean_
Definition: fill.hpp:323
float variance_
Definition: fill.hpp:324
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillNormalDistributionIntegerValue & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:337
std::optional< uint32_t > seed_
Definition: fill.hpp:325
float end_value_
Definition: fill.hpp:385
float start_value_
Definition: fill.hpp:384
float step_
Definition: fill.hpp:386
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:389
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillStepRange & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:410
int i
Definition: fill.hpp:491
auto operator()()
Definition: fill.hpp:492
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:510
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillTrigValue & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:517
uint8_t type
Definition: fill.hpp:154
uint16_t type
Definition: fill.hpp:155
uint32_t type
Definition: fill.hpp:156
uint64_t type
Definition: fill.hpp:157
Definition: joinable_thread.hpp:12
Definition: pk_int4.hpp:21