12 #include <type_traits>
14 #include <unordered_set>
43 std::optional<uint32_t>
seed_{11939};
48 template <
typename ForwardIter>
49 void operator()(ForwardIter first, ForwardIter last)
const
53 uint32_t num_thread = std::thread::hardware_concurrency();
54 auto total =
static_cast<std::size_t
>(std::distance(first, last));
55 auto work_per_thread =
static_cast<std::size_t
>((total + num_thread - 1) / num_thread);
57 std::vector<joinable_thread> threads(num_thread);
58 for(std::size_t it = 0; it < num_thread; ++it)
60 std::size_t iw_begin = it * work_per_thread;
61 std::size_t iw_end =
std::min((it + 1) * work_per_thread, total);
62 auto thread_f = [
this, total, iw_begin, iw_end, &first] {
63 if(iw_begin > total || iw_end > total)
66 std::mt19937 gen(
seed_.has_value() ? (*
seed_ + iw_begin)
67 : std::random_device{}());
68 std::uniform_real_distribution<float> dis(
a_,
b_);
69 std::generate(first + iw_begin, first + iw_end, [&dis, &gen]() {
71 return ck_tile::type_convert<T>(
fp32x2_t{dis(gen), dis(gen)});
73 return ck_tile::type_convert<T>(dis(gen));
81 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
82 std::uniform_real_distribution<float> dis(
a_,
b_);
83 std::generate(first, last, [&dis, &gen]() {
85 return ck_tile::type_convert<T>(
fp32x2_t{dis(gen), dis(gen)});
87 return ck_tile::type_convert<T>(dis(gen));
92 template <
typename ForwardRange>
94 -> std::void_t<decltype(std::declval<const FillUniformDistribution&>()(
95 std::begin(std::forward<ForwardRange>(range)),
96 std::end(std::forward<ForwardRange>(range))))>
98 (*this)(std::begin(std::forward<ForwardRange>(range)),
99 std::end(std::forward<ForwardRange>(range)));
109 std::optional<uint32_t>
seed_{11939};
110 template <
typename ForwardIter>
113 if(a_ < -8.0f || b_ > 7.0f)
115 throw std::runtime_error(
116 "a_ or b_ of FillUniformDistribution<ck_tile::pk_int4_t> is out of range.");
119 int min_value =
static_cast<int>(
a_);
120 int max_value =
static_cast<int>(
b_);
121 constexpr
auto int4_array = std::array<uint8_t, 16>{0x88,
137 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
138 std::uniform_int_distribution<std::int32_t> dis(0, max_value - min_value + 1);
141 int randomInt = dis(gen);
142 *first = int4_array[randomInt + (min_value + 8)];
146 template <
typename ForwardRange>
148 -> std::void_t<decltype(std::declval<const FillUniformDistribution&>()(
149 std::begin(std::forward<ForwardRange>(range)),
150 std::end(std::forward<ForwardRange>(range))))>
152 (*this)(std::begin(std::forward<ForwardRange>(range)),
153 std::end(std::forward<ForwardRange>(range)));
167 template <
typename T>
172 template <
typename T>
177 std::optional<uint32_t>
seed_{11939};
180 std::unordered_set<impl::RawIntegerType<T>>
set_{};
184 std::optional<uint32_t> seed = {11939})
193 template <
typename ForwardIter>
196 std::mt19937& gen =
gen_;
197 std::uniform_real_distribution<float> dis(
a_,
b_);
199 std::generate(first, last, [&dis, &gen, &set]() {
200 T v =
static_cast<T
>(0);
203 v = ck_tile::type_convert<T>(dis(gen));
211 template <
typename ForwardRange>
213 -> std::void_t<decltype(std::declval<FillUniformDistribution_Unique&>()(
214 std::begin(std::forward<ForwardRange>(range)),
215 std::end(std::forward<ForwardRange>(range))))>
217 (*this)(std::begin(std::forward<ForwardRange>(range)),
218 std::end(std::forward<ForwardRange>(range)));
224 template <
typename T>
229 std::optional<uint32_t>
seed_{11939};
233 template <
typename ForwardIter>
238 uint32_t num_thread = std::thread::hardware_concurrency();
239 auto total =
static_cast<std::size_t
>(std::distance(first, last));
240 auto work_per_thread =
static_cast<std::size_t
>((total + num_thread - 1) / num_thread);
242 std::vector<joinable_thread> threads(num_thread);
243 for(std::size_t it = 0; it < num_thread; ++it)
245 std::size_t iw_begin = it * work_per_thread;
246 std::size_t iw_end =
std::min((it + 1) * work_per_thread, total);
247 auto thread_f = [
this, total, iw_begin, iw_end, &first] {
248 if(iw_begin > total || iw_end > total)
251 std::mt19937 gen(
seed_.has_value() ? (*
seed_ + iw_begin)
252 : std::random_device{}());
254 std::generate(first + iw_begin, first + iw_end, [&dis, &gen]() {
255 return ck_tile::type_convert<T>(dis(gen));
263 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
266 first, last, [&dis, &gen]() {
return ck_tile::type_convert<T>(dis(gen)); });
270 template <
typename ForwardRange>
272 -> std::void_t<decltype(std::declval<const FillNormalDistribution&>()(
273 std::begin(std::forward<ForwardRange>(range)),
274 std::end(std::forward<ForwardRange>(range))))>
276 (*this)(std::begin(std::forward<ForwardRange>(range)),
277 std::end(std::forward<ForwardRange>(range)));
300 template <
typename T>
305 std::optional<uint32_t>
seed_{11939};
307 template <
typename ForwardIter>
310 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
311 std::uniform_real_distribution<float> dis(
a_,
b_);
313 first, last, [&dis, &gen]() {
return ck_tile::type_convert<T>(std::round(dis(gen))); });
316 template <
typename ForwardRange>
318 -> std::void_t<decltype(std::declval<const FillUniformDistributionIntegerValue&>()(
319 std::begin(std::forward<ForwardRange>(range)),
320 std::end(std::forward<ForwardRange>(range))))>
322 (*this)(std::begin(std::forward<ForwardRange>(range)),
323 std::end(std::forward<ForwardRange>(range)));
327 template <
typename T>
332 std::optional<uint32_t>
seed_{11939};
334 template <
typename ForwardIter>
337 std::mt19937 gen(
seed_.has_value() ? *
seed_ : std::random_device{}());
340 first, last, [&dis, &gen]() {
return ck_tile::type_convert<T>(std::round(dis(gen))); });
343 template <
typename ForwardRange>
345 -> std::void_t<decltype(std::declval<const FillNormalDistributionIntegerValue&>()(
346 std::begin(std::forward<ForwardRange>(range)),
347 std::end(std::forward<ForwardRange>(range))))>
349 (*this)(std::begin(std::forward<ForwardRange>(range)),
350 std::end(std::forward<ForwardRange>(range)));
354 template <
typename T>
360 template <
typename ForwardIter>
363 std::generate(first, last, [=, *
this, n =
init_value_]()
mutable {
367 n.data +=
step_.data;
377 template <
typename ForwardRange>
379 -> std::void_t<decltype(std::declval<const FillMonotonicSeq&>()(
380 std::begin(std::forward<ForwardRange>(range)),
381 std::end(std::forward<ForwardRange>(range))))>
383 (*this)(std::begin(std::forward<ForwardRange>(range)),
384 std::end(std::forward<ForwardRange>(range)));
388 template <
typename T,
bool IsAscending = true>
395 template <
typename ForwardIter>
398 std::generate(first, last, [=, *
this, n =
start_value_]()
mutable {
401 if constexpr(IsAscending)
412 return type_convert<T>(tmp);
416 template <
typename ForwardRange>
418 -> std::void_t<decltype(std::declval<const FillStepRange&>()(
419 std::begin(std::forward<ForwardRange>(range)),
420 std::end(std::forward<ForwardRange>(range))))>
422 (*this)(std::begin(std::forward<ForwardRange>(range)),
423 std::end(std::forward<ForwardRange>(range)));
427 template <
typename T>
432 template <
typename ForwardIter>
438 template <
typename ForwardRange>
440 -> std::void_t<decltype(std::declval<const FillConstant&>()(
441 std::begin(std::forward<ForwardRange>(range)),
442 std::end(std::forward<ForwardRange>(range))))>
444 (*this)(std::begin(std::forward<ForwardRange>(range)),
445 std::end(std::forward<ForwardRange>(range)));
452 template <
typename T>
470 template <
typename ForwardIter>
477 return type_convert<T>(tmp);
481 template <
typename ForwardRange>
483 -> std::void_t<decltype(std::declval<const AdjustToStructuredSparsity&>()(
484 std::begin(std::forward<ForwardRange>(range)),
485 std::end(std::forward<ForwardRange>(range))))>
487 (*this)(std::begin(std::forward<ForwardRange>(range)),
488 std::end(std::forward<ForwardRange>(range)));
492 template <
typename T,
bool UseCos = true,
bool UseAbs = false>
495 template <
typename T_,
bool UseCos_ = true,
bool UseAbs_ = false>
502 if constexpr(UseCos_)
510 if constexpr(UseAbs_)
513 return ck_tile::type_convert<T_>(v);
516 template <
typename ForwardIter>
520 std::generate(first, last, gen);
523 template <
typename ForwardRange>
525 -> std::void_t<decltype(std::declval<const FillTrigValue&>()(
526 std::begin(std::forward<ForwardRange>(range)),
527 std::end(std::forward<ForwardRange>(range))))>
529 (*this)(std::begin(std::forward<ForwardRange>(range)),
530 std::end(std::forward<ForwardRange>(range)));
__host__ constexpr __device__ T min(T x)
Definition: math.hpp:116
auto fill(OutputRange &&range, const T &init) -> std::void_t< decltype(std::fill(std::begin(std::forward< OutputRange >(range)), std::end(std::forward< OutputRange >(range)), init))>
Definition: algorithm.hpp:25
auto transform(InputRange &&range, OutputIterator iter, UnaryOperation unary_op) -> decltype(std::transform(std::begin(range), std::end(range), iter, unary_op))
Definition: algorithm.hpp:36
typename RawIntegerType_< sizeof(T)>::type RawIntegerType
Definition: fill.hpp:168
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE Y bit_cast(const X &x)
Definition: bit_cast.hpp:11
CK_TILE_HOST T cos(T x)
Definition: math.hpp:752
float fp32x2_t
Definition: pk_fp4.hpp:22
CK_TILE_HOST T sin(T x)
Definition: math.hpp:698
int32_t int32_t
Definition: integer.hpp:10
CK_TILE_HOST_DEVICE bfloat16_t abs(const bfloat16_t &x)
Definition: bfloat16.hpp:392
constexpr bool is_same_v
Definition: type.hpp:283
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1249
unsigned short uint16_t
Definition: stdint.h:125
unsigned int uint32_t
Definition: stdint.h:126
unsigned char uint8_t
Definition: stdint.h:124
unsigned __int64 uint64_t
Definition: stdint.h:136
Transforms given input to fit 2:4 structured sparsity pattern so every subgroup of 4 elements contain...
Definition: fill.hpp:454
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const AdjustToStructuredSparsity & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:482
size_t start
Definition: fill.hpp:455
static constexpr int32_t masks[]
Definition: fill.hpp:458
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:471
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillConstant & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:439
T value_
Definition: fill.hpp:430
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:433
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillMonotonicSeq & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:378
T init_value_
Definition: fill.hpp:357
T step_
Definition: fill.hpp:358
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:361
std::optional< uint32_t > seed_
Definition: fill.hpp:229
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:234
float variance_
Definition: fill.hpp:228
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillNormalDistribution & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:271
bool threaded
Definition: fill.hpp:231
float mean_
Definition: fill.hpp:227
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:335
float mean_
Definition: fill.hpp:330
float variance_
Definition: fill.hpp:331
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillNormalDistributionIntegerValue & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:344
std::optional< uint32_t > seed_
Definition: fill.hpp:332
float end_value_
Definition: fill.hpp:392
float start_value_
Definition: fill.hpp:391
float step_
Definition: fill.hpp:393
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:396
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillStepRange & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:417
int i
Definition: fill.hpp:498
auto operator()()
Definition: fill.hpp:499
void operator()(ForwardIter first, ForwardIter last) const
Definition: fill.hpp:517
auto operator()(ForwardRange &&range) const -> std::void_t< decltype(std::declval< const FillTrigValue & >()(std::begin(std::forward< ForwardRange >(range)), std::end(std::forward< ForwardRange >(range))))>
Definition: fill.hpp:524
uint8_t type
Definition: fill.hpp:161
uint16_t type
Definition: fill.hpp:162
uint32_t type
Definition: fill.hpp:163
uint64_t type
Definition: fill.hpp:164
Definition: joinable_thread.hpp:12
Definition: numeric.hpp:81
Definition: pk_int4.hpp:21