26 template <
typename Range>
27 std::ostream&
LogRange(std::ostream& os, Range&& range, std::string delim)
41 template <
typename T,
typename Range>
42 std::ostream&
LogRangeAsType(std::ostream& os, Range&& range, std::string delim)
53 if constexpr(std::is_same_v<RangeType, ck::f8_t> || std::is_same_v<RangeType, ck::bf8_t> ||
54 std::is_same_v<RangeType, ck::bhalf_t>)
56 os << ck::type_convert<float>(v);
58 else if constexpr(std::is_same_v<RangeType, ck::pk_i4_t> ||
59 std::is_same_v<RangeType, ck::f4x2_pk_t>)
61 const auto packed_floats = ck::type_convert<ck::float2_t>(v);
63 os << vector_of_floats.template AsType<float>()[
ck::Number<0>{}] << delim
64 << vector_of_floats.template AsType<float>()[
ck::Number<1>{}];
68 os << static_cast<T>(v);
74 template <
typename F,
typename T, std::size_t... Is>
77 return f(std::get<Is>(args)...);
80 template <
typename F,
typename T>
83 constexpr std::size_t N = std::tuple_size<T>{};
88 template <
typename F,
typename T, std::size_t... Is>
91 return F(std::get<Is>(args)...);
94 template <
typename F,
typename T>
97 constexpr std::size_t N = std::tuple_size<T>{};
99 return construct_f_unpack_args_impl<F>(args, std::make_index_sequence<N>{});
185 template <
typename Layout>
187 std::vector<std::size_t> strides,
189 : mLens(std::move(lens)), mStrides(std::move(strides))
195 std::cout <<
"Original Lens: [";
196 LogRange(std::cout, mLens,
", ") <<
"] and Strides: [";
197 LogRange(std::cout, mStrides,
", ") <<
"]" << std::endl;
198 std::cout <<
"Layout: " <<
layout <<
" --> " << new_layout << std::endl;
213 template <
typename F,
typename OrigLayout>
221 default: f(orig);
break;
225 template <
typename Layout>
228 if constexpr(!std::is_same_v<Layout, DefaultLayout>)
240 const auto rank = mLens.size();
261 if(mStrides.size() == 2)
281 template <
typename Layout>
284 if constexpr(std::is_same_v<Layout, ck::tensor_layout::BypassLayoutVerification>)
288 auto strides_int = AsInt(mStrides);
291 if(mStrides.empty() || std::all_of(strides_int.begin(), strides_int.end(), [](
int stride) {
296 if constexpr(!(std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ||
297 std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>))
299 std::cerr <<
"Only RowMajor and ColumnMajor layouts are supported for empty "
301 <<
layout <<
". Will calculate strides as RowMajor." << std::endl;
305 mStrides.resize(mLens.size(), 0);
310 std::partial_sum(mLens.rbegin(),
312 mStrides.rbegin() + 1,
313 std::multiplies<std::size_t>());
315 if constexpr(std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>)
318 if(mStrides.size() >= 2)
319 std::swap(mStrides[mStrides.size() - 1], mStrides[mStrides.size() - 2]);
326 else if constexpr(std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ||
327 std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>)
329 auto rank = mStrides.size();
330 if(mLens.size() >= 2 &&
rank >= 2)
332 const auto inner_idx =
333 std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ?
rank - 1 :
rank - 2;
334 const auto outer_idx = inner_idx ==
rank - 1 ?
rank - 2 :
rank - 1;
335 if(mStrides[inner_idx] <= 0)
337 mStrides[inner_idx] = 1;
339 if(mStrides[outer_idx] <= 0)
341 mStrides[outer_idx] = mLens[inner_idx] * mStrides[inner_idx];
347 template <
typename Layout>
350 if constexpr(std::is_same_v<ck::tensor_layout::BypassLayoutVerification, Layout>)
357 throw std::runtime_error(
358 "HostTensorDescriptor::ValidateStrides: empty tensor dimensions is not allowed.");
361 const int rank = mLens.size();
367 if constexpr(std::is_same_v<ck::tensor_layout::BaseTensorLayout, Layout>)
371 throw std::runtime_error(
"HostTensorDescriptor::ValidateStrides: Abstract tensor "
372 "layout BaseTensorLayout can't be verified. Pls "
373 "pass specific tensor layout to HostTensorDescriptor (or "
374 "ck::tensor_layout::BypassLayoutVerification)");
378 if constexpr(std::is_base_of_v<ck::tensor_layout::gemm::BaseGemmLayout, Layout>)
380 if(mLens.size() != mStrides.size())
382 std::ostringstream oss;
383 oss <<
"HostTensorDescriptor::ValidateStrides: mismatch between tensor rank and "
386 throw std::runtime_error(oss.str());
391 auto strides_int = AsInt(mStrides);
393 strides_int.begin(), strides_int.end(), [](
int stride) { return stride <= 0; }))
395 std::ostringstream oss;
396 oss <<
"Stride values must be positive or all-zeros (auto-derived from tensor "
397 "dimensions). Instead got ";
399 strides_int.begin(), strides_int.end(), std::ostream_iterator<int>(oss,
" "));
400 throw std::runtime_error(oss.str());
403 if constexpr(std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ||
404 std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>)
408 const auto inner_idx =
409 std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ?
rank - 1 :
rank - 2;
410 const auto outer_idx = inner_idx ==
rank - 1 ?
rank - 2 :
rank - 1;
412 if(mStrides[outer_idx] < mLens[inner_idx] * mStrides[inner_idx])
414 std::ostringstream oss;
415 oss <<
"Invalid strides for " <<
layout <<
": " << *
this;
416 throw std::runtime_error(oss.str());
420 for(
int i = 1; i <
rank - 2; ++i)
422 if(mStrides[i - 1] < mStrides[i] * mLens[i])
424 std::ostringstream oss;
425 oss <<
"Invalid strides for higher dimensions in " <<
layout <<
": "
427 throw std::runtime_error(oss.str());
433 std::ostringstream oss;
434 oss <<
"Error: Unsupported GEMM layout: " <<
layout;
435 throw std::runtime_error(oss.str());
444 std::cerr <<
"Warning: Tensor layout verification for ck::tensor_layout::convolution "
445 "layouts is not supported yet. Skipping..."
451 std::ostringstream oss;
452 oss <<
"Error: Tensor layout verification for " <<
layout <<
" is not supported yet.";
453 throw std::runtime_error(oss.str());
457 template <
typename X,
459 typename = std::enable_if_t<std::is_convertible_v<X, std::size_t> &&
460 std::is_convertible_v<Layout, BaseTensorLayout>>>
465 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
469 typename = std::enable_if_t<std::is_convertible_v<Layout, BaseTensorLayout>>>
475 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
478 template <
typename Lengths,
481 (std::is_convertible_v<ck::ranges::range_value_t<Lengths>, std::size_t> ||
482 std::is_convertible_v<ck::ranges::range_value_t<Lengths>,
ck::long_index_t>) &&
483 std::is_convertible_v<Layout, BaseTensorLayout>>>
488 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
491 template <
typename X,
493 typename = std::enable_if_t<std::is_convertible_v<X, std::size_t> &&
494 std::is_convertible_v<Y, std::size_t>>,
497 const std::initializer_list<Y>& strides,
500 std::vector<std::size_t>(strides.begin(), strides.end()),
504 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
508 template <
typename Layout = DefaultLayout>
510 const std::initializer_list<ck::long_index_t>& strides,
513 std::vector<std::size_t>(strides.begin(), strides.end()),
517 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
521 template <
typename Str
ides,
typename Layout = DefaultLayout>
523 const Strides& strides,
526 std::vector<std::size_t>(strides.begin(), strides.end()),
530 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
533 template <
typename Lengths,
537 ((std::is_convertible_v<ck::ranges::range_value_t<Lengths>, std::size_t> &&
538 std::is_convertible_v<ck::ranges::range_value_t<Strides>, std::size_t>) ||
541 std::is_convertible_v<Layout, BaseTensorLayout>>>
543 const Strides& strides,
546 std::vector<std::size_t>(strides.begin(), strides.end()),
550 std::cout <<
"HostTensorDescriptor ctor (" << __LINE__ <<
")" << std::endl;
560 template <
typename... Is>
564 std::initializer_list<std::size_t> iss{
static_cast<std::size_t
>(is)...};
577 std::vector<std::size_t> mLens;
578 std::vector<std::size_t> mStrides;
579 static constexpr
bool dbg =
false;
587 std::vector<int> AsInt(
const std::vector<size_t>& vec)
const
589 std::vector<int> strides_int(vec.size());
590 std::transform(vec.begin(), vec.end(), strides_int.begin(), [](std::size_t stride) {
591 return static_cast<int>(stride);
597 template <
typename New2Old,
typename NewLayout = HostTensorDescriptor::BaseTensorLayout>
600 const New2Old& new2old,
601 const NewLayout& new_layout = NewLayout())
603 std::vector<std::size_t> new_lengths(
a.GetNumOfDimension());
604 std::vector<std::size_t> new_strides(
a.GetNumOfDimension());
606 for(std::size_t i = 0; i <
a.GetNumOfDimension(); i++)
608 new_lengths[i] =
a.GetLengths()[new2old[i]];
609 new_strides[i] =
a.GetStrides()[new2old[i]];
617 template <
typename... Xs>
632 template <
typename F,
typename... Xs>
636 static constexpr std::size_t
NDIM =
sizeof...(Xs);
637 std::array<std::size_t, NDIM>
mLens;
644 std::partial_sum(
mLens.rbegin(),
647 std::multiplies<std::size_t>());
653 std::array<std::size_t, NDIM> indices;
655 for(std::size_t idim = 0; idim <
NDIM; ++idim)
658 i -= indices[idim] *
mStrides[idim];
666 std::size_t work_per_thread = (
mN1d + num_thread - 1) / num_thread;
668 std::vector<joinable_thread> threads(num_thread);
670 for(std::size_t it = 0; it < num_thread; ++it)
672 std::size_t iw_begin = it * work_per_thread;
673 std::size_t iw_end =
std::min((it + 1) * work_per_thread,
mN1d);
675 auto f = [=, *
this] {
676 for(std::size_t iw = iw_begin; iw < iw_end; ++iw)
686 template <
typename F,
typename... Xs>
692 template <
typename T>
698 template <
typename X>
703 template <
typename X,
typename Y>
704 Tensor(std::initializer_list<X> lens, std::initializer_list<Y> strides)
709 template <
typename Lengths>
714 template <
typename Lengths,
typename Str
ides>
715 Tensor(
const Lengths& lens,
const Strides& strides)
720 template <
typename X,
typename... Rest,
std::enable_if_t<(
sizeof...(Rest) > 0),
int> = 0>
721 Tensor(std::initializer_list<X> lens, Rest&&... rest)
726 template <
typename X,
730 Tensor(std::initializer_list<X> lens, std::initializer_list<Y> strides, Rest&&... rest)
735 template <
typename Lengths,
typename... Rest,
std::enable_if_t<(
sizeof...(Rest) > 0),
int> = 0>
736 Tensor(
const Lengths& lens, Rest&&... rest)
741 template <
typename Lengths,
745 Tensor(
const Lengths& lens,
const Strides& strides, Rest&&... rest)
752 template <
typename OutT>
758 mData, ret.
mData.begin(), [](
auto value) { return ck::type_convert<OutT>(value); });
772 template <
typename FromT>
776 void savetxt(std::string file_name, std::string dtype =
"float")
778 std::ofstream file(file_name);
782 for(
auto& itm :
mData)
785 file << ck::type_convert<float>(itm) << std::endl;
786 else if(dtype ==
"int")
787 file << ck::type_convert<int>(itm) << std::endl;
791 file << ck::type_convert<float>(itm) << std::endl;
799 throw std::runtime_error(std::string(
"unable to open file:") + file_name);
826 template <
typename F>
842 template <
typename F>
849 template <
typename F>
865 template <
typename F>
872 template <
typename G>
878 auto f = [&](
auto i) { (*this)(i) = g(i); };
883 auto f = [&](
auto i0,
auto i1) { (*this)(i0, i1) = g(i0, i1); };
888 auto f = [&](
auto i0,
auto i1,
auto i2) { (*this)(i0, i1, i2) = g(i0, i1, i2); };
894 auto f = [&](
auto i0,
auto i1,
auto i2,
auto i3) {
895 (*this)(i0, i1, i2, i3) = g(i0, i1, i2, i3);
905 auto f = [&](
auto i0,
auto i1,
auto i2,
auto i3,
auto i4) {
906 (*this)(i0, i1, i2, i3, i4) = g(i0, i1, i2, i3, i4);
917 auto f = [&](
auto i0,
auto i1,
auto i2,
auto i3,
auto i4,
auto i5) {
918 (*this)(i0, i1, i2, i3, i4, i5) = g(i0, i1, i2, i3, i4, i5);
930 auto f = [&](
auto i0,
942 (*this)(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11) =
943 g(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11);
960 default:
throw std::runtime_error(
"unspported dimension");
966 template <
typename Distribution = std::uniform_real_distribution<
float>,
967 typename Mapping = ck::
identity,
968 typename Generator = std::minstd_rand>
971 const Generator g = Generator(0),
972 std::size_t num_thread = -1)
976 if(num_thread == -1ULL)
980 constexpr std::size_t BLOCK_BYTES = 64;
981 constexpr std::size_t BLOCK_SIZE = BLOCK_BYTES /
sizeof(T);
986 std::vector<std::thread> threads;
987 threads.reserve(num_thread - 1);
988 const auto dst =
const_cast<T*
>(this->
mData.data());
990 for(
int it = num_thread - 1; it >= 0; --it)
992 std::size_t ib_begin = it * blocks_per_thread;
993 std::size_t ib_end =
min(ib_begin + blocks_per_thread, num_blocks);
998 g_.discard(ib_begin * BLOCK_SIZE * ck::packed_size_v<T>);
1005 if constexpr(ck::is_same_v<T, ck::f8_t> || ck::is_same_v<T, ck::bf8_t>)
1006 return ck::type_convert<T>(
static_cast<float>(fn(dis_(g_))));
1007 else if constexpr(ck::packed_size_v<T> == 1)
1010 return
ck::f4x2_pk_t{ck::type_convert<ck::f4x2_t>(
1012 ck::type_convert<float>(fn(dis_(g_)))})};
1013 else if constexpr(ck::is_same_v<T, ck::f6x32_pk_t> ||
1014 ck::is_same_v<T, ck::bf6x32_pk_t>)
1016 return ck::type_convert<T>(
1018 ck::type_convert<float>(fn(dis_(g_))),
1019 ck::type_convert<float>(fn(dis_(g_))),
1020 ck::type_convert<float>(fn(dis_(g_))),
1021 ck::type_convert<float>(fn(dis_(g_))),
1022 ck::type_convert<float>(fn(dis_(g_))),
1023 ck::type_convert<float>(fn(dis_(g_))),
1024 ck::type_convert<float>(fn(dis_(g_))),
1025 ck::type_convert<float>(fn(dis_(g_))),
1026 ck::type_convert<float>(fn(dis_(g_))),
1027 ck::type_convert<float>(fn(dis_(g_))),
1028 ck::type_convert<float>(fn(dis_(g_))),
1029 ck::type_convert<float>(fn(dis_(g_))),
1030 ck::type_convert<float>(fn(dis_(g_))),
1031 ck::type_convert<float>(fn(dis_(g_))),
1032 ck::type_convert<float>(fn(dis_(g_))),
1033 ck::type_convert<float>(fn(dis_(g_))),
1034 ck::type_convert<float>(fn(dis_(g_))),
1035 ck::type_convert<float>(fn(dis_(g_))),
1036 ck::type_convert<float>(fn(dis_(g_))),
1037 ck::type_convert<float>(fn(dis_(g_))),
1038 ck::type_convert<float>(fn(dis_(g_))),
1039 ck::type_convert<float>(fn(dis_(g_))),
1040 ck::type_convert<float>(fn(dis_(g_))),
1041 ck::type_convert<float>(fn(dis_(g_))),
1042 ck::type_convert<float>(fn(dis_(g_))),
1043 ck::type_convert<float>(fn(dis_(g_))),
1044 ck::type_convert<float>(fn(dis_(g_))),
1045 ck::type_convert<float>(fn(dis_(g_))),
1046 ck::type_convert<float>(fn(dis_(g_))),
1047 ck::type_convert<float>(fn(dis_(g_))),
1048 ck::type_convert<float>(fn(dis_(g_)))});
1050 else if constexpr(ck::is_same_v<T, ck::f6x16_pk_t> ||
1051 ck::is_same_v<T, ck::bf6x16_pk_t>)
1053 return ck::type_convert<T>(
1055 ck::type_convert<float>(fn(dis_(g_))),
1056 ck::type_convert<float>(fn(dis_(g_))),
1057 ck::type_convert<float>(fn(dis_(g_))),
1058 ck::type_convert<float>(fn(dis_(g_))),
1059 ck::type_convert<float>(fn(dis_(g_))),
1060 ck::type_convert<float>(fn(dis_(g_))),
1061 ck::type_convert<float>(fn(dis_(g_))),
1062 ck::type_convert<float>(fn(dis_(g_))),
1063 ck::type_convert<float>(fn(dis_(g_))),
1064 ck::type_convert<float>(fn(dis_(g_))),
1065 ck::type_convert<float>(fn(dis_(g_))),
1066 ck::type_convert<float>(fn(dis_(g_))),
1067 ck::type_convert<float>(fn(dis_(g_))),
1068 ck::type_convert<float>(fn(dis_(g_))),
1069 ck::type_convert<float>(fn(dis_(g_)))});
1072 static_assert(
false,
"Unsupported packed size for T");
1075 std::size_t ib = ib_begin;
1076 for(; ib < ib_end - 1; ++ib)
1078 constexpr
size_t iw = iw_.value;
1079 dst[ib * BLOCK_SIZE + iw] = t_fn();
1081 for(std::size_t iw = 0; iw < BLOCK_SIZE; ++iw)
1082 if(ib * BLOCK_SIZE + iw < element_space_size)
1083 dst[ib * BLOCK_SIZE + iw] = t_fn();
1087 threads.emplace_back(std::move(job));
1091 for(
auto& t : threads)
1095 template <
typename... Is>
1101 template <
typename... Is>
1105 ck::packed_size_v<ck::remove_cvref_t<T>>];
1108 template <
typename... Is>
1112 ck::packed_size_v<ck::remove_cvref_t<T>>];
1131 typename Data::const_iterator
begin()
const {
return mData.begin(); }
1133 typename Data::const_iterator
end()
const {
return mData.end(); }
1135 typename Data::const_pointer
data()
const {
return mData.data(); }
1137 typename Data::size_type
size()
const {
return mData.size(); }
1139 template <
typename U = T>
1142 constexpr std::size_t FromSize =
sizeof(T);
1143 constexpr std::size_t ToSize =
sizeof(U);
1145 using Element = std::add_const_t<std::remove_reference_t<U>>;
1149 template <
typename U = T>
1152 constexpr std::size_t FromSize =
sizeof(T);
1153 constexpr std::size_t ToSize =
sizeof(U);
1155 using Element = std::remove_reference_t<U>;
__host__ constexpr __device__ auto rank([[maybe_unused]] const Layout< Shape, UnrolledDescriptorType > &layout)
Get layout rank (num elements in shape).
Definition: layout_utils.hpp:310
auto call_f_unpack_args_impl(F f, T args, std::index_sequence< Is... >)
Definition: host_tensor.hpp:75
std::ostream & LogRangeAsType(std::ostream &os, Range &&range, std::string delim)
Definition: host_tensor.hpp:42
auto construct_f_unpack_args_impl(T args, std::index_sequence< Is... >)
Definition: host_tensor.hpp:89
HostTensorDescriptor transpose_host_tensor_descriptor_given_new2old(const HostTensorDescriptor &a, const New2Old &new2old, const NewLayout &new_layout=NewLayout())
Definition: host_tensor.hpp:599
auto call_f_unpack_args(F f, T args)
Definition: host_tensor.hpp:81
auto construct_f_unpack_args(F, T args)
Definition: host_tensor.hpp:95
auto make_ParallelTensorFunctor(F f, Xs... xs)
Definition: host_tensor.hpp:687
std::ostream & LogRange(std::ostream &os, Range &&range, std::string delim)
Definition: host_tensor.hpp:27
__host__ constexpr __device__ auto integer_divide_ceil(X x, Y y)
Definition: math.hpp:72
__host__ constexpr __device__ T min(T x)
Definition: math.hpp:116
auto transform(InputRange &&range, OutputIterator iter, UnaryOperation unary_op) -> decltype(std::transform(std::begin(range), std::end(range), iter, unary_op))
Definition: algorithm.hpp:36
auto copy(InputRange &&range, OutputIterator iter) -> decltype(std::copy(std::begin(std::forward< InputRange >(range)), std::end(std::forward< InputRange >(range)), iter))
Definition: algorithm.hpp:14
iter_value_t< ranges::iterator_t< R > > range_value_t
Definition: ranges.hpp:28
typename vector_type< float, 16 >::type float16_t
Definition: dtype_vector.hpp:2148
unsigned int get_available_cpu_cores()
Definition: thread.hpp:11
int64_t long_index_t
Definition: ck.hpp:300
typename vector_type< float, 2 >::type float2_t
Definition: dtype_vector.hpp:2145
__host__ constexpr __device__ Y type_convert(X x)
Definition: type_convert.hpp:98
constexpr bool is_base_of_v
Definition: type.hpp:286
constexpr bool is_same_v
Definition: type.hpp:283
constexpr bool is_packed_type_v
Definition: data_type.hpp:414
remove_cv_t< remove_reference_t< T > > remove_cvref_t
Definition: type.hpp:297
typename std::enable_if< B, T >::type enable_if_t
Definition: enable_if.hpp:27
__device__ void inner_product(const TA &a, const TB &b, TC &c)
typename vector_type< float, 32 >::type float32_t
Definition: dtype_vector.hpp:2149
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1350
const GenericPointer< typename T::ValueType > & pointer
Definition: pointer.h:1249
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1249
A descriptor class for host tensors that manages tensor dimensions, strides, and layout.
Definition: host_tensor.hpp:171
HostTensorDescriptor()
Definition: host_tensor.hpp:208
void DispatchChosenLayout(ChosenLayout tag, const OrigLayout &orig, F &&f) const
Definition: host_tensor.hpp:214
HostTensorDescriptor(const std::initializer_list< ck::long_index_t > &lens, const std::initializer_list< ck::long_index_t > &strides, const Layout &layout=Layout{})
Definition: host_tensor.hpp:509
const std::vector< std::size_t > & GetStrides() const
HostTensorDescriptor(const std::initializer_list< X > &lens, const Layout &layout=Layout{})
Definition: host_tensor.hpp:461
std::size_t GetElementSize() const
const std::vector< std::size_t > & GetLengths() const
HostTensorDescriptor(const std::initializer_list< ck::long_index_t > &lens, const Layout &layout=Layout{})
Definition: host_tensor.hpp:470
std::size_t GetOffsetFromMultiIndex(Is... is) const
Definition: host_tensor.hpp:561
void ValidateStrides(const Layout &layout) const
Definition: host_tensor.hpp:348
HostTensorDescriptor(std::vector< std::size_t > lens, std::vector< std::size_t > strides, const Layout &layout=DefaultLayout())
Definition: host_tensor.hpp:186
void CalculateStrides(const Layout &layout)
Definition: host_tensor.hpp:282
HostTensorDescriptor(const std::initializer_list< X > &lens, const std::initializer_list< Y > &strides, const Layout &layout=Layout{})
Definition: host_tensor.hpp:496
HostTensorDescriptor(const std::initializer_list< std::size_t > &lens, const Strides &strides, const Layout &layout=Layout{})
Definition: host_tensor.hpp:522
ChosenLayout HandleDefaultLayout(const Layout &)
Definition: host_tensor.hpp:226
HostTensorDescriptor(const Lengths &lens, const Strides &strides, const Layout &layout=Layout{})
Definition: host_tensor.hpp:542
std::size_t GetNumOfDimension() const
HostTensorDescriptor(const Lengths &lens, const Layout &layout=Layout{})
Definition: host_tensor.hpp:484
friend std::ostream & operator<<(std::ostream &os, ChosenLayout tag)
std::size_t GetElementSpaceSize() const
BaseTensorLayout DefaultLayout
Definition: host_tensor.hpp:173
ck::tensor_layout::BaseTensorLayout BaseTensorLayout
Definition: host_tensor.hpp:172
ChosenLayout
Definition: host_tensor.hpp:178
std::size_t GetOffsetFromMultiIndex(const std::vector< std::size_t > &iss) const
Definition: host_tensor.hpp:568
friend std::ostream & operator<<(std::ostream &os, const HostTensorDescriptor &desc)
Layout wrapper that performs the tensor descriptor logic.
Definition: layout.hpp:24
Definition: host_tensor.hpp:634
std::array< std::size_t, NDIM > GetNdIndices(std::size_t i) const
Definition: host_tensor.hpp:651
F mF
Definition: host_tensor.hpp:635
std::size_t mN1d
Definition: host_tensor.hpp:639
ParallelTensorFunctor(F f, Xs... xs)
Definition: host_tensor.hpp:641
std::array< std::size_t, NDIM > mLens
Definition: host_tensor.hpp:637
std::array< std::size_t, NDIM > mStrides
Definition: host_tensor.hpp:638
void operator()(std::size_t num_thread=1) const
Definition: host_tensor.hpp:664
static constexpr std::size_t NDIM
Definition: host_tensor.hpp:636
Tensor wrapper that performs static and dynamic buffer logic. The tensor is based on a descriptor sto...
Definition: host_tensor.hpp:694
auto AsSpan() const
Definition: host_tensor.hpp:1140
Tensor(std::initializer_list< X > lens, std::initializer_list< Y > strides, Rest &&... rest)
Definition: host_tensor.hpp:730
Tensor(const Lengths &lens, Rest &&... rest)
Definition: host_tensor.hpp:736
Tensor(const Lengths &lens, const Strides &strides)
Definition: host_tensor.hpp:715
std::size_t GetNumOfDimension() const
Definition: host_tensor.hpp:806
T & operator()(const std::vector< std::size_t > &idx)
Definition: host_tensor.hpp:1115
void ForEach(const F &&f) const
Definition: host_tensor.hpp:866
decltype(auto) GetLengths() const
Definition: host_tensor.hpp:802
Data::const_iterator end() const
Definition: host_tensor.hpp:1133
std::size_t GetOffsetFromMultiIndex(Is... is) const
Definition: host_tensor.hpp:1096
Tensor< OutT > CopyAsType() const
Definition: host_tensor.hpp:753
const T & operator()(const std::vector< std::size_t > &idx) const
Definition: host_tensor.hpp:1120
void ForEach(F &&f)
Definition: host_tensor.hpp:843
Data::pointer data()
Definition: host_tensor.hpp:1129
void ForEach_impl(F &&f, std::vector< size_t > &idx, size_t rank)
Definition: host_tensor.hpp:827
std::size_t GetElementSpaceSizeInBytes() const
Definition: host_tensor.hpp:822
void ForEach_impl(const F &&f, std::vector< size_t > &idx, size_t rank) const
Definition: host_tensor.hpp:850
Tensor & operator=(const Tensor &)=default
std::vector< T > Data
Definition: host_tensor.hpp:696
Data mData
Definition: host_tensor.hpp:1160
Data::iterator end()
Definition: host_tensor.hpp:1127
void GenerateTensorDistr(Distribution dis={0.f, 1.f}, Mapping fn={}, const Generator g=Generator(0), std::size_t num_thread=-1)
Definition: host_tensor.hpp:969
std::size_t GetElementSize() const
Definition: host_tensor.hpp:808
Tensor(const Lengths &lens, const Strides &strides, Rest &&... rest)
Definition: host_tensor.hpp:745
void SetZero()
Definition: host_tensor.hpp:824
Tensor(const Lengths &lens)
Definition: host_tensor.hpp:710
void savetxt(std::string file_name, std::string dtype="float")
Definition: host_tensor.hpp:776
Tensor(Tensor &&)=default
const T & operator()(Is... is) const
Definition: host_tensor.hpp:1109
Data::const_pointer data() const
Definition: host_tensor.hpp:1135
auto AsSpan()
Definition: host_tensor.hpp:1150
Data::iterator begin()
Definition: host_tensor.hpp:1125
Tensor(std::initializer_list< X > lens, std::initializer_list< Y > strides)
Definition: host_tensor.hpp:704
Tensor(const Tensor &)=default
Tensor(const Descriptor &desc)
Definition: host_tensor.hpp:750
Descriptor mDesc
Definition: host_tensor.hpp:1159
Tensor & operator=(Tensor &&)=default
Data::const_iterator begin() const
Definition: host_tensor.hpp:1131
std::size_t GetElementSpaceSize() const
Definition: host_tensor.hpp:810
Tensor(const Tensor< FromT > &other)
Definition: host_tensor.hpp:773
Data::size_type size() const
Definition: host_tensor.hpp:1137
void GenerateTensorValue(G g, std::size_t num_thread=1)
Definition: host_tensor.hpp:873
decltype(auto) GetStrides() const
Definition: host_tensor.hpp:804
T & operator()(Is... is)
Definition: host_tensor.hpp:1102
Tensor(std::initializer_list< X > lens)
Definition: host_tensor.hpp:699
Tensor(std::initializer_list< X > lens, Rest &&... rest)
Definition: host_tensor.hpp:721
Definition: integral_constant.hpp:20
Definition: functional2.hpp:33
Definition: tensor_layout.hpp:10
Definition: tensor_layout.hpp:45
Definition: tensor_layout.hpp:31
Definition: tensor_layout.hpp:26
Definition: dtype_vector.hpp:10
Definition: host_tensor.hpp:616
joinable_thread(joinable_thread &&)=default
joinable_thread(Xs &&... xs)
Definition: host_tensor.hpp:618
~joinable_thread()
Definition: host_tensor.hpp:625
joinable_thread & operator=(joinable_thread &&)=default
__host__ constexpr __device__ const auto & layout(const Tensor< BufferAddressSpace, ElementType, Shape, UnrolledDescriptorType > &tensor)
Get Tensor Layout.
Definition: tensor_utils.hpp:162