include/ck_tile/host/reference/reference_reduce.hpp Source File

include/ck_tile/host/reference/reference_reduce.hpp Source File#

Composable Kernel: include/ck_tile/host/reference/reference_reduce.hpp Source File
reference_reduce.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include "ck_tile/core.hpp"
8 #include <thread>
9 
10 namespace ck_tile {
11 
12 template <typename XDataType, typename ComputeDataType, typename YDataType, typename ReduceOp>
13 CK_TILE_HOST void
14 reference_reduce(const HostTensor<XDataType>& x_m_n, HostTensor<YDataType>& y_m, ReduceOp reduce_op)
15 {
16  auto f = [&](auto m) {
17  const int N = x_m_n.mDesc.get_lengths()[1];
18 
19  ComputeDataType v_acc = reduce_op.template GetIdentityValue<ComputeDataType>();
20 
21  for(int n = 0; n < N; ++n)
22  {
23  const ComputeDataType v_a = type_convert<ComputeDataType>(x_m_n(m, n));
24 
25  v_acc = reduce_op(v_acc, v_a);
26  }
27 
28  y_m(m) = ck_tile::type_convert<YDataType>(v_acc);
29  };
30 
31  make_ParallelTensorFunctor(f, y_m.mDesc.get_lengths()[0])(std::thread::hardware_concurrency());
32 }
33 } // namespace ck_tile
#define CK_TILE_HOST
Definition: config.hpp:39
Definition: cluster_descriptor.hpp:13
CK_TILE_HOST auto make_ParallelTensorFunctor(F f, Xs... xs)
Definition: host_tensor.hpp:272
CK_TILE_HOST void reference_reduce(const HostTensor< XDataType > &x_m_n, HostTensor< YDataType > &y_m, ReduceOp reduce_op)
Definition: reference_reduce.hpp:14
const std::vector< std::size_t > & get_lengths() const
Definition: host_tensor.hpp:162
Definition: host_tensor.hpp:279
Descriptor mDesc
Definition: host_tensor.hpp:678