/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp Source File
gridwise_set_buffer_value.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
8 
9 namespace ck {
10 
11 template <index_t BlockSize, typename DataType, typename Grid1dBufferDescType>
12 __global__ void kernel_buffer_set_value(const Grid1dBufferDescType grid_1d_buffer_desc,
13  DataType* const __restrict__ p_global,
14  DataType value)
15 
16 {
17 
18  using PassThroughOp = tensor_operation::element_wise::PassThrough;
19 
20  constexpr auto I0 = Number<0>{};
21 
22  const index_t thread_local_id = get_thread_local_1d_id();
23  const index_t block_global_id = get_block_1d_id();
24 
25  const index_t thread_global_id = block_global_id * BlockSize + thread_local_id;
26 
28 
29  value_buf(I0) = value;
30 
31  constexpr auto val_buff_desc = make_naive_tensor_descriptor_packed(make_tuple(Number<1>{}));
32 
33  auto global_buf = make_dynamic_buffer<AddressSpaceEnum::Global>(
34  p_global, grid_1d_buffer_desc.GetElementSpaceSize());
35 
36  if(thread_global_id < grid_1d_buffer_desc.GetElementSize())
37  {
38  auto threadwise_store = ThreadwiseTensorSliceTransfer_v1r3<DataType,
39  DataType,
40  decltype(val_buff_desc),
41  Grid1dBufferDescType,
42  PassThroughOp,
45  0,
46  1,
48  1,
49  true>(
50  grid_1d_buffer_desc, make_multi_index(thread_global_id), PassThroughOp{});
51 
52  threadwise_store.Run(
53  val_buff_desc, make_tuple(I0), value_buf, grid_1d_buffer_desc, global_buf);
54  }
55 };
56 
57 } // namespace ck
Definition: ck.hpp:267
__host__ constexpr __device__ auto make_multi_index(Xs &&... xs)
Definition: array_multi_index.hpp:15
__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition: tensor_descriptor_helper.hpp:101
__device__ index_t get_block_1d_id()
Definition: get_id.hpp:58
__host__ constexpr __device__ auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:211
__global__ void kernel_buffer_set_value(const Grid1dBufferDescType grid_1d_buffer_desc, DataType *const __restrict__ p_global, DataType value)
Definition: gridwise_set_buffer_value.hpp:12
int32_t index_t
Definition: ck.hpp:298
__device__ index_t get_thread_local_1d_id()
Definition: get_id.hpp:52
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1350
Definition: sequence.hpp:43
Definition: static_buffer.hpp:16
Definition: threadwise_tensor_slice_transfer.hpp:39
Definition: integral_constant.hpp:20
Definition: unary_element_wise_operation.hpp:334