/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/amd_lds.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/amd_lds.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/amd_lds.hpp Source File
amd_lds.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
8 #include "ck/utility/math.hpp"
9 
10 namespace ck {
11 
12 namespace lds_utils {
13 
24 template <typename DataType, index_t NumBuffers>
25 __device__ static auto AllocateLdsBuffers(void* lds_ptr,
26  int32_t num_elems_per_buffer,
27  int32_t start_offset_elems,
28  int32_t lds_alignment)
29 {
30  const DataType* lds_start = static_cast<DataType*>(lds_ptr) + start_offset_elems;
31  const int32_t single_buffer_offset =
32  math::integer_least_multiple(num_elems_per_buffer, lds_alignment);
33  return generate_tuple(
34  [&](auto i) {
35  const int32_t local_offset = i * single_buffer_offset;
36  return make_dynamic_buffer<AddressSpaceEnum::Lds>(lds_start + local_offset,
37  num_elems_per_buffer);
38  },
40 }
41 
42 } // namespace lds_utils
43 } // namespace ck
__host__ constexpr __device__ auto integer_least_multiple(X x, Y y)
Definition: math.hpp:78
Definition: ck.hpp:267
__host__ constexpr __device__ auto generate_tuple(F &&f, Number< N >)
Definition: tuple_helper.hpp:21
signed int int32_t
Definition: stdint.h:123
Definition: integral_constant.hpp:20