/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/amd_transpose_load.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/amd_transpose_load.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/amd_transpose_load.hpp Source File
amd_transpose_load.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 #include "data_type.hpp"
6 
7 namespace ck {
8 
9 #if defined(__gfx12__)
10 template <typename T>
11 __device__ auto amd_global_load_transpose_to_vgpr(const T* in_ptr)
12 {
13  using vector_t = typename vector_type<T, 8>::type;
14  if constexpr(sizeof(T) == 2)
15  {
16  typedef __attribute__((__vector_size__(8 * sizeof(__fp16)))) __fp16 llvm_fp16x8_t;
17  __attribute__((address_space(1))) llvm_fp16x8_t* glb_ptr =
18  reinterpret_cast<__attribute__((address_space(1))) llvm_fp16x8_t*>(
19  reinterpret_cast<uintptr_t>(in_ptr));
20  return bit_cast<vector_t>(__builtin_amdgcn_global_load_tr_b128_v8f16(glb_ptr));
21  }
22  else if constexpr(sizeof(T) == 1)
23  {
24  typedef __attribute__((__vector_size__(2 * sizeof(int)))) int llvm_intx2_t;
25  __attribute__((address_space(1))) llvm_intx2_t* glb_ptr =
26  reinterpret_cast<__attribute__((address_space(1))) llvm_intx2_t*>(
27  reinterpret_cast<uintptr_t>(in_ptr));
28  return bit_cast<vector_t>(__builtin_amdgcn_global_load_tr_b64_v2i32(glb_ptr));
29  }
30  else
31  {
32  static_assert(false, "not implemented");
33  }
34 }
35 #endif
36 
37 } // namespace ck
Definition: ck.hpp:268
__host__ constexpr __device__ Y bit_cast(const X &x)
Definition: type.hpp:306
_W64 unsigned int uintptr_t
Definition: stdint.h:164