/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/host/permute_pk_int4.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/host/permute_pk_int4.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/host/permute_pk_int4.hpp Source File
permute_pk_int4.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c), Advanced Micro Devices, Inc. All rights reserved.
3 #pragma once
4 
6 namespace ck_tile {
7 
28 template <typename Tensor>
30 {
31  auto tensor_row_buf = tensor.data();
32  for(size_t idx = 0; idx < tensor.size(); idx += 4)
33  {
34  int8_t input[8];
35 
36  for(int k = 0; k < 4; k++)
37  {
38  int8_t i4x2 = bit_cast<int8_t>(tensor_row_buf[idx + k]);
39  input[k * 2 + 0] = (i4x2 >> 4) & 0xf;
40  input[k * 2 + 1] = (i4x2 >> 0) & 0xf;
41  }
42 
43  // permute 0x76543210 => 0x75316420
44  {
45  int8_t hi = input[2];
46  int8_t lo = input[0];
47  int8_t i4x2 = (hi << 4) | lo;
48 
49  tensor_row_buf[idx + 0] = bit_cast<pk_int4_t>(i4x2);
50  }
51 
52  {
53  int8_t hi = input[6];
54  int8_t lo = input[4];
55  int8_t i4x2 = (hi << 4) | lo;
56 
57  tensor_row_buf[idx + 1] = bit_cast<pk_int4_t>(i4x2);
58  }
59 
60  {
61  int8_t hi = input[3];
62  int8_t lo = input[1];
63  int8_t i4x2 = (hi << 4) | lo;
64 
65  tensor_row_buf[idx + 2] = bit_cast<pk_int4_t>(i4x2);
66  }
67 
68  {
69  int8_t hi = input[7];
70  int8_t lo = input[5];
71  int8_t i4x2 = (hi << 4) | lo;
72 
73  tensor_row_buf[idx + 3] = bit_cast<pk_int4_t>(i4x2);
74  }
75  }
76 }
77 
78 } // namespace ck_tile
Definition: cluster_descriptor.hpp:13
int8_t int8_t
Definition: int8.hpp:20
void permute_vectors_i4x4_b(Tensor &tensor)
Permute packed int4 vectors for device implementation compatibility.
Definition: permute_pk_int4.hpp:29
Tensor wrapper that performs static and dynamic buffer logic. The tensor is based on a descriptor sto...
Definition: host_tensor.hpp:694
Data::pointer data()
Definition: host_tensor.hpp:1129
Data::size_type size() const
Definition: host_tensor.hpp:1137