include/ck_tile/host/device_memory.hpp Source File

include/ck_tile/host/device_memory.hpp Source File#

Composable Kernel: include/ck_tile/host/device_memory.hpp Source File
device_memory.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include <hip/hip_runtime.h>
7 #include <stdint.h>
8 #include <stdexcept>
11 
12 namespace ck_tile {
13 template <typename T>
14 __global__ void set_buffer_value(T* p, T x, uint64_t buffer_element_size)
15 {
16  for(uint64_t i = threadIdx.x; i < buffer_element_size; i += blockDim.x)
17  {
18  p[i] = x;
19  }
20 }
21 
26 struct DeviceMem
27 {
28  DeviceMem() : mpDeviceBuf(nullptr), mMemSize(0) {}
29  DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
30  {
31  if(mMemSize != 0)
32  {
33  HIP_CHECK_ERROR(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
34  }
35  else
36  {
37  mpDeviceBuf = nullptr;
38  }
39  }
40  template <typename T>
41  DeviceMem(const HostTensor<T>& t) : mMemSize(t.get_element_space_size_in_bytes())
42  {
43  if(mMemSize != 0)
44  {
45  HIP_CHECK_ERROR(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
46  }
47  else
48  {
49  mpDeviceBuf = nullptr;
50  }
51  ToDevice(t.data());
52  }
53  void Realloc(std::size_t mem_size)
54  {
55  if(mpDeviceBuf)
56  {
57  HIP_CHECK_ERROR(hipFree(mpDeviceBuf));
58  }
59  mMemSize = mem_size;
60  if(mMemSize != 0)
61  {
62  HIP_CHECK_ERROR(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
63  }
64  else
65  {
66  mpDeviceBuf = nullptr;
67  }
68  }
69  void* GetDeviceBuffer() const { return mpDeviceBuf; }
70  std::size_t GetBufferSize() const { return mMemSize; }
71  void ToDevice(const void* p) const
72  {
73  if(mpDeviceBuf)
74  {
76  hipMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, hipMemcpyHostToDevice));
77  }
78  // else
79  // {
80  // throw std::runtime_error("ToDevice with an empty pointer");
81  // }
82  }
83  void ToDevice(const void* p, const std::size_t cpySize) const
84  {
85  if(mpDeviceBuf)
86  {
88  hipMemcpy(mpDeviceBuf, const_cast<void*>(p), cpySize, hipMemcpyHostToDevice));
89  }
90  }
91  void FromDevice(void* p) const
92  {
93  if(mpDeviceBuf)
94  {
95  HIP_CHECK_ERROR(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
96  }
97  // else
98  // {
99  // throw std::runtime_error("FromDevice with an empty pointer");
100  // }
101  }
102  void FromDevice(void* p, const std::size_t cpySize) const
103  {
104  if(mpDeviceBuf)
105  {
106  HIP_CHECK_ERROR(hipMemcpy(p, mpDeviceBuf, cpySize, hipMemcpyDeviceToHost));
107  }
108  }
109 
110  // construct a host tensor with type T
111  template <typename T>
112  HostTensor<T> ToHost(std::size_t cpySize)
113  {
114  // TODO: host tensor could be slightly larger than the device tensor
115  // we just copy all data from GPU buffer
116  std::size_t host_elements = (cpySize + sizeof(T) - 1) / sizeof(T);
117  HostTensor<T> h_({host_elements});
118  if(mpDeviceBuf)
119  {
120  HIP_CHECK_ERROR(hipMemcpy(h_.data(), mpDeviceBuf, cpySize, hipMemcpyDeviceToHost));
121  }
122  return h_;
123  }
124  template <typename T>
126  {
127  return ToHost<T>(mMemSize);
128  }
129 
130  void SetZero() const
131  {
132  if(mpDeviceBuf)
133  {
134  HIP_CHECK_ERROR(hipMemset(mpDeviceBuf, 0, mMemSize));
135  }
136  }
137  template <typename T>
138  void SetValue(T x) const
139  {
140  if(mpDeviceBuf)
141  {
142  if(mMemSize % sizeof(T) != 0)
143  {
144  throw std::runtime_error("wrong! not entire DeviceMem will be set");
145  }
146 
147  // TODO: call a gpu kernel to set the value (?)
148  set_buffer_value<T><<<1, 1024>>>(static_cast<T*>(mpDeviceBuf), x, mMemSize / sizeof(T));
149  }
150  }
152  {
153  if(mpDeviceBuf)
154  {
155  try
156  {
157  HIP_CHECK_ERROR(hipFree(mpDeviceBuf));
158  }
159  catch(std::runtime_error& re)
160  {
161  std::cerr << re.what() << std::endl;
162  }
163  }
164  }
165 
166  void* mpDeviceBuf;
167  std::size_t mMemSize;
168 };
169 
170 } // namespace ck_tile
#define HIP_CHECK_ERROR(retval_or_funcall)
Definition: hip_check_error.hpp:22
Definition: cluster_descriptor.hpp:13
__global__ void set_buffer_value(T *p, T x, uint64_t buffer_element_size)
Definition: device_memory.hpp:14
Container for storing data in GPU device memory.
Definition: device_memory.hpp:27
DeviceMem()
Definition: device_memory.hpp:28
DeviceMem(std::size_t mem_size)
Definition: device_memory.hpp:29
void SetValue(T x) const
Definition: device_memory.hpp:138
void ToDevice(const void *p, const std::size_t cpySize) const
Definition: device_memory.hpp:83
void Realloc(std::size_t mem_size)
Definition: device_memory.hpp:53
std::size_t mMemSize
Definition: device_memory.hpp:167
DeviceMem(const HostTensor< T > &t)
Definition: device_memory.hpp:41
void * GetDeviceBuffer() const
Definition: device_memory.hpp:69
void FromDevice(void *p) const
Definition: device_memory.hpp:91
void SetZero() const
Definition: device_memory.hpp:130
void FromDevice(void *p, const std::size_t cpySize) const
Definition: device_memory.hpp:102
HostTensor< T > ToHost()
Definition: device_memory.hpp:125
std::size_t GetBufferSize() const
Definition: device_memory.hpp:70
~DeviceMem()
Definition: device_memory.hpp:151
HostTensor< T > ToHost(std::size_t cpySize)
Definition: device_memory.hpp:112
void * mpDeviceBuf
Definition: device_memory.hpp:166
void ToDevice(const void *p) const
Definition: device_memory.hpp:71
Definition: host_tensor.hpp:279
Data::pointer data()
Definition: host_tensor.hpp:523