/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/host_utility/kernel_launch.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/host_utility/kernel_launch.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/host_utility/kernel_launch.hpp Source File
kernel_launch.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 #ifndef __HIPCC_RTC__
6 #include <hip/hip_runtime.h>
7 
8 #include "ck/ck.hpp"
9 #include "ck/utility/env.hpp"
10 #include "ck/stream_config.hpp"
12 
13 template <typename... Args, typename F>
14 float launch_and_time_kernel(const StreamConfig& stream_config,
15  F kernel,
16  dim3 grid_dim,
17  dim3 block_dim,
18  std::size_t lds_byte,
19  Args... args)
20 {
21 #if CK_TIME_KERNEL
22  if(stream_config.time_kernel_)
23  {
24  if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
25  {
26  printf("%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u} \n",
27  __func__,
28  grid_dim.x,
29  grid_dim.y,
30  grid_dim.z,
31  block_dim.x,
32  block_dim.y,
33  block_dim.z);
34 
35  printf("Warm up %d times\n", stream_config.cold_niters_);
36  }
37  // warm up
38  for(int i = 0; i < stream_config.cold_niters_; ++i)
39  {
40  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
41  hip_check_error(hipGetLastError());
42  }
43 
44  const int nrepeat = stream_config.nrepeat_;
45  if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
46  {
47  printf("Start running %d times...\n", nrepeat);
48  }
49  hipEvent_t start, stop;
50 
51  hip_check_error(hipEventCreate(&start));
52  hip_check_error(hipEventCreate(&stop));
53 
54  hip_check_error(hipDeviceSynchronize());
55  hip_check_error(hipEventRecord(start, stream_config.stream_id_));
56 
57  for(int i = 0; i < nrepeat; ++i)
58  {
59  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
60  hip_check_error(hipGetLastError());
61  }
62 
63  hip_check_error(hipEventRecord(stop, stream_config.stream_id_));
64  hip_check_error(hipEventSynchronize(stop));
65 
66  float total_time = 0;
67 
68  hip_check_error(hipEventElapsedTime(&total_time, start, stop));
69 
70  hip_check_error(hipEventDestroy(start));
71  hip_check_error(hipEventDestroy(stop));
72 
73  return total_time / nrepeat;
74  }
75  else
76  {
77  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
78  hip_check_error(hipGetLastError());
79 
80  return 0;
81  }
82 #else
83  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
84  hip_check_error(hipGetLastError());
85 
86  return 0;
87 #endif
88 }
89 
90 template <typename... Args, typename F, typename PreProcessFunc>
92  PreProcessFunc preprocess,
93  F kernel,
94  dim3 grid_dim,
95  dim3 block_dim,
96  std::size_t lds_byte,
97  Args... args)
98 {
99 #if CK_TIME_KERNEL
100  if(stream_config.time_kernel_)
101  {
102  if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
103  {
104  printf("%s: grid_dim {%u, %u, %u}, block_dim {%u, %u, %u} \n",
105  __func__,
106  grid_dim.x,
107  grid_dim.y,
108  grid_dim.z,
109  block_dim.x,
110  block_dim.y,
111  block_dim.z);
112 
113  printf("Warm up %d times\n", stream_config.cold_niters_);
114  }
115  // warm up
116  preprocess();
117  for(int i = 0; i < stream_config.cold_niters_; ++i)
118  {
119  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
120  hip_check_error(hipGetLastError());
121  }
122 
123  const int nrepeat = stream_config.nrepeat_;
124  if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
125  {
126  printf("Start running %d times...\n", nrepeat);
127  }
128  hipEvent_t start, stop;
129 
130  hip_check_error(hipEventCreate(&start));
131  hip_check_error(hipEventCreate(&stop));
132 
133  hip_check_error(hipDeviceSynchronize());
134  hip_check_error(hipEventRecord(start, stream_config.stream_id_));
135 
136  for(int i = 0; i < nrepeat; ++i)
137  {
138  preprocess();
139  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
140  hip_check_error(hipGetLastError());
141  }
142 
143  hip_check_error(hipEventRecord(stop, stream_config.stream_id_));
144  hip_check_error(hipEventSynchronize(stop));
145 
146  float total_time = 0;
147 
148  hip_check_error(hipEventElapsedTime(&total_time, start, stop));
149 
150  hip_check_error(hipEventDestroy(start));
151  hip_check_error(hipEventDestroy(stop));
152 
153  return total_time / nrepeat;
154  }
155  else
156  {
157  preprocess();
158  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
159  hip_check_error(hipGetLastError());
160 
161  return 0;
162  }
163 #else
164  kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
165  hip_check_error(hipGetLastError());
166 
167  return 0;
168 #endif
169 }
170 #endif
void hip_check_error(hipError_t x)
Definition: hip_check_error.hpp:10
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition: kernel_launch.hpp:14
float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config, PreProcessFunc preprocess, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition: kernel_launch.hpp:91
bool EnvIsEnabled(EnvVar)
Definition: env.hpp:140
Definition: stream_config.hpp:10
int cold_niters_
Definition: stream_config.hpp:14
bool time_kernel_
Definition: stream_config.hpp:12
int nrepeat_
Definition: stream_config.hpp:15
hipStream_t stream_id_
Definition: stream_config.hpp:11
#define CK_ENV(name)
Definition: env.hpp:129