/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/config.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/config.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/config.hpp Source File
config.hpp
Go to the documentation of this file.
1 // Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2 // SPDX-License-Identifier: MIT
3 
4 #pragma once
5 
6 #if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx942__) || defined(__gfx950__) || \
7  defined(__gfx9_4_generic__)
8 #define __gfx9__
9 #endif
10 #if defined(__gfx942__) || defined(__gfx950__) || defined(__gfx9_4_generic__)
11 #define __gfx94__
12 #endif
13 #if defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__) || \
14  defined(__gfx1013__) || defined(__gfx10_1_generic__)
15 #define __gfx101__
16 #endif
17 #if defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || \
18  defined(__gfx1034__) || defined(__gfx1035__) || defined(__gfx1036__) || \
19  defined(__gfx10_3_generic__)
20 #define __gfx103__
21 #endif
22 #if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || \
23  defined(__gfx1103__) || defined(__gfx1150__) || defined(__gfx1151__) || \
24  defined(__gfx1152__) || defined(__gfx1153__) || defined(__gfx11_generic__)
25 #define __gfx11__
26 #endif
27 #if defined(__gfx1200__) || defined(__gfx1201__) || defined(__gfx12_generic__)
28 #define __gfx12__
29 #endif
30 
31 #include "hip/hip_version.h"
32 #ifndef CK_TILE_DONT_USE_HIP_RUNTIME_HEADERS
33 #include "hip/hip_runtime.h"
34 #include "hip/hip_fp16.h"
35 #endif
36 
37 #ifdef __HIPCC__
38 #define CK_TILE_HOST inline __host__
39 #define CK_TILE_DEVICE inline __device__
40 #define CK_TILE_HOST_DEVICE inline __host__ __device__
41 #define CK_TILE_DEVICE_EXTERN __device__
42 #define CK_TILE_HOST_DEVICE_EXTERN __host__ __device__
43 #else
44 #define CK_TILE_HOST inline
45 #define CK_TILE_DEVICE inline
46 #define CK_TILE_HOST_DEVICE inline
47 #define CK_TILE_DEVICE_EXTERN
48 #define CK_TILE_HOST_DEVICE_EXTERN
49 #endif
50 
51 // implementing the "memory address space" attribute
52 // https://llvm.org/docs/AMDGPUUsage.html#amdgpu-address-spaces-table
53 // WA for https://github.com/ROCm/composable_kernel/issues/1946
54 #if 0
55 #define CK_TILE_GENERIC_ADDR __attribute__((address_space(0)))
56 #define CK_TILE_GLOBAL_ADDR __attribute__((address_space(1)))
57 #define CK_TILE_LDS_ADDR __attribute__((address_space(3)))
58 #define CK_TILE_BUF_RES_ADDR __attribute__((address_space(8)))
59 #else
60 #define CK_TILE_GENERIC_ADDR
61 #define CK_TILE_GLOBAL_ADDR
62 #define CK_TILE_LDS_ADDR
63 #define CK_TILE_BUF_RES_ADDR
64 #endif
65 #ifndef CK_TILE_USE_CUSTOM_DATA_TYPE
66 #define CK_TILE_USE_CUSTOM_DATA_TYPE 0 // custom data type will generate extra move/bfi code
67 #endif
68 
69 #define CK_TILE_FLOAT_TO_BFLOAT16_STANDARD 0
70 #define CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE_WITH_NAN 1
71 #define CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE 2
72 #define CK_TILE_FLOAT_TO_BFLOAT16_STANDARD_ASM 3
73 #define CK_TILE_FLOAT_TO_BFLOAT16_RTA_ASM 4
74 
75 #ifndef CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT
76 #define CK_TILE_FLOAT_TO_BFLOAT16_DEFAULT CK_TILE_FLOAT_TO_BFLOAT16_TRUNCATE
77 #endif
78 
79 #define CK_TILE_FLOAT_TO_FP8_STANDARD 0
80 #define CK_TILE_FLOAT_TO_FP8_STOCHASTIC 1
81 
82 #ifndef CK_TILE_FLOAT_TO_FP8_DEFAULT
83 #define CK_TILE_FLOAT_TO_FP8_DEFAULT CK_TILE_FLOAT_TO_FP8_STANDARD
84 #endif
85 
86 // in the old rocm period, we have to use tuple array implementation to implement this
87 // so turn on the _USE_TUPLE if meet compiler error, otherwise _USE_ARRAY by default.
88 #define CK_TILE_STATICALLY_INDEXED_ARRAY_USE_ARRAY 0
89 #define CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE 1
90 #ifndef CK_TILE_STATICALLY_INDEXED_ARRAY_DEFAULT
91 #define CK_TILE_STATICALLY_INDEXED_ARRAY_DEFAULT CK_TILE_STATICALLY_INDEXED_ARRAY_USE_TUPLE
92 #endif
93 
94 #define CK_TILE_THREAD_BUFFER_USE_ARRAY 0
95 #define CK_TILE_THREAD_BUFFER_USE_TUPLE 1
96 #ifndef CK_TILE_THREAD_BUFFER_DEFAULT
97 #define CK_TILE_THREAD_BUFFER_DEFAULT CK_TILE_THREAD_BUFFER_USE_ARRAY
98 #endif
99 
100 #ifndef CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST
101 #if CK_TILE_THREAD_BUFFER_DEFAULT == CK_TILE_THREAD_BUFFER_USE_TUPLE
102 // if using tuple-array as thread_buffer implementation, need to support {} brace init
103 // ... with similiar behavior as array
104 #define CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST 1
105 #else
106 #define CK_TILE_TUPLE_CTOR_WITH_INITIALIZER_LIST 0
107 #endif
108 #endif
109 
110 #ifndef CK_TILE_USE_LAUNCH_BOUNDS
111 #define CK_TILE_USE_LAUNCH_BOUNDS 1
112 #endif
113 
114 #ifndef CK_TILE_TIME_KERNEL
115 #define CK_TILE_TIME_KERNEL 1
116 #endif
117 
118 #define CK_TILE_MAX_THREAD_PER_BLOCK 256
119 #define CK_TILE_MIN_BLOCK_PER_CU 2
120 
121 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK
122 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK 0
123 #endif
124 
125 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK
126 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK 1
127 #endif
128 
129 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK
130 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK 1
131 #endif
132 
133 #ifndef CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK
134 #define CK_TILE_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK 1
135 #endif
136 
137 #ifndef CK_TILE_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM
138 #define CK_TILE_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM 1
139 #endif
140 
141 #ifndef CK_TILE_USE_AMD_BUFFER_LOAD
142 #define CK_TILE_USE_AMD_BUFFER_LOAD 1
143 #endif
144 
145 #ifndef CK_TILE_USE_AMD_BUFFER_STORE
146 #define CK_TILE_USE_AMD_BUFFER_STORE 1
147 #endif
148 
149 #ifndef CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER
150 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER 1
151 #endif
152 
153 #ifndef CK_TILE_USE_PK4_LAYOUT_SHUFFLE
154 #define CK_TILE_USE_PK4_LAYOUT_SHUFFLE 1
155 #endif
156 
157 // buffer atomic add: floating point
158 #ifndef __HIP_DEVICE_COMPILE__ // for host code
159 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 1
160 #elif defined(__gfx9__) || defined(__gfx12__) // for GPU code
161 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 1
162 #else // for GPU code
163 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 0
164 #endif
165 
166 #if(defined(__gfx90a__) || defined(__gfx94__)) // for GPU code
167 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64 1
168 #else
169 #define CK_TILE_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64 0
170 #endif
171 
172 #ifndef CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS
173 #define CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS 0
174 #endif
175 
176 #ifndef CK_TILE_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE
177 #define CK_TILE_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE 1
178 #endif
179 
180 #ifndef CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE
181 #if HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 1 && HIP_VERSION_PATCH >= 40091
182 #define CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE 1
183 #else
184 #define CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE 0
185 #endif
186 #endif
187 
188 // workaround for ROCm 6.2 and later
189 #ifndef CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE
190 #if(HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 2 && HIP_VERSION_PATCH >= 41133) || \
191  (HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 3 && HIP_VERSION_PATCH >= 42131) || \
192  (HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR > 3)
193 #define CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE 1
194 #else
195 #define CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE 0
196 #endif
197 #endif
198 
199 // use llvm builtin bf16 data type after ROCm 6.5
200 #ifndef CK_TILE_USE_LLVM_BUILTIN_BF16
201 #if(HIP_VERSION_MAJOR == 6 && HIP_VERSION_MINOR == 5 && HIP_VERSION_PATCH >= 50421) || \
202  (HIP_VERSION_MAJOR >= 7)
203 #define CK_TILE_USE_LLVM_BUILTIN_BF16 1
204 #else
205 #define CK_TILE_USE_LLVM_BUILTIN_BF16 0
206 #endif
207 #endif
208 
209 #ifndef CK_TILE_DEBUG_LOG
210 #define CK_TILE_DEBUG_LOG 0
211 #endif
212 
213 #ifndef __HIP_DEVICE_COMPILE__ // for host code
214 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0xffffffff
215 #elif defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) || \
216  defined(__gfx9__) // for GPU code
217 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x00020000
218 #elif defined(__gfx101__) || defined(__gfx103__) // for GPU code
219 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x31014000
220 #elif defined(__gfx11__) || defined(__gfx12__) // for GPU code
221 #define CK_TILE_BUFFER_RESOURCE_3RD_DWORD 0x31004000
222 #endif
223 
224 #ifndef CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
225 #define CK_TILE_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM 1
226 #endif
227 
228 #ifndef CK_TILE_USE_SUBDWORD_TILE_CAST
229 #define CK_TILE_USE_SUBDWORD_TILE_CAST 0
230 #endif
231 
232 #ifndef CK_TILE_USE_PK_FP16_TILE_CAST
233 #define CK_TILE_USE_PK_FP16_TILE_CAST 0
234 #endif
235 
236 // TODO: better solve this inside compiler
237 #ifndef CK_TILE_FMHA_FWD_FAST_EXP2
238 #define CK_TILE_FMHA_FWD_FAST_EXP2 0
239 #endif
240 
241 #ifndef CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN
242 #define CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN 0
243 #endif
244 
245 #ifndef CK_TILE_BUFFER_LOAD_RAW_BF16_WA
246 #define CK_TILE_BUFFER_LOAD_RAW_BF16_WA 1
247 #endif
248 
249 // workaround: compiler not emiting reciprocal instruction frm __frcp_rn()
250 #ifndef CK_TILE_WORKAROUND_SWDEV_383542
251 #define CK_TILE_WORKAROUND_SWDEV_383542 1
252 #endif
253 
254 #ifndef CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID
255 #define CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID 1
256 #endif
257 
258 #ifndef CK_TILE_USE_OCP_FP8
259 #if defined(__HIP_DEVICE_COMPILE__)
260 #if defined(__gfx950__) || defined(__gfx12__)
261 #define CK_TILE_USE_OCP_FP8 1
262 #else
263 #define CK_TILE_USE_OCP_FP8 0
264 #endif
265 #else
266 #define CK_TILE_USE_OCP_FP8 0
267 #endif
268 #endif
269 
270 #ifndef CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN
271 #if __clang_major__ >= 20 && !(defined(__gfx103__) || defined(__gfx11__) || defined(__gfx12__))
272 #define CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN 1
273 #else
274 #define CK_TILE_USE_BUFFER_ADDRESSING_BUILTIN 0
275 #endif
276 #endif
277 
278 #ifndef CK_TILE_WA_ISSUE_2028
279 #define CK_TILE_WA_ISSUE_2028 0
280 #endif
281 
282 // Y pointed to R, we don't see a valuable use case.
283 // Will enforce encoding to check Y not pointed to R if set to zero
284 #ifndef CK_TILE_ENC_SUPPORT_Y_TO_R
285 #define CK_TILE_ENC_SUPPORT_Y_TO_R 0
286 #endif
287 
288 // Mark unsupported features with a deprecation warning in debug builds
289 #if defined(NDEBUG)
290 #define CK_TILE_UNSUPPORTED_IMPL(MSG)
291 #else
292 #define CK_TILE_UNSUPPORTED_IMPL(MSG) __attribute__((deprecated(MSG)))
293 #endif
294 
295 namespace ck_tile::core {
325 {
326  // Determine if we are compiling for device or host
327 #if defined(__HIP_DEVICE_COMPILE__) && __HIP_DEVICE_COMPILE__
328  static constexpr bool CK_TILE_DEVICE_COMPILE = true;
329  static constexpr bool CK_TILE_HOST_COMPILE = false;
330 #else
331  static constexpr bool CK_TILE_DEVICE_COMPILE = false;
332  static constexpr bool CK_TILE_HOST_COMPILE = true;
333 #endif // __HIP_DEVICE_COMPILE__ && __HIP_DEVICE_COMPILE__
334 
335  // GFX9
336 #if defined(__gfx908__)
337  static constexpr bool CK_TILE_ARCH_GFX908 = true;
338 #else
339  static constexpr bool CK_TILE_ARCH_GFX908 = false;
340 #endif // __gfx908__
341 
342 #if defined(__gfx90a__)
343  static constexpr bool CK_TILE_ARCH_GFX90A = true;
344 #else
345  static constexpr bool CK_TILE_ARCH_GFX90A = false;
346 #endif // __gfx90a__
347 
348 #if defined(__gfx942__)
349  static constexpr bool CK_TILE_ARCH_GFX942 = true;
350 #else
351  static constexpr bool CK_TILE_ARCH_GFX942 = false;
352 #endif // __gfx942__
353 
354 #if defined(__gfx950__)
355  static constexpr bool CK_TILE_ARCH_GFX950 = true;
356 #else
357  static constexpr bool CK_TILE_ARCH_GFX950 = false;
358 #endif // __gfx950__
359 
360  // GFX10
361 #if defined(__gfx1010__)
362  static constexpr bool CK_TILE_ARCH_GFX1010 = true;
363 #else
364  static constexpr bool CK_TILE_ARCH_GFX1010 = false;
365 #endif
366 #if defined(__gfx1011__)
367  static constexpr bool CK_TILE_ARCH_GFX1011 = true;
368 #else
369  static constexpr bool CK_TILE_ARCH_GFX1011 = false;
370 #endif
371 #if defined(__gfx1012__)
372  static constexpr bool CK_TILE_ARCH_GFX1012 = true;
373 #else
374  static constexpr bool CK_TILE_ARCH_GFX1012 = false;
375 #endif
376 #if defined(__gfx1013__)
377  static constexpr bool CK_TILE_ARCH_GFX1013 = true;
378 #else
379  static constexpr bool CK_TILE_ARCH_GFX1013 = false;
380 #endif
381 #if defined(__gfx10_1_generic__)
382  static constexpr bool CK_TILE_ARCH_GFX10_1_GENERIC = true;
383 #else
384  static constexpr bool CK_TILE_ARCH_GFX10_1_GENERIC = false;
385 #endif // __gfx10_1_generic__
386 
387 #if defined(__gfx1030__)
388  static constexpr bool CK_TILE_ARCH_GFX1030 = true;
389 #else
390  static constexpr bool CK_TILE_ARCH_GFX1030 = false;
391 #endif // __gfx1030__
392 
393 #if defined(__gfx1031__)
394  static constexpr bool CK_TILE_ARCH_GFX1031 = true;
395 #else
396  static constexpr bool CK_TILE_ARCH_GFX1031 = false;
397 #endif // __gfx1031__
398 
399 #if defined(__gfx1032__)
400  static constexpr bool CK_TILE_ARCH_GFX1032 = true;
401 #else
402  static constexpr bool CK_TILE_ARCH_GFX1032 = false;
403 #endif // __gfx1032__
404 
405 #if defined(__gfx1034__)
406  static constexpr bool CK_TILE_ARCH_GFX1034 = true;
407 #else
408  static constexpr bool CK_TILE_ARCH_GFX1034 = false;
409 #endif // __gfx1034__
410 
411 #if defined(__gfx1035__)
412  static constexpr bool CK_TILE_ARCH_GFX1035 = true;
413 #else
414  static constexpr bool CK_TILE_ARCH_GFX1035 = false;
415 #endif // __gfx1035__
416 
417 #if defined(__gfx1036__)
418  static constexpr bool CK_TILE_ARCH_GFX1036 = true;
419 #else
420  static constexpr bool CK_TILE_ARCH_GFX1036 = false;
421 #endif // __gfx1036__
422 
423 #if defined(__gfx10_3_generic__)
424  static constexpr bool CK_TILE_ARCH_GFX10_3_GENERIC = true;
425 #else
426  static constexpr bool CK_TILE_ARCH_GFX10_3_GENERIC = false;
427 #endif // __gfx10_3_generic__
428 
429  // GFX11
430 #if defined(__gfx1100__)
431  static constexpr bool CK_TILE_ARCH_GFX1100 = true;
432 #else
433  static constexpr bool CK_TILE_ARCH_GFX1100 = false;
434 #endif // __gfx1100__
435 
436 #if defined(__gfx1101__)
437  static constexpr bool CK_TILE_ARCH_GFX1101 = true;
438 #else
439  static constexpr bool CK_TILE_ARCH_GFX1101 = false;
440 #endif // __gfx1101__
441 
442 #if defined(__gfx1102__)
443  static constexpr bool CK_TILE_ARCH_GFX1102 = true;
444 #else
445  static constexpr bool CK_TILE_ARCH_GFX1102 = false;
446 #endif // __gfx1102__
447 
448 #if defined(__gfx1103__)
449  static constexpr bool CK_TILE_ARCH_GFX1103 = true;
450 #else
451  static constexpr bool CK_TILE_ARCH_GFX1103 = false;
452 #endif // __gfx1103__
453 
454 #if defined(__gfx1150__)
455  static constexpr bool CK_TILE_ARCH_GFX1150 = true;
456 #else
457  static constexpr bool CK_TILE_ARCH_GFX1150 = false;
458 #endif // __gfx1150__
459 
460 #if defined(__gfx1151__)
461  static constexpr bool CK_TILE_ARCH_GFX1151 = true;
462 #else
463  static constexpr bool CK_TILE_ARCH_GFX1151 = false;
464 #endif // __gfx1151__
465 
466 #if defined(__gfx1152__)
467  static constexpr bool CK_TILE_ARCH_GFX1152 = true;
468 #else
469  static constexpr bool CK_TILE_ARCH_GFX1152 = false;
470 #endif // __gfx1152__
471 
472 #if defined(__gfx1153__)
473  static constexpr bool CK_TILE_ARCH_GFX1153 = true;
474 #else
475  static constexpr bool CK_TILE_ARCH_GFX1153 = false;
476 #endif // __gfx1153__
477 
478 #if defined(__gfx11_generic__)
479  static constexpr bool CK_TILE_ARCH_GFX11_GENERIC = true;
480 #else
481  static constexpr bool CK_TILE_ARCH_GFX11_GENERIC = false;
482 #endif // __gfx11_generic__
483 
484  // GFX12
485 #if defined(__gfx1200__)
486  static constexpr bool CK_TILE_ARCH_GFX1200 = true;
487 #else
488  static constexpr bool CK_TILE_ARCH_GFX1200 = false;
489 #endif // __gfx1200__
490 
491 #if defined(__gfx1201__)
492  static constexpr bool CK_TILE_ARCH_GFX1201 = true;
493 #else
494  static constexpr bool CK_TILE_ARCH_GFX1201 = false;
495 #endif // __gfx1201__
496 
497 #if defined(__gfx12_generic__)
498  static constexpr bool CK_TILE_ARCH_GFX12_GENERIC = true;
499 #else
500  static constexpr bool CK_TILE_ARCH_GFX12_GENERIC = false;
501 #endif // __gfx12_generic__
502 };
503 
512 template <typename T, typename... Ts>
513 // TODO: c++20 concept requires((std::is_convertible<Ts, T>::value && ...) && (sizeof...(Ts) >=
514 // 1))
515 CK_TILE_HOST_DEVICE static constexpr uint32_t count_values_of(T search, Ts... searchList)
516 {
517  static_assert((std::is_convertible<Ts, T>::value && ...),
518  "All search list values must be convertible to the search value type");
519  static_assert(sizeof...(Ts) >= 1, "At least one value must be provided to search in");
520 
521  return (static_cast<uint32_t>(search == static_cast<T>(searchList)) + ...);
522 }
523 
524 #define CK_TILE_COMPILER_TARGETS_LIST \
525  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX908, \
526  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX90A, \
527  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX942, \
528  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX950, \
529  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1010, \
530  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1011, \
531  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1012, \
532  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1013, \
533  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX10_1_GENERIC, \
534  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1030, \
535  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1031, \
536  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1032, \
537  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1034, \
538  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1035, \
539  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1036, \
540  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX10_3_GENERIC, \
541  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1100, \
542  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1101, \
543  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1102, \
544  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1103, \
545  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1150, \
546  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1151, \
547  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1152, \
548  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1153, \
549  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX11_GENERIC, \
550  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1200, \
551  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX1201, \
552  amdgcn_compiler_target_state::CK_TILE_ARCH_GFX12_GENERIC
553 
554 // Sanity check: make sure only one target architecture is defined during device compile
556  count_values_of(true, CK_TILE_COMPILER_TARGETS_LIST) == 1u,
557  "Only one target architecture can be defined during device compile");
558 
559 // Sanity check: make sure no device target architecture is defined during host compile
561  count_values_of(true, CK_TILE_COMPILER_TARGETS_LIST) == 0u,
562  "No device target architecture can be defined during host compile");
563 
564 } // namespace ck_tile::core
#define CK_TILE_COMPILER_TARGETS_LIST
Definition: config.hpp:524
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:46
Definition: amdgcn_mma.hpp:10
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1697
unsigned int uint32_t
Definition: stdint.h:126
Defines compiler states for supported AMDGCN devices.
Definition: config.hpp:325
static constexpr bool CK_TILE_ARCH_GFX90A
Definition: config.hpp:345
static constexpr bool CK_TILE_ARCH_GFX1032
Definition: config.hpp:402
static constexpr bool CK_TILE_ARCH_GFX1011
Definition: config.hpp:369
static constexpr bool CK_TILE_ARCH_GFX908
Definition: config.hpp:339
static constexpr bool CK_TILE_ARCH_GFX11_GENERIC
Definition: config.hpp:481
static constexpr bool CK_TILE_ARCH_GFX1030
Definition: config.hpp:390
static constexpr bool CK_TILE_ARCH_GFX1036
Definition: config.hpp:420
static constexpr bool CK_TILE_ARCH_GFX1200
Definition: config.hpp:488
static constexpr bool CK_TILE_ARCH_GFX1013
Definition: config.hpp:379
static constexpr bool CK_TILE_ARCH_GFX1035
Definition: config.hpp:414
static constexpr bool CK_TILE_ARCH_GFX1034
Definition: config.hpp:408
static constexpr bool CK_TILE_ARCH_GFX10_1_GENERIC
Definition: config.hpp:384
static constexpr bool CK_TILE_ARCH_GFX1152
Definition: config.hpp:469
static constexpr bool CK_TILE_ARCH_GFX1010
Definition: config.hpp:364
static constexpr bool CK_TILE_ARCH_GFX1031
Definition: config.hpp:396
static constexpr bool CK_TILE_ARCH_GFX1103
Definition: config.hpp:451
static constexpr bool CK_TILE_HOST_COMPILE
Definition: config.hpp:332
static constexpr bool CK_TILE_ARCH_GFX1100
Definition: config.hpp:433
static constexpr bool CK_TILE_ARCH_GFX1201
Definition: config.hpp:494
static constexpr bool CK_TILE_ARCH_GFX10_3_GENERIC
Definition: config.hpp:426
static constexpr bool CK_TILE_ARCH_GFX1101
Definition: config.hpp:439
static constexpr bool CK_TILE_ARCH_GFX942
Definition: config.hpp:351
static constexpr bool CK_TILE_DEVICE_COMPILE
Definition: config.hpp:331
static constexpr bool CK_TILE_ARCH_GFX1102
Definition: config.hpp:445
static constexpr bool CK_TILE_ARCH_GFX12_GENERIC
Definition: config.hpp:500
static constexpr bool CK_TILE_ARCH_GFX1153
Definition: config.hpp:475
static constexpr bool CK_TILE_ARCH_GFX950
Definition: config.hpp:357
static constexpr bool CK_TILE_ARCH_GFX1012
Definition: config.hpp:374
static constexpr bool CK_TILE_ARCH_GFX1150
Definition: config.hpp:457
static constexpr bool CK_TILE_ARCH_GFX1151
Definition: config.hpp:463