/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/warp/warp_gemm.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/warp/warp_gemm.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/warp/warp_gemm.hpp Source File
warp_gemm.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include "ck_tile/core.hpp"
9 
12 
13 namespace ck_tile {
14 
15 // fp32
16 
19 
20 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
23  4,
24  AttrNumAccess>>;
25 
26 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
30  4,
31  AttrNumAccess>>;
32 
33 // fp16
34 
37 
40 
41 #if defined(__gfx950__)
42 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
45  AttrNumAccess>>;
46 #else
47 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
50  2,
51  AttrNumAccess>>;
52 #endif
53 
54 #if defined(__gfx950__)
55 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
58  AttrNumAccess>>;
59 #else
60 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
63  2,
64  AttrNumAccess>>;
65 #endif
66 
69  1>>;
70 
73  2>>;
74 
78 
82 
83 #if defined(__gfx950__)
84 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
88  AttrNumAccess>>;
89 #else
90 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
94  2,
95  AttrNumAccess>>;
96 #endif
97 
98 #if defined(__gfx950__)
99 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
103  AttrNumAccess>>;
104 #else
105 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
109  2,
110  AttrNumAccess>>;
111 #endif
112 
113 #if defined(__gfx950__)
114 using WarpGemmMfmaF16F16F32M16N16K32SwizzleBTransposedCDistribution =
117  1>>;
118 
119 using WarpGemmMfmaBf16Bf16F32M16N16K32SwizzleBTransposedCDistribution =
122  1>>;
123 #endif
124 
128 
129 #if defined(__gfx950__)
133 #else
137  2>>;
138 #endif
139 
142  4>>;
143 
146  4>>;
147 
148 // fp16 2:4 structured sparsity
151 
154 
155 // bf16
158 
161 
162 #if defined(__gfx950__)
163 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
166  AttrNumAccess>>;
167 #else
168 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
171  2,
172  AttrNumAccess>>;
173 #endif
174 
175 #if defined(__gfx950__)
176 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
179  AttrNumAccess>>;
180 #else
181 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
184  2,
185  AttrNumAccess>>;
186 #endif
187 
190  1>>;
191 
195  2>>;
196 
200 
204 
205 #if defined(__gfx950__)
206 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
210  AttrNumAccess>>;
211 #else
212 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
216  2,
217  AttrNumAccess>>;
218 #endif
219 
220 #if defined(__gfx950__)
221 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
225  AttrNumAccess>>;
226 #else
227 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
231  2,
232  AttrNumAccess>>;
233 #endif
234 
238 
239 #if defined(__gfx950__)
243 #else
247  2>>;
248 #endif
249 
252  4>>;
253 
256  4>>;
257 
258 // fp8
259 
262 
265 
268 
271 
274 
277  2>>;
278 
281  2>>;
282 
285  2>>;
286 
289 
293 
296 
300 
303  2>>;
304 
307  2>>;
308 
309 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
312  AttrNumAccess>>;
313 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
316  AttrNumAccess>>;
317 
318 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
321  AttrNumAccess>>;
322 
323 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
326  AttrNumAccess>>;
327 
328 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
331  AttrNumAccess>>;
332 
333 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
337  AttrNumAccess>>;
338 
339 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
343  AttrNumAccess>>;
344 
345 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
349  AttrNumAccess>>;
350 
351 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
355  AttrNumAccess>>;
356 
357 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
360  AttrNumAccess>>;
361 
362 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
365  AttrNumAccess>>;
366 
367 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
370  AttrNumAccess>>;
371 
372 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
375  AttrNumAccess>>;
376 
380 
384 
388 
392 
393 template <index_t swizzle_factor = 2>
397  2,
398  swizzle_factor>>;
399 
400 // int8
403 
407 
410 
414 
415 } // namespace ck_tile
Definition: cluster_descriptor.hpp:13
Definition: warp_gemm_attribute_mfma.hpp:23
Definition: warp_gemm_attribute_mfma_impl.hpp:1531
Definition: warp_gemm_attribute_mfma_impl.hpp:1164
Definition: warp_gemm_attribute_mfma_impl.hpp:1323
Definition: warp_gemm_attribute_mfma_impl.hpp:1890
Definition: warp_gemm_attribute_mfma_impl.hpp:1820
Definition: warp_gemm_attribute_mfma_impl.hpp:666
Definition: warp_gemm_attribute_mfma_impl.hpp:196
Definition: warp_gemm_attribute_mfma_impl.hpp:1049
Definition: warp_gemm_attribute_mfma_impl.hpp:577
Definition: warp_gemm_attribute_mfma_impl.hpp:754
Definition: warp_gemm_attribute_mfma_impl.hpp:844
Definition: warp_gemm_attribute_mfma_impl.hpp:322
Definition: warp_gemm_attribute_mfma_impl.hpp:385
Definition: warp_gemm_attribute_mfma_impl.hpp:935
Definition: warp_gemm_attribute_mfma_impl.hpp:259
Definition: warp_gemm_attribute_mfma_impl.hpp:448
Definition: warp_gemm_attribute_mfma_impl.hpp:512
Definition: warp_gemm_attribute_mfma_impl.hpp:67
Definition: warp_gemm_attribute_mfma.hpp:869
Definition: warp_gemm_attribute_mfma.hpp:575
Definition: warp_gemm_attribute_mfma.hpp:130
Definition: warp_gemm_attribute_mfma.hpp:479
Definition: warp_gemm_attribute_mfma.hpp:395
Class describing structured sparsity mfma instructions.
Definition: warp_gemm_attribute_smfmac.hpp:26
Definition: warp_gemm_attribute_smfmac_impl.hpp:65
Definition: warp_gemm_attribute_smfmac_impl.hpp:14
Definition: warp_gemm_impl.hpp:11
Definition: warp_gemm_smfmac_impl.hpp:11