/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/warp/warp_gemm.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/warp/warp_gemm.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/gemm/warp/warp_gemm.hpp Source File
warp_gemm.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include "ck_tile/core.hpp"
9 
12 
13 namespace ck_tile {
14 
15 // fp32
16 
19 
20 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
23  4,
24  AttrNumAccess>>;
25 
26 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
30  4,
31  AttrNumAccess>>;
32 
33 // fp16
34 
37 
40 
41 #if defined(__gfx950__)
42 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
45  AttrNumAccess>>;
46 #else
47 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
50  2,
51  AttrNumAccess>>;
52 #endif
53 
54 #if defined(__gfx950__)
55 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
58  AttrNumAccess>>;
59 #else
60 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
63  2,
64  AttrNumAccess>>;
65 #endif
66 
69  1>>;
70 
73  2>>;
74 
78 
82 
83 #if defined(__gfx950__)
84 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
88  AttrNumAccess>>;
89 #else
90 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
94  2,
95  AttrNumAccess>>;
96 #endif
97 
98 #if defined(__gfx950__)
99 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
103  AttrNumAccess>>;
104 #else
105 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
109  2,
110  AttrNumAccess>>;
111 #endif
112 
113 #if defined(__gfx950__)
114 using WarpGemmMfmaF16F16F32M16N16K32SwizzleBTransposedCDistribution =
117  1>>;
118 
119 using WarpGemmMfmaBf16Bf16F32M16N16K32SwizzleBTransposedCDistribution =
122  1>>;
123 #endif
124 
128 
129 #if defined(__gfx950__)
133 #else
137  2>>;
138 #endif
139 
142  4>>;
143 
146  4>>;
147 
148 // fp16 2:4 structured sparsity
151 
154 
155 // bf16
158 
161 
162 #if defined(__gfx950__)
163 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
166  AttrNumAccess>>;
167 #else
168 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
171  2,
172  AttrNumAccess>>;
173 #endif
174 
175 #if defined(__gfx950__)
176 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
179  AttrNumAccess>>;
180 #else
181 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
184  2,
185  AttrNumAccess>>;
186 #endif
187 
190  1>>;
191 
195  2>>;
196 
200 
204 
205 #if defined(__gfx950__)
206 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
210  AttrNumAccess>>;
211 #else
212 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
216  2,
217  AttrNumAccess>>;
218 #endif
219 
220 #if defined(__gfx950__)
221 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
225  AttrNumAccess>>;
226 #else
227 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
231  2,
232  AttrNumAccess>>;
233 #endif
234 
238 
239 #if defined(__gfx950__)
243 #else
247  2>>;
248 #endif
249 
252  4>>;
253 
256  4>>;
257 
258 // fp8
259 
262 
265 
268 
271 
274  2>>;
275 
278  2>>;
279 
282 
286 
289 
293 
296  2>>;
297 
300  2>>;
301 
302 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
305  AttrNumAccess>>;
306 
307 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
310  AttrNumAccess>>;
311 
312 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
315  AttrNumAccess>>;
316 
317 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
320  AttrNumAccess>>;
321 
322 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
326  AttrNumAccess>>;
327 
328 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
332  AttrNumAccess>>;
333 
334 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
338  AttrNumAccess>>;
339 
340 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
344  AttrNumAccess>>;
345 
346 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
349  AttrNumAccess>>;
350 
351 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
354  AttrNumAccess>>;
355 
356 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
359  AttrNumAccess>>;
360 
361 template <WGAttrNumAccessEnum AttrNumAccess = WGAttrNumAccessEnum::Single>
364  AttrNumAccess>>;
365 
369 
373 
377 
381 
382 template <index_t swizzle_factor = 2>
386  2,
387  swizzle_factor>>;
388 
389 // int8
392 
396 
399 
403 
404 } // namespace ck_tile
Definition: cluster_descriptor.hpp:13
Definition: warp_gemm_attribute_mfma.hpp:23
Definition: warp_gemm_attribute_mfma_impl.hpp:1528
Definition: warp_gemm_attribute_mfma_impl.hpp:1164
Definition: warp_gemm_attribute_mfma_impl.hpp:1323
Definition: warp_gemm_attribute_mfma_impl.hpp:1795
Definition: warp_gemm_attribute_mfma_impl.hpp:1725
Definition: warp_gemm_attribute_mfma_impl.hpp:666
Definition: warp_gemm_attribute_mfma_impl.hpp:196
Definition: warp_gemm_attribute_mfma_impl.hpp:1049
Definition: warp_gemm_attribute_mfma_impl.hpp:577
Definition: warp_gemm_attribute_mfma_impl.hpp:754
Definition: warp_gemm_attribute_mfma_impl.hpp:844
Definition: warp_gemm_attribute_mfma_impl.hpp:322
Definition: warp_gemm_attribute_mfma_impl.hpp:385
Definition: warp_gemm_attribute_mfma_impl.hpp:935
Definition: warp_gemm_attribute_mfma_impl.hpp:259
Definition: warp_gemm_attribute_mfma_impl.hpp:448
Definition: warp_gemm_attribute_mfma_impl.hpp:512
Definition: warp_gemm_attribute_mfma_impl.hpp:67
Definition: warp_gemm_attribute_mfma.hpp:846
Definition: warp_gemm_attribute_mfma.hpp:552
Definition: warp_gemm_attribute_mfma.hpp:107
Definition: warp_gemm_attribute_mfma.hpp:456
Definition: warp_gemm_attribute_mfma.hpp:372
Class describing structured sparsity mfma instructions.
Definition: warp_gemm_attribute_smfmac.hpp:26
Definition: warp_gemm_attribute_smfmac_impl.hpp:65
Definition: warp_gemm_attribute_smfmac_impl.hpp:14
Definition: warp_gemm_impl.hpp:11
Definition: warp_gemm_smfmac_impl.hpp:11