/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/amd_buffer_coherence.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/amd_buffer_coherence.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/utility/amd_buffer_coherence.hpp Source File
amd_buffer_coherence.hpp
Go to the documentation of this file.
1 // Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2 // SPDX-License-Identifier: MIT
3 
4 #pragma once
5 
6 namespace ck {
7 
9 {
10  DefaultCoherence = 0, // default value
11 #if defined(__gfx12__)
12  // Temporal hint
13  RT = 0, // regular temporal
14  NT = 1, // non temporal
15  HT = 2, // high priority temporal
16  LU = 3, // last use (load op)
17  WB = 3, // same as HT, overrides WR in far cache (store op)
18  NT_RT = 4, // non temporal for near cache, regular for far cache
19  RT_NT = 5, // regular for near cache, non-temporal for far cache
20  NT_HT = 6, // non temporal for near cache, high priority for far cache
21  NT_WB = 7, // non temporal for near cache, WB for far cache
22  // (store op, reserved for load op)
23  // Scope
24  CU = 0,
25  SE = 8,
26  DEVICE = 16,
27  SYSTEM = 24,
28  // Temporal Hint for CU
29  CU_RT = RT | CU,
30  CU_NT = NT | CU,
31  CU_HT = HT | CU,
32  CU_LU = LU | CU,
33  CU_WB = WB | CU,
34  CU_NT_RT = NT_RT | CU,
35  CU_RT_NT = RT_NT | CU,
36  CU_NT_HT = NT_HT | CU,
37  CU_NT_WB = NT_WB | CU,
38  // Temporal Hint for SE
39  SE_RT = RT | SE,
40  SE_NT = NT | SE,
41  SE_HT = HT | SE,
42  SE_LU = LU | SE,
43  SE_WB = WB | SE,
44  SE_NT_RT = NT_RT | SE,
45  SE_RT_NT = RT_NT | SE,
46  SE_NT_HT = NT_HT | SE,
47  SE_NT_WB = NT_WB | SE,
48  // Temporal Hint for DEVICE
49  DEVICE_RT = RT | DEVICE,
50  DEVICE_NT = NT | DEVICE,
51  DEVICE_HT = HT | DEVICE,
52  DEVICE_LU = LU | DEVICE,
53  DEVICE_WB = WB | DEVICE,
54  DEVICE_NT_RT = NT_RT | DEVICE,
55  DEVICE_RT_NT = RT_NT | DEVICE,
56  DEVICE_NT_HT = NT_HT | DEVICE,
57  DEVICE_NT_WB = NT_WB | DEVICE,
58  // Temporal Hint for SYSTEM
59  SYSTEM_RT = RT | SYSTEM,
60  SYSTEM_NT = NT | SYSTEM,
61  SYSTEM_HT = HT | SYSTEM,
62  SYSTEM_LU = LU | SYSTEM,
63  SYSTEM_WB = WB | SYSTEM,
64  SYSTEM_NT_RT = NT_RT | SYSTEM,
65  SYSTEM_RT_NT = RT_NT | SYSTEM,
66  SYSTEM_NT_HT = NT_HT | SYSTEM,
67  SYSTEM_NT_WB = NT_WB | SYSTEM,
68 
69  // GFX942 and GFX950 compatiblity
70  GROUP_NT0 = CU_RT,
71  GROUP_NT1 = CU_NT,
72  DEVICE_NT0 = DEVICE_RT,
73  DEVICE_NT1 = DEVICE_NT,
74  SYSTEM_NT0 = SYSTEM_RT,
75  SYSTEM_NT1 = SYSTEM_NT,
76  // Other archs compatiblity
77  GLC = DEVICE_NT,
78  SLC = SYSTEM_NT,
79  GLC_SLC = DEVICE_NT | SYSTEM_NT,
80 
81 // gfx94: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
82 // SC[1:0] System Cache level: 0=wave, 1=group, 2=device, 3=system
83 // NT Non-Temporal: 0=expect temporal reuse; 1=do not expect temporal reuse
84 #elif defined(__gfx942__) || defined(__gfx950__)
85 
86  WAVE = 0,
87  GROUP = 1,
88  DEVICE = 16,
89  SYSTEM = 17,
90  NT0 = 0,
91  NT1 = 2,
92 
93  WAVE_NT0 = NT0 | WAVE,
94  WAVE_NT1 = NT1 | WAVE,
95  GROUP_NT0 = NT0 | GROUP,
96  GROUP_NT1 = NT1 | GROUP,
97  DEVICE_NT0 = NT0 | DEVICE,
98  DEVICE_NT1 = NT1 | DEVICE,
99  SYSTEM_NT0 = NT0 | SYSTEM,
100  SYSTEM_NT1 = NT1 | SYSTEM,
101 
102  // Other archs compatiblity
103  GLC = DEVICE_NT1,
104  SLC = SYSTEM_NT1,
106 #else
107  GLC = 1,
108  SLC = 2,
109  GLC_SLC = 3,
110 
111  // Other archs compatiblity
112  DEVICE_NT0 = 0,
113  SYSTEM_NT0 = 0,
114  DEVICE_NT1 = GLC,
115  SYSTEM_NT1 = SLC,
116 #endif
117 };
118 
119 } // namespace ck
Definition: ck.hpp:270
AmdBufferCoherenceEnum
Definition: amd_buffer_coherence.hpp:9