/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/host/flush_icache.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/host/flush_icache.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/host/flush_icache.hpp Source File
flush_icache.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include <hip/hip_runtime.h>
7 
8 namespace ck_tile {
9 // GPU kernel to invalidate instruction cache for accurate benchmarking.
10 // s_icache_inv: Asynchronously invalidates the L1 instruction cache on this compute unit,
11 // forcing subsequent kernel runs to fetch instructions from HBM instead of cache.
12 // 16x s_nop: Wait cycles (~16 cycles) to ensure cache invalidation completes before kernel
13 // exits. Without these NOPs, the flush may not finish, leading to inconsistent
14 // timing measurements where some instructions remain cached.
15 static __global__ void flush_cache()
16 {
17  asm __volatile__("s_icache_inv \n\t"
18  "s_nop 0 \n\t"
19  "s_nop 0 \n\t"
20  "s_nop 0 \n\t"
21  "s_nop 0 \n\t"
22  "s_nop 0 \n\t"
23  "s_nop 0 \n\t"
24  "s_nop 0 \n\t"
25  "s_nop 0 \n\t"
26  "s_nop 0 \n\t"
27  "s_nop 0 \n\t"
28  "s_nop 0 \n\t"
29  "s_nop 0 \n\t"
30  "s_nop 0 \n\t"
31  "s_nop 0 \n\t"
32  "s_nop 0 \n\t"
33  "s_nop 0 \n\t" ::
34  :);
35 }
36 } // namespace ck_tile
Definition: cluster_descriptor.hpp:13