/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocprofiler-docs/checkouts/latest/include/rocprofiler/rocprofiler.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocprofiler-docs/checkouts/latest/include/rocprofiler/rocprofiler.h Source File#

ROCProfiler API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocprofiler-docs/checkouts/latest/include/rocprofiler/rocprofiler.h Source File
rocprofiler.h
Go to the documentation of this file.
1 /******************************************************************************
2 Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 *******************************************************************************/
22 
24 //
25 // ROC Profiler API
26 //
27 // The goal of the implementation is to provide a HW specific low-level
28 // performance analysis interface for profiling of GPU compute applications.
29 // The profiling includes HW performance counters (PMC) with complex
30 // performance metrics and traces.
31 //
32 // The library can be used by a tool library loaded by HSA runtime or by
33 // higher level HW independent performance analysis API like PAPI.
34 //
35 // The library is written on C and will be based on AQLprofile AMD specific
36 // HSA extension. The library implementation requires HSA API intercepting and
37 // a profiling queue supporting a submit callback interface.
38 //
39 //
40 
41 #ifndef INC_ROCPROFILER_H_
42 #define INC_ROCPROFILER_H_
43 
44 /* Placeholder for calling convention and import/export macros */
45 #if !defined(ROCPROFILER_CALL)
46 #define ROCPROFILER_CALL
47 #endif /* !defined (ROCPROFILER_CALL) */
48 
49 #if !defined(ROCPROFILER_EXPORT_DECORATOR)
50 #if defined(__GNUC__)
51 #define ROCPROFILER_EXPORT_DECORATOR __attribute__((visibility("default")))
52 #elif defined(_MSC_VER)
53 #define ROCPROFILER_EXPORT_DECORATOR __declspec(dllexport)
54 #endif /* defined (_MSC_VER) */
55 #endif /* !defined (ROCPROFILER_EXPORT_DECORATOR) */
56 
57 #if !defined(ROCPROFILER_IMPORT_DECORATOR)
58 #if defined(__GNUC__)
59 #define ROCPROFILER_IMPORT_DECORATOR
60 #elif defined(_MSC_VER)
61 #define ROCPROFILER_IMPORT_DECORATOR __declspec(dllimport)
62 #endif /* defined (_MSC_VER) */
63 #endif /* !defined (ROCPROFILER_IMPORT_DECORATOR) */
64 
65 #define ROCPROFILER_EXPORT ROCPROFILER_EXPORT_DECORATOR ROCPROFILER_CALL
66 #define ROCPROFILER_IMPORT ROCPROFILER_IMPORT_DECORATOR ROCPROFILER_CALL
67 
68 #if !defined(ROCPROFILER)
69 #if defined(ROCPROFILER_EXPORTS)
70 #define ROCPROFILER_API ROCPROFILER_EXPORT
71 #else /* !defined (ROCPROFILER_EXPORTS) */
72 #define ROCPROFILER_API ROCPROFILER_IMPORT
73 #endif /* !defined (ROCPROFILER_EXPORTS) */
74 #endif /* !defined (ROCPROFILER) */
75 
76 
77 #include <stddef.h>
78 #include <stdint.h>
79 
80 #ifdef __cplusplus
81 extern "C" {
82 #endif /* __cplusplus */
83 
84 #include <hsa/amd_hsa_kernel_code.h>
85 #include <hsa/hsa.h>
86 #include <hsa/hsa_ext_amd.h>
87 #include <hsa/hsa_ven_amd_aqlprofile.h>
88 #include <stdint.h>
89 
90 
91 #define ROCPROFILER_VERSION_MAJOR 8
92 #define ROCPROFILER_VERSION_MINOR 0
93 
95 // Returning library version
98 
100 // Global properties structure
101 
102 typedef struct {
103  uint32_t intercept_mode;
106  uint32_t trace_size;
107  uint32_t trace_local;
108  uint64_t timeout;
109  uint32_t timestamp_on;
111  uint32_t k_concurrent;
112  uint32_t opt_mode;
113  uint32_t obj_dumping;
115 
117 // Returning the error string method
118 
120  const char** str); // [out] the API error string pointer returning
121 
123 // Profiling features and data
124 //
125 // Profiling features objects have profiling feature info, type, parameters and data
126 // Also profiling data samplaes can be iterated using a callback
127 
128 // Profiling feature kind
129 typedef enum {
135 
136 // Profiling feture parameter
137 typedef hsa_ven_amd_aqlprofile_parameter_t rocprofiler_parameter_t;
138 
139 // Profiling data kind
140 typedef enum {
148 
149 // Profiling data type
150 typedef struct {
152  union {
153  uint32_t result_int32; // 32bit integer result
154  uint64_t result_int64; // 64bit integer result
155  float result_float; // float single-precision result
156  double result_double; // float double-precision result
157  struct {
158  void* ptr;
159  uint32_t size;
160  uint32_t instance_count;
161  bool copy;
162  } result_bytes; // data by ptr and byte size
163  };
165 
166 // Profiling feature type
167 typedef struct {
169  union {
170  const char* name; // feature name
171  struct {
172  const char* block; // counter block name
173  uint32_t event; // counter event id
174  } counter;
175  };
176  const rocprofiler_parameter_t* parameters; // feature parameters array
177  uint32_t parameter_count; // feature parameters count
178  rocprofiler_data_t data; // profiling data
180 
181 // Profiling features set type
183 
185 // Profiling context
186 //
187 // Profiling context object accumuate all profiling information
188 
189 // Profiling context object
190 typedef void rocprofiler_t;
191 
192 // Profiling group object
193 typedef struct {
194  unsigned index; // group index
195  rocprofiler_feature_t** features; // profiling info array
196  uint32_t feature_count; // profiling info count
197  rocprofiler_t* context; // context object
199 
200 // Profiling mode mask
201 typedef enum {
202  ROCPROFILER_MODE_STANDALONE = 1, // standalone mode when ROC profiler supports a queue
203  ROCPROFILER_MODE_CREATEQUEUE = 2, // ROC profiler creates queue in standalone mode
204  ROCPROFILER_MODE_SINGLEGROUP = 4 // only one group is allowed, failed otherwise
206 
207 // Profiling handler, calling on profiling completion
208 typedef bool (*rocprofiler_handler_t)(rocprofiler_group_t group, void* arg);
209 
210 // Profiling preperties
211 typedef struct {
212  hsa_queue_t* queue; // queue for STANDALONE mode
213  // the queue is created and returned in CREATEQUEUE mode
214  uint32_t queue_depth; // created queue depth
215  rocprofiler_handler_t handler; // handler on completion
216  void* handler_arg; // the handler arg
218 
219 // Create new profiling context
220 hsa_status_t rocprofiler_open(hsa_agent_t agent, // GPU handle
221  rocprofiler_feature_t* features, // [in] profiling features array
222  uint32_t feature_count, // profiling info count
223  rocprofiler_t** context, // [out] context object
224  uint32_t mode, // profiling mode mask
225  rocprofiler_properties_t* properties); // profiling properties
226 
227 // Add feature to a features set
229  const rocprofiler_feature_t* feature, // [in]
230  rocprofiler_feature_set_t* features_set); // [in/out] profiling features set
231 
232 // Create new profiling context
234  hsa_agent_t agent, // GPU handle
235  rocprofiler_feature_set_t* features_set, // [in] profiling features set
236  rocprofiler_t** context, // [out] context object
237  uint32_t mode, // profiling mode mask
238  rocprofiler_properties_t* properties); // profiling properties
239 
240 // Delete profiling info
241 hsa_status_t rocprofiler_close(rocprofiler_t* context); // [in] profiling context
242 
243 // Context reset before reusing
244 hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling context
245  uint32_t group_index); // group index
246 
247 // Return context agent
248 hsa_status_t rocprofiler_get_agent(rocprofiler_t* context, // [in] profiling context
249  hsa_agent_t* agent); // [out] GPU handle
250 
251 // Supported time value ID
252 typedef enum {
253  ROCPROFILER_TIME_ID_CLOCK_REALTIME = 0, // Linux realtime clock time
254  ROCPROFILER_TIME_ID_CLOCK_REALTIME_COARSE = 1, // Linux realtime-coarse clock time
255  ROCPROFILER_TIME_ID_CLOCK_MONOTONIC = 2, // Linux monotonic clock time
256  ROCPROFILER_TIME_ID_CLOCK_MONOTONIC_COARSE = 3, // Linux monotonic-coarse clock time
257  ROCPROFILER_TIME_ID_CLOCK_MONOTONIC_RAW = 4, // Linux monotonic-raw clock time
259 
260 // Return time value for a given time ID and profiling timestamp
261 hsa_status_t rocprofiler_get_time(
262  rocprofiler_time_id_t time_id, // identifier of the particular time to convert the timesatmp
263  uint64_t timestamp, // profiling timestamp
264  uint64_t* value_ns, // [out] returned time 'ns' value, ignored if NULL
265  uint64_t* error_ns); // [out] returned time error 'ns' value, ignored if NULL
266 
268 // Queue callbacks
269 //
270 // Queue callbacks for initiating profiling per kernel dispatch and to wait
271 // the profiling data on the queue destroy.
272 
273 // Dispatch record
274 typedef struct {
275  uint64_t dispatch; // dispatch timestamp, ns
276  uint64_t begin; // kernel begin timestamp, ns
277  uint64_t end; // kernel end timestamp, ns
278  uint64_t complete; // completion signal timestamp, ns
280 
281 // Profiling callback data
282 typedef struct {
283  hsa_agent_t agent; // GPU agent handle
284  uint32_t agent_index; // GPU index (GPU Driver Node ID as reported in the sysfs topology)
285  const hsa_queue_t* queue; // HSA queue
286  uint64_t queue_index; // Index in the queue
287  uint32_t queue_id; // Queue id
288  hsa_signal_t completion_signal; // Completion signal
289  const hsa_kernel_dispatch_packet_t* packet; // HSA dispatch packet
290  const char* kernel_name; // Kernel name
291  uint64_t kernel_object; // Kernel object address
292  const amd_kernel_code_t* kernel_code; // Kernel code pointer
293  uint32_t thread_id; // Thread id
294  const rocprofiler_dispatch_record_t* record; // Dispatch record
296 
297 // Profiling callback type
298 typedef hsa_status_t (*rocprofiler_callback_t)(
299  const rocprofiler_callback_data_t* callback_data, // [in] callback data
300  void* user_data, // [in/out] user data passed to the callback
301  rocprofiler_group_t* group); // [out] returned profiling group
302 
303 // Queue callbacks
304 typedef struct {
305  rocprofiler_callback_t dispatch; // dispatch callback
306  hsa_status_t (*create)(hsa_queue_t* queue, void* data); // create callback
307  hsa_status_t (*destroy)(hsa_queue_t* queue, void* data); // destroy callback
309 
310 // Set queue callbacks
312  void* data); // [in/out] passed callbacks data
313 
314 // Remove queue callbacks
316 
317 // Start/stop queue callbacks
320 
322 // Start/stop profiling
323 //
324 // Start/stop the context profiling invocation, have to be as many as
325 // contect.invocations' to collect all profiling data
326 
327 // Start profiling
328 hsa_status_t rocprofiler_start(rocprofiler_t* context, // [in/out] profiling context
329  uint32_t group_index); // group index
330 
331 // Stop profiling
332 hsa_status_t rocprofiler_stop(rocprofiler_t* context, // [in/out] profiling context
333  uint32_t group_index); // group index
334 
335 // Read profiling
336 hsa_status_t rocprofiler_read(rocprofiler_t* context, // [in/out] profiling context
337  uint32_t group_index); // group index
338 
339 // Read profiling data
340 hsa_status_t rocprofiler_get_data(rocprofiler_t* context, // [in/out] profiling context
341  uint32_t group_index); // group index
342 
343 // Get profiling groups count
344 hsa_status_t rocprofiler_group_count(const rocprofiler_t* context, // [in] profiling context
345  uint32_t* group_count); // [out] profiling groups count
346 
347 // Get profiling group for a given index
348 hsa_status_t rocprofiler_get_group(rocprofiler_t* context, // [in] profiling context
349  uint32_t group_index, // profiling group index
350  rocprofiler_group_t* group); // [out] profiling group
351 
352 // Start profiling
353 hsa_status_t rocprofiler_group_start(rocprofiler_group_t* group); // [in/out] profiling group
354 
355 // Stop profiling
356 hsa_status_t rocprofiler_group_stop(rocprofiler_group_t* group); // [in/out] profiling group
357 
358 // Read profiling
359 hsa_status_t rocprofiler_group_read(rocprofiler_group_t* group); // [in/out] profiling group
360 
361 // Get profiling data
362 hsa_status_t rocprofiler_group_get_data(rocprofiler_group_t* group); // [in/out] profiling group
363 
364 // Get metrics data
365 hsa_status_t rocprofiler_get_metrics(const rocprofiler_t* context); // [in/out] profiling context
366 
367 // Definition of output data iterator callback
368 typedef hsa_ven_amd_aqlprofile_data_callback_t rocprofiler_trace_data_callback_t;
369 
370 // Method for iterating the events output data
372  rocprofiler_t* context, // [in] profiling context
373  rocprofiler_trace_data_callback_t callback, // callback to iterate the output data
374  void* data); // [in/out] callback data
375 
377 // Profiling features and data
378 //
379 // Profiling features objects have profiling feature info, type, parameters and data
380 // Also profiling data samplaes can be iterated using a callback
381 
382 // Profiling info kind
383 typedef enum {
384  ROCPROFILER_INFO_KIND_METRIC = 0, // metric info
385  ROCPROFILER_INFO_KIND_METRIC_COUNT = 1, // metric features count, int32
386  ROCPROFILER_INFO_KIND_TRACE = 2, // trace info
387  ROCPROFILER_INFO_KIND_TRACE_COUNT = 3, // trace features count, int32
388  ROCPROFILER_INFO_KIND_TRACE_PARAMETER = 4, // trace parameter info
389  ROCPROFILER_INFO_KIND_TRACE_PARAMETER_COUNT = 5 // trace parameter count, int32
391 
392 // Profiling info query
393 typedef union {
394  rocprofiler_info_kind_t info_kind; // queried profiling info kind
395  struct {
396  const char* trace_name; // queried info trace name
397  } trace_parameter;
399 
400 // Profiling info data
401 typedef struct {
402  uint32_t
403  agent_index; // GPU HSA agent index (GPU Driver Node ID as reported in the sysfs topology)
404  rocprofiler_info_kind_t kind; // info data kind
405  union {
406  struct {
407  const char* name; // metric name
408  uint32_t instances; // instances number
409  const char* expr; // metric expression, NULL for basic counters
410  const char* description; // metric description
411  const char* block_name; // block name
412  uint32_t block_counters; // number of block counters
413  } metric;
414  struct {
415  const char* name; // trace name
416  const char* description; // trace description
417  uint32_t parameter_count; // supported by the trace number parameters
418  } trace;
419  struct {
420  uint32_t code; // parameter code
421  const char* trace_name; // trace name
422  const char* parameter_name; // parameter name
423  const char* description; // trace parameter description
424  } trace_parameter;
425  };
427 
428 // Return the info for a given info kind
429 hsa_status_t rocprofiler_get_info(const hsa_agent_t* agent, // [in] GFXIP handle
430  rocprofiler_info_kind_t kind, // kind of iterated info
431  void* data); // [in/out] returned data
432 
433 // Iterate over the info for a given info kind, and invoke an application-defined callback on every
434 // iteration
435 hsa_status_t rocprofiler_iterate_info(const hsa_agent_t* agent, // [in] GFXIP handle
436  rocprofiler_info_kind_t kind, // kind of iterated info
437  hsa_status_t (*callback)(const rocprofiler_info_data_t info,
438  void* data), // callback
439  void* data); // [in/out] data passed to callback
440 
441 // Iterate over the info for a given info query, and invoke an application-defined callback on every
442 // iteration
443 hsa_status_t rocprofiler_query_info(const hsa_agent_t* agent, // [in] GFXIP handle
444  rocprofiler_info_query_t query, // iterated info query
445  hsa_status_t (*callback)(const rocprofiler_info_data_t info,
446  void* data), // callback
447  void* data); // [in/out] data passed to callback
448 
449 // Create a profiled queue. All dispatches on this queue will be profiled
451  hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type,
452  void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
453  uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);
454 
456 // Profiling pool
457 //
458 // Support for profiling contexts pool
459 // The API provide capability to create a contexts pool for a given agent and a set of features,
460 // to fetch/relase a context entry, to register a callback for the contexts completion.
461 
462 // Profiling pool handle
463 typedef void rocprofiler_pool_t;
464 
465 // Profiling pool entry
466 typedef struct {
467  rocprofiler_t* context; // context object
468  void* payload; // payload data object
470 
471 // Profiling handler, calling on profiling completion
472 typedef bool (*rocprofiler_pool_handler_t)(const rocprofiler_pool_entry_t* entry, void* arg);
473 
474 // Profiling preperties
475 typedef struct {
476  uint32_t num_entries; // pool size entries
477  uint32_t payload_bytes; // payload size bytes
478  rocprofiler_pool_handler_t handler; // handler on context completion
479  void* handler_arg; // the handler arg
481 
482 // Open profiling pool
484  hsa_agent_t agent, // GPU handle
485  rocprofiler_feature_t* features, // [in] profiling features array
486  uint32_t feature_count, // profiling info count
487  rocprofiler_pool_t** pool, // [out] context object
488  uint32_t mode, // profiling mode mask
489  rocprofiler_pool_properties_t*); // pool properties
490 
491 // Close profiling pool
492 hsa_status_t rocprofiler_pool_close(rocprofiler_pool_t* pool); // profiling pool handle
493 
494 // Fetch profiling pool entry
496  rocprofiler_pool_t* pool, // profiling pool handle
497  rocprofiler_pool_entry_t* entry); // [out] empty profiling pool entry
498 
499 // Release profiling pool entry
501  rocprofiler_pool_entry_t* entry); // released profiling pool entry
502 
503 // Iterate fetched profiling pool entries
504 hsa_status_t rocprofiler_pool_iterate(rocprofiler_pool_t* pool, // profiling pool handle
505  hsa_status_t (*callback)(rocprofiler_pool_entry_t* entry,
506  void* data), // callback
507  void* data); // [in/out] data passed to callback
508 
509 // Flush completed entries in profiling pool
510 hsa_status_t rocprofiler_pool_flush(rocprofiler_pool_t* pool); // profiling pool handle
511 
513 // HSA intercepting API
514 
515 // HSA callbacks ID enumeration
516 typedef enum {
517  ROCPROFILER_HSA_CB_ID_ALLOCATE = 0, // Memory allocate callback
518  ROCPROFILER_HSA_CB_ID_DEVICE = 1, // Device assign callback
519  ROCPROFILER_HSA_CB_ID_MEMCOPY = 2, // Memcopy callback
520  ROCPROFILER_HSA_CB_ID_SUBMIT = 3, // Packet submit callback
521  ROCPROFILER_HSA_CB_ID_KSYMBOL = 4, // Loading/unloading of kernel symbol
522  ROCPROFILER_HSA_CB_ID_CODEOBJ = 5 // Loading/unloading of kernel symbol
524 
525 // HSA callback data type
526 typedef struct {
527  union {
528  struct {
529  const void* ptr; // allocated area ptr
530  size_t size; // allocated area size, zero size means 'free' callback
531  hsa_amd_segment_t segment; // allocated area's memory segment type
532  hsa_amd_memory_pool_global_flag_t global_flag; // allocated area's memory global flag
533  int is_code; // equal to 1 if code is allocated
534  } allocate;
535  struct {
536  hsa_device_type_t type; // type of assigned device
537  uint32_t id; // id of assigned device
538  hsa_agent_t agent; // device HSA agent handle
539  const void* ptr; // ptr the device is assigned to
540  } device;
541  struct {
542  const void* dst; // memcopy dst ptr
543  const void* src; // memcopy src ptr
544  size_t size; // memcopy size bytes
545  } memcopy;
546  struct {
547  const void* packet; // submitted to GPU packet
548  const char* kernel_name; // kernel name, not NULL if dispatch
549  hsa_queue_t* queue; // HSA queue the kernel was submitted to
550  uint32_t device_type; // type of device the packed is submitted to
551  uint32_t device_id; // id of device the packed is submitted to
552  } submit;
553  struct {
554  uint64_t object; // kernel symbol object
555  const char* name; // kernel symbol name
556  uint32_t name_length; // kernel symbol name length
557  int unload; // symbol executable destroy
558  } ksymbol;
559  struct {
560  uint32_t storage_type; // code object storage type
561  int storage_file; // origin file descriptor
562  uint64_t memory_base; // origin memory base
563  uint64_t memory_size; // origin memory size
564  uint64_t load_base; // codeobj load base
565  uint64_t load_size; // codeobj load size
566  uint64_t load_delta; // codeobj load size
567  uint32_t uri_length; // URI string length
568  char* uri; // URI string
569  int unload; // unload flag
570  } codeobj;
571  };
573 
574 // HSA callback function type
575 typedef hsa_status_t (*rocprofiler_hsa_callback_fun_t)(
576  rocprofiler_hsa_cb_id_t id, // callback id
577  const rocprofiler_hsa_callback_data_t* data, // [in] callback data
578  void* arg); // [in/out] user passed data
579 
580 // HSA callbacks structure
581 typedef struct {
582  rocprofiler_hsa_callback_fun_t allocate; // memory allocate callback
583  rocprofiler_hsa_callback_fun_t device; // agent assign callback
584  rocprofiler_hsa_callback_fun_t memcopy; // memory copy callback
585  rocprofiler_hsa_callback_fun_t submit; // packet submit callback
586  rocprofiler_hsa_callback_fun_t ksymbol; // kernel symbol callback
587  rocprofiler_hsa_callback_fun_t codeobj; // codeobject load/unload callback
589 
590 // Set callbacks. If the callback is NULL then it is disabled.
591 // If callback returns a value that is not HSA_STATUS_SUCCESS the callback
592 // will be unregistered.
594  const rocprofiler_hsa_callbacks_t callbacks, // HSA callback function
595  void* arg); // callback user data
596 
597 #ifdef __cplusplus
598 } // extern "C" block
599 #endif // __cplusplus
600 
601 #endif // INC_ROCPROFILER_H_
rocprofiler_data_kind_t
Definition: rocprofiler.h:140
@ ROCPROFILER_DATA_KIND_DOUBLE
Definition: rocprofiler.h:145
@ ROCPROFILER_DATA_KIND_BYTES
Definition: rocprofiler.h:146
@ ROCPROFILER_DATA_KIND_INT32
Definition: rocprofiler.h:142
@ ROCPROFILER_DATA_KIND_INT64
Definition: rocprofiler.h:143
@ ROCPROFILER_DATA_KIND_FLOAT
Definition: rocprofiler.h:144
@ ROCPROFILER_DATA_KIND_UNINIT
Definition: rocprofiler.h:141
hsa_status_t rocprofiler_get_metrics(const rocprofiler_t *context)
hsa_status_t rocprofiler_query_info(const hsa_agent_t *agent, rocprofiler_info_query_t query, hsa_status_t(*callback)(const rocprofiler_info_data_t info, void *data), void *data)
hsa_status_t rocprofiler_iterate_trace_data(rocprofiler_t *context, rocprofiler_trace_data_callback_t callback, void *data)
hsa_status_t rocprofiler_get_info(const hsa_agent_t *agent, rocprofiler_info_kind_t kind, void *data)
hsa_status_t rocprofiler_group_get_data(rocprofiler_group_t *group)
hsa_status_t(* rocprofiler_callback_t)(const rocprofiler_callback_data_t *callback_data, void *user_data, rocprofiler_group_t *group)
Definition: rocprofiler.h:298
hsa_status_t rocprofiler_pool_iterate(rocprofiler_pool_t *pool, hsa_status_t(*callback)(rocprofiler_pool_entry_t *entry, void *data), void *data)
rocprofiler_hsa_cb_id_t
Definition: rocprofiler.h:516
@ ROCPROFILER_HSA_CB_ID_ALLOCATE
Definition: rocprofiler.h:517
@ ROCPROFILER_HSA_CB_ID_DEVICE
Definition: rocprofiler.h:518
@ ROCPROFILER_HSA_CB_ID_KSYMBOL
Definition: rocprofiler.h:521
@ ROCPROFILER_HSA_CB_ID_SUBMIT
Definition: rocprofiler.h:520
@ ROCPROFILER_HSA_CB_ID_CODEOBJ
Definition: rocprofiler.h:522
@ ROCPROFILER_HSA_CB_ID_MEMCOPY
Definition: rocprofiler.h:519
hsa_status_t rocprofiler_open(hsa_agent_t agent, rocprofiler_feature_t *features, uint32_t feature_count, rocprofiler_t **context, uint32_t mode, rocprofiler_properties_t *properties)
hsa_status_t rocprofiler_start_queue_callbacks()
hsa_status_t rocprofiler_pool_flush(rocprofiler_pool_t *pool)
uint32_t rocprofiler_version_minor()
hsa_status_t rocprofiler_add_feature(const rocprofiler_feature_t *feature, rocprofiler_feature_set_t *features_set)
rocprofiler_info_kind_t
Definition: rocprofiler.h:383
@ ROCPROFILER_INFO_KIND_METRIC_COUNT
Definition: rocprofiler.h:385
@ ROCPROFILER_INFO_KIND_METRIC
Definition: rocprofiler.h:384
@ ROCPROFILER_INFO_KIND_TRACE_PARAMETER
Definition: rocprofiler.h:388
@ ROCPROFILER_INFO_KIND_TRACE_COUNT
Definition: rocprofiler.h:387
@ ROCPROFILER_INFO_KIND_TRACE
Definition: rocprofiler.h:386
@ ROCPROFILER_INFO_KIND_TRACE_PARAMETER_COUNT
Definition: rocprofiler.h:389
hsa_status_t rocprofiler_group_stop(rocprofiler_group_t *group)
hsa_status_t rocprofiler_group_start(rocprofiler_group_t *group)
hsa_status_t rocprofiler_pool_open(hsa_agent_t agent, rocprofiler_feature_t *features, uint32_t feature_count, rocprofiler_pool_t **pool, uint32_t mode, rocprofiler_pool_properties_t *)
hsa_status_t rocprofiler_stop_queue_callbacks()
hsa_status_t rocprofiler_features_set_open(hsa_agent_t agent, rocprofiler_feature_set_t *features_set, rocprofiler_t **context, uint32_t mode, rocprofiler_properties_t *properties)
void rocprofiler_t
Definition: rocprofiler.h:190
rocprofiler_mode_t
Definition: rocprofiler.h:201
@ ROCPROFILER_MODE_CREATEQUEUE
Definition: rocprofiler.h:203
@ ROCPROFILER_MODE_SINGLEGROUP
Definition: rocprofiler.h:204
@ ROCPROFILER_MODE_STANDALONE
Definition: rocprofiler.h:202
hsa_status_t rocprofiler_get_group(rocprofiler_t *context, uint32_t group_index, rocprofiler_group_t *group)
void rocprofiler_feature_set_t
Definition: rocprofiler.h:182
hsa_status_t rocprofiler_group_count(const rocprofiler_t *context, uint32_t *group_count)
hsa_status_t rocprofiler_reset(rocprofiler_t *context, uint32_t group_index)
hsa_ven_amd_aqlprofile_parameter_t rocprofiler_parameter_t
Definition: rocprofiler.h:137
bool(* rocprofiler_handler_t)(rocprofiler_group_t group, void *arg)
Definition: rocprofiler.h:208
hsa_ven_amd_aqlprofile_data_callback_t rocprofiler_trace_data_callback_t
Definition: rocprofiler.h:368
bool(* rocprofiler_pool_handler_t)(const rocprofiler_pool_entry_t *entry, void *arg)
Definition: rocprofiler.h:472
hsa_status_t rocprofiler_pool_fetch(rocprofiler_pool_t *pool, rocprofiler_pool_entry_t *entry)
hsa_status_t rocprofiler_get_data(rocprofiler_t *context, uint32_t group_index)
hsa_status_t(* rocprofiler_hsa_callback_fun_t)(rocprofiler_hsa_cb_id_t id, const rocprofiler_hsa_callback_data_t *data, void *arg)
Definition: rocprofiler.h:575
hsa_status_t rocprofiler_iterate_info(const hsa_agent_t *agent, rocprofiler_info_kind_t kind, hsa_status_t(*callback)(const rocprofiler_info_data_t info, void *data), void *data)
hsa_status_t rocprofiler_get_agent(rocprofiler_t *context, hsa_agent_t *agent)
hsa_status_t rocprofiler_queue_create_profiled(hsa_agent_t agent_handle, uint32_t size, hsa_queue_type32_t type, void(*callback)(hsa_status_t status, hsa_queue_t *source, void *data), void *data, uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t **queue)
hsa_status_t rocprofiler_close(rocprofiler_t *context)
hsa_status_t rocprofiler_pool_release(rocprofiler_pool_entry_t *entry)
rocprofiler_feature_kind_t
Definition: rocprofiler.h:129
@ ROCPROFILER_FEATURE_KIND_METRIC
Definition: rocprofiler.h:130
@ ROCPROFILER_FEATURE_KIND_PCSMP_MOD
Definition: rocprofiler.h:133
@ ROCPROFILER_FEATURE_KIND_TRACE
Definition: rocprofiler.h:131
@ ROCPROFILER_FEATURE_KIND_SPM_MOD
Definition: rocprofiler.h:132
hsa_status_t rocprofiler_start(rocprofiler_t *context, uint32_t group_index)
hsa_status_t rocprofiler_remove_queue_callbacks()
hsa_status_t rocprofiler_error_string(const char **str)
uint32_t rocprofiler_version_major()
rocprofiler_time_id_t
Definition: rocprofiler.h:252
@ ROCPROFILER_TIME_ID_CLOCK_MONOTONIC_RAW
Definition: rocprofiler.h:257
@ ROCPROFILER_TIME_ID_CLOCK_REALTIME_COARSE
Definition: rocprofiler.h:254
@ ROCPROFILER_TIME_ID_CLOCK_REALTIME
Definition: rocprofiler.h:253
@ ROCPROFILER_TIME_ID_CLOCK_MONOTONIC_COARSE
Definition: rocprofiler.h:256
@ ROCPROFILER_TIME_ID_CLOCK_MONOTONIC
Definition: rocprofiler.h:255
hsa_status_t rocprofiler_get_time(rocprofiler_time_id_t time_id, uint64_t timestamp, uint64_t *value_ns, uint64_t *error_ns)
hsa_status_t rocprofiler_read(rocprofiler_t *context, uint32_t group_index)
void rocprofiler_pool_t
Definition: rocprofiler.h:463
hsa_status_t rocprofiler_stop(rocprofiler_t *context, uint32_t group_index)
hsa_status_t rocprofiler_pool_close(rocprofiler_pool_t *pool)
hsa_status_t rocprofiler_set_hsa_callbacks(const rocprofiler_hsa_callbacks_t callbacks, void *arg)
hsa_status_t rocprofiler_group_read(rocprofiler_group_t *group)
hsa_status_t rocprofiler_set_queue_callbacks(rocprofiler_queue_callbacks_t callbacks, void *data)
Definition: rocprofiler.h:282
hsa_agent_t agent
Definition: rocprofiler.h:283
uint32_t queue_id
Definition: rocprofiler.h:287
uint64_t kernel_object
Definition: rocprofiler.h:291
hsa_signal_t completion_signal
Definition: rocprofiler.h:288
const rocprofiler_dispatch_record_t * record
Definition: rocprofiler.h:294
uint64_t queue_index
Definition: rocprofiler.h:286
const amd_kernel_code_t * kernel_code
Definition: rocprofiler.h:292
const hsa_queue_t * queue
Definition: rocprofiler.h:285
uint32_t agent_index
Definition: rocprofiler.h:284
const hsa_kernel_dispatch_packet_t * packet
Definition: rocprofiler.h:289
const char * kernel_name
Definition: rocprofiler.h:290
uint32_t thread_id
Definition: rocprofiler.h:293
Definition: rocprofiler.h:150
rocprofiler_data_kind_t kind
Definition: rocprofiler.h:151
uint32_t instance_count
Definition: rocprofiler.h:160
uint32_t result_int32
Definition: rocprofiler.h:153
void * ptr
Definition: rocprofiler.h:158
bool copy
Definition: rocprofiler.h:161
double result_double
Definition: rocprofiler.h:156
float result_float
Definition: rocprofiler.h:155
uint32_t size
Definition: rocprofiler.h:159
uint64_t result_int64
Definition: rocprofiler.h:154
Definition: rocprofiler.h:274
uint64_t end
Definition: rocprofiler.h:277
uint64_t complete
Definition: rocprofiler.h:278
uint64_t begin
Definition: rocprofiler.h:276
uint64_t dispatch
Definition: rocprofiler.h:275
Definition: rocprofiler.h:167
const rocprofiler_parameter_t * parameters
Definition: rocprofiler.h:176
rocprofiler_feature_kind_t kind
Definition: rocprofiler.h:168
uint32_t parameter_count
Definition: rocprofiler.h:177
const char * block
Definition: rocprofiler.h:172
const char * name
Definition: rocprofiler.h:170
uint32_t event
Definition: rocprofiler.h:173
rocprofiler_data_t data
Definition: rocprofiler.h:178
Definition: rocprofiler.h:193
uint32_t feature_count
Definition: rocprofiler.h:196
rocprofiler_feature_t ** features
Definition: rocprofiler.h:195
rocprofiler_t * context
Definition: rocprofiler.h:197
unsigned index
Definition: rocprofiler.h:194
Definition: rocprofiler.h:526
hsa_agent_t agent
Definition: rocprofiler.h:538
uint32_t uri_length
Definition: rocprofiler.h:567
char * uri
Definition: rocprofiler.h:568
hsa_amd_segment_t segment
Definition: rocprofiler.h:531
hsa_queue_t * queue
Definition: rocprofiler.h:549
uint32_t device_id
Definition: rocprofiler.h:551
const void * packet
Definition: rocprofiler.h:547
const void * src
Definition: rocprofiler.h:543
uint64_t load_base
Definition: rocprofiler.h:564
uint32_t name_length
Definition: rocprofiler.h:556
int is_code
Definition: rocprofiler.h:533
uint64_t load_size
Definition: rocprofiler.h:565
int storage_file
Definition: rocprofiler.h:561
uint32_t id
Definition: rocprofiler.h:537
const char * name
Definition: rocprofiler.h:555
uint64_t load_delta
Definition: rocprofiler.h:566
uint64_t memory_size
Definition: rocprofiler.h:563
size_t size
Definition: rocprofiler.h:530
hsa_device_type_t type
Definition: rocprofiler.h:536
uint32_t storage_type
Definition: rocprofiler.h:560
const char * kernel_name
Definition: rocprofiler.h:548
uint32_t device_type
Definition: rocprofiler.h:550
uint64_t object
Definition: rocprofiler.h:554
int unload
Definition: rocprofiler.h:557
const void * dst
Definition: rocprofiler.h:542
const void * ptr
Definition: rocprofiler.h:529
uint64_t memory_base
Definition: rocprofiler.h:562
hsa_amd_memory_pool_global_flag_t global_flag
Definition: rocprofiler.h:532
Definition: rocprofiler.h:581
rocprofiler_hsa_callback_fun_t allocate
Definition: rocprofiler.h:582
rocprofiler_hsa_callback_fun_t device
Definition: rocprofiler.h:583
rocprofiler_hsa_callback_fun_t codeobj
Definition: rocprofiler.h:587
rocprofiler_hsa_callback_fun_t submit
Definition: rocprofiler.h:585
rocprofiler_hsa_callback_fun_t ksymbol
Definition: rocprofiler.h:586
rocprofiler_hsa_callback_fun_t memcopy
Definition: rocprofiler.h:584
Definition: rocprofiler.h:401
const char * block_name
Definition: rocprofiler.h:411
uint32_t agent_index
Definition: rocprofiler.h:403
const char * description
Definition: rocprofiler.h:410
uint32_t code
Definition: rocprofiler.h:420
rocprofiler_info_kind_t kind
Definition: rocprofiler.h:404
const char * name
Definition: rocprofiler.h:407
const char * expr
Definition: rocprofiler.h:409
uint32_t parameter_count
Definition: rocprofiler.h:417
const char * trace_name
Definition: rocprofiler.h:421
const char * parameter_name
Definition: rocprofiler.h:422
uint32_t block_counters
Definition: rocprofiler.h:412
uint32_t instances
Definition: rocprofiler.h:408
Definition: rocprofiler.h:466
rocprofiler_t * context
Definition: rocprofiler.h:467
void * payload
Definition: rocprofiler.h:468
Definition: rocprofiler.h:475
uint32_t payload_bytes
Definition: rocprofiler.h:477
void * handler_arg
Definition: rocprofiler.h:479
uint32_t num_entries
Definition: rocprofiler.h:476
rocprofiler_pool_handler_t handler
Definition: rocprofiler.h:478
Definition: rocprofiler.h:211
hsa_queue_t * queue
Definition: rocprofiler.h:212
void * handler_arg
Definition: rocprofiler.h:216
uint32_t queue_depth
Definition: rocprofiler.h:214
rocprofiler_handler_t handler
Definition: rocprofiler.h:215
Definition: rocprofiler.h:304
rocprofiler_callback_t dispatch
Definition: rocprofiler.h:305
Definition: rocprofiler.h:102
uint32_t trace_size
Definition: rocprofiler.h:106
uint32_t obj_dumping
Definition: rocprofiler.h:113
uint32_t k_concurrent
Definition: rocprofiler.h:111
uint32_t hsa_intercepting
Definition: rocprofiler.h:110
uint32_t code_obj_tracking
Definition: rocprofiler.h:104
uint32_t timestamp_on
Definition: rocprofiler.h:109
uint32_t opt_mode
Definition: rocprofiler.h:112
uint32_t intercept_mode
Definition: rocprofiler.h:103
uint32_t memcopy_tracking
Definition: rocprofiler.h:105
uint64_t timeout
Definition: rocprofiler.h:108
uint32_t trace_local
Definition: rocprofiler.h:107
Definition: rocprofiler.h:393
const char * trace_name
Definition: rocprofiler.h:396
rocprofiler_info_kind_t info_kind
Definition: rocprofiler.h:394