/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp Source File
json_dump.hpp
Go to the documentation of this file.
1 // Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2 // SPDX-License-Identifier: MIT
3 
4 #ifdef CK_ENABLE_JSON_DUMP
5 #pragma GCC diagnostic push
6 #pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
7 #include "rapidjson/writer.h"
9 #include "rapidjson/document.h"
10 #include "rapidjson/rapidjson.h"
11 #pragma GCC diagnostic pop
12 
13 #define START_JSON_DUMP_FILE(file_name) \
14  std::string file_str(file_name); \
15  std::ofstream file(file_str); \
16  if(!file.is_open()) \
17  { \
18  throw std::runtime_error("Could not open file: " + std::string(file_name)); \
19  } \
20  rapidjson::StringBuffer s; \
21  rapidjson::Writer<rapidjson::StringBuffer> writer(s); \
22  writer.StartObject();
23 
24 #define END_JSON_DUMP_FILE() \
25  writer.EndObject(); \
26  file << s.GetString(); \
27  file.close(); \
28  std::cout << "Results written to " << file_str << " successfully" << std::endl;
29 
30 #define ADD_KEY_VALUE(key, value) add_key_value_pair(writer, key, value);
31 #define ADD_PERF_TO_JSON(_time, tflops, gbytes) add_perf_to_json(writer, _time, tflops, gbytes);
32 
33 template <typename T>
34 void add_key_value_pair(rapidjson::Writer<rapidjson::StringBuffer>& writer,
35  const char* key,
36  T value)
37 {
38  writer.Key(key);
40  {
41  writer.String(value, static_cast<rapidjson::SizeType>(std::strlen(value)));
42  }
43  else if constexpr(std::is_same<T, std::string>::value)
44  {
45  writer.String(value.c_str(), static_cast<rapidjson::SizeType>(value.length()));
46  }
47  else if constexpr(std::is_floating_point<T>::value)
48  {
49  writer.Double(static_cast<double>(value));
50  }
51  else if constexpr(std::is_integral<T>::value)
52  {
53  writer.Int64(static_cast<int64_t>(value));
54  }
55  else
56  {
59  "Unsupported type for JSON serialization");
60  }
61 }
62 
63 static void add_perf_to_json(rapidjson::Writer<rapidjson::StringBuffer>& writer,
64  float time,
65  float tflops,
66  float gbytes)
67 {
68  std::string roster("perf");
69  writer.String(roster.c_str(), static_cast<rapidjson::SizeType>(roster.length()));
70 
71  writer.StartArray();
72  writer.StartObject();
73 
74  add_key_value_pair(writer, "time", time);
75  add_key_value_pair(writer, "tflops", tflops);
76  add_key_value_pair(writer, "gbytes", gbytes);
77 
78  writer.EndObject();
79  writer.EndArray();
80 }
81 
82 #else
83 #pragma GCC diagnostic push
84 #pragma GCC diagnostic ignored "-Wunused-parameter"
85 #pragma GCC diagnostic ignored "-Wunused-local-typedef"
86 #define START_JSON_DUMP_FILE(file_name)
87 #define END_JSON_DUMP_FILE() \
88  std::cout << "JSON dump disabled, To enable, set CK_ENABLE_JSON_DUMP cmake option" << std::endl;
89 
90 #define ADD_KEY_VALUE(key, value)
91 #define ADD_PERF_TO_JSON(_time, tflops, gbytes)
92 #endif
93 
94 // Helper traits to check for static member existence
95 template <typename T, typename = void>
97 {
98 };
99 
100 template <typename T>
102  T,
103  std::void_t<decltype(T::M_Warp_Tile), decltype(T::N_Warp_Tile), decltype(T::K_Warp_Tile)>>
105 {
106 };
107 
108 template <typename ALayout,
109  typename BLayout,
110  typename CLayout,
111  typename ADataType,
112  typename BDataType,
113  typename CDataType,
114  typename GemmConfig,
115  template <typename>
116  typename DTypeTraits>
117 void dump_gemm_json_results(const std::string& json_filename,
118  int M,
119  int N,
120  int K,
121  int stride_A,
122  int stride_B,
123  int stride_C,
124  bool persistent,
125  bool pass,
126  float ave_time,
127  float tflops,
128  float gb_per_sec,
129  const std::string& kernel_name = "gemm_basic")
130 {
131  START_JSON_DUMP_FILE(json_filename);
132  ADD_KEY_VALUE("name", kernel_name);
133  ADD_KEY_VALUE("M", M);
134  ADD_KEY_VALUE("N", N);
135  ADD_KEY_VALUE("K", K);
136  ADD_KEY_VALUE("stride_A", stride_A);
137  ADD_KEY_VALUE("stride_B", stride_B);
138  ADD_KEY_VALUE("stride_C", stride_C);
139  ADD_KEY_VALUE("A_layout", ALayout::name);
140  ADD_KEY_VALUE("B_layout", BLayout::name);
141  ADD_KEY_VALUE("C_layout", CLayout::name);
142  using TraitsADataType = DTypeTraits<ADataType>;
143  using TraitsBDataType = DTypeTraits<BDataType>;
144  using TraitsCDataType = DTypeTraits<CDataType>;
145  ADD_KEY_VALUE("A_type", TraitsADataType::name);
146  ADD_KEY_VALUE("B_type", TraitsBDataType::name);
147  ADD_KEY_VALUE("C_type", TraitsCDataType::name);
148  ADD_KEY_VALUE("structured_sparsity", GemmConfig::UseStructuredSparsity ? "on" : "off");
149 
151  {
152  ADD_KEY_VALUE("warp_tile",
153  std::to_string(GemmConfig::M_Warp_Tile) + "x" +
154  std::to_string(GemmConfig::N_Warp_Tile) + "x" +
155  std::to_string(GemmConfig::K_Warp_Tile));
156  }
157  ADD_KEY_VALUE("persistent", persistent ? "on" : "off");
158  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
159  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
161 }
162 
163 inline void dump_batched_gemm_json_results(const std::string& json_filename,
164  const std::string& op_name,
165  int M,
166  int N,
167  int K,
168  int stride_A,
169  int stride_B,
170  int stride_C,
171  int batch_stride_A,
172  int batch_stride_B,
173  int batch_stride_C,
174  int batch_count,
175  bool pass,
176  float ave_time,
177  float tflops,
178  float gb_per_sec,
179  const std::string& kernel_name = "batched_gemm_basic")
180 {
181  START_JSON_DUMP_FILE(json_filename);
182  ADD_KEY_VALUE("name", kernel_name);
183  ADD_KEY_VALUE("op_name", op_name);
184  ADD_KEY_VALUE("M", M);
185  ADD_KEY_VALUE("N", N);
186  ADD_KEY_VALUE("K", K);
187  ADD_KEY_VALUE("stride_A", stride_A);
188  ADD_KEY_VALUE("stride_B", stride_B);
189  ADD_KEY_VALUE("stride_C", stride_C);
190  ADD_KEY_VALUE("batch_stride_A", batch_stride_A);
191  ADD_KEY_VALUE("batch_stride_B", batch_stride_B);
192  ADD_KEY_VALUE("batch_stride_C", batch_stride_C);
193  ADD_KEY_VALUE("batch_count", batch_count);
194  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
195  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
197 }
198 
199 template <typename ALayout, typename BLayout, typename CLayout>
200 void dump_grouped_gemm_json_results(const std::string& json_filename,
201  const std::string& op_name,
202  int group_count,
203  bool pass,
204  float ave_time,
205  float tflops,
206  float gb_per_sec,
207  const std::string& kernel_name = "grouped_gemm")
208 {
209  START_JSON_DUMP_FILE(json_filename);
210  ADD_KEY_VALUE("name", kernel_name);
211  ADD_KEY_VALUE("op_name", op_name);
212  ADD_KEY_VALUE("group_count", group_count);
213  ADD_KEY_VALUE("A_layout", ALayout::name);
214  ADD_KEY_VALUE("B_layout", BLayout::name);
215  ADD_KEY_VALUE("C_layout", CLayout::name);
216  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
217  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
219 }
220 
221 inline void dump_flatmm_json_results(const std::string& json_filename,
222  const std::string& datatype,
223  int M,
224  int N,
225  int K,
226  int stride_A,
227  int stride_B,
228  int stride_C,
229  int kbatch,
230  bool pass,
231  float ave_time,
232  float tflops,
233  float gb_per_sec,
234  const std::string& kernel_name = "flatmm_basic")
235 {
236  START_JSON_DUMP_FILE(json_filename);
237  ADD_KEY_VALUE("name", kernel_name);
238  ADD_KEY_VALUE("DataType", datatype);
239  ADD_KEY_VALUE("M", M);
240  ADD_KEY_VALUE("N", N);
241  ADD_KEY_VALUE("K", K);
242  ADD_KEY_VALUE("StrideA", stride_A);
243  ADD_KEY_VALUE("StrideB", stride_B);
244  ADD_KEY_VALUE("StrideC", stride_C);
245  ADD_KEY_VALUE("kbatch", kbatch);
246  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
247  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
249 }
250 
251 inline void
252 dump_gemm_multi_d_fp16_json_results(const std::string& json_filename,
253  const std::string& op_name,
254  int M,
255  int N,
256  int K,
257  int StrideA,
258  int StrideB,
259  int StrideD0,
260  int StrideD1,
261  int StrideE,
262  bool pass,
263  float ave_time,
264  float tflops,
265  float gb_per_sec,
266  const std::string& kernel_name = "gemm_multi_d_fp16")
267 {
268  START_JSON_DUMP_FILE(json_filename);
269  ADD_KEY_VALUE("name", kernel_name);
270  ADD_KEY_VALUE("op_name", op_name);
271  ADD_KEY_VALUE("M", M);
272  ADD_KEY_VALUE("N", N);
273  ADD_KEY_VALUE("K", K);
274  ADD_KEY_VALUE("StrideA", StrideA);
275  ADD_KEY_VALUE("StrideB", StrideB);
276  ADD_KEY_VALUE("StrideD0", StrideD0);
277  ADD_KEY_VALUE("StrideD1", StrideD1);
278  ADD_KEY_VALUE("StrideE", StrideE);
279  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
280  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
282 }
283 
284 inline void dump_elementwise_json_results(const std::string& json_filename,
285  const std::string& prec,
286  int grid_size,
287  int block_size,
288  float ave_time,
289  float tflops,
290  float gb_per_sec,
291  const std::string& kernel_name = "elementwise")
292 {
293  START_JSON_DUMP_FILE(json_filename);
294  ADD_KEY_VALUE("name", kernel_name);
295  ADD_KEY_VALUE("prec", prec);
296  ADD_KEY_VALUE("grid_size", grid_size);
297  ADD_KEY_VALUE("block_size", block_size);
298  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
300 }
301 
302 inline void dump_layernorm2d_fwd_json_results(const std::string& json_filename,
303  const std::string& prec_i,
304  const std::string& prec_o,
305  const std::string& prec_sm,
306  const std::string& prec_sy,
307  int m,
308  int n,
309  int x_stride,
310  int xr_stride,
311  int y_stride,
312  int yr_stride,
313  bool pass,
314  float ave_time,
315  float tflops,
316  float gb_per_sec,
317  const std::string& kernel_name = "layernorm2d_fwd")
318 {
319  START_JSON_DUMP_FILE(json_filename);
320  ADD_KEY_VALUE("name", kernel_name);
321  ADD_KEY_VALUE("prec_i", prec_i);
322  ADD_KEY_VALUE("prec_o", prec_o);
323  ADD_KEY_VALUE("prec_sm", prec_sm);
324  ADD_KEY_VALUE("prec_sy", prec_sy);
325  ADD_KEY_VALUE("m", m);
326  ADD_KEY_VALUE("n", n);
327  ADD_KEY_VALUE("x_stride", x_stride);
328  ADD_KEY_VALUE("xr_stride", xr_stride);
329  ADD_KEY_VALUE("y_stride", y_stride);
330  ADD_KEY_VALUE("yr_stride", yr_stride);
331  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
332  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
334 }
335 
336 template <typename DataType, template <typename> typename DTypeTraits>
337 void dump_reduce_json_results(const std::string& json_filename,
338  int N,
339  int C,
340  int H,
341  int W,
342  bool pass,
343  float ave_time,
344  float tflops,
345  float gb_per_sec,
346  const std::string& kernel_name = "reduce")
347 {
348  START_JSON_DUMP_FILE(json_filename);
349  ADD_KEY_VALUE("name", kernel_name);
350  using Traits = DTypeTraits<DataType>;
351  ADD_KEY_VALUE("data_type", Traits::name);
352  ADD_KEY_VALUE("N", N);
353  ADD_KEY_VALUE("C", C);
354  ADD_KEY_VALUE("H", H);
355  ADD_KEY_VALUE("W", W);
356  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
357  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
359 }
360 
361 inline void dump_permute_json_results(const std::string& json_filename,
362  const std::string& data_type,
363  bool pass,
364  float ave_time,
365  float tflop,
366  float gb_per_sec,
367  const std::string& kernel_name = "permute")
368 {
369  START_JSON_DUMP_FILE(json_filename);
370  ADD_KEY_VALUE("name", kernel_name);
371  ADD_KEY_VALUE("data_type", data_type);
372  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
373  ADD_PERF_TO_JSON(ave_time, tflop, gb_per_sec)
375 }
376 
377 inline void dump_topk_softmax_json(const std::string& json_filename,
378  const std::string& input_prec,
379  const std::string& weight_prec,
380  int tokens,
381  int experts,
382  int topk,
383  int stride_input,
384  int stride_output,
385  float ave_time,
386  float tflop,
387  float gb_per_sec,
388  bool pass,
389  const std::string& kernel_name = "topk_softmax")
390 {
391  START_JSON_DUMP_FILE(json_filename);
392  ADD_KEY_VALUE("name", kernel_name);
393  ADD_KEY_VALUE("input_prec", input_prec);
394  ADD_KEY_VALUE("weight_prec", weight_prec);
395  ADD_KEY_VALUE("tokens", tokens);
396  ADD_KEY_VALUE("experts", experts);
397  ADD_KEY_VALUE("topk", topk);
398  ADD_KEY_VALUE("stride_input", stride_input);
399  ADD_KEY_VALUE("stride_output", stride_output);
400  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
401  ADD_PERF_TO_JSON(ave_time, tflop, gb_per_sec);
403 }
404 
405 inline void dump_rmsnorm2d_fwd_json(const std::string& json_filename,
406  const std::string& prec_str,
407  int m,
408  int n,
409  int x_stride,
410  int xr_stride,
411  int y_stride,
412  int yr_stride,
413  int use_model_sensitive_rmsnorm,
414  float ave_time,
415  float tflops,
416  float gb_per_sec,
417  bool pass,
418  const std::string& kernel_name = "rmsnorm2d_fwd")
419 {
420  START_JSON_DUMP_FILE(json_filename);
421  ADD_KEY_VALUE("name", kernel_name);
422  ADD_KEY_VALUE("prec", prec_str);
423  ADD_KEY_VALUE("m", m);
424  ADD_KEY_VALUE("n", n);
425  ADD_KEY_VALUE("x_stride", x_stride);
426  ADD_KEY_VALUE("xr_stride", xr_stride);
427  ADD_KEY_VALUE("y_stride", y_stride);
428  ADD_KEY_VALUE("yr_stride", yr_stride);
429  ADD_KEY_VALUE("use_model_sensitive_rmsnorm", use_model_sensitive_rmsnorm);
430  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
431  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
433 }
434 
435 inline void
436 dump_add_rmsnorm2d_rdquant_fwd_json(const std::string& json_filename,
437  const std::string& input_data_type,
438  const std::string& quantized_data_type,
439  int m,
440  int n,
441  int stride,
442  float epsilon,
443  float ave_time,
444  float tflops,
445  float gb_per_sec,
446  bool pass,
447  const std::string& kernel_name = "add_rmsnorm2d_rdquant_fwd")
448 {
449  START_JSON_DUMP_FILE(json_filename);
450  ADD_KEY_VALUE("name", kernel_name);
451  ADD_KEY_VALUE("input_data_type", input_data_type);
452  ADD_KEY_VALUE("quantized_data_type", quantized_data_type);
453  ADD_KEY_VALUE("m", m);
454  ADD_KEY_VALUE("n", n);
455  ADD_KEY_VALUE("stride", stride);
456  ADD_KEY_VALUE("epsilon", epsilon);
457  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
458  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
460 }
461 
462 inline void dump_smoothquant_json(const std::string& json_filename,
463  const std::string& prec_str,
464  int m,
465  int n,
466  int x_stride,
467  int y_stride,
468  float ave_time,
469  float tflops,
470  float gb_per_sec,
471  bool pass,
472  const std::string& kernel_name = "smoothquant")
473 {
474  START_JSON_DUMP_FILE(json_filename);
475  ADD_KEY_VALUE("name", kernel_name);
476  ADD_KEY_VALUE("prec", prec_str);
477  ADD_KEY_VALUE("m", m);
478  ADD_KEY_VALUE("n", n);
479  ADD_KEY_VALUE("x_stride", x_stride);
480  ADD_KEY_VALUE("y_stride", y_stride);
481  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
482  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
484 }
485 
486 inline void dump_moe_sorting_json(const std::string& json_filename,
487  const std::string& index_prec,
488  const std::string& weight_prec,
489  const std::string& workspace_size,
490  int dispatch_policy,
491  int tokens,
492  int num_experts,
493  int topk,
494  float ave_time,
495  float tflops,
496  float gb_per_sec,
497  bool pass,
498  const std::string& kernel_name = "moe_sorting")
499 {
500  START_JSON_DUMP_FILE(json_filename);
501  ADD_KEY_VALUE("name", kernel_name);
502  ADD_KEY_VALUE("index_prec", index_prec);
503  ADD_KEY_VALUE("weight_prec", weight_prec);
504  ADD_KEY_VALUE("workspace_size", workspace_size);
505  ADD_KEY_VALUE("dispatch_policy", dispatch_policy);
506  ADD_KEY_VALUE("tokens", tokens);
507  ADD_KEY_VALUE("num_experts", num_experts);
508  ADD_KEY_VALUE("topk", topk);
509  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
510  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
512 }
513 
514 inline void dump_batched_transpose_json(const std::string& json_filename,
515  int N,
516  int C,
517  int H,
518  int W,
519  const std::string& layout_in,
520  const std::string& layout_out,
521  const std::string& prec,
522  float ave_time,
523  float tflops,
524  float gb_per_sec,
525  bool pass,
526  const std::string& kernel_name = "batched_transpose")
527 {
528  START_JSON_DUMP_FILE(json_filename);
529  ADD_KEY_VALUE("name", kernel_name);
530  ADD_KEY_VALUE("N", N);
531  ADD_KEY_VALUE("C", C);
532  ADD_KEY_VALUE("H", H);
533  ADD_KEY_VALUE("W", W);
534  ADD_KEY_VALUE("LayoutIn", layout_in);
535  ADD_KEY_VALUE("LayoutOut", layout_out);
536  ADD_KEY_VALUE("Precision", prec);
537  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
538  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
540 }
541 
542 inline void dump_moe_smoothquant_json(const std::string& json_filename,
543  const std::string& prec_i,
544  const std::string& prec_o,
545  int tokens,
546  int hidden_size,
547  int stride,
548  int experts,
549  int topk,
550  bool pass,
551  float ave_time,
552  float tflops,
553  float gb_per_sec,
554  const std::string& kernel_name = "moe_smoothquant")
555 {
556  START_JSON_DUMP_FILE(json_filename);
557  ADD_KEY_VALUE("name", kernel_name);
558  ADD_KEY_VALUE("prec_i", prec_i);
559  ADD_KEY_VALUE("prec_o", prec_o);
560  ADD_KEY_VALUE("tokens", tokens);
561  ADD_KEY_VALUE("hidden_size", hidden_size);
562  ADD_KEY_VALUE("stride", stride);
563  ADD_KEY_VALUE("experts", experts);
564  ADD_KEY_VALUE("topk", topk);
565  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
566  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
568 }
569 
570 inline void dump_fused_moe_json(const std::string& json_filename,
571  const std::string& api_str,
572  const std::string& prec_str,
573  int tokens,
574  bool is_local_token,
575  int local_tokens,
576  int experts,
577  int topk,
578  int hidden_size,
579  int intermediate_size,
580  int stride,
581  int block_m,
582  int activation,
583  bool gate_only,
584  bool fused_quant,
585  bool pass,
586  float ave_time,
587  float tflops,
588  float tb_per_sec,
589  const std::string& kernel_name = "fused_moe")
590 {
591  START_JSON_DUMP_FILE(json_filename);
592  ADD_KEY_VALUE("name", kernel_name);
593  ADD_KEY_VALUE("api", api_str);
594  ADD_KEY_VALUE("prec", prec_str);
595  ADD_KEY_VALUE("tokens", tokens);
596  if(is_local_token)
597  {
598  ADD_KEY_VALUE("local_tokens", local_tokens);
599  }
600  ADD_KEY_VALUE("experts", experts);
601  ADD_KEY_VALUE("topk", topk);
602  ADD_KEY_VALUE("hidden_size", hidden_size);
603  ADD_KEY_VALUE("intermediate_size", intermediate_size);
604  ADD_KEY_VALUE("stride", stride);
605  ADD_KEY_VALUE("block_m", block_m);
606  ADD_KEY_VALUE("activation", activation);
607  ADD_KEY_VALUE("gate_only", gate_only);
608  ADD_KEY_VALUE("fused_quant", fused_quant);
609  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
610  ADD_PERF_TO_JSON(ave_time, tflops, (tb_per_sec * 1024.0f))
612 }
613 
614 inline void dump_fmha_fwd_json_results(const std::string& json_filename,
615  const std::string& prec,
616  const std::string& mode,
617  const std::string& io_layout,
618  int batch,
619  int nhead,
620  int nhead_k,
621  int seqlen_qs,
622  int seqlen_ks,
623  int seqlen_kpads,
624  int hdim_q,
625  int hdim_v,
626  float scale_s,
627  float p_drop,
628  bool lse,
629  const std::string& qscale,
630  const std::string& bias,
631  const std::string& vlayout,
632  bool pass,
633  float ave_time,
634  float tflops,
635  float gb_per_sec,
636  const std::string& kernel_name = "fmha_fwd")
637 {
638  START_JSON_DUMP_FILE(json_filename);
639  ADD_KEY_VALUE("name", kernel_name);
640  ADD_KEY_VALUE("prec", prec);
641  ADD_KEY_VALUE("mode", mode);
642  ADD_KEY_VALUE("io_layout", io_layout);
643  ADD_KEY_VALUE("batch", batch);
644  ADD_KEY_VALUE("nhead", nhead);
645  ADD_KEY_VALUE("nhead_k", nhead_k);
646  ADD_KEY_VALUE("seqlen_q", seqlen_qs);
647  ADD_KEY_VALUE("seqlen_k", seqlen_ks);
648  ADD_KEY_VALUE("seqlen_kpads", seqlen_kpads);
649  ADD_KEY_VALUE("hdim_q", hdim_q);
650  ADD_KEY_VALUE("hdim_v", hdim_v);
651  ADD_KEY_VALUE("scale_s", scale_s);
652  ADD_KEY_VALUE("p_drop", p_drop);
653  ADD_KEY_VALUE("lse", lse);
654  ADD_KEY_VALUE("qscale", qscale);
655  ADD_KEY_VALUE("bias", bias);
656  ADD_KEY_VALUE("vlayout", vlayout);
657  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
658  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
660 }
661 
662 inline void dump_fmha_bwd_json_results(const std::string& json_filename,
663  const std::string& data_type,
664  const std::string& mode,
665  const std::string& i_perm,
666  const std::string& o_perm,
667  int batch,
668  int nhead,
669  int nhead_k,
670  int seqlen_q,
671  int seqlen_k,
672  int hdim_q,
673  int hdim_v,
674  float scale,
675  const std::string& bias,
676  bool use_dbias,
677  float p_drop,
678  bool s_randval,
679  bool deterministic,
680  const std::string& mask,
681  int mask_left,
682  int mask_right,
683  int workspace_size,
684  bool pass,
685  float ave_time,
686  float tflops,
687  float gb_per_sec,
688  const std::string& kernel_name = "fmha_bwd")
689 {
690  START_JSON_DUMP_FILE(json_filename);
691  ADD_KEY_VALUE("name", kernel_name);
692  ADD_KEY_VALUE("prec", data_type);
693  ADD_KEY_VALUE("mode", mode);
694  ADD_KEY_VALUE("i_perm", i_perm);
695  ADD_KEY_VALUE("o_perm", o_perm);
696  ADD_KEY_VALUE("batch", batch);
697  ADD_KEY_VALUE("nhead", nhead);
698  ADD_KEY_VALUE("nhead_k", nhead_k);
699  ADD_KEY_VALUE("seqlen_q", seqlen_q);
700  ADD_KEY_VALUE("seqlen_k", seqlen_k);
701  ADD_KEY_VALUE("hdim_q", hdim_q);
702  ADD_KEY_VALUE("hdim_v", hdim_v);
703  ADD_KEY_VALUE("scale", scale);
704  ADD_KEY_VALUE("bias", bias);
705  ADD_KEY_VALUE("use_dbias", use_dbias);
706  ADD_KEY_VALUE("p_drop", p_drop);
707  ADD_KEY_VALUE("s_randval", s_randval);
708  ADD_KEY_VALUE("deterministic", deterministic ? "true" : "false");
709  ADD_KEY_VALUE("mask", mask);
710  ADD_KEY_VALUE("mask_left", mask_left);
711  ADD_KEY_VALUE("mask_right", mask_right);
712  ADD_KEY_VALUE("workspace_size", workspace_size);
713  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
714  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
716 }
717 
718 #ifndef CK_ENABLE_JSON_DUMP
719 #pragma GCC diagnostic pop
720 #endif
#define ADD_PERF_TO_JSON(_time, tflops, gbytes)
Definition: json_dump.hpp:91
void dump_gemm_json_results(const std::string &json_filename, int M, int N, int K, int stride_A, int stride_B, int stride_C, bool persistent, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_basic")
Definition: json_dump.hpp:117
#define END_JSON_DUMP_FILE()
Definition: json_dump.hpp:87
void dump_rmsnorm2d_fwd_json(const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, int use_model_sensitive_rmsnorm, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="rmsnorm2d_fwd")
Definition: json_dump.hpp:405
void dump_batched_gemm_json_results(const std::string &json_filename, const std::string &op_name, int M, int N, int K, int stride_A, int stride_B, int stride_C, int batch_stride_A, int batch_stride_B, int batch_stride_C, int batch_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="batched_gemm_basic")
Definition: json_dump.hpp:163
void dump_fmha_bwd_json_results(const std::string &json_filename, const std::string &data_type, const std::string &mode, const std::string &i_perm, const std::string &o_perm, int batch, int nhead, int nhead_k, int seqlen_q, int seqlen_k, int hdim_q, int hdim_v, float scale, const std::string &bias, bool use_dbias, float p_drop, bool s_randval, bool deterministic, const std::string &mask, int mask_left, int mask_right, int workspace_size, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_bwd")
Definition: json_dump.hpp:662
void dump_layernorm2d_fwd_json_results(const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, const std::string &prec_sm, const std::string &prec_sy, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="layernorm2d_fwd")
Definition: json_dump.hpp:302
void dump_moe_smoothquant_json(const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, int tokens, int hidden_size, int stride, int experts, int topk, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="moe_smoothquant")
Definition: json_dump.hpp:542
void dump_reduce_json_results(const std::string &json_filename, int N, int C, int H, int W, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="reduce")
Definition: json_dump.hpp:337
void dump_flatmm_json_results(const std::string &json_filename, const std::string &datatype, int M, int N, int K, int stride_A, int stride_B, int stride_C, int kbatch, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="flatmm_basic")
Definition: json_dump.hpp:221
void dump_batched_transpose_json(const std::string &json_filename, int N, int C, int H, int W, const std::string &layout_in, const std::string &layout_out, const std::string &prec, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="batched_transpose")
Definition: json_dump.hpp:514
void dump_fmha_fwd_json_results(const std::string &json_filename, const std::string &prec, const std::string &mode, const std::string &io_layout, int batch, int nhead, int nhead_k, int seqlen_qs, int seqlen_ks, int seqlen_kpads, int hdim_q, int hdim_v, float scale_s, float p_drop, bool lse, const std::string &qscale, const std::string &bias, const std::string &vlayout, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_fwd")
Definition: json_dump.hpp:614
#define START_JSON_DUMP_FILE(file_name)
Definition: json_dump.hpp:86
void dump_permute_json_results(const std::string &json_filename, const std::string &data_type, bool pass, float ave_time, float tflop, float gb_per_sec, const std::string &kernel_name="permute")
Definition: json_dump.hpp:361
void dump_gemm_multi_d_fp16_json_results(const std::string &json_filename, const std::string &op_name, int M, int N, int K, int StrideA, int StrideB, int StrideD0, int StrideD1, int StrideE, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_multi_d_fp16")
Definition: json_dump.hpp:252
void dump_moe_sorting_json(const std::string &json_filename, const std::string &index_prec, const std::string &weight_prec, const std::string &workspace_size, int dispatch_policy, int tokens, int num_experts, int topk, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="moe_sorting")
Definition: json_dump.hpp:486
void dump_grouped_gemm_json_results(const std::string &json_filename, const std::string &op_name, int group_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="grouped_gemm")
Definition: json_dump.hpp:200
void dump_fused_moe_json(const std::string &json_filename, const std::string &api_str, const std::string &prec_str, int tokens, bool is_local_token, int local_tokens, int experts, int topk, int hidden_size, int intermediate_size, int stride, int block_m, int activation, bool gate_only, bool fused_quant, bool pass, float ave_time, float tflops, float tb_per_sec, const std::string &kernel_name="fused_moe")
Definition: json_dump.hpp:570
void dump_smoothquant_json(const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int y_stride, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="smoothquant")
Definition: json_dump.hpp:462
#define ADD_KEY_VALUE(key, value)
Definition: json_dump.hpp:90
void dump_elementwise_json_results(const std::string &json_filename, const std::string &prec, int grid_size, int block_size, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="elementwise")
Definition: json_dump.hpp:284
void dump_topk_softmax_json(const std::string &json_filename, const std::string &input_prec, const std::string &weight_prec, int tokens, int experts, int topk, int stride_input, int stride_output, float ave_time, float tflop, float gb_per_sec, bool pass, const std::string &kernel_name="topk_softmax")
Definition: json_dump.hpp:377
void dump_add_rmsnorm2d_rdquant_fwd_json(const std::string &json_filename, const std::string &input_data_type, const std::string &quantized_data_type, int m, int n, int stride, float epsilon, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="add_rmsnorm2d_rdquant_fwd")
Definition: json_dump.hpp:436
bool_constant< false > false_type
Definition: integral_constant.hpp:63
bool_constant< true > true_type
Definition: integral_constant.hpp:62
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1697
common definitions and configuration
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:429
signed __int64 int64_t
Definition: stdint.h:135
Definition: json_dump.hpp:97