/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp Source File
json_dump.hpp
Go to the documentation of this file.
1 // Copyright © Advanced Micro Devices, Inc. or its affiliates.
2 // SPDX-License-Identifier: MIT
3 
4 #ifdef CK_ENABLE_JSON_DUMP
5 #pragma GCC diagnostic push
6 #pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
7 #include "rapidjson/writer.h"
9 #include "rapidjson/document.h"
10 #include "rapidjson/rapidjson.h"
11 #pragma GCC diagnostic pop
12 
13 #define START_JSON_DUMP_FILE(file_name) \
14  std::string file_str(file_name); \
15  std::ofstream file(file_str); \
16  if(!file.is_open()) \
17  { \
18  throw std::runtime_error("Could not open file: " + std::string(file_name)); \
19  } \
20  rapidjson::StringBuffer s; \
21  rapidjson::Writer<rapidjson::StringBuffer> writer(s); \
22  writer.StartObject();
23 
24 #define END_JSON_DUMP_FILE() \
25  writer.EndObject(); \
26  file << s.GetString(); \
27  file.close(); \
28  std::cout << "Results written to " << file_str << " successfully" << std::endl;
29 
30 #define ADD_KEY_VALUE(key, value) add_key_value_pair(writer, key, value);
31 #define ADD_PERF_TO_JSON(_time, tflops, gbytes) add_perf_to_json(writer, _time, tflops, gbytes);
32 
33 template <typename T>
34 void add_key_value_pair(rapidjson::Writer<rapidjson::StringBuffer>& writer,
35  const char* key,
36  T value)
37 {
38  writer.Key(key);
40  {
41  writer.String(value, static_cast<rapidjson::SizeType>(std::strlen(value)));
42  }
43  else if constexpr(std::is_same<T, std::string>::value)
44  {
45  writer.String(value.c_str(), static_cast<rapidjson::SizeType>(value.length()));
46  }
47  else if constexpr(std::is_floating_point<T>::value)
48  {
49  writer.Double(static_cast<double>(value));
50  }
51  else if constexpr(std::is_integral<T>::value)
52  {
53  writer.Int64(static_cast<int64_t>(value));
54  }
55  else
56  {
59  "Unsupported type for JSON serialization");
60  }
61 }
62 
63 static void add_perf_to_json(rapidjson::Writer<rapidjson::StringBuffer>& writer,
64  float time,
65  float tflops,
66  float gbytes)
67 {
68  std::string roster("perf");
69  writer.String(roster.c_str(), static_cast<rapidjson::SizeType>(roster.length()));
70 
71  writer.StartArray();
72  writer.StartObject();
73 
74  add_key_value_pair(writer, "time", time);
75  add_key_value_pair(writer, "tflops", tflops);
76  add_key_value_pair(writer, "gbytes", gbytes);
77 
78  writer.EndObject();
79  writer.EndArray();
80 }
81 
82 #else
83 #pragma GCC diagnostic push
84 #pragma GCC diagnostic ignored "-Wunused-parameter"
85 #pragma GCC diagnostic ignored "-Wunused-local-typedef"
86 #define START_JSON_DUMP_FILE(file_name)
87 #define END_JSON_DUMP_FILE() \
88  std::cout << "JSON dump disabled, To enable, set CK_ENABLE_JSON_DUMP cmake option" << std::endl;
89 
90 #define ADD_KEY_VALUE(key, value)
91 #define ADD_PERF_TO_JSON(_time, tflops, gbytes)
92 #endif
93 
94 // Helper traits to check for static member existence
95 template <typename T, typename = void>
97 {
98 };
99 
100 template <typename T>
102  T,
103  std::void_t<decltype(T::M_Warp_Tile), decltype(T::N_Warp_Tile), decltype(T::K_Warp_Tile)>>
105 {
106 };
107 
108 template <typename ALayout,
109  typename BLayout,
110  typename CLayout,
111  typename ADataType,
112  typename BDataType,
113  typename CDataType,
114  typename GemmConfig,
115  template <typename>
116  typename DTypeTraits>
117 void dump_gemm_json_results(const std::string& json_filename,
118  int M,
119  int N,
120  int K,
121  int stride_A,
122  int stride_B,
123  int stride_C,
124  bool persistent,
125  bool pass,
126  float ave_time,
127  float tflops,
128  float gb_per_sec,
129  const std::string& kernel_name = "gemm_basic")
130 {
131  START_JSON_DUMP_FILE(json_filename);
132  ADD_KEY_VALUE("name", kernel_name);
133  ADD_KEY_VALUE("M", M);
134  ADD_KEY_VALUE("N", N);
135  ADD_KEY_VALUE("K", K);
136  ADD_KEY_VALUE("stride_A", stride_A);
137  ADD_KEY_VALUE("stride_B", stride_B);
138  ADD_KEY_VALUE("stride_C", stride_C);
139  ADD_KEY_VALUE("A_layout", ALayout::name);
140  ADD_KEY_VALUE("B_layout", BLayout::name);
141  ADD_KEY_VALUE("C_layout", CLayout::name);
142  using TraitsADataType = DTypeTraits<ADataType>;
143  using TraitsBDataType = DTypeTraits<BDataType>;
144  using TraitsCDataType = DTypeTraits<CDataType>;
145  ADD_KEY_VALUE("A_type", TraitsADataType::name);
146  ADD_KEY_VALUE("B_type", TraitsBDataType::name);
147  ADD_KEY_VALUE("C_type", TraitsCDataType::name);
148  ADD_KEY_VALUE("structured_sparsity", GemmConfig::UseStructuredSparsity ? "on" : "off");
149 
151  {
152  ADD_KEY_VALUE("warp_tile",
153  std::to_string(GemmConfig::M_Warp_Tile) + "x" +
154  std::to_string(GemmConfig::N_Warp_Tile) + "x" +
155  std::to_string(GemmConfig::K_Warp_Tile));
156  }
157  ADD_KEY_VALUE("persistent", persistent ? "on" : "off");
158  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
159  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
161 }
162 
163 void dump_batched_gemm_json_results(const std::string& json_filename,
164  const std::string& op_name,
165  int M,
166  int N,
167  int K,
168  int stride_A,
169  int stride_B,
170  int stride_C,
171  int batch_stride_A,
172  int batch_stride_B,
173  int batch_stride_C,
174  int batch_count,
175  bool pass,
176  float ave_time,
177  float tflops,
178  float gb_per_sec,
179  const std::string& kernel_name = "batched_gemm_basic")
180 {
181  START_JSON_DUMP_FILE(json_filename);
182  ADD_KEY_VALUE("name", kernel_name);
183  ADD_KEY_VALUE("op_name", op_name);
184  ADD_KEY_VALUE("M", M);
185  ADD_KEY_VALUE("N", N);
186  ADD_KEY_VALUE("K", K);
187  ADD_KEY_VALUE("stride_A", stride_A);
188  ADD_KEY_VALUE("stride_B", stride_B);
189  ADD_KEY_VALUE("stride_C", stride_C);
190  ADD_KEY_VALUE("batch_stride_A", batch_stride_A);
191  ADD_KEY_VALUE("batch_stride_B", batch_stride_B);
192  ADD_KEY_VALUE("batch_stride_C", batch_stride_C);
193  ADD_KEY_VALUE("batch_count", batch_count);
194  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
195  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
197 }
198 
199 template <typename ALayout, typename BLayout, typename CLayout>
200 void dump_grouped_gemm_json_results(const std::string& json_filename,
201  const std::string& op_name,
202  int group_count,
203  bool pass,
204  float ave_time,
205  float tflops,
206  float gb_per_sec,
207  const std::string& kernel_name = "grouped_gemm")
208 {
209  START_JSON_DUMP_FILE(json_filename);
210  ADD_KEY_VALUE("name", kernel_name);
211  ADD_KEY_VALUE("op_name", op_name);
212  ADD_KEY_VALUE("group_count", group_count);
213  ADD_KEY_VALUE("A_layout", ALayout::name);
214  ADD_KEY_VALUE("B_layout", BLayout::name);
215  ADD_KEY_VALUE("C_layout", CLayout::name);
216  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
217  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
219 }
220 
221 void dump_flatmm_json_results(const std::string& json_filename,
222  const std::string& datatype,
223  int M,
224  int N,
225  int K,
226  int stride_A,
227  int stride_B,
228  int stride_C,
229  int kbatch,
230  bool pass,
231  float ave_time,
232  float tflops,
233  float gb_per_sec,
234  const std::string& kernel_name = "flatmm_basic")
235 {
236  START_JSON_DUMP_FILE(json_filename);
237  ADD_KEY_VALUE("name", kernel_name);
238  ADD_KEY_VALUE("DataType", datatype);
239  ADD_KEY_VALUE("M", M);
240  ADD_KEY_VALUE("N", N);
241  ADD_KEY_VALUE("K", K);
242  ADD_KEY_VALUE("StrideA", stride_A);
243  ADD_KEY_VALUE("StrideB", stride_B);
244  ADD_KEY_VALUE("StrideC", stride_C);
245  ADD_KEY_VALUE("kbatch", kbatch);
246  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
247  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
249 }
250 
251 void dump_gemm_multi_d_fp16_json_results(const std::string& json_filename,
252  const std::string& op_name,
253  int M,
254  int N,
255  int K,
256  int StrideA,
257  int StrideB,
258  int StrideD0,
259  int StrideD1,
260  int StrideE,
261  bool pass,
262  float ave_time,
263  float tflops,
264  float gb_per_sec,
265  const std::string& kernel_name = "gemm_multi_d_fp16")
266 {
267  START_JSON_DUMP_FILE(json_filename);
268  ADD_KEY_VALUE("name", kernel_name);
269  ADD_KEY_VALUE("op_name", op_name);
270  ADD_KEY_VALUE("M", M);
271  ADD_KEY_VALUE("N", N);
272  ADD_KEY_VALUE("K", K);
273  ADD_KEY_VALUE("StrideA", StrideA);
274  ADD_KEY_VALUE("StrideB", StrideB);
275  ADD_KEY_VALUE("StrideD0", StrideD0);
276  ADD_KEY_VALUE("StrideD1", StrideD1);
277  ADD_KEY_VALUE("StrideE", StrideE);
278  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
279  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
281 }
282 
283 void dump_elementwise_json_results(const std::string& json_filename,
284  const std::string& prec,
285  int grid_size,
286  int block_size,
287  float ave_time,
288  float tflops,
289  float gb_per_sec,
290  const std::string& kernel_name = "elementwise")
291 {
292  START_JSON_DUMP_FILE(json_filename);
293  ADD_KEY_VALUE("name", kernel_name);
294  ADD_KEY_VALUE("prec", prec);
295  ADD_KEY_VALUE("grid_size", grid_size);
296  ADD_KEY_VALUE("block_size", block_size);
297  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
299 }
300 
301 void dump_layernorm2d_fwd_json_results(const std::string& json_filename,
302  const std::string& prec_i,
303  const std::string& prec_o,
304  const std::string& prec_sm,
305  const std::string& prec_sy,
306  int m,
307  int n,
308  int x_stride,
309  int xr_stride,
310  int y_stride,
311  int yr_stride,
312  bool pass,
313  float ave_time,
314  float tflops,
315  float gb_per_sec,
316  const std::string& kernel_name = "layernorm2d_fwd")
317 {
318  START_JSON_DUMP_FILE(json_filename);
319  ADD_KEY_VALUE("name", kernel_name);
320  ADD_KEY_VALUE("prec_i", prec_i);
321  ADD_KEY_VALUE("prec_o", prec_o);
322  ADD_KEY_VALUE("prec_sm", prec_sm);
323  ADD_KEY_VALUE("prec_sy", prec_sy);
324  ADD_KEY_VALUE("m", m);
325  ADD_KEY_VALUE("n", n);
326  ADD_KEY_VALUE("x_stride", x_stride);
327  ADD_KEY_VALUE("xr_stride", xr_stride);
328  ADD_KEY_VALUE("y_stride", y_stride);
329  ADD_KEY_VALUE("yr_stride", yr_stride);
330  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
331  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
333 }
334 
335 template <typename DataType, template <typename> typename DTypeTraits>
336 void dump_reduce_json_results(const std::string& json_filename,
337  int N,
338  int C,
339  int H,
340  int W,
341  bool pass,
342  float ave_time,
343  float tflops,
344  float gb_per_sec,
345  const std::string& kernel_name = "reduce")
346 {
347  START_JSON_DUMP_FILE(json_filename);
348  ADD_KEY_VALUE("name", kernel_name);
349  using Traits = DTypeTraits<DataType>;
350  ADD_KEY_VALUE("data_type", Traits::name);
351  ADD_KEY_VALUE("N", N);
352  ADD_KEY_VALUE("C", C);
353  ADD_KEY_VALUE("H", H);
354  ADD_KEY_VALUE("W", W);
355  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
356  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
358 }
359 
360 void dump_permute_json_results(const std::string& json_filename,
361  const std::string& data_type,
362  bool pass,
363  float ave_time,
364  float tflop,
365  float gb_per_sec,
366  const std::string& kernel_name = "permute")
367 {
368  START_JSON_DUMP_FILE(json_filename);
369  ADD_KEY_VALUE("name", kernel_name);
370  ADD_KEY_VALUE("data_type", data_type);
371  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
372  ADD_PERF_TO_JSON(ave_time, tflop, gb_per_sec)
374 }
375 
376 void dump_topk_softmax_json(const std::string& json_filename,
377  const std::string& input_prec,
378  const std::string& weight_prec,
379  int tokens,
380  int experts,
381  int topk,
382  int stride_input,
383  int stride_output,
384  float ave_time,
385  float tflop,
386  float gb_per_sec,
387  bool pass,
388  const std::string& kernel_name = "topk_softmax")
389 {
390  START_JSON_DUMP_FILE(json_filename);
391  ADD_KEY_VALUE("name", kernel_name);
392  ADD_KEY_VALUE("input_prec", input_prec);
393  ADD_KEY_VALUE("weight_prec", weight_prec);
394  ADD_KEY_VALUE("tokens", tokens);
395  ADD_KEY_VALUE("experts", experts);
396  ADD_KEY_VALUE("topk", topk);
397  ADD_KEY_VALUE("stride_input", stride_input);
398  ADD_KEY_VALUE("stride_output", stride_output);
399  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
400  ADD_PERF_TO_JSON(ave_time, tflop, gb_per_sec);
402 }
403 
404 void dump_rmsnorm2d_fwd_json(const std::string& json_filename,
405  const std::string& prec_str,
406  int m,
407  int n,
408  int x_stride,
409  int xr_stride,
410  int y_stride,
411  int yr_stride,
412  int use_model_sensitive_rmsnorm,
413  float ave_time,
414  float tflops,
415  float gb_per_sec,
416  bool pass,
417  const std::string& kernel_name = "rmsnorm2d_fwd")
418 {
419  START_JSON_DUMP_FILE(json_filename);
420  ADD_KEY_VALUE("name", kernel_name);
421  ADD_KEY_VALUE("prec", prec_str);
422  ADD_KEY_VALUE("m", m);
423  ADD_KEY_VALUE("n", n);
424  ADD_KEY_VALUE("x_stride", x_stride);
425  ADD_KEY_VALUE("xr_stride", xr_stride);
426  ADD_KEY_VALUE("y_stride", y_stride);
427  ADD_KEY_VALUE("yr_stride", yr_stride);
428  ADD_KEY_VALUE("use_model_sensitive_rmsnorm", use_model_sensitive_rmsnorm);
429  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
430  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
432 }
433 
435  const std::string& json_filename,
436  const std::string& input_data_type,
437  const std::string& quantized_data_type,
438  int m,
439  int n,
440  int stride,
441  float epsilon,
442  float ave_time,
443  float tflops,
444  float gb_per_sec,
445  bool pass,
446  const std::string& kernel_name = "add_rmsnorm2d_rdquant_fwd")
447 {
448  START_JSON_DUMP_FILE(json_filename);
449  ADD_KEY_VALUE("name", kernel_name);
450  ADD_KEY_VALUE("input_data_type", input_data_type);
451  ADD_KEY_VALUE("quantized_data_type", quantized_data_type);
452  ADD_KEY_VALUE("m", m);
453  ADD_KEY_VALUE("n", n);
454  ADD_KEY_VALUE("stride", stride);
455  ADD_KEY_VALUE("epsilon", epsilon);
456  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
457  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
459 }
460 
461 void dump_smoothquant_json(const std::string& json_filename,
462  const std::string& prec_str,
463  int m,
464  int n,
465  int x_stride,
466  int y_stride,
467  float ave_time,
468  float tflops,
469  float gb_per_sec,
470  bool pass,
471  const std::string& kernel_name = "smoothquant")
472 {
473  START_JSON_DUMP_FILE(json_filename);
474  ADD_KEY_VALUE("name", kernel_name);
475  ADD_KEY_VALUE("prec", prec_str);
476  ADD_KEY_VALUE("m", m);
477  ADD_KEY_VALUE("n", n);
478  ADD_KEY_VALUE("x_stride", x_stride);
479  ADD_KEY_VALUE("y_stride", y_stride);
480  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
481  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
483 }
484 
485 void dump_moe_sorting_json(const std::string& json_filename,
486  const std::string& index_prec,
487  const std::string& weight_prec,
488  const std::string& workspace_size,
489  int dispatch_policy,
490  int tokens,
491  int num_experts,
492  int topk,
493  float ave_time,
494  float tflops,
495  float gb_per_sec,
496  bool pass,
497  const std::string& kernel_name = "moe_sorting")
498 {
499  START_JSON_DUMP_FILE(json_filename);
500  ADD_KEY_VALUE("name", kernel_name);
501  ADD_KEY_VALUE("index_prec", index_prec);
502  ADD_KEY_VALUE("weight_prec", weight_prec);
503  ADD_KEY_VALUE("workspace_size", workspace_size);
504  ADD_KEY_VALUE("dispatch_policy", dispatch_policy);
505  ADD_KEY_VALUE("tokens", tokens);
506  ADD_KEY_VALUE("num_experts", num_experts);
507  ADD_KEY_VALUE("topk", topk);
508  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
509  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
511 }
512 
513 void dump_batched_transpose_json(const std::string& json_filename,
514  int N,
515  int C,
516  int H,
517  int W,
518  const std::string& layout_in,
519  const std::string& layout_out,
520  const std::string& prec,
521  float ave_time,
522  float tflops,
523  float gb_per_sec,
524  bool pass,
525  const std::string& kernel_name = "batched_transpose")
526 {
527  START_JSON_DUMP_FILE(json_filename);
528  ADD_KEY_VALUE("name", kernel_name);
529  ADD_KEY_VALUE("N", N);
530  ADD_KEY_VALUE("C", C);
531  ADD_KEY_VALUE("H", H);
532  ADD_KEY_VALUE("W", W);
533  ADD_KEY_VALUE("LayoutIn", layout_in);
534  ADD_KEY_VALUE("LayoutOut", layout_out);
535  ADD_KEY_VALUE("Precision", prec);
536  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
537  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
539 }
540 
541 void dump_moe_smoothquant_json(const std::string& json_filename,
542  const std::string& prec_i,
543  const std::string& prec_o,
544  int tokens,
545  int hidden_size,
546  int stride,
547  int experts,
548  int topk,
549  bool pass,
550  float ave_time,
551  float tflops,
552  float gb_per_sec,
553  const std::string& kernel_name = "moe_smoothquant")
554 {
555  START_JSON_DUMP_FILE(json_filename);
556  ADD_KEY_VALUE("name", kernel_name);
557  ADD_KEY_VALUE("prec_i", prec_i);
558  ADD_KEY_VALUE("prec_o", prec_o);
559  ADD_KEY_VALUE("tokens", tokens);
560  ADD_KEY_VALUE("hidden_size", hidden_size);
561  ADD_KEY_VALUE("stride", stride);
562  ADD_KEY_VALUE("experts", experts);
563  ADD_KEY_VALUE("topk", topk);
564  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
565  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
567 }
568 
569 void dump_fused_moe_json(const std::string& json_filename,
570  const std::string& api_str,
571  const std::string& prec_str,
572  int tokens,
573  bool is_local_token,
574  int local_tokens,
575  int experts,
576  int topk,
577  int hidden_size,
578  int intermediate_size,
579  int stride,
580  int block_m,
581  int activation,
582  bool gate_only,
583  bool fused_quant,
584  bool pass,
585  float ave_time,
586  float tflops,
587  float tb_per_sec,
588  const std::string& kernel_name = "fused_moe")
589 {
590  START_JSON_DUMP_FILE(json_filename);
591  ADD_KEY_VALUE("name", kernel_name);
592  ADD_KEY_VALUE("api", api_str);
593  ADD_KEY_VALUE("prec", prec_str);
594  ADD_KEY_VALUE("tokens", tokens);
595  if(is_local_token)
596  {
597  ADD_KEY_VALUE("local_tokens", local_tokens);
598  }
599  ADD_KEY_VALUE("experts", experts);
600  ADD_KEY_VALUE("topk", topk);
601  ADD_KEY_VALUE("hidden_size", hidden_size);
602  ADD_KEY_VALUE("intermediate_size", intermediate_size);
603  ADD_KEY_VALUE("stride", stride);
604  ADD_KEY_VALUE("block_m", block_m);
605  ADD_KEY_VALUE("activation", activation);
606  ADD_KEY_VALUE("gate_only", gate_only);
607  ADD_KEY_VALUE("fused_quant", fused_quant);
608  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
609  ADD_PERF_TO_JSON(ave_time, tflops, (tb_per_sec * 1024.0f))
611 }
612 
613 void dump_fmha_fwd_json_results(const std::string& json_filename,
614  const std::string& prec,
615  const std::string& mode,
616  const std::string& io_layout,
617  int batch,
618  int nhead,
619  int nhead_k,
620  int seqlen_qs,
621  int seqlen_ks,
622  int seqlen_kpads,
623  int hdim_q,
624  int hdim_v,
625  float scale_s,
626  float p_drop,
627  bool lse,
628  bool squant,
629  const std::string& bias,
630  const std::string& vlayout,
631  bool pass,
632  float ave_time,
633  float tflops,
634  float gb_per_sec,
635  const std::string& kernel_name = "fmha_fwd")
636 {
637  START_JSON_DUMP_FILE(json_filename);
638  ADD_KEY_VALUE("name", kernel_name);
639  ADD_KEY_VALUE("prec", prec);
640  ADD_KEY_VALUE("mode", mode);
641  ADD_KEY_VALUE("io_layout", io_layout);
642  ADD_KEY_VALUE("batch", batch);
643  ADD_KEY_VALUE("nhead", nhead);
644  ADD_KEY_VALUE("nhead_k", nhead_k);
645  ADD_KEY_VALUE("seqlen_q", seqlen_qs);
646  ADD_KEY_VALUE("seqlen_k", seqlen_ks);
647  ADD_KEY_VALUE("seqlen_kpads", seqlen_kpads);
648  ADD_KEY_VALUE("hdim_q", hdim_q);
649  ADD_KEY_VALUE("hdim_v", hdim_v);
650  ADD_KEY_VALUE("scale_s", scale_s);
651  ADD_KEY_VALUE("p_drop", p_drop);
652  ADD_KEY_VALUE("lse", lse);
653  ADD_KEY_VALUE("squant", squant);
654  ADD_KEY_VALUE("bias", bias);
655  ADD_KEY_VALUE("vlayout", vlayout);
656  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
657  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
659 }
660 
661 void dump_fmha_bwd_json_results(const std::string& json_filename,
662  const std::string& data_type,
663  const std::string& mode,
664  const std::string& i_perm,
665  const std::string& o_perm,
666  int batch,
667  int nhead,
668  int nhead_k,
669  int seqlen_q,
670  int seqlen_k,
671  int hdim_q,
672  int hdim_v,
673  float scale,
674  const std::string& bias,
675  bool use_dbias,
676  float p_drop,
677  bool s_randval,
678  bool deterministic,
679  const std::string& mask,
680  int mask_left,
681  int mask_right,
682  int workspace_size,
683  bool pass,
684  float ave_time,
685  float tflops,
686  float gb_per_sec,
687  const std::string& kernel_name = "fmha_bwd")
688 {
689  START_JSON_DUMP_FILE(json_filename);
690  ADD_KEY_VALUE("name", kernel_name);
691  ADD_KEY_VALUE("prec", data_type);
692  ADD_KEY_VALUE("mode", mode);
693  ADD_KEY_VALUE("i_perm", i_perm);
694  ADD_KEY_VALUE("o_perm", o_perm);
695  ADD_KEY_VALUE("batch", batch);
696  ADD_KEY_VALUE("nhead", nhead);
697  ADD_KEY_VALUE("nhead_k", nhead_k);
698  ADD_KEY_VALUE("seqlen_q", seqlen_q);
699  ADD_KEY_VALUE("seqlen_k", seqlen_k);
700  ADD_KEY_VALUE("hdim_q", hdim_q);
701  ADD_KEY_VALUE("hdim_v", hdim_v);
702  ADD_KEY_VALUE("scale", scale);
703  ADD_KEY_VALUE("bias", bias);
704  ADD_KEY_VALUE("use_dbias", use_dbias);
705  ADD_KEY_VALUE("p_drop", p_drop);
706  ADD_KEY_VALUE("s_randval", s_randval);
707  ADD_KEY_VALUE("deterministic", deterministic ? "true" : "false");
708  ADD_KEY_VALUE("mask", mask);
709  ADD_KEY_VALUE("mask_left", mask_left);
710  ADD_KEY_VALUE("mask_right", mask_right);
711  ADD_KEY_VALUE("workspace_size", workspace_size);
712  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
713  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
715 }
716 
717 #ifndef CK_ENABLE_JSON_DUMP
718 #pragma GCC diagnostic pop
719 #endif
#define ADD_PERF_TO_JSON(_time, tflops, gbytes)
Definition: json_dump.hpp:91
void dump_gemm_json_results(const std::string &json_filename, int M, int N, int K, int stride_A, int stride_B, int stride_C, bool persistent, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_basic")
Definition: json_dump.hpp:117
#define END_JSON_DUMP_FILE()
Definition: json_dump.hpp:87
void dump_rmsnorm2d_fwd_json(const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, int use_model_sensitive_rmsnorm, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="rmsnorm2d_fwd")
Definition: json_dump.hpp:404
void dump_batched_gemm_json_results(const std::string &json_filename, const std::string &op_name, int M, int N, int K, int stride_A, int stride_B, int stride_C, int batch_stride_A, int batch_stride_B, int batch_stride_C, int batch_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="batched_gemm_basic")
Definition: json_dump.hpp:163
void dump_fmha_bwd_json_results(const std::string &json_filename, const std::string &data_type, const std::string &mode, const std::string &i_perm, const std::string &o_perm, int batch, int nhead, int nhead_k, int seqlen_q, int seqlen_k, int hdim_q, int hdim_v, float scale, const std::string &bias, bool use_dbias, float p_drop, bool s_randval, bool deterministic, const std::string &mask, int mask_left, int mask_right, int workspace_size, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_bwd")
Definition: json_dump.hpp:661
void dump_layernorm2d_fwd_json_results(const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, const std::string &prec_sm, const std::string &prec_sy, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="layernorm2d_fwd")
Definition: json_dump.hpp:301
void dump_moe_smoothquant_json(const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, int tokens, int hidden_size, int stride, int experts, int topk, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="moe_smoothquant")
Definition: json_dump.hpp:541
void dump_reduce_json_results(const std::string &json_filename, int N, int C, int H, int W, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="reduce")
Definition: json_dump.hpp:336
void dump_flatmm_json_results(const std::string &json_filename, const std::string &datatype, int M, int N, int K, int stride_A, int stride_B, int stride_C, int kbatch, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="flatmm_basic")
Definition: json_dump.hpp:221
void dump_batched_transpose_json(const std::string &json_filename, int N, int C, int H, int W, const std::string &layout_in, const std::string &layout_out, const std::string &prec, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="batched_transpose")
Definition: json_dump.hpp:513
#define START_JSON_DUMP_FILE(file_name)
Definition: json_dump.hpp:86
void dump_permute_json_results(const std::string &json_filename, const std::string &data_type, bool pass, float ave_time, float tflop, float gb_per_sec, const std::string &kernel_name="permute")
Definition: json_dump.hpp:360
void dump_gemm_multi_d_fp16_json_results(const std::string &json_filename, const std::string &op_name, int M, int N, int K, int StrideA, int StrideB, int StrideD0, int StrideD1, int StrideE, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_multi_d_fp16")
Definition: json_dump.hpp:251
void dump_moe_sorting_json(const std::string &json_filename, const std::string &index_prec, const std::string &weight_prec, const std::string &workspace_size, int dispatch_policy, int tokens, int num_experts, int topk, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="moe_sorting")
Definition: json_dump.hpp:485
void dump_grouped_gemm_json_results(const std::string &json_filename, const std::string &op_name, int group_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="grouped_gemm")
Definition: json_dump.hpp:200
void dump_fmha_fwd_json_results(const std::string &json_filename, const std::string &prec, const std::string &mode, const std::string &io_layout, int batch, int nhead, int nhead_k, int seqlen_qs, int seqlen_ks, int seqlen_kpads, int hdim_q, int hdim_v, float scale_s, float p_drop, bool lse, bool squant, const std::string &bias, const std::string &vlayout, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_fwd")
Definition: json_dump.hpp:613
void dump_fused_moe_json(const std::string &json_filename, const std::string &api_str, const std::string &prec_str, int tokens, bool is_local_token, int local_tokens, int experts, int topk, int hidden_size, int intermediate_size, int stride, int block_m, int activation, bool gate_only, bool fused_quant, bool pass, float ave_time, float tflops, float tb_per_sec, const std::string &kernel_name="fused_moe")
Definition: json_dump.hpp:569
void dump_smoothquant_json(const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int y_stride, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="smoothquant")
Definition: json_dump.hpp:461
#define ADD_KEY_VALUE(key, value)
Definition: json_dump.hpp:90
void dump_elementwise_json_results(const std::string &json_filename, const std::string &prec, int grid_size, int block_size, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="elementwise")
Definition: json_dump.hpp:283
void dump_topk_softmax_json(const std::string &json_filename, const std::string &input_prec, const std::string &weight_prec, int tokens, int experts, int topk, int stride_input, int stride_output, float ave_time, float tflop, float gb_per_sec, bool pass, const std::string &kernel_name="topk_softmax")
Definition: json_dump.hpp:376
void dump_add_rmsnorm2d_rdquant_fwd_json(const std::string &json_filename, const std::string &input_data_type, const std::string &quantized_data_type, int m, int n, int stride, float epsilon, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="add_rmsnorm2d_rdquant_fwd")
Definition: json_dump.hpp:434
bool_constant< false > false_type
Definition: integral_constant.hpp:63
bool_constant< true > true_type
Definition: integral_constant.hpp:62
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1697
common definitions and configuration
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:429
signed __int64 int64_t
Definition: stdint.h:135
Definition: json_dump.hpp:97