/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp Source File
json_dump.hpp
Go to the documentation of this file.
1 #ifdef CK_ENABLE_JSON_DUMP
2 #pragma GCC diagnostic push
3 #pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
4 #include "rapidjson/writer.h"
6 #include "rapidjson/document.h"
7 #include "rapidjson/rapidjson.h"
8 #pragma GCC diagnostic pop
9 
10 #define START_JSON_DUMP_FILE(file_name) \
11  std::string file_str(file_name); \
12  std::ofstream file(file_str); \
13  if(!file.is_open()) \
14  { \
15  throw std::runtime_error("Could not open file: " + std::string(file_name)); \
16  } \
17  rapidjson::StringBuffer s; \
18  rapidjson::Writer<rapidjson::StringBuffer> writer(s); \
19  writer.StartObject();
20 
21 #define END_JSON_DUMP_FILE() \
22  writer.EndObject(); \
23  file << s.GetString(); \
24  file.close(); \
25  std::cout << "Results written to " << file_str << " successfully" << std::endl;
26 
27 #define ADD_KEY_VALUE(key, value) add_key_value_pair(writer, key, value);
28 #define ADD_PERF_TO_JSON(_time, tflops, gbytes) add_perf_to_json(writer, _time, tflops, gbytes);
29 
30 template <typename T>
31 void add_key_value_pair(rapidjson::Writer<rapidjson::StringBuffer>& writer,
32  const char* key,
33  T value)
34 {
35  writer.Key(key);
37  {
38  writer.String(value, static_cast<rapidjson::SizeType>(std::strlen(value)));
39  }
40  else if constexpr(std::is_same<T, std::string>::value)
41  {
42  writer.String(value.c_str(), static_cast<rapidjson::SizeType>(value.length()));
43  }
44  else if constexpr(std::is_floating_point<T>::value)
45  {
46  writer.Double(static_cast<double>(value));
47  }
48  else if constexpr(std::is_integral<T>::value)
49  {
50  writer.Int64(static_cast<int64_t>(value));
51  }
52  else
53  {
56  "Unsupported type for JSON serialization");
57  }
58 }
59 
60 static void add_perf_to_json(rapidjson::Writer<rapidjson::StringBuffer>& writer,
61  float time,
62  float tflops,
63  float gbytes)
64 {
65  std::string roster("perf");
66  writer.String(roster.c_str(), static_cast<rapidjson::SizeType>(roster.length()));
67 
68  writer.StartArray();
69  writer.StartObject();
70 
71  add_key_value_pair(writer, "time", time);
72  add_key_value_pair(writer, "tflops", tflops);
73  add_key_value_pair(writer, "gbytes", gbytes);
74 
75  writer.EndObject();
76  writer.EndArray();
77 }
78 
79 #else
80 #pragma GCC diagnostic push
81 #pragma GCC diagnostic ignored "-Wunused-parameter"
82 #pragma GCC diagnostic ignored "-Wunused-local-typedef"
83 #define START_JSON_DUMP_FILE(file_name)
84 #define END_JSON_DUMP_FILE() \
85  std::cout << "JSON dump disabled, To enable, set CK_ENABLE_JSON_DUMP cmake option" << std::endl;
86 
87 #define ADD_KEY_VALUE(key, value)
88 #define ADD_PERF_TO_JSON(_time, tflops, gbytes)
89 #endif
90 
91 // Helper traits to check for static member existence
92 template <typename T, typename = void>
94 {
95 };
96 
97 template <typename T>
99  T,
100  std::void_t<decltype(T::M_Warp_Tile), decltype(T::N_Warp_Tile), decltype(T::K_Warp_Tile)>>
102 {
103 };
104 
105 template <typename ALayout,
106  typename BLayout,
107  typename CLayout,
108  typename ADataType,
109  typename BDataType,
110  typename CDataType,
111  typename GemmConfig,
112  template <typename>
113  typename DTypeTraits>
114 void dump_gemm_json_results(const std::string& json_filename,
115  int M,
116  int N,
117  int K,
118  int stride_A,
119  int stride_B,
120  int stride_C,
121  bool persistent,
122  bool pass,
123  float ave_time,
124  float tflops,
125  float gb_per_sec,
126  const std::string& kernel_name = "gemm_basic")
127 {
128  START_JSON_DUMP_FILE(json_filename);
129  ADD_KEY_VALUE("name", kernel_name);
130  ADD_KEY_VALUE("M", M);
131  ADD_KEY_VALUE("N", N);
132  ADD_KEY_VALUE("K", K);
133  ADD_KEY_VALUE("stride_A", stride_A);
134  ADD_KEY_VALUE("stride_B", stride_B);
135  ADD_KEY_VALUE("stride_C", stride_C);
136  ADD_KEY_VALUE("A_layout", ALayout::name);
137  ADD_KEY_VALUE("B_layout", BLayout::name);
138  ADD_KEY_VALUE("C_layout", CLayout::name);
139  using TraitsADataType = DTypeTraits<ADataType>;
140  using TraitsBDataType = DTypeTraits<BDataType>;
141  using TraitsCDataType = DTypeTraits<CDataType>;
142  ADD_KEY_VALUE("A_type", TraitsADataType::name);
143  ADD_KEY_VALUE("B_type", TraitsBDataType::name);
144  ADD_KEY_VALUE("C_type", TraitsCDataType::name);
145  ADD_KEY_VALUE("structured_sparsity", GemmConfig::UseStructuredSparsity ? "on" : "off");
146 
148  {
149  ADD_KEY_VALUE("warp_tile",
150  std::to_string(GemmConfig::M_Warp_Tile) + "x" +
151  std::to_string(GemmConfig::N_Warp_Tile) + "x" +
152  std::to_string(GemmConfig::K_Warp_Tile));
153  }
154  ADD_KEY_VALUE("persistent", persistent ? "on" : "off");
155  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
156  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
158 }
159 
160 void dump_batched_gemm_json_results(const std::string& json_filename,
161  const std::string& op_name,
162  int M,
163  int N,
164  int K,
165  int stride_A,
166  int stride_B,
167  int stride_C,
168  int batch_stride_A,
169  int batch_stride_B,
170  int batch_stride_C,
171  int batch_count,
172  bool pass,
173  float ave_time,
174  float tflops,
175  float gb_per_sec,
176  const std::string& kernel_name = "batched_gemm_basic")
177 {
178  START_JSON_DUMP_FILE(json_filename);
179  ADD_KEY_VALUE("name", kernel_name);
180  ADD_KEY_VALUE("op_name", op_name);
181  ADD_KEY_VALUE("M", M);
182  ADD_KEY_VALUE("N", N);
183  ADD_KEY_VALUE("K", K);
184  ADD_KEY_VALUE("stride_A", stride_A);
185  ADD_KEY_VALUE("stride_B", stride_B);
186  ADD_KEY_VALUE("stride_C", stride_C);
187  ADD_KEY_VALUE("batch_stride_A", batch_stride_A);
188  ADD_KEY_VALUE("batch_stride_B", batch_stride_B);
189  ADD_KEY_VALUE("batch_stride_C", batch_stride_C);
190  ADD_KEY_VALUE("batch_count", batch_count);
191  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
192  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
194 }
195 
196 template <typename ALayout, typename BLayout, typename CLayout>
197 void dump_grouped_gemm_json_results(const std::string& json_filename,
198  const std::string& op_name,
199  int group_count,
200  bool pass,
201  float ave_time,
202  float tflops,
203  float gb_per_sec,
204  const std::string& kernel_name = "grouped_gemm")
205 {
206  START_JSON_DUMP_FILE(json_filename);
207  ADD_KEY_VALUE("name", kernel_name);
208  ADD_KEY_VALUE("op_name", op_name);
209  ADD_KEY_VALUE("group_count", group_count);
210  ADD_KEY_VALUE("A_layout", ALayout::name);
211  ADD_KEY_VALUE("B_layout", BLayout::name);
212  ADD_KEY_VALUE("C_layout", CLayout::name);
213  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
214  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
216 }
217 
218 void dump_flatmm_json_results(const std::string& json_filename,
219  const std::string& datatype,
220  int M,
221  int N,
222  int K,
223  int stride_A,
224  int stride_B,
225  int stride_C,
226  int kbatch,
227  bool pass,
228  float ave_time,
229  float tflops,
230  float gb_per_sec,
231  const std::string& kernel_name = "flatmm_basic")
232 {
233  START_JSON_DUMP_FILE(json_filename);
234  ADD_KEY_VALUE("name", kernel_name);
235  ADD_KEY_VALUE("DataType", datatype);
236  ADD_KEY_VALUE("M", M);
237  ADD_KEY_VALUE("N", N);
238  ADD_KEY_VALUE("K", K);
239  ADD_KEY_VALUE("StrideA", stride_A);
240  ADD_KEY_VALUE("StrideB", stride_B);
241  ADD_KEY_VALUE("StrideC", stride_C);
242  ADD_KEY_VALUE("kbatch", kbatch);
243  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
244  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
246 }
247 
248 void dump_gemm_multi_d_fp16_json_results(const std::string& json_filename,
249  const std::string& op_name,
250  int M,
251  int N,
252  int K,
253  int StrideA,
254  int StrideB,
255  int StrideD0,
256  int StrideD1,
257  int StrideE,
258  bool pass,
259  float ave_time,
260  float tflops,
261  float gb_per_sec,
262  const std::string& kernel_name = "gemm_multi_d_fp16")
263 {
264  START_JSON_DUMP_FILE(json_filename);
265  ADD_KEY_VALUE("name", kernel_name);
266  ADD_KEY_VALUE("op_name", op_name);
267  ADD_KEY_VALUE("M", M);
268  ADD_KEY_VALUE("N", N);
269  ADD_KEY_VALUE("K", K);
270  ADD_KEY_VALUE("StrideA", StrideA);
271  ADD_KEY_VALUE("StrideB", StrideB);
272  ADD_KEY_VALUE("StrideD0", StrideD0);
273  ADD_KEY_VALUE("StrideD1", StrideD1);
274  ADD_KEY_VALUE("StrideE", StrideE);
275  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
276  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
278 }
279 
280 void dump_elementwise_json_results(const std::string& json_filename,
281  const std::string& prec,
282  int grid_size,
283  int block_size,
284  float ave_time,
285  float tflops,
286  float gb_per_sec,
287  const std::string& kernel_name = "elementwise")
288 {
289  START_JSON_DUMP_FILE(json_filename);
290  ADD_KEY_VALUE("name", kernel_name);
291  ADD_KEY_VALUE("prec", prec);
292  ADD_KEY_VALUE("grid_size", grid_size);
293  ADD_KEY_VALUE("block_size", block_size);
294  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
296 }
297 
298 void dump_layernorm2d_fwd_json_results(const std::string& json_filename,
299  const std::string& prec_i,
300  const std::string& prec_o,
301  const std::string& prec_sm,
302  const std::string& prec_sy,
303  int m,
304  int n,
305  int x_stride,
306  int xr_stride,
307  int y_stride,
308  int yr_stride,
309  bool pass,
310  float ave_time,
311  float tflops,
312  float gb_per_sec,
313  const std::string& kernel_name = "layernorm2d_fwd")
314 {
315  START_JSON_DUMP_FILE(json_filename);
316  ADD_KEY_VALUE("name", kernel_name);
317  ADD_KEY_VALUE("prec_i", prec_i);
318  ADD_KEY_VALUE("prec_o", prec_o);
319  ADD_KEY_VALUE("prec_sm", prec_sm);
320  ADD_KEY_VALUE("prec_sy", prec_sy);
321  ADD_KEY_VALUE("m", m);
322  ADD_KEY_VALUE("n", n);
323  ADD_KEY_VALUE("x_stride", x_stride);
324  ADD_KEY_VALUE("xr_stride", xr_stride);
325  ADD_KEY_VALUE("y_stride", y_stride);
326  ADD_KEY_VALUE("yr_stride", yr_stride);
327  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
328  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
330 }
331 
332 template <typename DataType, template <typename> typename DTypeTraits>
333 void dump_reduce_json_results(const std::string& json_filename,
334  int N,
335  int C,
336  int H,
337  int W,
338  bool pass,
339  float ave_time,
340  float tflops,
341  float gb_per_sec,
342  const std::string& kernel_name = "reduce")
343 {
344  START_JSON_DUMP_FILE(json_filename);
345  ADD_KEY_VALUE("name", kernel_name);
346  using Traits = DTypeTraits<DataType>;
347  ADD_KEY_VALUE("data_type", Traits::name);
348  ADD_KEY_VALUE("N", N);
349  ADD_KEY_VALUE("C", C);
350  ADD_KEY_VALUE("H", H);
351  ADD_KEY_VALUE("W", W);
352  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
353  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
355 }
356 
357 void dump_permute_json_results(const std::string& json_filename,
358  const std::string& data_type,
359  bool pass,
360  float ave_time,
361  float tflop,
362  float gb_per_sec,
363  const std::string& kernel_name = "permute")
364 {
365  START_JSON_DUMP_FILE(json_filename);
366  ADD_KEY_VALUE("name", kernel_name);
367  ADD_KEY_VALUE("data_type", data_type);
368  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
369  ADD_PERF_TO_JSON(ave_time, tflop, gb_per_sec)
371 }
372 
373 void dump_topk_softmax_json(const std::string& json_filename,
374  const std::string& input_prec,
375  const std::string& weight_prec,
376  int tokens,
377  int experts,
378  int topk,
379  int stride_input,
380  int stride_output,
381  float ave_time,
382  float tflop,
383  float gb_per_sec,
384  bool pass,
385  const std::string& kernel_name = "topk_softmax")
386 {
387  START_JSON_DUMP_FILE(json_filename);
388  ADD_KEY_VALUE("name", kernel_name);
389  ADD_KEY_VALUE("input_prec", input_prec);
390  ADD_KEY_VALUE("weight_prec", weight_prec);
391  ADD_KEY_VALUE("tokens", tokens);
392  ADD_KEY_VALUE("experts", experts);
393  ADD_KEY_VALUE("topk", topk);
394  ADD_KEY_VALUE("stride_input", stride_input);
395  ADD_KEY_VALUE("stride_output", stride_output);
396  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
397  ADD_PERF_TO_JSON(ave_time, tflop, gb_per_sec);
399 }
400 
401 void dump_rmsnorm2d_fwd_json(const std::string& json_filename,
402  const std::string& prec_str,
403  int m,
404  int n,
405  int x_stride,
406  int xr_stride,
407  int y_stride,
408  int yr_stride,
409  int use_model_sensitive_rmsnorm,
410  float ave_time,
411  float tflops,
412  float gb_per_sec,
413  bool pass,
414  const std::string& kernel_name = "rmsnorm2d_fwd")
415 {
416  START_JSON_DUMP_FILE(json_filename);
417  ADD_KEY_VALUE("name", kernel_name);
418  ADD_KEY_VALUE("prec", prec_str);
419  ADD_KEY_VALUE("m", m);
420  ADD_KEY_VALUE("n", n);
421  ADD_KEY_VALUE("x_stride", x_stride);
422  ADD_KEY_VALUE("xr_stride", xr_stride);
423  ADD_KEY_VALUE("y_stride", y_stride);
424  ADD_KEY_VALUE("yr_stride", yr_stride);
425  ADD_KEY_VALUE("use_model_sensitive_rmsnorm", use_model_sensitive_rmsnorm);
426  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
427  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
429 }
430 
432  const std::string& json_filename,
433  const std::string& input_data_type,
434  const std::string& quantized_data_type,
435  int m,
436  int n,
437  int stride,
438  float epsilon,
439  float ave_time,
440  float tflops,
441  float gb_per_sec,
442  bool pass,
443  const std::string& kernel_name = "add_rmsnorm2d_rdquant_fwd")
444 {
445  START_JSON_DUMP_FILE(json_filename);
446  ADD_KEY_VALUE("name", kernel_name);
447  ADD_KEY_VALUE("input_data_type", input_data_type);
448  ADD_KEY_VALUE("quantized_data_type", quantized_data_type);
449  ADD_KEY_VALUE("m", m);
450  ADD_KEY_VALUE("n", n);
451  ADD_KEY_VALUE("stride", stride);
452  ADD_KEY_VALUE("epsilon", epsilon);
453  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
454  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
456 }
457 
458 void dump_smoothquant_json(const std::string& json_filename,
459  const std::string& prec_str,
460  int m,
461  int n,
462  int x_stride,
463  int y_stride,
464  float ave_time,
465  float tflops,
466  float gb_per_sec,
467  bool pass,
468  const std::string& kernel_name = "smoothquant")
469 {
470  START_JSON_DUMP_FILE(json_filename);
471  ADD_KEY_VALUE("name", kernel_name);
472  ADD_KEY_VALUE("prec", prec_str);
473  ADD_KEY_VALUE("m", m);
474  ADD_KEY_VALUE("n", n);
475  ADD_KEY_VALUE("x_stride", x_stride);
476  ADD_KEY_VALUE("y_stride", y_stride);
477  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
478  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec);
480 }
481 
482 void dump_moe_sorting_json(const std::string& json_filename,
483  const std::string& index_prec,
484  const std::string& weight_prec,
485  const std::string& workspace_size,
486  int dispatch_policy,
487  int tokens,
488  int num_experts,
489  int topk,
490  float ave_time,
491  float tflops,
492  float gb_per_sec,
493  bool pass,
494  const std::string& kernel_name = "moe_sorting")
495 {
496  START_JSON_DUMP_FILE(json_filename);
497  ADD_KEY_VALUE("name", kernel_name);
498  ADD_KEY_VALUE("index_prec", index_prec);
499  ADD_KEY_VALUE("weight_prec", weight_prec);
500  ADD_KEY_VALUE("workspace_size", workspace_size);
501  ADD_KEY_VALUE("dispatch_policy", dispatch_policy);
502  ADD_KEY_VALUE("tokens", tokens);
503  ADD_KEY_VALUE("num_experts", num_experts);
504  ADD_KEY_VALUE("topk", topk);
505  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
506  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
508 }
509 
510 void dump_batched_transpose_json(const std::string& json_filename,
511  int N,
512  int C,
513  int H,
514  int W,
515  const std::string& layout_in,
516  const std::string& layout_out,
517  const std::string& prec,
518  float ave_time,
519  float tflops,
520  float gb_per_sec,
521  bool pass,
522  const std::string& kernel_name = "batched_transpose")
523 {
524  START_JSON_DUMP_FILE(json_filename);
525  ADD_KEY_VALUE("name", kernel_name);
526  ADD_KEY_VALUE("N", N);
527  ADD_KEY_VALUE("C", C);
528  ADD_KEY_VALUE("H", H);
529  ADD_KEY_VALUE("W", W);
530  ADD_KEY_VALUE("LayoutIn", layout_in);
531  ADD_KEY_VALUE("LayoutOut", layout_out);
532  ADD_KEY_VALUE("Precision", prec);
533  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
534  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
536 }
537 
538 void dump_moe_smoothquant_json(const std::string& json_filename,
539  const std::string& prec_i,
540  const std::string& prec_o,
541  int tokens,
542  int hidden_size,
543  int stride,
544  int experts,
545  int topk,
546  bool pass,
547  float ave_time,
548  float tflops,
549  float gb_per_sec,
550  const std::string& kernel_name = "moe_smoothquant")
551 {
552  START_JSON_DUMP_FILE(json_filename);
553  ADD_KEY_VALUE("name", kernel_name);
554  ADD_KEY_VALUE("prec_i", prec_i);
555  ADD_KEY_VALUE("prec_o", prec_o);
556  ADD_KEY_VALUE("tokens", tokens);
557  ADD_KEY_VALUE("hidden_size", hidden_size);
558  ADD_KEY_VALUE("stride", stride);
559  ADD_KEY_VALUE("experts", experts);
560  ADD_KEY_VALUE("topk", topk);
561  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
562  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
564 }
565 
566 void dump_fused_moe_json(const std::string& json_filename,
567  const std::string& api_str,
568  const std::string& prec_str,
569  int tokens,
570  bool is_local_token,
571  int local_tokens,
572  int experts,
573  int topk,
574  int hidden_size,
575  int intermediate_size,
576  int stride,
577  int block_m,
578  int activation,
579  bool gate_only,
580  bool fused_quant,
581  bool pass,
582  float ave_time,
583  float tflops,
584  float tb_per_sec,
585  const std::string& kernel_name = "fused_moe")
586 {
587  START_JSON_DUMP_FILE(json_filename);
588  ADD_KEY_VALUE("name", kernel_name);
589  ADD_KEY_VALUE("api", api_str);
590  ADD_KEY_VALUE("prec", prec_str);
591  ADD_KEY_VALUE("tokens", tokens);
592  if(is_local_token)
593  {
594  ADD_KEY_VALUE("local_tokens", local_tokens);
595  }
596  ADD_KEY_VALUE("experts", experts);
597  ADD_KEY_VALUE("topk", topk);
598  ADD_KEY_VALUE("hidden_size", hidden_size);
599  ADD_KEY_VALUE("intermediate_size", intermediate_size);
600  ADD_KEY_VALUE("stride", stride);
601  ADD_KEY_VALUE("block_m", block_m);
602  ADD_KEY_VALUE("activation", activation);
603  ADD_KEY_VALUE("gate_only", gate_only);
604  ADD_KEY_VALUE("fused_quant", fused_quant);
605  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
606  ADD_PERF_TO_JSON(ave_time, tflops, (tb_per_sec * 1024.0f))
608 }
609 
610 void dump_fmha_fwd_json_results(const std::string& json_filename,
611  const std::string& prec,
612  const std::string& mode,
613  const std::string& io_layout,
614  int batch,
615  int nhead,
616  int nhead_k,
617  int seqlen_qs,
618  int seqlen_ks,
619  int seqlen_kpads,
620  int hdim_q,
621  int hdim_v,
622  float scale_s,
623  float p_drop,
624  bool lse,
625  bool squant,
626  const std::string& bias,
627  const std::string& vlayout,
628  bool pass,
629  float ave_time,
630  float tflops,
631  float gb_per_sec,
632  const std::string& kernel_name = "fmha_fwd")
633 {
634  START_JSON_DUMP_FILE(json_filename);
635  ADD_KEY_VALUE("name", kernel_name);
636  ADD_KEY_VALUE("prec", prec);
637  ADD_KEY_VALUE("mode", mode);
638  ADD_KEY_VALUE("io_layout", io_layout);
639  ADD_KEY_VALUE("batch", batch);
640  ADD_KEY_VALUE("nhead", nhead);
641  ADD_KEY_VALUE("nhead_k", nhead_k);
642  ADD_KEY_VALUE("seqlen_q", seqlen_qs);
643  ADD_KEY_VALUE("seqlen_k", seqlen_ks);
644  ADD_KEY_VALUE("seqlen_kpads", seqlen_kpads);
645  ADD_KEY_VALUE("hdim_q", hdim_q);
646  ADD_KEY_VALUE("hdim_v", hdim_v);
647  ADD_KEY_VALUE("scale_s", scale_s);
648  ADD_KEY_VALUE("p_drop", p_drop);
649  ADD_KEY_VALUE("lse", lse);
650  ADD_KEY_VALUE("squant", squant);
651  ADD_KEY_VALUE("bias", bias);
652  ADD_KEY_VALUE("vlayout", vlayout);
653  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
654  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
656 }
657 
658 void dump_fmha_bwd_json_results(const std::string& json_filename,
659  const std::string& data_type,
660  const std::string& mode,
661  const std::string& i_perm,
662  const std::string& o_perm,
663  int batch,
664  int nhead,
665  int nhead_k,
666  int seqlen_q,
667  int seqlen_k,
668  int hdim_q,
669  int hdim_v,
670  float scale,
671  const std::string& bias,
672  bool use_dbias,
673  float p_drop,
674  bool s_randval,
675  bool deterministic,
676  const std::string& mask,
677  int mask_left,
678  int mask_right,
679  int workspace_size,
680  bool pass,
681  float ave_time,
682  float tflops,
683  float gb_per_sec,
684  const std::string& kernel_name = "fmha_bwd")
685 {
686  START_JSON_DUMP_FILE(json_filename);
687  ADD_KEY_VALUE("name", kernel_name);
688  ADD_KEY_VALUE("prec", data_type);
689  ADD_KEY_VALUE("mode", mode);
690  ADD_KEY_VALUE("i_perm", i_perm);
691  ADD_KEY_VALUE("o_perm", o_perm);
692  ADD_KEY_VALUE("batch", batch);
693  ADD_KEY_VALUE("nhead", nhead);
694  ADD_KEY_VALUE("nhead_k", nhead_k);
695  ADD_KEY_VALUE("seqlen_q", seqlen_q);
696  ADD_KEY_VALUE("seqlen_k", seqlen_k);
697  ADD_KEY_VALUE("hdim_q", hdim_q);
698  ADD_KEY_VALUE("hdim_v", hdim_v);
699  ADD_KEY_VALUE("scale", scale);
700  ADD_KEY_VALUE("bias", bias);
701  ADD_KEY_VALUE("use_dbias", use_dbias);
702  ADD_KEY_VALUE("p_drop", p_drop);
703  ADD_KEY_VALUE("s_randval", s_randval);
704  ADD_KEY_VALUE("deterministic", deterministic ? "true" : "false");
705  ADD_KEY_VALUE("mask", mask);
706  ADD_KEY_VALUE("mask_left", mask_left);
707  ADD_KEY_VALUE("mask_right", mask_right);
708  ADD_KEY_VALUE("workspace_size", workspace_size);
709  ADD_KEY_VALUE("verification", pass ? "pass" : "fail");
710  ADD_PERF_TO_JSON(ave_time, tflops, gb_per_sec)
712 }
713 
714 #ifndef CK_ENABLE_JSON_DUMP
715 #pragma GCC diagnostic pop
716 #endif
#define ADD_PERF_TO_JSON(_time, tflops, gbytes)
Definition: json_dump.hpp:88
void dump_gemm_json_results(const std::string &json_filename, int M, int N, int K, int stride_A, int stride_B, int stride_C, bool persistent, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_basic")
Definition: json_dump.hpp:114
#define END_JSON_DUMP_FILE()
Definition: json_dump.hpp:84
void dump_rmsnorm2d_fwd_json(const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, int use_model_sensitive_rmsnorm, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="rmsnorm2d_fwd")
Definition: json_dump.hpp:401
void dump_batched_gemm_json_results(const std::string &json_filename, const std::string &op_name, int M, int N, int K, int stride_A, int stride_B, int stride_C, int batch_stride_A, int batch_stride_B, int batch_stride_C, int batch_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="batched_gemm_basic")
Definition: json_dump.hpp:160
void dump_fmha_bwd_json_results(const std::string &json_filename, const std::string &data_type, const std::string &mode, const std::string &i_perm, const std::string &o_perm, int batch, int nhead, int nhead_k, int seqlen_q, int seqlen_k, int hdim_q, int hdim_v, float scale, const std::string &bias, bool use_dbias, float p_drop, bool s_randval, bool deterministic, const std::string &mask, int mask_left, int mask_right, int workspace_size, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_bwd")
Definition: json_dump.hpp:658
void dump_layernorm2d_fwd_json_results(const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, const std::string &prec_sm, const std::string &prec_sy, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="layernorm2d_fwd")
Definition: json_dump.hpp:298
void dump_moe_smoothquant_json(const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, int tokens, int hidden_size, int stride, int experts, int topk, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="moe_smoothquant")
Definition: json_dump.hpp:538
void dump_reduce_json_results(const std::string &json_filename, int N, int C, int H, int W, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="reduce")
Definition: json_dump.hpp:333
void dump_flatmm_json_results(const std::string &json_filename, const std::string &datatype, int M, int N, int K, int stride_A, int stride_B, int stride_C, int kbatch, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="flatmm_basic")
Definition: json_dump.hpp:218
void dump_batched_transpose_json(const std::string &json_filename, int N, int C, int H, int W, const std::string &layout_in, const std::string &layout_out, const std::string &prec, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="batched_transpose")
Definition: json_dump.hpp:510
#define START_JSON_DUMP_FILE(file_name)
Definition: json_dump.hpp:83
void dump_permute_json_results(const std::string &json_filename, const std::string &data_type, bool pass, float ave_time, float tflop, float gb_per_sec, const std::string &kernel_name="permute")
Definition: json_dump.hpp:357
void dump_gemm_multi_d_fp16_json_results(const std::string &json_filename, const std::string &op_name, int M, int N, int K, int StrideA, int StrideB, int StrideD0, int StrideD1, int StrideE, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_multi_d_fp16")
Definition: json_dump.hpp:248
void dump_moe_sorting_json(const std::string &json_filename, const std::string &index_prec, const std::string &weight_prec, const std::string &workspace_size, int dispatch_policy, int tokens, int num_experts, int topk, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="moe_sorting")
Definition: json_dump.hpp:482
void dump_grouped_gemm_json_results(const std::string &json_filename, const std::string &op_name, int group_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="grouped_gemm")
Definition: json_dump.hpp:197
void dump_fmha_fwd_json_results(const std::string &json_filename, const std::string &prec, const std::string &mode, const std::string &io_layout, int batch, int nhead, int nhead_k, int seqlen_qs, int seqlen_ks, int seqlen_kpads, int hdim_q, int hdim_v, float scale_s, float p_drop, bool lse, bool squant, const std::string &bias, const std::string &vlayout, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_fwd")
Definition: json_dump.hpp:610
void dump_fused_moe_json(const std::string &json_filename, const std::string &api_str, const std::string &prec_str, int tokens, bool is_local_token, int local_tokens, int experts, int topk, int hidden_size, int intermediate_size, int stride, int block_m, int activation, bool gate_only, bool fused_quant, bool pass, float ave_time, float tflops, float tb_per_sec, const std::string &kernel_name="fused_moe")
Definition: json_dump.hpp:566
void dump_smoothquant_json(const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int y_stride, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="smoothquant")
Definition: json_dump.hpp:458
#define ADD_KEY_VALUE(key, value)
Definition: json_dump.hpp:87
void dump_elementwise_json_results(const std::string &json_filename, const std::string &prec, int grid_size, int block_size, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="elementwise")
Definition: json_dump.hpp:280
void dump_topk_softmax_json(const std::string &json_filename, const std::string &input_prec, const std::string &weight_prec, int tokens, int experts, int topk, int stride_input, int stride_output, float ave_time, float tflop, float gb_per_sec, bool pass, const std::string &kernel_name="topk_softmax")
Definition: json_dump.hpp:373
void dump_add_rmsnorm2d_rdquant_fwd_json(const std::string &json_filename, const std::string &input_data_type, const std::string &quantized_data_type, int m, int n, int stride, float epsilon, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="add_rmsnorm2d_rdquant_fwd")
Definition: json_dump.hpp:431
bool_constant< false > false_type
Definition: integral_constant.hpp:63
bool_constant< true > true_type
Definition: integral_constant.hpp:62
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1350
common definitions and configuration
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:415
signed __int64 int64_t
Definition: stdint.h:135
Definition: json_dump.hpp:94