/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/utility/json_dump.hpp File Reference#
Go to the source code of this file.
Classes | |
| struct | has_warp_tile_members< T, typename > | 
| struct | has_warp_tile_members< T, std::void_t< decltype(T::M_Warp_Tile), decltype(T::N_Warp_Tile), decltype(T::K_Warp_Tile)> > | 
Macros | |
| #define | START_JSON_DUMP_FILE(file_name) | 
| #define | END_JSON_DUMP_FILE() std::cout << "JSON dump disabled, To enable, set CK_ENABLE_JSON_DUMP cmake option" << std::endl; | 
| #define | ADD_KEY_VALUE(key, value) | 
| #define | ADD_PERF_TO_JSON(_time, tflops, gbytes) | 
Functions | |
| template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename GemmConfig , template< typename > typename DTypeTraits> | |
| void | dump_gemm_json_results (const std::string &json_filename, int M, int N, int K, int stride_A, int stride_B, int stride_C, bool persistent, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_basic") | 
| void | dump_batched_gemm_json_results (const std::string &json_filename, const std::string &op_name, int M, int N, int K, int stride_A, int stride_B, int stride_C, int batch_stride_A, int batch_stride_B, int batch_stride_C, int batch_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="batched_gemm_basic") | 
| template<typename ALayout , typename BLayout , typename CLayout > | |
| void | dump_grouped_gemm_json_results (const std::string &json_filename, const std::string &op_name, int group_count, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="grouped_gemm") | 
| void | dump_flatmm_json_results (const std::string &json_filename, const std::string &datatype, int M, int N, int K, int stride_A, int stride_B, int stride_C, int kbatch, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="flatmm_basic") | 
| void | dump_gemm_multi_d_fp16_json_results (const std::string &json_filename, const std::string &op_name, int M, int N, int K, int StrideA, int StrideB, int StrideD0, int StrideD1, int StrideE, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="gemm_multi_d_fp16") | 
| void | dump_elementwise_json_results (const std::string &json_filename, const std::string &prec, int grid_size, int block_size, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="elementwise") | 
| void | dump_layernorm2d_fwd_json_results (const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, const std::string &prec_sm, const std::string &prec_sy, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="layernorm2d_fwd") | 
| template<typename DataType , template< typename > typename DTypeTraits> | |
| void | dump_reduce_json_results (const std::string &json_filename, int N, int C, int H, int W, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="reduce") | 
| void | dump_permute_json_results (const std::string &json_filename, const std::string &data_type, bool pass, float ave_time, float tflop, float gb_per_sec, const std::string &kernel_name="permute") | 
| void | dump_topk_softmax_json (const std::string &json_filename, const std::string &input_prec, const std::string &weight_prec, int tokens, int experts, int topk, int stride_input, int stride_output, float ave_time, float tflop, float gb_per_sec, bool pass, const std::string &kernel_name="topk_softmax") | 
| void | dump_rmsnorm2d_fwd_json (const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int xr_stride, int y_stride, int yr_stride, int use_model_sensitive_rmsnorm, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="rmsnorm2d_fwd") | 
| void | dump_add_rmsnorm2d_rdquant_fwd_json (const std::string &json_filename, const std::string &input_data_type, const std::string &quantized_data_type, int m, int n, int stride, float epsilon, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="add_rmsnorm2d_rdquant_fwd") | 
| void | dump_smoothquant_json (const std::string &json_filename, const std::string &prec_str, int m, int n, int x_stride, int y_stride, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="smoothquant") | 
| void | dump_moe_sorting_json (const std::string &json_filename, const std::string &index_prec, const std::string &weight_prec, const std::string &workspace_size, int dispatch_policy, int tokens, int num_experts, int topk, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="moe_sorting") | 
| void | dump_batched_transpose_json (const std::string &json_filename, int N, int C, int H, int W, const std::string &layout_in, const std::string &layout_out, const std::string &prec, float ave_time, float tflops, float gb_per_sec, bool pass, const std::string &kernel_name="batched_transpose") | 
| void | dump_moe_smoothquant_json (const std::string &json_filename, const std::string &prec_i, const std::string &prec_o, int tokens, int hidden_size, int stride, int experts, int topk, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="moe_smoothquant") | 
| void | dump_fused_moe_json (const std::string &json_filename, const std::string &api_str, const std::string &prec_str, int tokens, bool is_local_token, int local_tokens, int experts, int topk, int hidden_size, int intermediate_size, int stride, int block_m, int activation, bool gate_only, bool fused_quant, bool pass, float ave_time, float tflops, float tb_per_sec, const std::string &kernel_name="fused_moe") | 
| void | dump_fmha_fwd_json_results (const std::string &json_filename, const std::string &prec, const std::string &mode, const std::string &io_layout, int batch, int nhead, int nhead_k, int seqlen_qs, int seqlen_ks, int seqlen_kpads, int hdim_q, int hdim_v, float scale_s, float p_drop, bool lse, bool squant, const std::string &bias, const std::string &vlayout, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_fwd") | 
| void | dump_fmha_bwd_json_results (const std::string &json_filename, const std::string &data_type, const std::string &mode, const std::string &i_perm, const std::string &o_perm, int batch, int nhead, int nhead_k, int seqlen_q, int seqlen_k, int hdim_q, int hdim_v, float scale, const std::string &bias, bool use_dbias, float p_drop, bool s_randval, bool deterministic, const std::string &mask, int mask_left, int mask_right, int workspace_size, bool pass, float ave_time, float tflops, float gb_per_sec, const std::string &kernel_name="fmha_bwd") | 
Macro Definition Documentation
◆ ADD_KEY_VALUE
| #define ADD_KEY_VALUE | ( | key, | |
| value | |||
| ) | 
◆ ADD_PERF_TO_JSON
| #define ADD_PERF_TO_JSON | ( | _time, | |
| tflops, | |||
| gbytes | |||
| ) | 
◆ END_JSON_DUMP_FILE
| #define END_JSON_DUMP_FILE | ( | ) | std::cout << "JSON dump disabled, To enable, set CK_ENABLE_JSON_DUMP cmake option" << std::endl; | 
◆ START_JSON_DUMP_FILE
| #define START_JSON_DUMP_FILE | ( | file_name | ) | 
Function Documentation
◆ dump_add_rmsnorm2d_rdquant_fwd_json()
| void dump_add_rmsnorm2d_rdquant_fwd_json | ( | const std::string & | json_filename, | 
| const std::string & | input_data_type, | ||
| const std::string & | quantized_data_type, | ||
| int | m, | ||
| int | n, | ||
| int | stride, | ||
| float | epsilon, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| bool | pass, | ||
| const std::string & | kernel_name = "add_rmsnorm2d_rdquant_fwd"  | 
        ||
| ) | 
◆ dump_batched_gemm_json_results()
| void dump_batched_gemm_json_results | ( | const std::string & | json_filename, | 
| const std::string & | op_name, | ||
| int | M, | ||
| int | N, | ||
| int | K, | ||
| int | stride_A, | ||
| int | stride_B, | ||
| int | stride_C, | ||
| int | batch_stride_A, | ||
| int | batch_stride_B, | ||
| int | batch_stride_C, | ||
| int | batch_count, | ||
| bool | pass, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| const std::string & | kernel_name = "batched_gemm_basic"  | 
        ||
| ) | 
◆ dump_batched_transpose_json()
| void dump_batched_transpose_json | ( | const std::string & | json_filename, | 
| int | N, | ||
| int | C, | ||
| int | H, | ||
| int | W, | ||
| const std::string & | layout_in, | ||
| const std::string & | layout_out, | ||
| const std::string & | prec, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| bool | pass, | ||
| const std::string & | kernel_name = "batched_transpose"  | 
        ||
| ) | 
◆ dump_elementwise_json_results()
| void dump_elementwise_json_results | ( | const std::string & | json_filename, | 
| const std::string & | prec, | ||
| int | grid_size, | ||
| int | block_size, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| const std::string & | kernel_name = "elementwise"  | 
        ||
| ) | 
◆ dump_flatmm_json_results()
| void dump_flatmm_json_results | ( | const std::string & | json_filename, | 
| const std::string & | datatype, | ||
| int | M, | ||
| int | N, | ||
| int | K, | ||
| int | stride_A, | ||
| int | stride_B, | ||
| int | stride_C, | ||
| int | kbatch, | ||
| bool | pass, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| const std::string & | kernel_name = "flatmm_basic"  | 
        ||
| ) | 
◆ dump_fmha_bwd_json_results()
| void dump_fmha_bwd_json_results | ( | const std::string & | json_filename, | 
| const std::string & | data_type, | ||
| const std::string & | mode, | ||
| const std::string & | i_perm, | ||
| const std::string & | o_perm, | ||
| int | batch, | ||
| int | nhead, | ||
| int | nhead_k, | ||
| int | seqlen_q, | ||
| int | seqlen_k, | ||
| int | hdim_q, | ||
| int | hdim_v, | ||
| float | scale, | ||
| const std::string & | bias, | ||
| bool | use_dbias, | ||
| float | p_drop, | ||
| bool | s_randval, | ||
| bool | deterministic, | ||
| const std::string & | mask, | ||
| int | mask_left, | ||
| int | mask_right, | ||
| int | workspace_size, | ||
| bool | pass, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| const std::string & | kernel_name = "fmha_bwd"  | 
        ||
| ) | 
◆ dump_fmha_fwd_json_results()
| void dump_fmha_fwd_json_results | ( | const std::string & | json_filename, | 
| const std::string & | prec, | ||
| const std::string & | mode, | ||
| const std::string & | io_layout, | ||
| int | batch, | ||
| int | nhead, | ||
| int | nhead_k, | ||
| int | seqlen_qs, | ||
| int | seqlen_ks, | ||
| int | seqlen_kpads, | ||
| int | hdim_q, | ||
| int | hdim_v, | ||
| float | scale_s, | ||
| float | p_drop, | ||
| bool | lse, | ||
| bool | squant, | ||
| const std::string & | bias, | ||
| const std::string & | vlayout, | ||
| bool | pass, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| const std::string & | kernel_name = "fmha_fwd"  | 
        ||
| ) | 
◆ dump_fused_moe_json()
| void dump_fused_moe_json | ( | const std::string & | json_filename, | 
| const std::string & | api_str, | ||
| const std::string & | prec_str, | ||
| int | tokens, | ||
| bool | is_local_token, | ||
| int | local_tokens, | ||
| int | experts, | ||
| int | topk, | ||
| int | hidden_size, | ||
| int | intermediate_size, | ||
| int | stride, | ||
| int | block_m, | ||
| int | activation, | ||
| bool | gate_only, | ||
| bool | fused_quant, | ||
| bool | pass, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | tb_per_sec, | ||
| const std::string & | kernel_name = "fused_moe"  | 
        ||
| ) | 
◆ dump_gemm_json_results()
template<typename ALayout , typename BLayout , typename CLayout , typename ADataType , typename BDataType , typename CDataType , typename GemmConfig , template< typename > typename DTypeTraits> 
      | void dump_gemm_json_results | ( | const std::string & | json_filename, | 
| int | M, | ||
| int | N, | ||
| int | K, | ||
| int | stride_A, | ||
| int | stride_B, | ||
| int | stride_C, | ||
| bool | persistent, | ||
| bool | pass, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| const std::string & | kernel_name = "gemm_basic"  | 
        ||
| ) | 
◆ dump_gemm_multi_d_fp16_json_results()
| void dump_gemm_multi_d_fp16_json_results | ( | const std::string & | json_filename, | 
| const std::string & | op_name, | ||
| int | M, | ||
| int | N, | ||
| int | K, | ||
| int | StrideA, | ||
| int | StrideB, | ||
| int | StrideD0, | ||
| int | StrideD1, | ||
| int | StrideE, | ||
| bool | pass, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| const std::string & | kernel_name = "gemm_multi_d_fp16"  | 
        ||
| ) | 
◆ dump_grouped_gemm_json_results()
template<typename ALayout , typename BLayout , typename CLayout > 
      | void dump_grouped_gemm_json_results | ( | const std::string & | json_filename, | 
| const std::string & | op_name, | ||
| int | group_count, | ||
| bool | pass, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| const std::string & | kernel_name = "grouped_gemm"  | 
        ||
| ) | 
◆ dump_layernorm2d_fwd_json_results()
| void dump_layernorm2d_fwd_json_results | ( | const std::string & | json_filename, | 
| const std::string & | prec_i, | ||
| const std::string & | prec_o, | ||
| const std::string & | prec_sm, | ||
| const std::string & | prec_sy, | ||
| int | m, | ||
| int | n, | ||
| int | x_stride, | ||
| int | xr_stride, | ||
| int | y_stride, | ||
| int | yr_stride, | ||
| bool | pass, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| const std::string & | kernel_name = "layernorm2d_fwd"  | 
        ||
| ) | 
◆ dump_moe_smoothquant_json()
| void dump_moe_smoothquant_json | ( | const std::string & | json_filename, | 
| const std::string & | prec_i, | ||
| const std::string & | prec_o, | ||
| int | tokens, | ||
| int | hidden_size, | ||
| int | stride, | ||
| int | experts, | ||
| int | topk, | ||
| bool | pass, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| const std::string & | kernel_name = "moe_smoothquant"  | 
        ||
| ) | 
◆ dump_moe_sorting_json()
| void dump_moe_sorting_json | ( | const std::string & | json_filename, | 
| const std::string & | index_prec, | ||
| const std::string & | weight_prec, | ||
| const std::string & | workspace_size, | ||
| int | dispatch_policy, | ||
| int | tokens, | ||
| int | num_experts, | ||
| int | topk, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| bool | pass, | ||
| const std::string & | kernel_name = "moe_sorting"  | 
        ||
| ) | 
◆ dump_permute_json_results()
| void dump_permute_json_results | ( | const std::string & | json_filename, | 
| const std::string & | data_type, | ||
| bool | pass, | ||
| float | ave_time, | ||
| float | tflop, | ||
| float | gb_per_sec, | ||
| const std::string & | kernel_name = "permute"  | 
        ||
| ) | 
◆ dump_reduce_json_results()
template<typename DataType , template< typename > typename DTypeTraits> 
      | void dump_reduce_json_results | ( | const std::string & | json_filename, | 
| int | N, | ||
| int | C, | ||
| int | H, | ||
| int | W, | ||
| bool | pass, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| const std::string & | kernel_name = "reduce"  | 
        ||
| ) | 
◆ dump_rmsnorm2d_fwd_json()
| void dump_rmsnorm2d_fwd_json | ( | const std::string & | json_filename, | 
| const std::string & | prec_str, | ||
| int | m, | ||
| int | n, | ||
| int | x_stride, | ||
| int | xr_stride, | ||
| int | y_stride, | ||
| int | yr_stride, | ||
| int | use_model_sensitive_rmsnorm, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| bool | pass, | ||
| const std::string & | kernel_name = "rmsnorm2d_fwd"  | 
        ||
| ) | 
◆ dump_smoothquant_json()
| void dump_smoothquant_json | ( | const std::string & | json_filename, | 
| const std::string & | prec_str, | ||
| int | m, | ||
| int | n, | ||
| int | x_stride, | ||
| int | y_stride, | ||
| float | ave_time, | ||
| float | tflops, | ||
| float | gb_per_sec, | ||
| bool | pass, | ||
| const std::string & | kernel_name = "smoothquant"  | 
        ||
| ) | 
◆ dump_topk_softmax_json()
| void dump_topk_softmax_json | ( | const std::string & | json_filename, | 
| const std::string & | input_prec, | ||
| const std::string & | weight_prec, | ||
| int | tokens, | ||
| int | experts, | ||
| int | topk, | ||
| int | stride_input, | ||
| int | stride_output, | ||
| float | ave_time, | ||
| float | tflop, | ||
| float | gb_per_sec, | ||
| bool | pass, | ||
| const std::string & | kernel_name = "topk_softmax"  | 
        ||
| ) |