#include <threadwise_welford.hpp>
|
static __device__ void | Merge (T &mean_a, T &var_a, int32_t &count_a, T mean_b, T var_b, int32_t count_b) |
|
template<typename SrcMeanBufferType , typename SrcVarBufferType , typename SrcCountBufferType , typename DstMeanBufferType , typename DstVarBufferType , typename DstCountBufferType > |
static __device__ void | Run (const SrcMeanBufferType &src_mean_buf, const SrcVarBufferType &src_var_buf, const SrcCountBufferType &src_count_buf, DstMeanBufferType &dst_mean_buf, DstVarBufferType &dst_var_buf, DstCountBufferType &dst_count_buf) |
|
◆ Merge()
template<typename T , typename SrcMeanVarCountThreadDesc_M_K , typename DstMeanVarThreadDesc_M , bool GetActualVariance = false>
static __device__ void ck::ThreadwiseWelfordMerge< T, SrcMeanVarCountThreadDesc_M_K, DstMeanVarThreadDesc_M, GetActualVariance >::Merge |
( |
T & |
mean_a, |
|
|
T & |
var_a, |
|
|
int32_t & |
count_a, |
|
|
T |
mean_b, |
|
|
T |
var_b, |
|
|
int32_t |
count_b |
|
) |
| |
|
inlinestatic |
◆ Run()
template<typename T , typename SrcMeanVarCountThreadDesc_M_K , typename DstMeanVarThreadDesc_M , bool GetActualVariance = false>
template<typename SrcMeanBufferType , typename SrcVarBufferType , typename SrcCountBufferType , typename DstMeanBufferType , typename DstVarBufferType , typename DstCountBufferType >
static __device__ void ck::ThreadwiseWelfordMerge< T, SrcMeanVarCountThreadDesc_M_K, DstMeanVarThreadDesc_M, GetActualVariance >::Run |
( |
const SrcMeanBufferType & |
src_mean_buf, |
|
|
const SrcVarBufferType & |
src_var_buf, |
|
|
const SrcCountBufferType & |
src_count_buf, |
|
|
DstMeanBufferType & |
dst_mean_buf, |
|
|
DstVarBufferType & |
dst_var_buf, |
|
|
DstCountBufferType & |
dst_count_buf |
|
) |
| |
|
inlinestatic |
◆ dst_length_m
template<typename T , typename SrcMeanVarCountThreadDesc_M_K , typename DstMeanVarThreadDesc_M , bool GetActualVariance = false>
constexpr auto ck::ThreadwiseWelfordMerge< T, SrcMeanVarCountThreadDesc_M_K, DstMeanVarThreadDesc_M, GetActualVariance >::dst_length_m = dst_thread_desc_m.GetLength(Number<0>{}) |
|
staticconstexpr |
◆ dst_thread_desc_m
template<typename T , typename SrcMeanVarCountThreadDesc_M_K , typename DstMeanVarThreadDesc_M , bool GetActualVariance = false>
constexpr auto ck::ThreadwiseWelfordMerge< T, SrcMeanVarCountThreadDesc_M_K, DstMeanVarThreadDesc_M, GetActualVariance >::dst_thread_desc_m = DstMeanVarThreadDesc_M{} |
|
staticconstexpr |
◆ src_length_k
template<typename T , typename SrcMeanVarCountThreadDesc_M_K , typename DstMeanVarThreadDesc_M , bool GetActualVariance = false>
constexpr auto ck::ThreadwiseWelfordMerge< T, SrcMeanVarCountThreadDesc_M_K, DstMeanVarThreadDesc_M, GetActualVariance >::src_length_k = src_thread_desc_m_k.GetLength(Number<1>{}) |
|
staticconstexpr |
◆ src_length_m
template<typename T , typename SrcMeanVarCountThreadDesc_M_K , typename DstMeanVarThreadDesc_M , bool GetActualVariance = false>
constexpr auto ck::ThreadwiseWelfordMerge< T, SrcMeanVarCountThreadDesc_M_K, DstMeanVarThreadDesc_M, GetActualVariance >::src_length_m = src_thread_desc_m_k.GetLength(Number<0>{}) |
|
staticconstexpr |
◆ src_thread_desc_m_k
template<typename T , typename SrcMeanVarCountThreadDesc_M_K , typename DstMeanVarThreadDesc_M , bool GetActualVariance = false>
constexpr auto ck::ThreadwiseWelfordMerge< T, SrcMeanVarCountThreadDesc_M_K, DstMeanVarThreadDesc_M, GetActualVariance >::src_thread_desc_m_k = SrcMeanVarCountThreadDesc_M_K{} |
|
staticconstexpr |
The documentation for this struct was generated from the following file:
- /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp