clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h Source File#
amd_hip_cooperative_groups.h
Go to the documentation of this file.
302 __CG_QUALIFIER__ void barrier_wait(arrival_token&&) const { internal::workgroup::barrier_wait(); }
389 friend __CG_QUALIFIER__ coalesced_group binary_partition(const coalesced_group& cgrp, bool pred);
1006__CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent, unsigned int tile_size) {
1020__CG_QUALIFIER__ thread_group tiled_partition(const thread_block& parent, unsigned int tile_size) {
1024__CG_QUALIFIER__ tiled_group tiled_partition(const tiled_group& parent, unsigned int tile_size) {
1220 static_assert(size < ParentSize, "Sub tile size must be < parent tile size in tiled_partition");
1222 __CG_QUALIFIER__ tiled_partition_internal(const thread_block_tile<ParentSize, GrandParentCGTy>& g)
1365 __CG_STATIC_QUALIFIER__ void barrier_wait(arrival_token&&) { internal::cluster::barrier_wait(); }
1383 __CG_STATIC_QUALIFIER__ unsigned int thread_rank() { return internal::cluster::thread_rank(); }
1395 __CG_STATIC_QUALIFIER__ unsigned int num_threads() { return internal::cluster::num_threads(); }
static unsigned int query_shared_rank(const void *in)
Definition amd_hip_cooperative_groups.h:1403
static unsigned int thread_rank()
Definition amd_hip_cooperative_groups.h:1383
static dim3 thread_index()
Definition amd_hip_cooperative_groups.h:1380
static T * map_shared_rank(T *in, int rank)
Definition amd_hip_cooperative_groups.h:1398
static void sync()
Definition amd_hip_cooperative_groups.h:1355
{} arrival_token
Definition amd_hip_cooperative_groups.h:1352
static dim3 block_index()
Definition amd_hip_cooperative_groups.h:1374
static void barrier_wait(arrival_token &&)
Definition amd_hip_cooperative_groups.h:1365
static unsigned int block_rank()
Definition amd_hip_cooperative_groups.h:1377
static unsigned int num_threads()
Definition amd_hip_cooperative_groups.h:1395
static unsigned int size()
Definition amd_hip_cooperative_groups.h:1408
static arrival_token barrier_arrive()
Definition amd_hip_cooperative_groups.h:1358
static unsigned int num_blocks()
Definition amd_hip_cooperative_groups.h:1389
friend cluster_group this_cluster()
get cluster group
Definition amd_hip_cooperative_groups.h:1416
static dim3 dim_threads()
Definition amd_hip_cooperative_groups.h:1392
static dim3 dim_blocks()
Definition amd_hip_cooperative_groups.h:1386
The coalesced_group cooperative group type.
Definition amd_hip_cooperative_groups.h:382
The grid cooperative group type.
Definition amd_hip_cooperative_groups.h:179
Definition amd_hip_cooperative_groups.h:1038
thread_block_tile_internal(const thread_block &g)
Definition amd_hip_cooperative_groups.h:1044
thread_block_tile_internal(const thread_block_tile_internal< tbtSize, tbtParentT > &g)
Definition amd_hip_cooperative_groups.h:1040
The multi-grid cooperative group type.
Definition amd_hip_cooperative_groups.h:124
User exposed API that captures the state of the parent group pre-partition.
Definition amd_hip_cooperative_groups.h:906
thread_block_tile(const thread_block_tile< size, ParentCGTy > &g)
Definition amd_hip_cooperative_groups.h:1201
Definition amd_hip_cooperative_groups.h:847
Group type - thread_block_tile.
Definition amd_hip_cooperative_groups.h:929
Group type - thread_block_tile.
Definition amd_hip_cooperative_groups.h:1058
T shfl_down(T var, unsigned int lane_delta) const
Shuffle down operation on group level.
unsigned long long ballot(int pred) const
Ballot function on group level.
thread_block_tile(const ParentCGTy &g)
Definition amd_hip_cooperative_groups.h:1060
unsigned int meta_group_rank() const
unsigned int thread_rank() const
Rank of the calling thread within [0, num_threads() ).
T shfl_xor(T var, unsigned int laneMask) const
Shuffle xor operation on group level.
unsigned long long match_any(T value) const
Match any function on group level.
unsigned long long match_all(T value, int &pred) const
Match all function on group level.
unsigned int meta_group_size() const
Returns the number of groups created when the parent group was partitioned.
T shfl_up(T var, unsigned int lane_delta) const
Shuffle up operation on group level.
T shfl(T var, int srcRank) const
Shuffle operation on group level.
The workgroup (thread-block in CUDA terminology) cooperative group type.
Definition amd_hip_cooperative_groups.h:235
The base type of all cooperative group types.
Definition amd_hip_cooperative_groups.h:34
Definition amd_hip_cooperative_groups.h:824
The tiled_group cooperative group type.
Definition amd_hip_cooperative_groups.h:326
const struct texture< T, dim, readMode > const void size_t size
Definition hip_runtime_api.h:10284
bool is_valid(CGTy const &g)
Returns true if the group has not violated any API constraints.
Definition amd_hip_cooperative_groups.h:803
void sync(CGTy const &g)
Synchronizes the threads in the group.
Definition amd_hip_cooperative_groups.h:814
__hip_uint32_t group_size(CGTy const &g)
Returns the size of the group.
Definition amd_hip_cooperative_groups.h:775
__hip_uint32_t thread_rank(CGTy const &g)
Returns the rank of thread of the group.
Definition amd_hip_cooperative_groups.h:790
thread_block this_thread_block()
User-exposed API interface to construct workgroup cooperative group type object - thread_block.
Definition amd_hip_cooperative_groups.h:315
coalesced_group binary_partition(const coalesced_group &cgrp, bool pred)
Binary partition.
Definition amd_hip_cooperative_groups.h:1257
thread_group tiled_partition(const thread_group &parent, unsigned int tile_size)
User-exposed API to partition groups.
Definition amd_hip_cooperative_groups.h:1006
multi_grid_group this_multi_grid()
User-exposed API interface to construct grid cooperative group type object - multi_grid_group.
Definition amd_hip_cooperative_groups.h:165
coalesced_group coalesced_threads()
User-exposed API to create coalesced groups.
Definition amd_hip_cooperative_groups.h:656
grid_group this_grid()
User-exposed API interface to construct grid cooperative group type object - grid_group.
Definition amd_hip_cooperative_groups.h:224
void sync() const
Synchronizes the threads in the group.
Definition amd_hip_cooperative_groups.h:370
T shfl_xor(T var, unsigned int laneMask) const
Definition amd_hip_cooperative_groups.h:878
static constexpr unsigned int numThreads
Definition amd_hip_cooperative_groups.h:826
friend multi_grid_group this_multi_grid()
User-exposed API interface to construct grid cooperative group type object - multi_grid_group.
Definition amd_hip_cooperative_groups.h:165
void sync() const
Synchronizes the threads in the group.
Definition amd_hip_cooperative_groups.h:470
static void sync()
Definition amd_hip_cooperative_groups.h:864
void barrier_wait(arrival_token &&t) const
Arrive at a barrier.
Definition amd_hip_cooperative_groups.h:209
unsigned int num_threads
Definition amd_hip_cooperative_groups.h:56
__hip_uint32_t thread_rank() const
Rank of the calling thread within [0, num_threads() ).
Definition amd_hip_cooperative_groups.h:191
arrival_token barrier_arrive() const
Arrive at a barrier.
Definition amd_hip_cooperative_groups.h:203
void sync() const
Synchronizes the threads in the group.
Definition amd_hip_cooperative_groups.h:197
__hip_uint32_t size() const
Total number of threads in the group (alias of num_threads())
Definition amd_hip_cooperative_groups.h:78
unsigned int size() const
Total number of threads in the group (alias of num_threads())
Definition amd_hip_cooperative_groups.h:462
__hip_uint32_t num_grids()
Definition amd_hip_cooperative_groups.h:137
unsigned long long match_all(T value, int &pred) const
Definition amd_hip_cooperative_groups.h:897
unsigned long long match_any(T value) const
Match any function on group level.
Definition amd_hip_cooperative_groups.h:622
friend thread_block this_thread_block()
User-exposed API interface to construct workgroup cooperative group type object - thread_block.
Definition amd_hip_cooperative_groups.h:315
static unsigned int meta_group_size()
Returns the number of groups created when the parent group was partitioned.
Definition amd_hip_cooperative_groups.h:915
unsigned int signal
Definition amd_hip_cooperative_groups.h:200
unsigned int meta_group_rank() const
Definition amd_hip_cooperative_groups.h:474
dim3 group_dim() const
Definition amd_hip_cooperative_groups.h:198
unsigned int meta_group_rank() const
Definition amd_hip_cooperative_groups.h:980
__hip_uint32_t _num_threads
Type of the thread_group.
Definition amd_hip_cooperative_groups.h:37
__hip_uint32_t block_rank() const
Rank of the block in calling thread within [0, num_threads() ).
static unsigned int num_threads()
Number of threads within this tile.
Definition amd_hip_cooperative_groups.h:835
thread_group(internal::group_type type, __hip_uint32_t num_threads=static_cast< __hip_uint64_t >(0), __hip_uint64_t mask=static_cast< __hip_uint64_t >(0))
Definition amd_hip_cooperative_groups.h:46
unsigned int meta_group_size() const
Returns the number of groups created when the parent group was partitioned.
Definition amd_hip_cooperative_groups.h:479
unsigned int thread_rank() const
Rank of the calling thread within [0, num_threads() ).
Definition amd_hip_cooperative_groups.h:366
unsigned int thread_rank() const
Rank of the calling thread within [0, num_threads() ).
Definition amd_hip_cooperative_groups.h:465
unsigned int meta_group_size() const
Returns the number of groups created when the parent group was partitioned.
Definition amd_hip_cooperative_groups.h:985
int all(int pred) const
All function on group level.
Definition amd_hip_cooperative_groups.h:610
unsigned int cg_type() const
Returns the type of the group.
Definition amd_hip_cooperative_groups.h:80
T shfl(T var, int srcRank) const
Definition amd_hip_cooperative_groups.h:866
thread_group new_tiled_group(unsigned int tile_size) const
Definition amd_hip_cooperative_groups.h:249
static __hip_uint32_t size()
Total number of threads in the group (alias of num_threads())
Definition amd_hip_cooperative_groups.h:288
unsigned int num_threads() const
Definition amd_hip_cooperative_groups.h:358
unsigned int num_threads() const
Definition amd_hip_cooperative_groups.h:459
void barrier_wait(arrival_token &&) const
Arrive at a barrier.
Definition amd_hip_cooperative_groups.h:302
tiled_group(unsigned int tileSize)
Definition amd_hip_cooperative_groups.h:350
unsigned int meta_group_rank
Definition amd_hip_cooperative_groups.h:57
unsigned long long match_all(T value, int &pred) const
Match all function on group level.
Definition amd_hip_cooperative_groups.h:639
thread_block_tile_type()
Definition amd_hip_cooperative_groups.h:934
__hip_uint32_t block_rank() const
Rank of the block in calling thread within [0, num_threads() ).
Definition amd_hip_cooperative_groups.h:193
grid_group(__hip_uint32_t size)
Construct grid thread group (through the API this_grid())
Definition amd_hip_cooperative_groups.h:186
__hip_uint32_t grid_rank()
Definition amd_hip_cooperative_groups.h:141
static constexpr unsigned int thread_rank()
Rank of the thread within this tile.
Definition amd_hip_cooperative_groups.h:830
bool is_tiled
Definition amd_hip_cooperative_groups.h:55
unsigned long long ballot(int pred) const
Ballot function on group level.
Definition amd_hip_cooperative_groups.h:588
void sync() const
Synchronizes the threads in the group.
Definition amd_hip_cooperative_groups.h:149
T shfl_up(T var, unsigned int lane_delta) const
Definition amd_hip_cooperative_groups.h:874
static __hip_uint32_t block_rank()
Rank of the block in calling thread within [0, num_threads() ).
Definition amd_hip_cooperative_groups.h:280
static dim3 group_index()
Returns 3-dimensional block index within the grid.
Definition amd_hip_cooperative_groups.h:272
unsigned int num_threads
Definition amd_hip_cooperative_groups.h:63
static void sync()
Synchronizes the threads in the group.
Definition amd_hip_cooperative_groups.h:292
friend coalesced_group binary_partition(const thread_block_tile< fsize, fparent > &tgrp, bool pred)
__hip_uint32_t thread_rank() const
Rank of the calling thread within [0, num_threads() ).
Definition amd_hip_cooperative_groups.h:143
bool is_valid() const
Returns true if the group has not violated any API constraints.
static unsigned int size()
Definition amd_hip_cooperative_groups.h:839
friend thread_group tiled_partition(const thread_group &parent, unsigned int tile_size)
User-exposed API to partition groups.
Definition amd_hip_cooperative_groups.h:1006
int any(int pred) const
Definition amd_hip_cooperative_groups.h:888
int all(int pred) const
Definition amd_hip_cooperative_groups.h:890
__hip_uint64_t _mask
Total number of threads in the thread_group.
Definition amd_hip_cooperative_groups.h:38
friend thread_group this_thread()
Definition amd_hip_cooperative_groups.h:994
bool is_valid() const
Returns true if the group has not violated any API constraints.
Definition amd_hip_cooperative_groups.h:195
__hip_uint32_t _type
Definition amd_hip_cooperative_groups.h:36
unsigned long long match_any(T value) const
Definition amd_hip_cooperative_groups.h:892
unsigned int size() const
Total number of threads in the group (alias of num_threads())
Definition amd_hip_cooperative_groups.h:363
struct cooperative_groups::thread_group::_coalesced_info coalesced_info
lane_mask member_mask
Definition amd_hip_cooperative_groups.h:62
thread_block(__hip_uint32_t size)
Definition amd_hip_cooperative_groups.h:246
unsigned int meta_group_size
Definition amd_hip_cooperative_groups.h:58
multi_grid_group(__hip_uint32_t size)
Construct multi-grid thread group (through the API this_multi_grid())
Definition amd_hip_cooperative_groups.h:131
int any(int pred) const
Any function on group level.
Definition amd_hip_cooperative_groups.h:600
thread_group this_thread()
Definition amd_hip_cooperative_groups.h:994
cluster_group this_cluster()
get cluster group
Definition amd_hip_cooperative_groups.h:1416
static unsigned int meta_group_rank()
Definition amd_hip_cooperative_groups.h:910
__hip_uint32_t num_threads() const
Definition amd_hip_cooperative_groups.h:76
struct _tiled_info tiled_info
Definition amd_hip_cooperative_groups.h:64
friend coalesced_group coalesced_threads()
User-exposed API to create coalesced groups.
Definition amd_hip_cooperative_groups.h:656
friend coalesced_group binary_partition(const coalesced_group &cgrp, bool pred)
Binary partition.
Definition amd_hip_cooperative_groups.h:1257
static dim3 thread_index()
Returns 3-dimensional thread index within the block.
Definition amd_hip_cooperative_groups.h:274
friend class thread_block
Definition amd_hip_cooperative_groups.h:70
coalesced_group(lane_mask member_mask)
Definition amd_hip_cooperative_groups.h:447
T shfl_down(T var, unsigned int lane_delta) const
Shuffle down operation on group level.
Definition amd_hip_cooperative_groups.h:520
static bool is_valid()
Returns true if the group has not violated any API constraints.
Definition amd_hip_cooperative_groups.h:290
dim3 group_dim()
Returns the group dimensions.
Definition amd_hip_cooperative_groups.h:294
thread_block_tile_type(unsigned int meta_group_rank, unsigned int meta_group_size)
Definition amd_hip_cooperative_groups.h:963
static __hip_uint32_t num_threads()
Definition amd_hip_cooperative_groups.h:284
unsigned long long ballot(int pred) const
Definition amd_hip_cooperative_groups.h:883
T shfl(T var, int srcRank) const
Shuffle operation on group level.
Definition amd_hip_cooperative_groups.h:495
arrival_token barrier_arrive() const
Arrive at a barrier.
Definition amd_hip_cooperative_groups.h:297
bool is_valid() const
Returns true if the group has not violated any API constraints.
Definition amd_hip_cooperative_groups.h:147
static __hip_uint32_t thread_rank()
Rank of the calling thread within [0, num_threads() ).
Definition amd_hip_cooperative_groups.h:276
friend grid_group this_grid()
User-exposed API interface to construct grid cooperative group type object - grid_group.
Definition amd_hip_cooperative_groups.h:224
T shfl_down(T var, unsigned int lane_delta) const
Definition amd_hip_cooperative_groups.h:870
T shfl_up(T var, unsigned int lane_delta) const
Shuffle up operation on group level.
Definition amd_hip_cooperative_groups.h:557
thread_block_tile_type(unsigned int meta_group_rank, unsigned int meta_group_size)
Definition amd_hip_cooperative_groups.h:939
__hip_uint32_t thread_rank() const
Rank of the calling thread within [0, num_threads() ).
Definition amd_hip_cooperative_groups.h:24
Definition amd_hip_cooperative_groups.h:1315
T operator()(T lhs, T rhs) const
Definition amd_hip_cooperative_groups.h:1316
Definition amd_hip_cooperative_groups.h:1331
T operator()(T lhs, T rhs) const
Definition amd_hip_cooperative_groups.h:1332
Definition amd_hip_cooperative_groups.h:1323
T operator()(T lhs, T rhs) const
Definition amd_hip_cooperative_groups.h:1324
Definition amd_hip_cooperative_groups.h:1307
T operator()(T lhs, T rhs) const
Definition amd_hip_cooperative_groups.h:1308
Definition amd_hip_cooperative_groups.h:199
tiled_partition_internal(const thread_block &g)
Definition amd_hip_cooperative_groups.h:1212
tiled_partition_internal(const thread_block_tile< ParentSize, GrandParentCGTy > &g)
Definition amd_hip_cooperative_groups.h:1222
Definition amd_hip_cooperative_groups.h:1208
Definition amd_hip_cooperative_groups.h:1299
T operator()(T lhs, T rhs) const
Definition amd_hip_cooperative_groups.h:1300
Definition amd_hip_cooperative_groups.h:1291
T operator()(T lhs, T rhs) const
Definition amd_hip_cooperative_groups.h:1292
Definition amd_hip_cooperative_groups.h:295
Definition amd_hip_cooperative_groups.h:61
Definition amd_hip_cooperative_groups.h:54
Definition hip_runtime_api.h:1298