Cooperative Groups

Cooperative Groups#

HIP Runtime API Reference: Cooperative Groups
Collaboration diagram for Cooperative Groups:

Namespaces

namespace  cooperative_groups::impl
 

Data Structures

struct  cooperative_groups::thread_group::_tiled_info
 
struct  cooperative_groups::thread_group::_coalesced_info
 
class  cooperative_groups::multi_grid_group
 The multi-grid cooperative group type. More...
 
class  cooperative_groups::grid_group
 The grid cooperative group type. More...
 
class  cooperative_groups::thread_block
 The workgroup (thread-block in CUDA terminology) cooperative group type. More...
 
class  cooperative_groups::tiled_group
 The tiled_group cooperative group type. More...
 
class  cooperative_groups::thread_block_tile< size, ParentCGTy >
 Group type - thread_block_tile. More...
 
class  cooperative_groups::coalesced_group
 The coalesced_group cooperative group type. More...
 
class  cooperative_groups::tile_base< tileSize >
 
class  cooperative_groups::thread_block_tile_base< size >
 
class  cooperative_groups::parent_group_info< tileSize, ParentCGTy >
 User exposed API that captures the state of the parent group pre-partition. More...
 
class  cooperative_groups::thread_block_tile_type< tileSize, ParentCGTy >
 Group type - thread_block_tile. More...
 
class  cooperative_groups::thread_block_tile_type< tileSize, void >
 
class  cooperative_groups::thread_block_tile< size, void >
 

Functions

 cooperative_groups::thread_group::thread_group (internal::group_type type, uint32_t size=static_cast< uint64_t >(0), uint64_t mask=static_cast< uint64_t >(0))
 
uint32_t cooperative_groups::thread_group::size () const
 
unsigned int cooperative_groups::thread_group::cg_type () const
 Returns the type of the group.
 
uint32_t cooperative_groups::thread_group::thread_rank () const
 Rank of the calling thread within [0, size() ).
 
bool cooperative_groups::thread_group::is_valid () const
 Returns true if the group has not violated any API constraints.
 
void cooperative_groups::thread_group::sync () const
 Synchronizes the threads in the group.
 
 cooperative_groups::multi_grid_group::multi_grid_group (uint32_t size)
 Construct mutli-grid thread group (through the API this_multi_grid())
 
uint32_t cooperative_groups::multi_grid_group::num_grids ()
 
uint32_t cooperative_groups::multi_grid_group::grid_rank ()
 
uint32_t cooperative_groups::multi_grid_group::thread_rank () const
 Rank of the calling thread within [0, size() ).
 
bool cooperative_groups::multi_grid_group::is_valid () const
 Returns true if the group has not violated any API constraints.
 
void cooperative_groups::multi_grid_group::sync () const
 Synchronizes the threads in the group.
 
multi_grid_group cooperative_groups::this_multi_grid ()
 User-exposed API interface to construct grid cooperative group type object - multi_grid_group.
 
 cooperative_groups::grid_group::grid_group (uint32_t size)
 Construct grid thread group (through the API this_grid())
 
uint32_t cooperative_groups::grid_group::thread_rank () const
 Rank of the calling thread within [0, size() ).
 
bool cooperative_groups::grid_group::is_valid () const
 Returns true if the group has not violated any API constraints.
 
void cooperative_groups::grid_group::sync () const
 Synchronizes the threads in the group.
 
dim3 cooperative_groups::grid_group::group_dim () const
 
grid_group cooperative_groups::this_grid ()
 User-exposed API interface to construct grid cooperative group type object - grid_group.
 
 cooperative_groups::thread_block::thread_block (uint32_t size)
 
thread_group cooperative_groups::thread_block::new_tiled_group (unsigned int tile_size) const
 
static dim3 cooperative_groups::thread_block::group_index ()
 Returns 3-dimensional block index within the grid.
 
static dim3 cooperative_groups::thread_block::thread_index ()
 Returns 3-dimensional thread index within the block.
 
static uint32_t cooperative_groups::thread_block::thread_rank ()
 Rank of the calling thread within [0, size() ).
 
static uint32_t cooperative_groups::thread_block::size ()
 
static bool cooperative_groups::thread_block::is_valid ()
 Returns true if the group has not violated any API constraints.
 
static void cooperative_groups::thread_block::sync ()
 Synchronizes the threads in the group.
 
dim3 cooperative_groups::thread_block::group_dim ()
 Returns the group dimensions.
 
thread_block cooperative_groups::this_thread_block ()
 User-exposed API interface to construct workgroup cooperative group type object - thread_block.
 
 cooperative_groups::tiled_group::tiled_group (unsigned int tileSize)
 
unsigned int cooperative_groups::tiled_group::size () const
 
unsigned int cooperative_groups::tiled_group::thread_rank () const
 Rank of the calling thread within [0, size() ).
 
void cooperative_groups::tiled_group::sync () const
 Synchronizes the threads in the group.
 
 cooperative_groups::coalesced_group::coalesced_group (lane_mask member_mask)
 
unsigned int cooperative_groups::coalesced_group::size () const
 
unsigned int cooperative_groups::coalesced_group::thread_rank () const
 Rank of the calling thread within [0, size() ).
 
void cooperative_groups::coalesced_group::sync () const
 Synchronizes the threads in the group.
 
unsigned int cooperative_groups::coalesced_group::meta_group_rank () const
 
unsigned int cooperative_groups::coalesced_group::meta_group_size () const
 Returns the number of groups created when the parent group was partitioned.
 
template<class T >
cooperative_groups::coalesced_group::shfl (T var, int srcRank) const
 Shuffle operation on group level.
 
template<class T >
cooperative_groups::coalesced_group::shfl_down (T var, unsigned int lane_delta) const
 Shuffle down operation on group level.
 
template<class T >
cooperative_groups::coalesced_group::shfl_up (T var, unsigned int lane_delta) const
 Shuffle up operation on group level.
 
unsigned long long cooperative_groups::coalesced_group::ballot (int pred) const
 Ballot function on group level.
 
int cooperative_groups::coalesced_group::any (int pred) const
 Any function on group level.
 
int cooperative_groups::coalesced_group::all (int pred) const
 All function on group level.
 
template<typename T >
unsigned long long cooperative_groups::coalesced_group::match_any (T value) const
 Match any function on group level.
 
template<typename T >
unsigned long long cooperative_groups::coalesced_group::match_all (T value, int &pred) const
 Match all function on group level.
 
coalesced_group cooperative_groups::coalesced_threads ()
 User-exposed API to create coalesced groups.
 
template<class CGTy >
uint32_t cooperative_groups::group_size (CGTy const &g)
 Returns the size of the group.
 
template<class CGTy >
uint32_t cooperative_groups::thread_rank (CGTy const &g)
 Returns the rank of thread of the group.
 
template<class CGTy >
bool cooperative_groups::is_valid (CGTy const &g)
 Returns true if the group has not violated any API constraints.
 
template<class CGTy >
void cooperative_groups::sync (CGTy const &g)
 Synchronizes the threads in the group.
 
static constexpr unsigned int cooperative_groups::tile_base< tileSize >::thread_rank ()
 Rank of the thread within this tile.
 
static unsigned int cooperative_groups::tile_base< tileSize >::size ()
 Number of threads within this tile.
 
static void cooperative_groups::thread_block_tile_base< size >::sync ()
 
template<class T >
cooperative_groups::thread_block_tile_base< size >::shfl (T var, int srcRank) const
 
template<class T >
cooperative_groups::thread_block_tile_base< size >::shfl_down (T var, unsigned int lane_delta) const
 
template<class T >
cooperative_groups::thread_block_tile_base< size >::shfl_up (T var, unsigned int lane_delta) const
 
template<class T >
cooperative_groups::thread_block_tile_base< size >::shfl_xor (T var, unsigned int laneMask) const
 
unsigned long long cooperative_groups::thread_block_tile_base< size >::ballot (int pred) const
 
int cooperative_groups::thread_block_tile_base< size >::any (int pred) const
 
int cooperative_groups::thread_block_tile_base< size >::all (int pred) const
 
template<typename T >
unsigned long long cooperative_groups::thread_block_tile_base< size >::match_any (T value) const
 
template<typename T >
unsigned long long cooperative_groups::thread_block_tile_base< size >::match_all (T value, int &pred) const
 
static unsigned int cooperative_groups::parent_group_info< tileSize, ParentCGTy >::meta_group_rank ()
 
static unsigned int cooperative_groups::parent_group_info< tileSize, ParentCGTy >::meta_group_size ()
 Returns the number of groups created when the parent group was partitioned.
 
 cooperative_groups::thread_block_tile_type< tileSize, ParentCGTy >::thread_block_tile_type ()
 
 cooperative_groups::thread_block_tile_type< tileSize, void >::thread_block_tile_type (unsigned int meta_group_rank, unsigned int meta_group_size)
 
unsigned int cooperative_groups::thread_block_tile_type< tileSize, void >::meta_group_rank () const
 
unsigned int cooperative_groups::thread_block_tile_type< tileSize, void >::meta_group_size () const
 Returns the number of groups created when the parent group was partitioned.
 
thread_group cooperative_groups::this_thread ()
 
thread_group cooperative_groups::tiled_partition (const thread_group &parent, unsigned int tile_size)
 User-exposed API to partition groups.
 
thread_group cooperative_groups::tiled_partition (const thread_block &parent, unsigned int tile_size)
 
tiled_group cooperative_groups::tiled_partition (const tiled_group &parent, unsigned int tile_size)
 
coalesced_group cooperative_groups::tiled_partition (const coalesced_group &parent, unsigned int tile_size)
 
template<unsigned int size, class ParentCGTy >
thread_block_tile< size, ParentCGTy > cooperative_groups::tiled_partition (const ParentCGTy &g)
 Create a partition.
 
coalesced_group cooperative_groups::binary_partition (const coalesced_group &cgrp, bool pred)
 Binary partition.
 
template<unsigned int size, class parent >
coalesced_group cooperative_groups::binary_partition (const thread_block_tile< size, parent > &tgrp, bool pred)
 Binary partition.
 

Variables

uint32_t cooperative_groups::thread_group::_type
 
uint32_t cooperative_groups::thread_group::_size
 Type of the thread_group.
 
uint64_t cooperative_groups::thread_group::_mask
 Total number of threads in the tread_group.
 
bool cooperative_groups::thread_group::_tiled_info::is_tiled
 
unsigned int cooperative_groups::thread_group::_tiled_info::size
 
unsigned int cooperative_groups::thread_group::_tiled_info::meta_group_rank
 
unsigned int cooperative_groups::thread_group::_tiled_info::meta_group_size
 
lane_mask cooperative_groups::thread_group::_coalesced_info::member_mask
 
unsigned int cooperative_groups::thread_group::_coalesced_info::size
 
struct _tiled_info cooperative_groups::thread_group::_coalesced_info::tiled_info
 
struct cooperative_groups::thread_group::_coalesced_info cooperative_groups::thread_group::coalesced_info
 
static constexpr unsigned int cooperative_groups::tile_base< tileSize >::numThreads = tileSize
 

Friends

class cooperative_groups::thread_group::thread_block
 
thread_group cooperative_groups::thread_group::this_thread ()
 
thread_group cooperative_groups::thread_group::tiled_partition (const thread_group &parent, unsigned int tile_size)
 User-exposed API to partition groups.
 
multi_grid_group cooperative_groups::multi_grid_group::this_multi_grid ()
 User-exposed API interface to construct grid cooperative group type object - multi_grid_group.
 
grid_group cooperative_groups::grid_group::this_grid ()
 User-exposed API interface to construct grid cooperative group type object - grid_group.
 
thread_block cooperative_groups::thread_block::this_thread_block ()
 User-exposed API interface to construct workgroup cooperative group type object - thread_block.
 
thread_group cooperative_groups::thread_block::tiled_partition (const thread_group &parent, unsigned int tile_size)
 User-exposed API to partition groups.
 
thread_group cooperative_groups::thread_block::tiled_partition (const thread_block &parent, unsigned int tile_size)
 
thread_group cooperative_groups::tiled_group::tiled_partition (const thread_group &parent, unsigned int tile_size)
 User-exposed API to partition groups.
 
tiled_group cooperative_groups::tiled_group::tiled_partition (const tiled_group &parent, unsigned int tile_size)
 
coalesced_group cooperative_groups::coalesced_group::coalesced_threads ()
 User-exposed API to create coalesced groups.
 
thread_group cooperative_groups::coalesced_group::tiled_partition (const thread_group &parent, unsigned int tile_size)
 User-exposed API to partition groups.
 
coalesced_group cooperative_groups::coalesced_group::tiled_partition (const coalesced_group &parent, unsigned int tile_size)
 
coalesced_group cooperative_groups::coalesced_group::binary_partition (const coalesced_group &cgrp, bool pred)
 Binary partition.
 
template<unsigned int fsize, class fparent >
coalesced_group cooperative_groups::coalesced_group::binary_partition (const thread_block_tile< fsize, fparent > &tgrp, bool pred)
 
template<unsigned int fsize, class fparent >
coalesced_group cooperative_groups::thread_block_tile_base< size >::binary_partition (const thread_block_tile< fsize, fparent > &tgrp, bool pred)
 

Detailed Description

This section describes the cooperative groups functions of HIP runtime API.

The cooperative groups provides flexible thread parallel programming algorithms, threads cooperate and share data to perform collective computations.

Note
Cooperative groups feature is implemented on Linux, under development on Microsoft Windows.

Function Documentation

◆ all() [1/2]

int cooperative_groups::coalesced_group::all ( int  pred) const
inline

All function on group level.

Returns non-zero if a predicate evaluates true for all threads.

Parameters
pred[in] The predicate to evaluate on group threads.

◆ all() [2/2]

template<unsigned int size>
int cooperative_groups::thread_block_tile_base< size >::all ( int  pred) const
inline

◆ any() [1/2]

int cooperative_groups::coalesced_group::any ( int  pred) const
inline

Any function on group level.

Returns non-zero if a predicate evaluates true for any threads.

Parameters
pred[in] The predicate to evaluate on group threads.

◆ any() [2/2]

template<unsigned int size>
int cooperative_groups::thread_block_tile_base< size >::any ( int  pred) const
inline

◆ ballot() [1/2]

unsigned long long cooperative_groups::coalesced_group::ballot ( int  pred) const
inline

Ballot function on group level.

Returns a bit mask with the Nth bit set to one if the specified predicate evaluates as true on the Nth thread.

Parameters
pred[in] The predicate to evaluate on group threads.

◆ ballot() [2/2]

template<unsigned int size>
unsigned long long cooperative_groups::thread_block_tile_base< size >::ballot ( int  pred) const
inline

◆ binary_partition() [1/2]

coalesced_group cooperative_groups::binary_partition ( const coalesced_group cgrp,
bool  pred 
)

Binary partition.

This splits the input thread group into two partitions determined by predicate.

Parameters
cgrp[in] The coalesced group for split.
pred[in] The predicate used during the group split up.

◆ binary_partition() [2/2]

template<unsigned int size, class parent >
coalesced_group cooperative_groups::binary_partition ( const thread_block_tile< size, parent > &  tgrp,
bool  pred 
)

Binary partition.

This splits the input thread group into two partitions determined by predicate.

Template Parameters
sizeThe size of the input thread block tile group.
parentThe cooperative group class template parameter of the input group.
Parameters
tgrp[in] The thread block tile group for split.
pred[in] The predicate used during the group split up.

◆ cg_type()

unsigned int cooperative_groups::thread_group::cg_type ( ) const
inline

Returns the type of the group.

◆ coalesced_group()

cooperative_groups::coalesced_group::coalesced_group ( lane_mask  member_mask)
inlineexplicitprotected

◆ coalesced_threads()

coalesced_group cooperative_groups::coalesced_threads ( )

User-exposed API to create coalesced groups.

A collective operation that groups all active lanes into a new thread group.

Note
This function is implemented on Linux and is under development on Microsoft Windows.

◆ grid_group()

cooperative_groups::grid_group::grid_group ( uint32_t  size)
inlineexplicitprotected

Construct grid thread group (through the API this_grid())

◆ grid_rank()

uint32_t cooperative_groups::multi_grid_group::grid_rank ( )
inline

Rank of this invocation. In other words, an ID number within the range [0, num_grids()) of the GPU that kernel is running on.

◆ group_dim() [1/2]

dim3 cooperative_groups::thread_block::group_dim ( )
inline

Returns the group dimensions.

◆ group_dim() [2/2]

dim3 cooperative_groups::grid_group::group_dim ( ) const
inline

◆ group_index()

static dim3 cooperative_groups::thread_block::group_index ( )
inlinestatic

Returns 3-dimensional block index within the grid.

◆ group_size()

template<class CGTy >
uint32_t cooperative_groups::group_size ( CGTy const &  g)

Returns the size of the group.

Total number of threads in the thread group, and this serves the purpose for all derived cooperative group types because their size is directly saved during the construction.

Template Parameters
CGTyThe cooperative group class template parameter.
Parameters
g[in] The cooperative group for size returns.
Note
Implementation of publicly exposed wrapper API on top of basic cooperative group type APIs. This function is implemented on Linux and is under development on Microsoft Windows.

◆ is_valid() [1/5]

static bool cooperative_groups::thread_block::is_valid ( )
inlinestatic

Returns true if the group has not violated any API constraints.

◆ is_valid() [2/5]

bool cooperative_groups::thread_group::is_valid ( ) const

Returns true if the group has not violated any API constraints.

◆ is_valid() [3/5]

bool cooperative_groups::multi_grid_group::is_valid ( ) const
inline

Returns true if the group has not violated any API constraints.

◆ is_valid() [4/5]

bool cooperative_groups::grid_group::is_valid ( ) const
inline

Returns true if the group has not violated any API constraints.

◆ is_valid() [5/5]

template<class CGTy >
bool cooperative_groups::is_valid ( CGTy const &  g)

Returns true if the group has not violated any API constraints.

Template Parameters
CGTyThe cooperative group class template parameter.
Parameters
g[in] The cooperative group for validity check.
Note
Implementation of publicly exposed wrapper API on top of basic cooperative group type APIs. This function is implemented on Linux and is under development on Microsoft Windows.

◆ match_all() [1/2]

template<typename T >
unsigned long long cooperative_groups::coalesced_group::match_all ( value,
int &  pred 
) const
inline

Match all function on group level.

Returns a bit mask containing a 1-bit for every participating thread if they all have the same value in value as the caller thread. The predicate pred is set to true if all participating threads have the same value in value.

Parameters
value[in] The value to examine on the current thread in group.
pred[out] The predicate is set to true if all participating threads in the thread group have the same value.

◆ match_all() [2/2]

template<unsigned int size>
template<typename T >
unsigned long long cooperative_groups::thread_block_tile_base< size >::match_all ( value,
int &  pred 
) const
inline

◆ match_any() [1/2]

template<typename T >
unsigned long long cooperative_groups::coalesced_group::match_any ( value) const
inline

Match any function on group level.

Returns a bit mask containing a 1-bit for every participating thread if that thread has the same value in value as the caller thread.

Parameters
value[in] The value to examine on the current thread in group.

◆ match_any() [2/2]

template<unsigned int size>
template<typename T >
unsigned long long cooperative_groups::thread_block_tile_base< size >::match_any ( value) const
inline

◆ meta_group_rank() [1/3]

template<unsigned int tileSize, typename ParentCGTy >
static unsigned int cooperative_groups::parent_group_info< tileSize, ParentCGTy >::meta_group_rank ( )
inlinestatic

Returns the linear rank of the group within the set of tiles partitioned from a parent group (bounded by meta_group_size)

◆ meta_group_rank() [2/3]

unsigned int cooperative_groups::coalesced_group::meta_group_rank ( ) const
inline

Returns the linear rank of the group within the set of tiles partitioned from a parent group (bounded by meta_group_size).

◆ meta_group_rank() [3/3]

template<unsigned int tileSize>
unsigned int cooperative_groups::thread_block_tile_type< tileSize, void >::meta_group_rank ( ) const
inline

Returns the linear rank of the group within the set of tiles partitioned from a parent group (bounded by meta_group_size)

◆ meta_group_size() [1/3]

template<unsigned int tileSize, typename ParentCGTy >
static unsigned int cooperative_groups::parent_group_info< tileSize, ParentCGTy >::meta_group_size ( )
inlinestatic

Returns the number of groups created when the parent group was partitioned.

◆ meta_group_size() [2/3]

unsigned int cooperative_groups::coalesced_group::meta_group_size ( ) const
inline

Returns the number of groups created when the parent group was partitioned.

◆ meta_group_size() [3/3]

template<unsigned int tileSize>
unsigned int cooperative_groups::thread_block_tile_type< tileSize, void >::meta_group_size ( ) const
inline

Returns the number of groups created when the parent group was partitioned.

◆ multi_grid_group()

cooperative_groups::multi_grid_group::multi_grid_group ( uint32_t  size)
inlineexplicitprotected

Construct mutli-grid thread group (through the API this_multi_grid())

◆ new_tiled_group()

thread_group cooperative_groups::thread_block::new_tiled_group ( unsigned int  tile_size) const
inlineprotected

◆ num_grids()

uint32_t cooperative_groups::multi_grid_group::num_grids ( )
inline

Number of invocations participating in this multi-grid group. In other words, the number of GPUs.

◆ shfl() [1/2]

template<class T >
T cooperative_groups::coalesced_group::shfl ( var,
int  srcRank 
) const
inline

Shuffle operation on group level.

Exchanging variables between threads without use of shared memory. Shuffle operation is a direct copy of var from srcRank thread ID of group.

Template Parameters
TThe type can be a 32-bit integer or single-precision floating point.
Parameters
var[in] The source variable to copy. Only the srcRank thread ID of group is copied to other threads.
srcRank[in] The source thread ID of the group for copy.

◆ shfl() [2/2]

template<unsigned int size>
template<class T >
T cooperative_groups::thread_block_tile_base< size >::shfl ( var,
int  srcRank 
) const
inline

◆ shfl_down() [1/2]

template<class T >
T cooperative_groups::coalesced_group::shfl_down ( var,
unsigned int  lane_delta 
) const
inline

Shuffle down operation on group level.

Exchanging variables between threads without use of shared memory. Shuffle down operation is copy of var from thread with thread ID of group relative higher with lane_delta to caller thread ID.

Template Parameters
TThe type can be a 32-bit integer or single-precision floating point.
Parameters
var[in] The source variable to copy.
lane_delta[in] The lane_delta is the relative thread ID difference between caller thread ID and source of copy thread ID. sourceID = (threadID + lane_delta) % size()

◆ shfl_down() [2/2]

template<unsigned int size>
template<class T >
T cooperative_groups::thread_block_tile_base< size >::shfl_down ( var,
unsigned int  lane_delta 
) const
inline

◆ shfl_up() [1/2]

template<class T >
T cooperative_groups::coalesced_group::shfl_up ( var,
unsigned int  lane_delta 
) const
inline

Shuffle up operation on group level.

Exchanging variables between threads without use of shared memory. Shuffle up operation is copy of var from thread with thread ID of group relative lower with lane_delta to caller thread ID.

Template Parameters
TThe type can be a 32-bit integer or single-precision floating point.
Parameters
var[in] The source variable to copy.
lane_delta[in] The lane_delta is the relative thread ID difference between caller thread ID and source of copy thread ID. sourceID = (threadID - lane_delta) % size()

◆ shfl_up() [2/2]

template<unsigned int size>
template<class T >
T cooperative_groups::thread_block_tile_base< size >::shfl_up ( var,
unsigned int  lane_delta 
) const
inline

◆ shfl_xor()

template<unsigned int size>
template<class T >
T cooperative_groups::thread_block_tile_base< size >::shfl_xor ( var,
unsigned int  laneMask 
) const
inline

◆ size() [1/5]

static uint32_t cooperative_groups::thread_block::size ( )
inlinestatic

Total number of threads in the thread_group, and this serves the purpose for all derived cooperative group types because their size is directly saved during the construction.

◆ size() [2/5]

template<unsigned int tileSize>
static unsigned int cooperative_groups::tile_base< tileSize >::size ( )
inlinestatic

Number of threads within this tile.

◆ size() [3/5]

uint32_t cooperative_groups::thread_group::size ( ) const
inline

Total number of threads in the thread_group, and this serves the purpose for all derived cooperative group types because their size is directly saved during the construction.

◆ size() [4/5]

unsigned int cooperative_groups::tiled_group::size ( ) const
inline

Total number of threads in the thread_group, and this serves the purpose for all derived cooperative group types because their size is directly saved during the construction.

◆ size() [5/5]

unsigned int cooperative_groups::coalesced_group::size ( ) const
inline

Total number of threads in the thread_group, and this serves the purpose for all derived cooperative group types because their size is directly saved during the construction.

◆ sync() [1/8]

static void cooperative_groups::thread_block::sync ( )
inlinestatic

Synchronizes the threads in the group.

Causes all threads in the group to wait at this synchronization point, and for all shared and global memory accesses by the threads to complete, before running synchronization. This guarantees the visibility of accessed data for all threads in the group.

Note
There are potential read-after-write (RAW), write-after-read (WAR), or write-after-write (WAW) hazards, when threads in the group access the same addresses in shared or global memory. The data hazards can be avoided with synchronization of the group.

◆ sync() [2/8]

template<unsigned int size>
static void cooperative_groups::thread_block_tile_base< size >::sync ( )
inlinestatic

◆ sync() [3/8]

void cooperative_groups::thread_group::sync ( ) const

Synchronizes the threads in the group.

Causes all threads in the group to wait at this synchronization point, and for all shared and global memory accesses by the threads to complete, before running synchronization. This guarantees the visibility of accessed data for all threads in the group.

Note
There are potential read-after-write (RAW), write-after-read (WAR), or write-after-write (WAW) hazards, when threads in the group access the same addresses in shared or global memory. The data hazards can be avoided with synchronization of the group.

◆ sync() [4/8]

void cooperative_groups::multi_grid_group::sync ( ) const
inline

Synchronizes the threads in the group.

Causes all threads in the group to wait at this synchronization point, and for all shared and global memory accesses by the threads to complete, before running synchronization. This guarantees the visibility of accessed data for all threads in the group.

Note
There are potential read-after-write (RAW), write-after-read (WAR), or write-after-write (WAW) hazards, when threads in the group access the same addresses in shared or global memory. The data hazards can be avoided with synchronization of the group.

◆ sync() [5/8]

void cooperative_groups::grid_group::sync ( ) const
inline

Synchronizes the threads in the group.

Causes all threads in the group to wait at this synchronization point, and for all shared and global memory accesses by the threads to complete, before running synchronization. This guarantees the visibility of accessed data for all threads in the group.

Note
There are potential read-after-write (RAW), write-after-read (WAR), or write-after-write (WAW) hazards, when threads in the group access the same addresses in shared or global memory. The data hazards can be avoided with synchronization of the group.

◆ sync() [6/8]

void cooperative_groups::tiled_group::sync ( ) const
inline

Synchronizes the threads in the group.

Causes all threads in the group to wait at this synchronization point, and for all shared and global memory accesses by the threads to complete, before running synchronization. This guarantees the visibility of accessed data for all threads in the group.

Note
There are potential read-after-write (RAW), write-after-read (WAR), or write-after-write (WAW) hazards, when threads in the group access the same addresses in shared or global memory. The data hazards can be avoided with synchronization of the group.

◆ sync() [7/8]

void cooperative_groups::coalesced_group::sync ( ) const
inline

Synchronizes the threads in the group.

Causes all threads in the group to wait at this synchronization point, and for all shared and global memory accesses by the threads to complete, before running synchronization. This guarantees the visibility of accessed data for all threads in the group.

Note
There are potential read-after-write (RAW), write-after-read (WAR), or write-after-write (WAW) hazards, when threads in the group access the same addresses in shared or global memory. The data hazards can be avoided with synchronization of the group.

◆ sync() [8/8]

template<class CGTy >
void cooperative_groups::sync ( CGTy const &  g)

Synchronizes the threads in the group.

Template Parameters
CGTyThe cooperative group class template parameter.
Parameters
g[in] The cooperative group for synchronization.
Note
Implementation of publicly exposed wrapper API on top of basic cooperative group type APIs. This function is implemented on Linux and is under development on Microsoft Windows.

◆ this_grid()

grid_group cooperative_groups::this_grid ( )

User-exposed API interface to construct grid cooperative group type object - grid_group.

User is not allowed to construct an object of type grid_group directly. Instead, they should construct it through this API function.

Note
This function is implemented on Linux and is under development on Microsoft Windows.

◆ this_multi_grid()

multi_grid_group cooperative_groups::this_multi_grid ( )

User-exposed API interface to construct grid cooperative group type object - multi_grid_group.

User is not allowed to construct an object of type multi_grid_group directly. Instead, they should construct it through this API function.

Note
This multi-grid cooperative API type is implemented on Linux, under development on Microsoft Windows.

◆ this_thread()

thread_group cooperative_groups::this_thread ( )

◆ this_thread_block()

thread_block cooperative_groups::this_thread_block ( )

User-exposed API interface to construct workgroup cooperative group type object - thread_block.

User is not allowed to construct an object of type thread_block directly. Instead, they should construct it through this API function.

Note
This function is implemented on Linux and is under development on Microsoft Windows.

◆ thread_block()

cooperative_groups::thread_block::thread_block ( uint32_t  size)
inlineexplicitprotected

◆ thread_block_tile_type() [1/2]

template<unsigned int tileSize, class ParentCGTy >
cooperative_groups::thread_block_tile_type< tileSize, ParentCGTy >::thread_block_tile_type ( )
inlineprotected

◆ thread_block_tile_type() [2/2]

template<unsigned int tileSize>
cooperative_groups::thread_block_tile_type< tileSize, void >::thread_block_tile_type ( unsigned int  meta_group_rank,
unsigned int  meta_group_size 
)
inlineprotected

◆ thread_group()

cooperative_groups::thread_group::thread_group ( internal::group_type  type,
uint32_t  size = static_cast<uint64_t>(0),
uint64_t  mask = static_cast<uint64_t>(0) 
)
inlineprotected

Lanemask for coalesced and tiled partitioned group types, LSB represents lane 0, and MSB represents lane 63 Construct a thread group, and set thread group type and other essential thread group properties. This generic thread group is directly constructed only when the group is supposed to contain only the calling the thread (through the API - this_thread()), and in all other cases, this thread group object is a sub-object of some other derived thread group object.

◆ thread_index()

static dim3 cooperative_groups::thread_block::thread_index ( )
inlinestatic

Returns 3-dimensional thread index within the block.

◆ thread_rank() [1/8]

static uint32_t cooperative_groups::thread_block::thread_rank ( )
inlinestatic

Rank of the calling thread within [0, size() ).

◆ thread_rank() [2/8]

template<unsigned int tileSize>
static constexpr unsigned int cooperative_groups::tile_base< tileSize >::thread_rank ( )
inlinestaticconstexpr

Rank of the thread within this tile.

◆ thread_rank() [3/8]

uint32_t cooperative_groups::thread_group::thread_rank ( ) const

Rank of the calling thread within [0, size() ).

◆ thread_rank() [4/8]

uint32_t cooperative_groups::multi_grid_group::thread_rank ( ) const
inline

Rank of the calling thread within [0, size() ).

◆ thread_rank() [5/8]

uint32_t cooperative_groups::grid_group::thread_rank ( ) const
inline

Rank of the calling thread within [0, size() ).

◆ thread_rank() [6/8]

unsigned int cooperative_groups::tiled_group::thread_rank ( ) const
inline

Rank of the calling thread within [0, size() ).

◆ thread_rank() [7/8]

unsigned int cooperative_groups::coalesced_group::thread_rank ( ) const
inline

Rank of the calling thread within [0, size() ).

◆ thread_rank() [8/8]

template<class CGTy >
uint32_t cooperative_groups::thread_rank ( CGTy const &  g)

Returns the rank of thread of the group.

Rank of the calling thread within [0, size() ).

Template Parameters
CGTyThe cooperative group class template parameter.
Parameters
g[in] The cooperative group for rank returns.
Note
Implementation of publicly exposed wrapper API on top of basic cooperative group type APIs. This function is implemented on Linux and is under development on Microsoft Windows.

◆ tiled_group()

cooperative_groups::tiled_group::tiled_group ( unsigned int  tileSize)
inlineexplicitprotected

◆ tiled_partition() [1/5]

coalesced_group cooperative_groups::tiled_partition ( const coalesced_group parent,
unsigned int  tile_size 
)

◆ tiled_partition() [2/5]

template<unsigned int size, class ParentCGTy >
thread_block_tile< size, ParentCGTy > cooperative_groups::tiled_partition ( const ParentCGTy &  g)

Create a partition.

This constructs a templated class derived from thread_group. The template defines the tile size of the new thread group at compile time.

Template Parameters
sizeThe new size of the partition.
ParentCGTyThe cooperative group class template parameter of the input group.
Parameters
g[in] The coalesced group for split.

◆ tiled_partition() [3/5]

thread_group cooperative_groups::tiled_partition ( const thread_block parent,
unsigned int  tile_size 
)

◆ tiled_partition() [4/5]

thread_group cooperative_groups::tiled_partition ( const thread_group parent,
unsigned int  tile_size 
)

User-exposed API to partition groups.

A collective operation that partitions the parent group into a one-dimensional, row-major, tiling of subgroups.

◆ tiled_partition() [5/5]

tiled_group cooperative_groups::tiled_partition ( const tiled_group parent,
unsigned int  tile_size 
)

Variable Documentation

◆ _mask

uint64_t cooperative_groups::thread_group::_mask
protected

Total number of threads in the tread_group.

◆ _size

uint32_t cooperative_groups::thread_group::_size
protected

Type of the thread_group.

◆ _type

uint32_t cooperative_groups::thread_group::_type
protected

◆ coalesced_info

struct cooperative_groups::thread_group::_coalesced_info cooperative_groups::thread_group::coalesced_info
protected

◆ is_tiled

bool cooperative_groups::thread_group::_tiled_info::is_tiled

◆ member_mask

lane_mask cooperative_groups::thread_group::_coalesced_info::member_mask

◆ meta_group_rank

unsigned int cooperative_groups::thread_group::_tiled_info::meta_group_rank

◆ meta_group_size

unsigned int cooperative_groups::thread_group::_tiled_info::meta_group_size

◆ numThreads

template<unsigned int tileSize>
constexpr unsigned int cooperative_groups::tile_base< tileSize >::numThreads = tileSize
staticconstexprprotected

◆ size [1/2]

unsigned int cooperative_groups::thread_group::_tiled_info::size

◆ size [2/2]

unsigned int cooperative_groups::thread_group::_coalesced_info::size

◆ tiled_info

struct _tiled_info cooperative_groups::thread_group::_coalesced_info::tiled_info

Friends

◆ binary_partition [1/3]

coalesced_group binary_partition ( const coalesced_group cgrp,
bool  pred 
)
friend

Binary partition.

This splits the input thread group into two partitions determined by predicate.

Parameters
cgrp[in] The coalesced group for split.
pred[in] The predicate used during the group split up.

◆ binary_partition [2/3]

template<unsigned int fsize, class fparent >
coalesced_group binary_partition ( const thread_block_tile< fsize, fparent > &  tgrp,
bool  pred 
)
friend

◆ binary_partition [3/3]

template<unsigned int size>
template<unsigned int fsize, class fparent >
coalesced_group binary_partition ( const thread_block_tile< fsize, fparent > &  tgrp,
bool  pred 
)
friend

◆ coalesced_threads

coalesced_group coalesced_threads ( )
friend

User-exposed API to create coalesced groups.

A collective operation that groups all active lanes into a new thread group.

Note
This function is implemented on Linux and is under development on Microsoft Windows.

◆ this_grid

grid_group this_grid ( )
friend

User-exposed API interface to construct grid cooperative group type object - grid_group.

Only these friend functions are allowed to construct an object of this class and access its resources.

User is not allowed to construct an object of type grid_group directly. Instead, they should construct it through this API function.

Note
This function is implemented on Linux and is under development on Microsoft Windows.

◆ this_multi_grid

multi_grid_group this_multi_grid ( )
friend

User-exposed API interface to construct grid cooperative group type object - multi_grid_group.

Only these friend functions are allowed to construct an object of this class and access its resources.

User is not allowed to construct an object of type multi_grid_group directly. Instead, they should construct it through this API function.

Note
This multi-grid cooperative API type is implemented on Linux, under development on Microsoft Windows.

◆ this_thread

thread_group this_thread ( )
friend

◆ this_thread_block

thread_block this_thread_block ( )
friend

User-exposed API interface to construct workgroup cooperative group type object - thread_block.

Only these friend functions are allowed to construct an object of thi class and access its resources

User is not allowed to construct an object of type thread_block directly. Instead, they should construct it through this API function.

Note
This function is implemented on Linux and is under development on Microsoft Windows.

◆ thread_block

friend class thread_block
friend

◆ tiled_partition [1/7]

coalesced_group tiled_partition ( const coalesced_group parent,
unsigned int  tile_size 
)
friend

◆ tiled_partition [2/7]

thread_group tiled_partition ( const thread_block parent,
unsigned int  tile_size 
)
friend

◆ tiled_partition [3/7]

thread_group tiled_partition ( const thread_group parent,
unsigned int  tile_size 
)
friend

User-exposed API to partition groups.

A collective operation that partitions the parent group into a one-dimensional, row-major, tiling of subgroups.

◆ tiled_partition [4/7]

thread_group tiled_partition ( const thread_group parent,
unsigned int  tile_size 
)
friend

User-exposed API to partition groups.

A collective operation that partitions the parent group into a one-dimensional, row-major, tiling of subgroups.

◆ tiled_partition [5/7]

thread_group tiled_partition ( const thread_group parent,
unsigned int  tile_size 
)
friend

User-exposed API to partition groups.

A collective operation that partitions the parent group into a one-dimensional, row-major, tiling of subgroups.

◆ tiled_partition [6/7]

thread_group tiled_partition ( const thread_group parent,
unsigned int  tile_size 
)
friend

User-exposed API to partition groups.

A collective operation that partitions the parent group into a one-dimensional, row-major, tiling of subgroups.

◆ tiled_partition [7/7]

tiled_group tiled_partition ( const tiled_group parent,
unsigned int  tile_size 
)
friend