Sparse generic functions#
This module contains all sparse generic routines.
The sparse generic routines describe operations that manipulate sparse matrices.
hipsparseAxpby()#
-
hipsparseStatus_t hipsparseAxpby(hipsparseHandle_t handle, const void *alpha, hipsparseConstSpVecDescr_t vecX, const void *beta, hipsparseDnVecDescr_t vecY)#
Scale a sparse vector and add it to a scaled dense vector.
hipsparseAxpby
multiplies the sparse vector \(x\) with scalar \(\alpha\) and adds the result to the dense vector \(y\) that is multiplied with scalar \(\beta\), such that\[ y := \alpha \cdot x + \beta \cdot y \]for(i = 0; i < size; ++i) { y[i] = beta * y[i] } for(i = 0; i < nnz; ++i) { y[xInd[i]] += alpha * xVal[i] }
hipsparseAxpby
supports the following precision data types for the sparse and dense vectors \(x\) and \(y\) and compute types for the scalars \(\alpha\) and \(\beta\).- Uniform Precisions:
X / Y / compute_type
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Mixed precisions:
X / Y
compute_type
HIP_R_16F
HIP_R_32F
HIP_R_16BF
HIP_R_32F
- Example
// Number of non-zeros of the sparse vector int nnz = 3; // Size of sparse and dense vector int size = 9; // Sparse index vector std::vector<int> hxInd = {0, 3, 5}; // Sparse value vector std::vector<float> hxVal = {1.0f, 2.0f, 3.0f}; // Dense vector std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; // Scalar alpha float alpha = 3.7f; // Scalar beta float beta = 1.2f; // Offload data to device int* dxInd; float* dxVal; float* dy; hipMalloc((void**)&dxInd, sizeof(int) * nnz); hipMalloc((void**)&dxVal, sizeof(float) * nnz); hipMalloc((void**)&dy, sizeof(float) * size); hipMemcpy(dxInd, hxInd.data(), sizeof(int) * nnz, hipMemcpyHostToDevice); hipMemcpy(dxVal, hxVal.data(), sizeof(float) * nnz, hipMemcpyHostToDevice); hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseCreate(&handle); // Create sparse vector X hipsparseSpVecDescr_t vecX; hipsparseCreateSpVec(&vecX, size, nnz, dxInd, dxVal, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); // Create dense vector Y hipsparseDnVecDescr_t vecY; hipsparseCreateDnVec(&vecY, size, dy, HIP_R_32F); // Call axpby to perform y = beta * y + alpha * x hipsparseAxpby(handle, &alpha, vecX, &beta, vecY); hipsparseDnVecGetValues(vecY, (void**)&dy); // Copy result back to host hipMemcpy(hy.data(), dy, sizeof(float) * size, hipMemcpyDeviceToHost); // Clear hipSPARSE hipsparseDestroySpVec(vecX); hipsparseDestroyDnVec(vecY); hipsparseDestroy(handle); // Clear device memory hipFree(dxInd); hipFree(dxVal); hipFree(dy);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
alpha – [in] scalar \(\alpha\).
vecX – [in] sparse matrix descriptor.
beta – [in] scalar \(\beta\).
vecY – [inout] dense matrix descriptor.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,vecX
,beta
orvecY
pointer is invalid.
hipsparseGather()#
-
hipsparseStatus_t hipsparseGather(hipsparseHandle_t handle, hipsparseConstDnVecDescr_t vecY, hipsparseSpVecDescr_t vecX)#
Gather elements from a dense vector and store them into a sparse vector.
hipsparseGather
gathers the elements from the dense vector \(y\) and stores them in the sparse vector \(x\).for(i = 0; i < nnz; ++i) { x_val[i] = y[x_ind[i]]; }
hipsparseGather
supports the following uniform precision data types for the sparse and dense vectors \(x\) and \(y\).- Uniform Precisions:
X / Y
HIP_R_8I
HIP_R_16F
HIP_R_16BF
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Example
// Number of non-zeros of the sparse vector int nnz = 3; // Size of sparse and dense vector int size = 9; // Sparse index vector std::vector<int> hxInd = {0, 3, 5}; // Dense vector std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; // Offload data to device int* dxInd; float* dxVal; float* dy; hipMalloc((void**)&dxInd, sizeof(int) * nnz); hipMalloc((void**)&dxVal, sizeof(float) * nnz); hipMalloc((void**)&dy, sizeof(float) * size); hipMemcpy(dxInd, hxInd.data(), sizeof(int) * nnz, hipMemcpyHostToDevice); hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseCreate(&handle); // Create sparse vector X hipsparseSpVecDescr_t vecX; hipsparseCreateSpVec(&vecX, size, nnz, dxInd, dxVal, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); // Create dense vector Y hipsparseDnVecDescr_t vecY; hipsparseCreateDnVec(&vecY, size, dy, HIP_R_32F); // Perform gather hipsparseGather(handle, vecY, vecX); hipsparseSpVecGetValues(vecX, (void**)&dxVal); // Copy result back to host std::vector<float> hxVal(nnz, 0.0f); hipMemcpy(hxVal.data(), dxVal, sizeof(float) * nnz, hipMemcpyDeviceToHost); // Clear hipSPARSE hipsparseDestroySpVec(vecX); hipsparseDestroyDnVec(vecY); hipsparseDestroy(handle); // Clear device memory hipFree(dxInd); hipFree(dxVal); hipFree(dy);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
vecY – [in] dense vector descriptor \(y\).
vecX – [out] sparse vector descriptor \(x\).
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,vecX
orvecY
pointer is invalid.
hipsparseScatter()#
-
hipsparseStatus_t hipsparseScatter(hipsparseHandle_t handle, hipsparseConstSpVecDescr_t vecX, hipsparseDnVecDescr_t vecY)#
Scatter elements from a sparse vector into a dense vector.
hipsparseScatter
scatters the elements from the sparse vector \(x\) in the dense vector \(y\).for(i = 0; i < nnz; ++i) { y[x_ind[i]] = x_val[i]; }
hipsparseScatter
supports the following uniform precision data types for the sparse and dense vectors \(x\) and \(y\).- Uniform Precisions:
X / Y
HIP_R_8I
HIP_R_16F
HIP_R_16BF
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Example
// Number of non-zeros of the sparse vector int nnz = 3; // Size of sparse and dense vector int size = 9; // Sparse index vector std::vector<int> hxInd = {0, 3, 5}; // Sparse value vector std::vector<float> hxVal = {1.0f, 2.0f, 3.0f}; // Dense vector std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; // Offload data to device int* dxInd; float* dxVal; float* dy; hipMalloc((void**)&dxInd, sizeof(int) * nnz); hipMalloc((void**)&dxVal, sizeof(float) * nnz); hipMalloc((void**)&dy, sizeof(float) * size); hipMemcpy(dxInd, hxInd.data(), sizeof(int) * nnz, hipMemcpyHostToDevice); hipMemcpy(dxVal, hxVal.data(), sizeof(float) * nnz, hipMemcpyHostToDevice); hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseCreate(&handle); // Create sparse vector X hipsparseSpVecDescr_t vecX; hipsparseCreateSpVec(&vecX, size, nnz, dxInd, dxVal, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); // Create dense vector Y hipsparseDnVecDescr_t vecY; hipsparseCreateDnVec(&vecY, size, dy, HIP_R_32F); // Perform scatter hipsparseScatter(handle, vecX, vecY); hipsparseDnVecGetValues(vecY, (void**)&dy); // Copy result back to host hipMemcpy(hy.data(), dy, sizeof(float) * size, hipMemcpyDeviceToHost); // Clear hipSPARSE hipsparseDestroySpVec(vecX); hipsparseDestroyDnVec(vecY); hipsparseDestroy(handle); // Clear device memory hipFree(dxInd); hipFree(dxVal); hipFree(dy);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
vecX – [in] sparse vector descriptor \(x\).
vecY – [out] dense vector descriptor \(y\).
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,vecX
orvecY
pointer is invalid.
hipsparseRot()#
-
hipsparseStatus_t hipsparseRot(hipsparseHandle_t handle, const void *c_coeff, const void *s_coeff, hipsparseSpVecDescr_t vecX, hipsparseDnVecDescr_t vecY)#
Apply Givens rotation to a dense and a sparse vector.
hipsparseRot
applies the Givens rotation matrix \(G\) to the sparse vector \(x\) and the dense vector \(y\), where\[\begin{split} G = \begin{pmatrix} c & s \\ -s & c \end{pmatrix} \end{split}\]for(i = 0; i < nnz; ++i) { x_tmp = x_val[i]; y_tmp = y[x_ind[i]]; x_val[i] = c * x_tmp + s * y_tmp; y[x_ind[i]] = c * y_tmp - s * x_tmp; }
hipsparseRot
supports the following uniform precision data types for the sparse and dense vectors \(x\) and \(y\) and compute types for the scalars \(c\) and \(s\).- Uniform Precisions:
X / Y / compute_type
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Example
// Number of non-zeros of the sparse vector int nnz = 3; // Size of sparse and dense vector int size = 9; // Sparse index vector std::vector<int> hxInd = {0, 3, 5}; // Sparse value vector std::vector<float> hxVal = {1.0f, 2.0f, 3.0f}; // Dense vector std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; // Scalar c float c = 3.7f; // Scalar s float s = 1.2f; // Offload data to device int* dxInd; float* dxVal; float* dy; hipMalloc((void**)&dxInd, sizeof(int) * nnz); hipMalloc((void**)&dxVal, sizeof(float) * nnz); hipMalloc((void**)&dy, sizeof(float) * size); hipMemcpy(dxInd, hxInd.data(), sizeof(int) * nnz, hipMemcpyHostToDevice); hipMemcpy(dxVal, hxVal.data(), sizeof(float) * nnz, hipMemcpyHostToDevice); hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseCreate(&handle); // Create sparse vector X hipsparseSpVecDescr_t vecX; hipsparseCreateSpVec(&vecX, size, nnz, dxInd, dxVal, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); // Create dense vector Y hipsparseDnVecDescr_t vecY; hipsparseCreateDnVec(&vecY, size, dy, HIP_R_32F); // Call rot hipsparseRot(handle, (void*)&c, (void*)&s, vecX, vecY); hipsparseSpVecGetValues(vecX, (void**)&dxVal); hipsparseDnVecGetValues(vecY, (void**)&dy); // Copy result back to host hipMemcpy(hxVal.data(), dxVal, sizeof(float) * nnz, hipMemcpyDeviceToHost); hipMemcpy(hy.data(), dy, sizeof(float) * size, hipMemcpyDeviceToHost); // Clear hipSPARSE hipsparseDestroySpVec(vecX); hipsparseDestroyDnVec(vecY); hipsparseDestroy(handle); // Clear device memory hipFree(dxInd); hipFree(dxVal); hipFree(dy);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
c_coeff – [in] pointer to the cosine element of \(G\), can be on host or device.
s_coeff – [in] pointer to the sine element of \(G\), can be on host or device.
vecX – [inout] sparse vector descriptor \(x\).
vecY – [inout] dense vector descriptor \(y\).
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,c_coeff
,s_coeff
,vecX
orvecY
pointer is invalid.
hipsparseSparseToDense_bufferSize()#
-
hipsparseStatus_t hipsparseSparseToDense_bufferSize(hipsparseHandle_t handle, hipsparseConstSpMatDescr_t matA, hipsparseDnMatDescr_t matB, hipsparseSparseToDenseAlg_t alg, size_t *pBufferSizeInBytes)#
hipsparseSparseToDense_bufferSize
computes the required user allocated buffer size needed when converting a sparse matrix to a dense matrix. This routine currently accepts the sparse matrix descriptormatA
in CSR, CSC, or COO format. This routine is used to determine the size of the buffer needed in hipsparseSparseToDense.hipsparseSparseToDense_bufferSize
supports different data types for the sparse and dense matrices. See hipsparseSparseToDense for a complete listing of all the data types available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
matA – [in] sparse matrix descriptor.
matB – [in] dense matrix descriptor.
alg – [in] algorithm for the sparse to dense computation.
pBufferSizeInBytes – [out] number of bytes of the temporary storage buffer.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,matA
,matB
, orpBufferSizeInBytes
pointer is invalid.
hipsparseSparseToDense()#
-
hipsparseStatus_t hipsparseSparseToDense(hipsparseHandle_t handle, hipsparseConstSpMatDescr_t matA, hipsparseDnMatDescr_t matB, hipsparseSparseToDenseAlg_t alg, void *externalBuffer)#
Sparse matrix to dense matrix conversion.
hipsparseSparseToDense
converts a sparse matrix to a dense matrix. This routine currently accepts the sparse matrix descriptormatA
in CSR, CSC, or COO format. This routine takes a user allocated buffer whose size must first be computed by calling hipsparseSparseToDense_bufferSizeThe conversion of a sparse matrix into a dense one involves two steps. First, the user creates the sparse and dense matrix descriptors and calls hipsparseSparseToDense_bufferSize to determine the size of the required temporary storage buffer. The user then allocates this buffer and passes it to hipsparseSparseToDense in order to complete the conversion. Once the conversion is complete, the user is free to deallocate the storage buffer. See full example below for details.
hipsparseSparseToDense
supports the following uniform precision data types for the sparse and dense matrices \(A\) and \(B\):- Uniform Precisions:
A / B
HIP_R_16F
HIP_R_16BF
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Example
// 1 0 0 0 // A = 4 2 0 4 // 0 3 7 0 // 9 0 0 1 int m = 4; int n = 4; int nnz = 8; std::vector<int> hcsrRowPtrA = {0, 1, 4, 6, 8}; std::vector<int> hcsrColIndA = {0, 0, 1, 3, 1, 2, 0, 3}; std::vector<float> hcsrValA = {1.0f, 4.0f, 2.0f, 4.0f, 3.0f, 7.0f, 9.0f, 1.0f}; int* dcsrRowPtrA; int* dcsrColIndA; float* dcsrValA; hipMalloc((void**)&dcsrRowPtrA, sizeof(int) * (m + 1)); hipMalloc((void**)&dcsrColIndA, sizeof(int) * nnz); hipMalloc((void**)&dcsrValA, sizeof(float) * nnz); hipMemcpy(dcsrRowPtrA, hcsrRowPtrA.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice); hipMemcpy(dcsrColIndA, hcsrColIndA.data(), sizeof(int) * nnz, hipMemcpyHostToDevice); hipMemcpy(dcsrValA, hcsrValA.data(), sizeof(float) * nnz, hipMemcpyHostToDevice); float* ddenseB; hipMalloc((void**)&ddenseB, sizeof(float) * m * n); hipsparseHandle_t handle; hipsparseSpMatDescr_t matA; hipsparseDnMatDescr_t matB; hipsparseCreate(&handle); hipsparseIndexType_t rowIdxTypeA = HIPSPARSE_INDEX_32I; hipsparseIndexType_t colIdxTypeA = HIPSPARSE_INDEX_32I; hipDataType dataTypeA = HIP_R_32F; hipsparseIndexBase_t idxBaseA = HIPSPARSE_INDEX_BASE_ZERO; // Create sparse matrix A hipsparseCreateCsr(&matA, m, n, nnz, dcsrRowPtrA, dcsrColIndA, dcsrValA, rowIdxTypeA, colIdxTypeA, idxBaseA, dataTypeA); // Create dense matrix B hipsparseCreateDnMat(&matB, m, n, m, ddenseB, HIP_R_32F, HIPSPARSE_ORDER_COL); hipsparseSparseToDenseAlg_t alg = HIPSPARSE_SPARSETODENSE_ALG_DEFAULT; size_t bufferSize; hipsparseSparseToDense_bufferSize(handle, matA, matB, alg, &bufferSize); void* tempBuffer; hipMalloc((void**)&tempBuffer, bufferSize); // Complete the conversion hipsparseSparseToDense(handle, matA, matB, alg, tempBuffer); // Copy result back to host std::vector<float> hdenseB(m * n); hipMemcpy(hdenseB.data(), ddenseB, sizeof(float) * m * n, hipMemcpyDeviceToHost); // Clear hipSPARSE hipsparseDestroyMatDescr(matA); hipsparseDestroyMatDescr(matB); hipsparseDestroy(handle); // Clear device memory hipFree(dcsrRowPtrA); hipFree(dcsrColIndA); hipFree(dcsrValA); hipFree(ddenseB); hipFree(tempBuffer);
Note
Currently only the sparse matrix formats CSR, CSC, and COO are supported when converting a sparse matrix to a dense matrix.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
matA – [in] sparse matrix descriptor.
matB – [in] dense matrix descriptor.
alg – [in] algorithm for the sparse to dense computation.
externalBuffer – [in] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,matA
,matB
, orexternalBuffer
pointer is invalid.
hipsparseDenseToSparse_bufferSize()#
-
hipsparseStatus_t hipsparseDenseToSparse_bufferSize(hipsparseHandle_t handle, hipsparseConstDnMatDescr_t matA, hipsparseSpMatDescr_t matB, hipsparseDenseToSparseAlg_t alg, size_t *pBufferSizeInBytes)#
hipsparseDenseToSparse_bufferSize
computes the required user allocated buffer size needed when converting a dense matrix to a sparse matrix. This routine currently accepts the sparse matrix descriptormatB
in CSR, CSC, or COO format. This routine is used to determine the size of the buffer needed in hipsparseDenseToSparse_analysis and hipsparseDenseToSparse_convert.hipsparseDenseToSparse_bufferSize
supports different data types for the dense and sparse matrices. See hipsparseDenseToSparse_convert for a complete listing of all the data types available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
matA – [in] dense matrix descriptor.
matB – [in] sparse matrix descriptor.
alg – [in] algorithm for the dense to sparse computation.
pBufferSizeInBytes – [out] number of bytes of the temporary storage buffer.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,matA
,matB
, orpBufferSizeInBytes
pointer is invalid.
hipsparseDenseToSparse_analysis()#
-
hipsparseStatus_t hipsparseDenseToSparse_analysis(hipsparseHandle_t handle, hipsparseConstDnMatDescr_t matA, hipsparseSpMatDescr_t matB, hipsparseDenseToSparseAlg_t alg, void *externalBuffer)#
hipsparseDenseToSparse_analysis
performs analysis that is later used in hipsparseDenseToSparse_convert when converting a dense matrix to sparse matrix. This routine currently accepts the sparse matrix descriptormatB
in CSR, CSC, or COO format. This routine takes a user allocated buffer whose size must first be computed using hipsparseDenseToSparse_bufferSize.hipsparseDenseToSparse_analysis
supports different data types for the dense and sparse matrices. See hipsparseDenseToSparse_convert for a complete listing of all the data types available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
matA – [in] dense matrix descriptor.
matB – [in] sparse matrix descriptor.
alg – [in] algorithm for the dense to sparse computation.
externalBuffer – [out] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,matA
,matB
, orexternalBuffer
pointer is invalid.
hipsparseDenseToSparse_convert()#
-
hipsparseStatus_t hipsparseDenseToSparse_convert(hipsparseHandle_t handle, hipsparseConstDnMatDescr_t matA, hipsparseSpMatDescr_t matB, hipsparseDenseToSparseAlg_t alg, void *externalBuffer)#
Dense matrix to sparse matrix conversion.
hipsparseDenseToSparse_convert
converts a dense matrix to a sparse matrix. This routine currently accepts the sparse matrix descriptormatB
in CSR, CSC, or COO format. This routine requires a user allocated buffer whose size must be determined by first calling hipsparseDenseToSparse_bufferSize.The conversion of a dense matrix into a sparse one involves three steps. First, the user creates the dense and sparse matrix descriptors. Because the number of non-zeros that will exist in the sparse matrix is not known apriori, when creating the sparse matrix descriptor, the user simply sets the arrays to
NULL
and the non-zero count to zero. For example, in the case of a CSR sparse matrix, this would look like:In the case of a COO sparse matrix, this would look like:hipsparseCreateCsr(&matB, m, n, 0, dcsrRowPtrB, // This array can be allocated as its size (i.e. m + 1) is known NULL, // Column indices array size is not yet known, pass NULL for now NULL, // Values array size is not yet known, pass NULL for now rowIdxTypeB, colIdxTypeB, idxBaseB, dataTypeB);
Once the descriptors have been created, the user calls hipsparseDenseToSparse_bufferSize. This routine will determine the size of the required temporary storage buffer. The user then allocates this buffer and passes it to hipsparseDenseToSparse_analysis which will perform analysis on the dense matrix in order to determine the number of non-zeros that will exist in the sparse matrix. Once this hipsparseDenseToSparse_analysis routine has been called, the non-zero count is stored in the sparse matrix descriptorhipsparseCreateCoo(&matB, m, n, 0, NULL, // Row indices array size is not yet known, pass NULL for now NULL, // Column indices array size is not yet known, pass NULL for now NULL, // Values array size is not yet known, pass NULL for now rowIdxTypeB, colIdxTypeB, idxBaseB, dataTypeB);
matB
. In order to allocate our remaining sparse matrix arrays, we query the sparse matrix descriptormatB
for this non-zero count:The remaining arrays are then allocated and set on the sparse matrix descriptor// Grab the non-zero count from the B matrix decriptor int64_t rows; int64_t cols; int64_t nnz; hipsparseSpMatGetSize(matB, &rows, &cols, &nnz);
matB
. Finally, we complete the conversion by calling hipsparseDenseToSparse_convert. Once the conversion is complete, the user is free to deallocate the storage buffer. See full example below for details.hipsparseDenseToSparse_convert
supports the following uniform precision data types for the dense and sparse matrices \(A\) and \(B\):- Uniform Precisions:
A / B
HIP_R_16F
HIP_R_16BF
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Example
// 1 0 0 0 // A = 4 2 0 4 // 0 3 7 0 // 9 0 0 1 int m = 4; int n = 4; std::vector<float> hdenseA = {1.0f, 4.0f, 0.0f, 9.0f, 0.0f, 2.0f, 3.0f, 0.0f, 0.0f, 0.0f, 7.0f, 0.0f, 0.0f, 4.0f, 0.0f, 1.0f}; float* ddenseA; hipMalloc((void**)&ddenseA, sizeof(float) * m * n); hipMemcpy(ddenseA, hdenseA.data(), sizeof(float) * m * n, hipMemcpyHostToDevice); int* dcsrRowPtrB; hipMalloc((void**)&dcsrRowPtrB, sizeof(int) * (m + 1)); hipsparseHandle_t handle; hipsparseDnMatDescr_t matA; hipsparseSpMatDescr_t matB; hipsparseCreate(&handle); // Create dense matrix A hipsparseCreateDnMat(&matA, m, n, m, ddenseA, HIP_R_32F, HIPSPARSE_ORDER_COL); hipsparseIndexType_t rowIdxTypeB = HIPSPARSE_INDEX_32I; hipsparseIndexType_t colIdxTypeB = HIPSPARSE_INDEX_32I; hipDataType dataTypeB = HIP_R_32F; hipsparseIndexBase_t idxBaseB = HIPSPARSE_INDEX_BASE_ZERO; // Create sparse matrix B hipsparseCreateCsr(&matB, m, n, 0, dcsrRowPtrB, NULL, NULL, rowIdxTypeB, colIdxTypeB, idxBaseB, dataTypeB); hipsparseDenseToSparseAlg_t alg = HIPSPARSE_DENSETOSPARSE_ALG_DEFAULT; size_t bufferSize; hipsparseDenseToSparse_bufferSize(handle, matA, matB, alg, &bufferSize); void* tempBuffer; hipMalloc((void**)&tempBuffer, bufferSize); // Perform analysis which will determine the number of non-zeros in the CSR matrix hipsparseDenseToSparse_analysis(handle, matA, matB, alg, tempBuffer); // Grab the non-zero count from the B matrix decriptor int64_t rows; int64_t cols; int64_t nnz; hipsparseSpMatGetSize(matB, &rows, &cols, &nnz); // Allocate the column indices and values arrays int* dcsrColIndB; float* dcsrValB; hipMalloc((void**)&dcsrColIndB, sizeof(int) * nnz); hipMalloc((void**)&dcsrValB, sizeof(float) * nnz); // Set the newly allocated arrays on the sparse matrix descriptor hipsparseCsrSetPointers(matB, dcsrRowPtrB, dcsrColIndB, dcsrValB); // Complete the conversion hipsparseDenseToSparse_convert(handle, matA, matB, alg, tempBuffer); // Copy result back to host std::vector<int> hcsrRowPtrB(m + 1); std::vector<int> hcsrColIndB(nnz); std::vector<float> hcsrValB(nnz); hipMemcpy(hcsrRowPtrB.data(), dcsrRowPtrB, sizeof(int) * (m + 1), hipMemcpyDeviceToHost); hipMemcpy(hcsrColIndB.data(), dcsrColIndB, sizeof(int) * nnz, hipMemcpyDeviceToHost); hipMemcpy(hcsrValB.data(), dcsrValB, sizeof(float) * nnz, hipMemcpyDeviceToHost); // Clear hipSPARSE hipsparseDestroyMatDescr(matA); hipsparseDestroyMatDescr(matB); hipsparseDestroy(handle); // Clear device memory hipFree(ddenseA); hipFree(dcsrRowPtrB); hipFree(dcsrColIndB); hipFree(dcsrValB); hipFree(tempBuffer);
Note
Currently only the sparse matrix formats CSR, CSC, and COO are supported when converting a dense matrix to a sparse matrix.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
matA – [in] dense matrix descriptor.
matB – [in] sparse matrix descriptor.
alg – [in] algorithm for the dense to sparse computation.
externalBuffer – [out] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,matA
,matB
, orexternalBuffer
pointer is invalid.
hipsparseSpVV_bufferSize()#
-
hipsparseStatus_t hipsparseSpVV_bufferSize(hipsparseHandle_t handle, hipsparseOperation_t opX, hipsparseConstSpVecDescr_t vecX, hipsparseConstDnVecDescr_t vecY, void *result, hipDataType computeType, size_t *pBufferSizeInBytes)#
hipsparseSpVV_bufferSize
computes the required user allocated buffer size needed when computing the inner dot product of a sparse vector with a dense vector:\[ \text{result} := op(x) \cdot y, \]hipsparseSpVV_bufferSize
supports multiple combinations of data types and compute types. See hipsparseSpVV for a complete listing of all the data type and compute type combinations available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
opX – [in] sparse vector operation type.
vecX – [in] sparse vector descriptor.
vecY – [in] dense vector descriptor.
result – [out] pointer to the result, can be host or device memory
computeType – [in] floating point precision for the SpVV computation.
pBufferSizeInBytes – [out] number of bytes of the temporary storage buffer.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,vecX
,vecY
,result
orpBufferSizeInBytes
pointer is invalid.HIPSPARSE_STATUS_NOT_SUPPORTED –
computeType
is currently not supported.
hipsparseSpVV()#
-
hipsparseStatus_t hipsparseSpVV(hipsparseHandle_t handle, hipsparseOperation_t opX, hipsparseConstSpVecDescr_t vecX, hipsparseConstDnVecDescr_t vecY, void *result, hipDataType computeType, void *externalBuffer)#
Compute the inner dot product of a sparse vector with a dense vector.
hipsparseSpVV
computes the inner dot product of the sparse vector \(x\) with the dense vector \(y\), such that\[ \text{result} := op(x) \cdot y, \]with\[\begin{split} op(x) = \left\{ \begin{array}{ll} x, & \text{if trans == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ \bar{x}, & \text{if trans == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \\ \end{array} \right. \end{split}\]result = 0; for(i = 0; i < nnz; ++i) { result += x_val[i] * y[x_ind[i]]; }
Performing the above operation involves two steps. First, the user calls
hipsparseSpVV_bufferSize
which will return the required temporary buffer size. The user then allocates this buffer. Finally, the user then completes the computation by callinghipsparseSpVV
with the newly allocated buffer. Once the computation is complete, the user is free to deallocate the buffer.hipsparseSpVV
supports the following uniform and mixed precision data types for the sparse and dense vectors \(x\) and \(y\) and compute types for the scalar \(result\).- Uniform Precisions:
X / Y / compute_type
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Mixed precisions:
X / Y
compute_type
HIP_R_8I
HIP_R_32I
HIP_R_8I
HIP_R_32F
HIP_R_16F
HIP_R_32F
HIP_R_16BF
HIP_R_32F
- Example
// Number of non-zeros of the sparse vector int nnz = 3; // Size of sparse and dense vector int size = 9; // Sparse index vector std::vector<int> hxInd = {0, 3, 5}; // Sparse value vector std::vector<float> hxVal = {1.0f, 2.0f, 3.0f}; // Dense vector std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; // Offload data to device int* dxInd; float* dxVal; float* dy; hipMalloc((void**)&dxInd, sizeof(int) * nnz); hipMalloc((void**)&dxVal, sizeof(float) * nnz); hipMalloc((void**)&dy, sizeof(float) * size); hipMemcpy(dxInd, hxInd.data(), sizeof(int) * nnz, hipMemcpyHostToDevice); hipMemcpy(dxVal, hxVal.data(), sizeof(float) * nnz, hipMemcpyHostToDevice); hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseCreate(&handle); // Create sparse vector X hipsparseSpVecDescr_t vecX; hipsparseCreateSpVec(&vecX, size, nnz, dxInd, dxVal, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); // Create dense vector Y hipsparseDnVecDescr_t vecY; hipsparseCreateDnVec(&vecY, size, dy, HIP_R_32F); // Obtain buffer size float hresult = 0.0f; size_t buffer_size; hipsparseSpVV_bufferSize(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, vecX, vecY, &hresult, HIP_R_32F, &buffer_size); void* temp_buffer; hipMalloc(&temp_buffer, buffer_size); // SpVV hipsparseSpVV(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, vecX, vecY, &hresult, HIP_R_32F, temp_buffer); hipDeviceSynchronize(); std::cout << "hresult: " << hresult << std::endl; // Clear hipSPARSE hipsparseDestroySpVec(vecX); hipsparseDestroyDnVec(vecY); hipsparseDestroy(handle); // Clear device memory hipFree(dxInd); hipFree(dxVal); hipFree(dy); hipFree(temp_buffer);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opX – [in] sparse vector operation type.
vecX – [in] sparse vector descriptor.
vecY – [in] dense vector descriptor.
result – [out] pointer to the result, can be host or device memory
computeType – [in] floating point precision for the SpVV computation.
externalBuffer – [out] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,vecX
,vecY
,result
orexternalBuffer
pointer is invalid.HIPSPARSE_STATUS_NOT_SUPPORTED –
computeType
is currently not supported.
hipsparseSpMV_bufferSize()#
-
hipsparseStatus_t hipsparseSpMV_bufferSize(hipsparseHandle_t handle, hipsparseOperation_t opA, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnVecDescr_t vecX, const void *beta, const hipsparseDnVecDescr_t vecY, hipDataType computeType, hipsparseSpMVAlg_t alg, size_t *pBufferSizeInBytes)#
hipsparseSpMV_bufferSize
computes the required user allocated buffer size needed when computing the sparse matrix multiplication with a dense vector:\[ y := \alpha \cdot op(A) \cdot x + \beta \cdot y, \]where \(op(A)\) is a sparse \(m \times n\) matrix in CSR, CSC, COO, or COO (AoS) format, \(x\) is a dense vector of length \(n\) and \(y\) is a dense vector of length \(m\).hipsparseSpMV_bufferSize
supports multiple combinations of data types and compute types. See hipsparseSpMV for a complete listing of all the data type and compute type combinations available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] matrix operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] matrix descriptor.
vecX – [in] vector descriptor.
beta – [in] scalar \(\beta\).
vecY – [inout] vector descriptor.
computeType – [in] floating point precision for the SpMV computation.
alg – [in] SpMV algorithm for the SpMV computation.
pBufferSizeInBytes – [out] number of bytes of the temporary storage buffer.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,matA
,x
,beta
,y
orpBufferSizeInBytes
pointer is invalid or ifopA
,computeType
,alg
is incorrect.HIPSPARSE_STATUS_NOT_SUPPORTED –
computeType
oralg
is currently not supported.
hipsparseSpMV_preprocess()#
-
hipsparseStatus_t hipsparseSpMV_preprocess(hipsparseHandle_t handle, hipsparseOperation_t opA, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnVecDescr_t vecX, const void *beta, const hipsparseDnVecDescr_t vecY, hipDataType computeType, hipsparseSpMVAlg_t alg, void *externalBuffer)#
hipsparseSpMV_preprocess
performs analysis on the sparse matrix \(op(A)\) when computing the sparse matrix multiplication with a dense vector:\[ y := \alpha \cdot op(A) \cdot x + \beta \cdot y, \]where \(op(A)\) is a sparse \(m \times n\) matrix in CSR, CSC, COO, or COO (AoS) format, \(x\) is a dense vector of length \(n\) and \(y\) is a dense vector of length \(m\). This step is optional but if used may results in better performance.hipsparseSpMV_preprocess
supports multiple combinations of data types and compute types. See hipsparseSpMV for a complete listing of all the data type and compute type combinations available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] matrix operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] matrix descriptor.
vecX – [in] vector descriptor.
beta – [in] scalar \(\beta\).
vecY – [inout] vector descriptor.
computeType – [in] floating point precision for the SpMV computation.
alg – [in] SpMV algorithm for the SpMV computation.
externalBuffer – [out] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,matA
,x
,beta
,y
orexternalBuffer
pointer is invalid or ifopA
,computeType
,alg
is incorrect.HIPSPARSE_STATUS_NOT_SUPPORTED –
computeType
oralg
is currently not supported.
hipsparseSpMV()#
-
hipsparseStatus_t hipsparseSpMV(hipsparseHandle_t handle, hipsparseOperation_t opA, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnVecDescr_t vecX, const void *beta, const hipsparseDnVecDescr_t vecY, hipDataType computeType, hipsparseSpMVAlg_t alg, void *externalBuffer)#
Compute the sparse matrix multiplication with a dense vector.
hipsparseSpMV
multiplies the scalar \(\alpha\) with a sparse \(m \times n\) matrix \(op(A)\), defined in CSR, CSC, COO, or COO (AoS) format, with the dense vector \(x\) and adds the result to the dense vector \(y\) that is multiplied by the scalar \(\beta\), such that\[ y := \alpha \cdot op(A) \cdot x + \beta \cdot y, \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if trans == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if trans == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if trans == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]Performing the above operation involves multiple steps. First the user calls hipsparseSpMV_bufferSize to determine the size of the required temporary storage buffer. The user then allocates this buffer and calls hipsparseSpMV_preprocess. Depending on the algorithm and sparse matrix format, this will perform analysis on the sparsity pattern of \(op(A)\). Finally the user completes the operation by calling
hipsparseSpMV
. The buffer size and preprecess routines only need to be called once for a given sparse matrix \(op(A)\) while the computation can be repeatedly used with different \(x\) and \(y\) vectors. Once all calls tohipsparseSpMV
are complete, the temporary buffer can be deallocated.hipsparseSpMV
supports multiple different algorithms. These algorithms have different trade offs depending on the sparsity pattern of the matrix, whether or not the results need to be deterministic, and how many times the sparse-vector product will be performed.CSR Algorithms
HIPSPARSE_SPMV_CSR_ALG1
HIPSPARSE_SPMV_CSR_ALG2
COO Algorithms
HIPSPARSE_SPMV_COO_ALG1
HIPSPARSE_SPMV_COO_ALG2
hipsparseSpMV
supports multiple combinations of data types and compute types. The tables below indicate the currently supported data types that can be used for the sparse matrix \(op(A)\) and the dense vectors \(x\) and \(y\) and the compute type for \(\alpha\) and \(\beta\). The advantage of using different data types is to save on memory bandwidth and storage when a user application allows while performing the actual computation in a higher precision.hipsparseSpMV
supports HIPSPARSE_INDEX_32I and HIPSPARSE_INDEX_64I index precisions for storing the row pointer and row/column indices arrays of the sparse matrices.- Uniform Precisions:
A / X / Y / compute_type
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Mixed precisions:
A / X
Y
compute_type
HIP_R_8I
HIP_R_32I
HIP_R_32I
HIP_R_8I
HIP_R_32F
HIP_R_32F
HIP_R_16F
HIP_R_32F
HIP_R_32F
HIP_R_16BF
HIP_R_32F
HIP_R_32F
- Mixed-regular real precisions
A
X / Y / compute_type
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Mixed-regular Complex precisions
A
X / Y / compute_type
HIP_R_32F
HIP_C_32F
HIP_R_64F
HIP_C_64F
- Example
// A, x, and y are m×k, k×1, and m×1 int m = 3, k = 4; int nnz_A = 8; hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE; // alpha and beta float alpha = 0.5f; float beta = 0.25f; std::vector<int> hcsrRowPtr = {0, 3, 5, 8}; std::vector<int> hcsrColInd = {0, 1, 3, 1, 2, 0, 2, 3}; std::vector<float> hcsrVal = {1, 2, 3, 4, 5, 6, 7, 8}; std::vector<float> hx(k, 1.0f); std::vector<float> hy(m, 1.0f); int *dcsrRowPtr; int *dcsrColInd; float *dcsrVal; hipMalloc((void**)&dcsrRowPtr, sizeof(int) * (m + 1)); hipMalloc((void**)&dcsrColInd, sizeof(int) * nnz_A); hipMalloc((void**)&dcsrVal, sizeof(float) * nnz_A); hipMemcpy(dcsrRowPtr, hcsrRowPtr.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice); hipMemcpy(dcsrColInd, hcsrColInd.data(), sizeof(int) * nnz_A, hipMemcpyHostToDevice); hipMemcpy(dcsrVal, hcsrVal.data(), sizeof(float) * nnz_A, hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseCreate(&handle); hipsparseSpMatDescr_t matA; hipsparseCreateCsr(&matA, m, k, nnz_A, dcsrRowPtr, dcsrColInd, dcsrVal, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); // Allocate memory for the vector x float* dx; hipMalloc((void**)&dx, sizeof(float) * k); hipMemcpy(dx, hx.data(), sizeof(float) * k, hipMemcpyHostToDevice); hipsparseDnVecDescr_t vecX; hipsparseCreateDnVec(&vecX, k, dx, HIP_R_32F); // Allocate memory for the resulting vector y float* dy; hipMalloc((void**)&dy, sizeof(float) * m); hipMemcpy(dy, hy.data(), sizeof(float) * m, hipMemcpyHostToDevice); hipsparseDnMatDescr_t vecY; hipsparseCreateDnVec(&vecY, m, dy, HIP_R_32F); // Compute buffersize size_t bufferSize; hipsparseSpMV_bufferSize(handle, transA, &alpha, matA, vecX, &beta, vecY, HIP_R_32F, HIPSPARSE_MV_ALG_DEFAULT, &bufferSize); void* buffer; hipMalloc(&buffer, bufferSize); // Preprocess operation (Optional) hipsparseSpMV_preprocess(handle, transA, &alpha, matA, vecX, &beta, vecY, HIP_R_32F, HIPSPARSE_MV_ALG_DEFAULT, buffer); // Perform operation hipsparseSpMV(handle, transA, &alpha, matA, vecX, &beta, vecY, HIP_R_32F, HIPSPARSE_MV_ALG_DEFAULT, buffer); // Copy device to host hipMemcpy(hy.data(), dy, sizeof(float) * m, hipMemcpyDeviceToHost); // Destroy matrix descriptors and handles hipsparseDestroySpMat(matA); hipsparseDestroyDnVec(vecX); hipsparseDestroyDnVec(vecY); hipsparseDestroy(handle); hipFree(buffer); hipFree(dcsrRowPtr); hipFree(dcsrColInd); hipFree(dcsrVal); hipFree(dx); hipFree(dy);
Note
None of the algorithms above are deterministic when \(A\) is transposed.
Note
The sparse matrix formats currently supported are: HIPSPARSE_FORMAT_COO, HIPSPARSE_FORMAT_COO_AOS, HIPSPARSE_FORMAT_CSR, and HIPSPARSE_FORMAT_CSC.
Note
Only the hipsparseSpMV_bufferSize and hipsparseSpMV routines are non blocking and executed asynchronously with respect to the host. They may return before the actual computation has finished. The hipsparseSpMV_preprocess routine is blocking with respect to the host.
Note
Only the hipsparseSpMV_bufferSize and the hipsparseSpMV routines support execution in a hipGraph context. The hipsparseSpMV_preprocess stage does not support hipGraph.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] matrix operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] matrix descriptor.
vecX – [in] vector descriptor.
beta – [in] scalar \(\beta\).
vecY – [inout] vector descriptor.
computeType – [in] floating point precision for the SpMV computation.
alg – [in] SpMV algorithm for the SpMV computation.
externalBuffer – [out] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,matA
,x
,beta
,y
orexternalBuffer
pointer is invalid or ifopA
,computeType
,alg
is incorrect.HIPSPARSE_STATUS_NOT_SUPPORTED –
computeType
oralg
is currently not supported.
hipsparseSpMM_bufferSize()#
-
hipsparseStatus_t hipsparseSpMM_bufferSize(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnMatDescr_t matB, const void *beta, const hipsparseDnMatDescr_t matC, hipDataType computeType, hipsparseSpMMAlg_t alg, size_t *pBufferSizeInBytes)#
hipsparseSpMM_bufferSize
computes the required user allocated buffer size needed when computing the sparse matrix multiplication with a dense matrix:\[ C := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]where \(op(A)\) is a sparse \(m \times k\) matrix in CSR, COO, BSR or Blocked ELL storage format, \(B\) is a dense matrix of size \(k \times n\) and \(C\) is a dense matrix of size \(m \times n\).hipsparseSpMM_bufferSize
supports multiple combinations of data types and compute types. See hipsparseSpMM for a complete listing of all the data type and compute type combinations available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] matrix operation type.
opB – [in] matrix operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] matrix descriptor.
matB – [in] matrix descriptor.
beta – [in] scalar \(\beta\).
matC – [in] matrix descriptor.
computeType – [in] floating point precision for the SpMM computation.
alg – [in] SpMM algorithm for the SpMM computation.
pBufferSizeInBytes – [out] number of bytes of the temporary storage buffer.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,matA
,matB
,matC
,beta
, orpBufferSizeInBytes
pointer is invalid.HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
,opB
,computeType
oralg
is currently not supported.
hipsparseSpMM_preprocess()#
-
hipsparseStatus_t hipsparseSpMM_preprocess(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnMatDescr_t matB, const void *beta, const hipsparseDnMatDescr_t matC, hipDataType computeType, hipsparseSpMMAlg_t alg, void *externalBuffer)#
hipsparseSpMM_preprocess
performs the required preprocessing used when computing the sparse matrix multiplication with a dense matrix:\[ C := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]where \(op(A)\) is a sparse \(m \times k\) matrix in CSR, COO, BSR or Blocked ELL storage format, \(B\) is a dense matrix of size \(k \times n\) and \(C\) is a dense matrix of size \(m \times n\).hipsparseSpMM_preprocess
supports multiple combinations of data types and compute types. See hipsparseSpMM for a complete listing of all the data type and compute type combinations available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] matrix operation type.
opB – [in] matrix operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] matrix descriptor.
matB – [in] matrix descriptor.
beta – [in] scalar \(\beta\).
matC – [in] matrix descriptor.
computeType – [in] floating point precision for the SpMM computation.
alg – [in] SpMM algorithm for the SpMM computation.
externalBuffer – [out] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,matA
,matB
,matC
,beta
, orexternalBuffer
pointer is invalid.HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
,opB
,computeType
oralg
is currently not supported.
hipsparseSpMM()#
-
hipsparseStatus_t hipsparseSpMM(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnMatDescr_t matB, const void *beta, const hipsparseDnMatDescr_t matC, hipDataType computeType, hipsparseSpMMAlg_t alg, void *externalBuffer)#
Compute the sparse matrix multiplication with a dense matrix.
hipsparseSpMM
multiplies the scalar \(\alpha\) with a sparse \(m \times k\) matrix \(op(A)\), defined in CSR, COO, BSR or Blocked ELL storage format, and the dense \(k \times n\) matrix \(op(B)\) and adds the result to the dense \(m \times n\) matrix \(C\) that is multiplied by the scalar \(\beta\), such that\[ C := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if trans_A == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if trans_A == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if trans_A == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]and\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if trans_B == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if trans_B == HIPSPARSE_OPERATION_TRANSPOSE} \\ B^H, & \text{if trans_B == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]Both \(B\) and \(C\) can be in row or column order.hipsparseSpMM
requires three stages to complete. First, the user calls hipsparseSpMM_bufferSize to determine the size of the required temporary storage buffer. Next, the user allocates this buffer and calls hipsparseSpMM_preprocess which will perform analysis on the sparse matrix \(op(A)\). Finally, the user callshipsparseSpMM
to perform the actual computation. The buffer size and preprecess routines only need to be called once for a given sparse matrix \(op(A)\) while the computation routine can be repeatedly used with different \(B\) and \(C\) matrices. Once all calls tohipsparseSpMM
are complete, the temporary buffer can be deallocated.As noted above, both \(B\) and \(C\) can be in row or column order (this includes mixing the order so that \(B\) is row order and \(C\) is column order and vice versa). For best performance, use row order for both \(B\) and \(C\) as this provides the best memory access.
hipsparseSpMM
supports multiple different algorithms. These algorithms have different trade offs depending on the sparsity pattern of the matrix, whether or not the results need to be deterministic, and how many times the sparse-matrix product will be performed.CSR Algorithms
HIPSPARSE_SPMM_CSR_ALG1
HIPSPARSE_SPMM_CSR_ALG2
HIPSPARSE_SPMM_CSR_ALG3
COO Algorithms
HIPSPARSE_SPMM_COO_ALG1
HIPSPARSE_SPMM_COO_ALG2
HIPSPARSE_SPMM_COO_ALG3
HIPSPARSE_SPMM_COO_ALG4
ELL Algorithms
HIPSPARSE_SPMM_BLOCKED_ELL_ALG1
BSR Algorithms
CUSPARSE_SPMM_BSR_ALG1
One can also pass HIPSPARSE_SPMM_ALG_DEFAULT which will automatically select from the algorithms listed above based on the sparse matrix format.
When A is transposed,
hipsparseSpMM
will revert to using HIPSPARSE_SPMM_CSR_ALG2 for CSR format and HIPSPARSE_SPMM_COO_ALG1 for COO format regardless of algorithm selected.hipsparseSpMM
supports multiple combinations of data types and compute types. The tables below indicate the currently supported different data types that can be used for for the sparse matrix \(op(A)\) and the dense matrices \(op(B)\) and \(C\) and the compute type for \(\alpha\) and \(\beta\). The advantage of using different data types is to save on memory bandwidth and storage when a user application allows while performing the actual computation in a higher precision.hipsparseSpMM
supports HIPSPARSE_INDEX_32I and HIPSPARSE_INDEX_64I index precisions for storing the row pointer and column indices arrays of the sparse matrices.- Uniform Precisions:
A / B / C / compute_type
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Mixed precisions:
A / B
C
compute_type
HIP_R_8I
HIP_R_32I
HIP_R_32I
HIP_R_8I
HIP_R_32F
HIP_R_32F
HIP_R_16F
HIP_R_32F
HIP_R_32F
HIP_R_16BF
HIP_R_32F
HIP_R_32F
hipsparseSpMM
also supports batched computation for CSR and COO matrices. There are three supported batch modes:\[\begin{split} C_i = A \times B_i \\ C_i = A_i \times B \\ C_i = A_i \times B_i \end{split}\]The batch mode is determined by the batch count and stride passed for each matrix. For example to use the first batch mode ( \(C_i = A \times B_i\)) with 100 batches for non-transposed \(A\), \(B\), and \(C\), one passes:
\[\begin{split} batchCountA=1 \\ batchCountB=100 \\ batchCountC=100 \\ offsetsBatchStrideA=0 \\ columnsValuesBatchStrideA=0 \\ batchStrideB=k*n \\ batchStrideC=m*n \end{split}\]To use the second batch mode ( \(C_i = A_i \times B\)) one could use:\[\begin{split} batchCountA=100 \\ batchCountB=1 \\ batchCountC=100 \\ offsetsBatchStrideA=m+1 \\ columnsValuesBatchStrideA=nnz \\ batchStrideB=0 \\ batchStrideC=m*n \end{split}\]And to use the third batch mode ( \(C_i = A_i \times B_i\)) one could use:\[\begin{split} batchCountA=100 \\ batchCountB=100 \\ batchCountC=100 \\ offsetsBatchStrideA=m+1 \\ columnsValuesBatchStrideA=nnz \\ batchStrideB=k*n \\ batchStrideC=m*n \end{split}\]See examples below.- Example
// A, B, and C are m×k, k×n, and m×n int m = 3, n = 5, k = 4; int ldb = n, ldc = n; int nnz_A = 8, nnz_B = 20, nnz_C = 15; hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE; hipsparseOperation_t transB = HIPSPARSE_OPERATION_NON_TRANSPOSE; hipsparseOperation_t transC = HIPSPARSE_OPERATION_NON_TRANSPOSE; hipsparseOrder_t order = HIPSPARSE_ORDER_ROW; // alpha and beta float alpha = 0.5f; float beta = 0.25f; std::vector<int> hcsrRowPtr = {0, 3, 5, 8}; std::vector<int> hcsrColInd = {0, 1, 3, 1, 2, 0, 2, 3}; std::vector<float> hcsrVal = {1, 2, 3, 4, 5, 6, 7, 8}; std::vector<float> hB(nnz_B, 1.0f); std::vector<float> hC(nnz_C, 1.0f); int *dcsrRowPtr; int *dcsrColInd; float *dcsrVal; hipMalloc((void**)&dcsrRowPtr, sizeof(int) * (m + 1)); hipMalloc((void**)&dcsrColInd, sizeof(int) * nnz_A); hipMalloc((void**)&dcsrVal, sizeof(float) * nnz_A); hipMemcpy(dcsrRowPtr, hcsrRowPtr.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice); hipMemcpy(dcsrColInd, hcsrColInd.data(), sizeof(int) * nnz_A, hipMemcpyHostToDevice); hipMemcpy(dcsrVal, hcsrVal.data(), sizeof(float) * nnz_A, hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseCreate(&handle); hipsparseSpMatDescr_t matA; hipsparseCreateCsr(&matA, m, k, nnz_A, dcsrRowPtr, dcsrColInd, dcsrVal, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); // Allocate memory for the matrix B float* dB; hipMalloc((void**)&dB, sizeof(float) * nnz_B); hipMemcpy(dB, hB.data(), sizeof(float) * nnz_B, hipMemcpyHostToDevice); hipsparseDnMatDescr_t matB; hipsparseCreateDnMat(&matB, k, n, ldb, dB, HIP_R_32F, order); // Allocate memory for the resulting matrix C float* dC; hipMalloc((void**)&dC, sizeof(float) * nnz_C); hipMemcpy(dC, hC.data(), sizeof(float) * nnz_C, hipMemcpyHostToDevice); hipsparseDnMatDescr_t matC; hipsparseCreateDnMat(&matC, m, n, ldc, dC, HIP_R_32F, HIPSPARSE_ORDER_ROW); // Compute buffersize size_t bufferSize; hipsparseSpMM_bufferSize(handle, transA, transB, &alpha, matA, matB, &beta, matC, HIP_R_32F, HIPSPARSE_MM_ALG_DEFAULT, &bufferSize); void* buffer; hipMalloc(&buffer, bufferSize); // Preprocess operation (Optional) hipsparseSpMM_preprocess(handle, transA, transB, &alpha, matA, matB, &beta, matC, HIP_R_32F, HIPSPARSE_MM_ALG_DEFAULT, buffer); // Perform operation hipsparseSpMM(handle, transA, transB, &alpha, matA, matB, &beta, matC, HIP_R_32F, HIPSPARSE_MM_ALG_DEFAULT, buffer); // Copy device to host hipMemcpy(hC.data(), dC, sizeof(float) * nnz_C, hipMemcpyDeviceToHost); // Destroy matrix descriptors and handles hipsparseDestroySpMat(matA); hipsparseDestroyDnMat(matB); hipsparseDestroyDnMat(matC); hipsparseDestroy(handle); hipFree(buffer); hipFree(dcsrRowPtr); hipFree(dcsrColInd); hipFree(dcsrVal); hipFree(dB); hipFree(dC);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] matrix operation type.
opB – [in] matrix operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] matrix descriptor.
matB – [in] matrix descriptor.
beta – [in] scalar \(\beta\).
matC – [in] matrix descriptor.
computeType – [in] floating point precision for the SpMM computation.
alg – [in] SpMM algorithm for the SpMM computation.
externalBuffer – [out] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,matA
,matB
,matC
,beta
, orexternalBuffer
pointer is invalid.HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
,opB
,computeType
oralg
is currently not supported.
hipsparseSpGEMM_createDescr()#
-
hipsparseStatus_t hipsparseSpGEMM_createDescr(hipsparseSpGEMMDescr_t *descr)#
hipsparseSpGEMM_createDescr
creates a sparse matrix sparse matrix product descriptor. It should be destroyed at the end using hipsparseSpGEMM_destroyDescr().
hipsparseSpGEMM_destroyDescr()#
-
hipsparseStatus_t hipsparseSpGEMM_destroyDescr(hipsparseSpGEMMDescr_t descr)#
hipsparseSpGEMM_destroyDescr
destroys a sparse matrix sparse matrix product descriptor and releases all resources used by the descriptor.
hipsparseSpGEMM_workEstimation()#
-
hipsparseStatus_t hipsparseSpGEMM_workEstimation(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, const void *beta, hipsparseSpMatDescr_t matC, hipDataType computeType, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize1, void *externalBuffer1)#
Work estimation step of the sparse matrix sparse matrix product:
\[ C' := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]where \(C'\), \(A\), \(B\), \(C\) are sparse matrices and \(C'\) and \(C\) have the same sparsity pattern.hipsparseSpGEMM_workEstimation
is called twice. We call it to compute the size of the first required user allocated buffer. After this buffer size is determined, the user allocates it and callshipsparseSpGEMM_workEstimation
a second time with the newly allocated buffer passed in. This second call inspects the matrices \(A\) and \(B\) to determine the number of intermediate products that will result from multipltying \(A\) and \(B\) together.hipsparseSpGEMM_workEstimation
supports multiple combinations of data types and compute types. See hipsparseSpGEMM_copy for a complete listing of all the data type and compute type combinations available.- Example (See full example below)
void* dBuffer1 = NULL; size_t bufferSize1 = 0; hipsparseSpGEMMDescr_t spgemmDesc; hipsparseSpGEMM_createDescr(&spgemmDesc); size_t bufferSize1 = 0; hipsparseSpGEMM_workEstimation(handle, opA, opB, &alpha, matA, matB, &beta, matC, computeType, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize1, NULL); hipMalloc((void**) &dBuffer1, bufferSize1); // Determine number of intermediate product when computing A * B hipsparseSpGEMM_workEstimation(handle, opA, opB, &alpha, matA, matB, &beta, matC, computeType, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize1, dBuffer1);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] sparse matrix \(A\) operation type.
opB – [in] sparse matrix \(B\) operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] sparse matrix \(A\) descriptor.
matB – [in] sparse matrix \(B\) descriptor.
beta – [in] scalar \(\beta\).
matC – [out] sparse matrix \(C\) descriptor.
computeType – [in] floating point precision for the SpGEMM computation.
alg – [in] SpGEMM algorithm for the SpGEMM computation.
spgemmDescr – [in] SpGEMM descriptor.
bufferSize1 – [out] number of bytes of the temporary storage buffer.
externalBuffer1 – [in] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,beta
,matA
,matB
,matC
orbufferSize1
pointer is invalid.HIPSPARSE_STATUS_ALLOC_FAILED – additional buffer for long rows could not be allocated.
HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
!= HIPSPARSE_OPERATION_NON_TRANSPOSE oropB
!= HIPSPARSE_OPERATION_NON_TRANSPOSE.
hipsparseSpGEMM_compute()#
-
hipsparseStatus_t hipsparseSpGEMM_compute(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, const void *beta, hipsparseSpMatDescr_t matC, hipDataType computeType, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize2, void *externalBuffer2)#
Compute step of the sparse matrix sparse matrix product:
\[ C' := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]where \(C'\), \(A\), \(B\), \(C\) are sparse matrices and \(C'\) and \(C\) have the same sparsity pattern.hipsparseSpGEMM_compute
is called twice. First to compute the size of the second required user allocated buffer. After this buffer size is determined, the user allocates it and callshipsparseSpGEMM_compute
a second time with the newly allocated buffer passed in. This second call performs the actual computation of \(C' = \alpha \cdot A \cdot B\) (the result is stored in the temporary buffers).hipsparseSpGEMM_compute
supports multiple combinations of data types and compute types. See hipsparseSpGEMM_copy for a complete listing of all the data type and compute type combinations available.- Example (See full example below)
void* dBuffer2 = NULL; size_t bufferSize2 = 0; size_t bufferSize2 = 0; hipsparseSpGEMM_compute(handle, opA, opB, &alpha, matA, matB, &beta, matC, computeType, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize2, NULL); hipMalloc((void**) &dBuffer2, bufferSize2); // compute the intermediate product of A * B hipsparseSpGEMM_compute(handle, opA, opB, &alpha, matA, matB, &beta, matC, computeType, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize2, dBuffer2);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] sparse matrix \(A\) operation type.
opB – [in] sparse matrix \(B\) operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] sparse matrix \(A\) descriptor.
matB – [in] sparse matrix \(B\) descriptor.
beta – [in] scalar \(\beta\).
matC – [out] sparse matrix \(C\) descriptor.
computeType – [in] floating point precision for the SpGEMM computation.
alg – [in] SpGEMM algorithm for the SpGEMM computation.
spgemmDescr – [in] SpGEMM descriptor.
bufferSize2 – [out] number of bytes of the temporary storage buffer.
externalBuffer2 – [in] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,beta
,matA
,matB
,matC
orbufferSize2
pointer is invalid.HIPSPARSE_STATUS_ALLOC_FAILED – additional buffer for long rows could not be allocated.
HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
!= HIPSPARSE_OPERATION_NON_TRANSPOSE oropB
!= HIPSPARSE_OPERATION_NON_TRANSPOSE.
hipsparseSpGEMM_copy()#
-
hipsparseStatus_t hipsparseSpGEMM_copy(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, const void *beta, hipsparseSpMatDescr_t matC, hipDataType computeType, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr)#
Copy step of the sparse matrix sparse matrix product:
\[ C' := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]where \(C'\), \(A\), \(B\), \(C\) are sparse matrices and \(C'\) and \(C\) have the same sparsity pattern.hipsparseSpGEMM_copy
is called once to copy the results (that are currently stored in the temporary arrays) to the output sparse matrix. If \(\beta != 0\), then the \(beta \cdot C\) portion of the computation: \(C' = \alpha \cdot A \cdot B + \beta * C\) is handled. This is possible because \(C'\) and \(C\) must have the same sparsity pattern.hipsparseSpGEMM_copy
supports multiple combinations of data types and compute types. The tables below indicate the currently supported different data types that can be used for for the sparse matrices \(op(A)\), \(op(B)\), \(C\), and \(C'\) and the compute type for \(\alpha\) and \(\beta\). The advantage of using different data types is to save on memory bandwidth and storage when a user application allows while performing the actual computation in a higher precision.hipsparseSpGEMM_copy
supports HIPSPARSE_INDEX_32I and HIPSPARSE_INDEX_64I index precisions for storing the row pointer and row/column indices arrays of the sparse matrices.- Uniform Precisions:
A / B / C / C’ / compute_type
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Example (Full example)
hipsparseHandle_t handle = NULL; hipsparseSpMatDescr_t matA, matB, matC; void* dBuffer1 = NULL; void* dBuffer2 = NULL; size_t bufferSize1 = 0; size_t bufferSize2 = 0; hipsparseCreate(&handle); // Create sparse matrix A in CSR format hipsparseCreateCsr(&matA, m, k, nnzA, dcsrRowPtrA, dcsrColIndA, dcsrValA, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); hipsparseCreateCsr(&matB, k, n, nnzB, dcsrRowPtrB, dcsrColIndB, dcsrValB, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); hipsparseCreateCsr(&matC, m, n, 0, dcsrRowPtrC, NULL, NULL, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); hipsparseSpGEMMDescr_t spgemmDesc; hipsparseSpGEMM_createDescr(&spgemmDesc); // Determine size of first user allocated buffer hipsparseSpGEMM_workEstimation(handle, opA, opB, &alpha, matA, matB, &beta, matC, computeType, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize1, NULL); hipMalloc((void**) &dBuffer1, bufferSize1); // Inspect the matrices A and B to determine the number of intermediate product in // C = alpha * A * B hipsparseSpGEMM_workEstimation(handle, opA, opB, &alpha, matA, matB, &beta, matC, computeType, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize1, dBuffer1); // Determine size of second user allocated buffer hipsparseSpGEMM_compute(handle, opA, opB, &alpha, matA, matB, &beta, matC, computeType, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize2, NULL); hipMalloc((void**) &dBuffer2, bufferSize2); // Compute C = alpha * A * B and store result in temporary buffers hipsparseSpGEMM_compute(handle, opA, opB, &alpha, matA, matB, &beta, matC, computeType, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize2, dBuffer2); // Get matrix C non-zero entries C_nnz1 int64_t C_num_rows1, C_num_cols1, C_nnz1; hipsparseSpMatGetSize(matC, &C_num_rows1, &C_num_cols1, &C_nnz1); // Allocate the CSR structures for the matrix C hipMalloc((void**) &dcsrColIndC, C_nnz1 * sizeof(int)); hipMalloc((void**) &dcsrValC, C_nnz1 * sizeof(float)); // Update matC with the new pointers hipsparseCsrSetPointers(matC, dcsrRowPtrC, dcsrColIndC, dcsrValC); // Copy the final products to the matrix C hipsparseSpGEMM_copy(handle, opA, opB, &alpha, matA, matB, &beta, matC, computeType, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc); // Destroy matrix descriptors and handles hipsparseSpGEMM_destroyDescr(spgemmDesc); hipsparseDestroySpMat(matA); hipsparseDestroySpMat(matB); hipsparseDestroySpMat(matC); hipsparseDestroy(handle); // Free device memory hipFree(dBuffer1); hipFree(dBuffer2);
Note
The two user allocated temporary buffers can only be freed after the call to
hipsparseSpGEMM_copy
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] sparse matrix \(A\) operation type.
opB – [in] sparse matrix \(B\) operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] sparse matrix \(A\) descriptor.
matB – [in] sparse matrix \(B\) descriptor.
beta – [in] scalar \(\beta\).
matC – [out] sparse matrix \(C\) descriptor.
computeType – [in] floating point precision for the SpGEMM computation.
alg – [in] SpGEMM algorithm for the SpGEMM computation.
spgemmDescr – [in] SpGEMM descriptor.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,beta
,matA
,matB
,matC
pointer is invalid.HIPSPARSE_STATUS_ALLOC_FAILED – additional buffer for long rows could not be allocated.
HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
!= HIPSPARSE_OPERATION_NON_TRANSPOSE oropB
!= HIPSPARSE_OPERATION_NON_TRANSPOSE.
hipsparseSpGEMMreuse_workEstimation()#
-
hipsparseStatus_t hipsparseSpGEMMreuse_workEstimation(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, hipsparseSpMatDescr_t matC, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize1, void *externalBuffer1)#
Work estimation step of the sparse matrix sparse matrix product:
\[ C' := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]where \(C'\), \(A\), \(B\), \(C\) are sparse matrices and \(C'\) and \(C\) have the same sparsity pattern.hipsparseSpGEMMreuse_workEstimation
is called twice. We call it to compute the size of the first required user allocated buffer. After this buffer size is determined, the user allocates it and callshipsparseSpGEMMreuse_workEstimation
a second time with the newly allocated buffer passed in. This second call inspects the matrices \(A\) and \(B\) to determine the number of intermediate products that will result from multipltying \(A\) and \(B\) together.- Example (See full example below)
void* dBuffer1 = NULL; size_t bufferSize1 = 0; hipsparseSpGEMMDescr_t spgemmDesc; hipsparseSpGEMM_createDescr(&spgemmDesc); size_t bufferSize1 = 0; hipsparseSpGEMMreuse_workEstimation(handle, opA, opB, matA, matB, matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize1, NULL); hipMalloc((void**) &dBuffer1, bufferSize1); // Determine number of intermediate product when computing A * B hipsparseSpGEMMreuse_workEstimation(handle, opA, opB, matA, matB, matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize1, dBuffer1);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] sparse matrix \(A\) operation type.
opB – [in] sparse matrix \(B\) operation type.
matA – [in] sparse matrix \(A\) descriptor.
matB – [in] sparse matrix \(B\) descriptor.
matC – [out] sparse matrix \(C\) descriptor.
alg – [in] SpGEMM algorithm for the SpGEMM computation.
spgemmDescr – [in] SpGEMM descriptor.
bufferSize1 – [out] number of bytes of the temporary storage buffer.
externalBuffer1 – [in] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,matA
,matB
,matC
orbufferSize1
pointer is invalid.HIPSPARSE_STATUS_ALLOC_FAILED – additional buffer for long rows could not be allocated.
HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
!= HIPSPARSE_OPERATION_NON_TRANSPOSE oropB
!= HIPSPARSE_OPERATION_NON_TRANSPOSE.
hipsparseSpGEMMreuse_nnz()#
-
hipsparseStatus_t hipsparseSpGEMMreuse_nnz(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, hipsparseSpMatDescr_t matC, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize2, void *externalBuffer2, size_t *bufferSize3, void *externalBuffer3, size_t *bufferSize4, void *externalBuffer4)#
Nnz calculation step of the sparse matrix sparse matrix product:
\[ C' := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]where \(C'\), \(A\), \(B\), \(C\) are sparse matrices and \(C'\) and \(C\) have the same sparsity pattern.- Example (See full example below)
// Determine size of second, third, and fourth user allocated buffer hipsparseSpGEMMreuse_nnz(handle, opA, opB, matA, matB, matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize2, NULL, &bufferSize3, NULL, &bufferSize4, NULL); hipMalloc((void**) &dBuffer2, bufferSize2); hipMalloc((void**) &dBuffer3, bufferSize3); hipMalloc((void**) &dBuffer4, bufferSize4); // COmpute sparsity pattern of C matrix and store in temporary buffers hipsparseSpGEMMreuse_nnz(handle, opA, opB, matA, matB, matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize2, dBuffer2, &bufferSize3, dBuffer3, &bufferSize4, dBuffer4); // We can now free buffer 1 and 2 hipFree(dBuffer1); hipFree(dBuffer2);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] sparse matrix \(A\) operation type.
opB – [in] sparse matrix \(B\) operation type.
matA – [in] sparse matrix \(A\) descriptor.
matB – [in] sparse matrix \(B\) descriptor.
matC – [out] sparse matrix \(C\) descriptor.
alg – [in] SpGEMM algorithm for the SpGEMM computation.
spgemmDescr – [in] SpGEMM descriptor.
bufferSize2 – [out] number of bytes of the temporary storage
externalBuffer2
.externalBuffer2 – [in] temporary storage buffer allocated by the user.
bufferSize3 – [out] number of bytes of the temporary storage
externalBuffer3
.externalBuffer3 – [in] temporary storage buffer allocated by the user.
bufferSize4 – [out] number of bytes of the temporary storage
externalBuffer4
.externalBuffer4 – [in] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,matA
,matB
,matC
,bufferSize2
,bufferSize3
orbufferSize4
pointer is invalid.HIPSPARSE_STATUS_ALLOC_FAILED – additional buffer for long rows could not be allocated.
HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
!= HIPSPARSE_OPERATION_NON_TRANSPOSE oropB
!= HIPSPARSE_OPERATION_NON_TRANSPOSE.
hipsparseSpGEMMreuse_copy()#
-
hipsparseStatus_t hipsparseSpGEMMreuse_copy(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, hipsparseSpMatDescr_t matC, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize5, void *externalBuffer5)#
Copy step of the sparse matrix sparse matrix product:
\[ C' := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]where \(C'\), \(A\), \(B\), \(C\) are sparse matrices and \(C'\) and \(C\) have the same sparsity pattern.- Example (See full example below)
// Get matrix C non-zero entries nnzC int64_t rowsC, colsC, nnzC; hipsparseSpMatGetSize(matC, &rowsC, &colsC, &nnzC); // Allocate matrix C hipMalloc((void**) &dcsrColIndC, sizeof(int) * nnzC); hipMalloc((void**) &dcsrValC, sizeof(float) * nnzC); // Update matC with the new pointers. The C values array can be filled with data here // which is used if beta != 0. hipsparseCsrSetPointers(matC, dcsrRowPtrC, dcsrColIndC, dcsrValC); // Determine size of fifth user allocated buffer hipsparseSpGEMMreuse_copy(handle, opA, opB, matA, matB, matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize5, NULL); hipMalloc((void**) &dBuffer5, bufferSize5); // Copy data from temporary buffers to the newly allocated C matrix hipsparseSpGEMMreuse_copy(handle, opA, opB, matA, matB, matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize5, dBuffer5); // We can now free buffer 3 hipFree(dBuffer3);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] sparse matrix \(A\) operation type.
opB – [in] sparse matrix \(B\) operation type.
matA – [in] sparse matrix \(A\) descriptor.
matB – [in] sparse matrix \(B\) descriptor.
matC – [out] sparse matrix \(C\) descriptor.
alg – [in] SpGEMM algorithm for the SpGEMM computation.
spgemmDescr – [in] SpGEMM descriptor.
bufferSize5 – [out] number of bytes of the temporary storage
externalBuffer5
.externalBuffer5 – [in] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,matA
,matB
,matC
, orbufferSize5
pointer is invalid.HIPSPARSE_STATUS_ALLOC_FAILED – additional buffer for long rows could not be allocated.
HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
!= HIPSPARSE_OPERATION_NON_TRANSPOSE oropB
!= HIPSPARSE_OPERATION_NON_TRANSPOSE.
hipsparseSpGEMMreuse_compute()#
-
hipsparseStatus_t hipsparseSpGEMMreuse_compute(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, const void *beta, hipsparseSpMatDescr_t matC, hipDataType computeType, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr)#
Copy step of the sparse matrix sparse matrix product:
\[ C' := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]where \(C'\), \(A\), \(B\), \(C\) are sparse matrices and \(C'\) and \(C\) have the same sparsity pattern.- Full example
int m = 2; int k = 2; int n = 3; int nnzA = 4; int nnzB = 4; float alpha{1.0f}; float beta{0.0f}; hipsparseOperation_t opA = HIPSPARSE_OPERATION_NON_TRANSPOSE; hipsparseOperation_t opB = HIPSPARSE_OPERATION_NON_TRANSPOSE; hipDataType computeType = HIP_R_32F; // A, B, and C are m×k, k×n, and m×n // A std::vector<int> hcsrRowPtrA = {0, 2, 4}; std::vector<int> hcsrColIndA = {0, 1, 0, 1}; std::vector<float> hcsrValA = {1.0f, 2.0f, 3.0f, 4.0f}; // B std::vector<int> hcsrRowPtrB = {0, 2, 4}; std::vector<int> hcsrColIndB = {1, 2, 0, 2}; std::vector<float> hcsrValB = {5.0f , 6.0f, 7.0f, 8.0f}; // Device memory management: Allocate and copy A, B int* dcsrRowPtrA; int* dcsrColIndA; float* dcsrValA; int* dcsrRowPtrB; int* dcsrColIndB; float* dcsrValB; int* dcsrRowPtrC; int* dcsrColIndC; float* dcsrValC; hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)); hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)); hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)); hipMalloc((void**)&dcsrRowPtrB, (k + 1) * sizeof(int)); hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)); hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)); hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)); hipMemcpy(dcsrRowPtrA, hcsrRowPtrA.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrColIndA, hcsrColIndA.data(), nnzA * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrValA, hcsrValA.data(), nnzA * sizeof(float), hipMemcpyHostToDevice); hipMemcpy(dcsrRowPtrB, hcsrRowPtrB.data(), (k + 1) * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrColIndB, hcsrColIndB.data(), nnzB * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrValB, hcsrValB.data(), nnzB * sizeof(float), hipMemcpyHostToDevice); hipsparseHandle_t handle = NULL; hipsparseSpMatDescr_t matA, matB, matC; void* dBuffer1 = NULL; void* dBuffer2 = NULL; void* dBuffer3 = NULL; void* dBuffer4 = NULL; void* dBuffer5 = NULL; size_t bufferSize1 = 0; size_t bufferSize2 = 0; size_t bufferSize3 = 0; size_t bufferSize4 = 0; size_t bufferSize5 = 0; hipsparseCreate(&handle); // Create sparse matrix A in CSR format hipsparseCreateCsr(&matA, m, k, nnzA, dcsrRowPtrA, dcsrColIndA, dcsrValA, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); hipsparseCreateCsr(&matB, k, n, nnzB, dcsrRowPtrB, dcsrColIndB, dcsrValB, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); hipsparseCreateCsr(&matC, m, n, 0, dcsrRowPtrC, NULL, NULL, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F); hipsparseSpGEMMDescr_t spgemmDesc; hipsparseSpGEMM_createDescr(&spgemmDesc); // Determine size of first user allocated buffer hipsparseSpGEMMreuse_workEstimation(handle, opA, opB, matA, matB, matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize1, NULL); hipMalloc((void**) &dBuffer1, bufferSize1); // Inspect the matrices A and B to determine the number of intermediate product in // C = alpha * A * B hipsparseSpGEMMreuse_workEstimation(handle, opA, opB, matA, matB, matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize1, dBuffer1); // Determine size of second, third, and fourth user allocated buffer hipsparseSpGEMMreuse_nnz(handle, opA, opB, matA, matB, matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize2, NULL, &bufferSize3, NULL, &bufferSize4, NULL); hipMalloc((void**) &dBuffer2, bufferSize2); hipMalloc((void**) &dBuffer3, bufferSize3); hipMalloc((void**) &dBuffer4, bufferSize4); // COmpute sparsity pattern of C matrix and store in temporary buffers hipsparseSpGEMMreuse_nnz(handle, opA, opB, matA, matB, matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize2, dBuffer2, &bufferSize3, dBuffer3, &bufferSize4, dBuffer4); // We can now free buffer 1 and 2 hipFree(dBuffer1); hipFree(dBuffer2); // Get matrix C non-zero entries nnzC int64_t rowsC, colsC, nnzC; hipsparseSpMatGetSize(matC, &rowsC, &colsC, &nnzC); // Allocate matrix C hipMalloc((void**) &dcsrColIndC, sizeof(int) * nnzC); hipMalloc((void**) &dcsrValC, sizeof(float) * nnzC); // Update matC with the new pointers. The C values array can be filled with data here // which is used if beta != 0. hipsparseCsrSetPointers(matC, dcsrRowPtrC, dcsrColIndC, dcsrValC); // Determine size of fifth user allocated buffer hipsparseSpGEMMreuse_copy(handle, opA, opB, matA, matB, matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize5, NULL); hipMalloc((void**) &dBuffer5, bufferSize5); // Copy data from temporary buffers to the newly allocated C matrix hipsparseSpGEMMreuse_copy(handle, opA, opB, matA, matB, matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc, &bufferSize5, dBuffer5); // We can now free buffer 3 hipFree(dBuffer3); // Compute C' = alpha * A * B + beta * C hipsparseSpGEMMreuse_compute(handle, opA, opB, &alpha, matA, matB, &beta, matC, computeType, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc); // Copy results back to host if required using hipsparseCsrGet... // Update dcsrValA, dcsrValB with new values for(size_t i = 0; i < hcsrValA.size(); i++){ hcsrValA[i] = 1.0f; } for(size_t i = 0; i < hcsrValB.size(); i++){ hcsrValB[i] = 2.0f; } hipMemcpy(dcsrValA, hcsrValA.data(), sizeof(float) * nnzA, hipMemcpyHostToDevice); hipMemcpy(dcsrValB, hcsrValB.data(), sizeof(float) * nnzB, hipMemcpyHostToDevice); // Compute C' = alpha * A * B + beta * C again with the new A and B values hipsparseSpGEMMreuse_compute(handle, opA, opB, &alpha, matA, matB, &beta, matC, computeType, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc); // Copy results back to host if required using hipsparseCsrGet... // Destroy matrix descriptors and handles hipsparseSpGEMM_destroyDescr(spgemmDesc); hipsparseDestroySpMat(matA); hipsparseDestroySpMat(matB); hipsparseDestroySpMat(matC); hipsparseDestroy(handle); // Free device memory hipFree(dBuffer4); hipFree(dBuffer5); hipFree(dcsrRowPtrA); hipFree(dcsrColIndA); hipFree(dcsrValA); hipFree(dcsrRowPtrB); hipFree(dcsrColIndB); hipFree(dcsrValB); hipFree(dcsrRowPtrC); hipFree(dcsrColIndC); hipFree(dcsrValC);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] sparse matrix \(A\) operation type.
opB – [in] sparse matrix \(B\) operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] sparse matrix \(A\) descriptor.
matB – [in] sparse matrix \(B\) descriptor.
beta – [in] scalar \(\beta\).
matC – [out] sparse matrix \(C\) descriptor.
computeType – [in] floating point precision for the SpGEMM computation.
alg – [in] SpGEMM algorithm for the SpGEMM computation.
spgemmDescr – [in] SpGEMM descriptor.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,beta
,matA
,matB
, ormatC
pointer is invalid.HIPSPARSE_STATUS_ALLOC_FAILED – additional buffer for long rows could not be allocated.
HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
!= HIPSPARSE_OPERATION_NON_TRANSPOSE oropB
!= HIPSPARSE_OPERATION_NON_TRANSPOSE.
hipsparseSDDMM_bufferSize()#
-
hipsparseStatus_t hipsparseSDDMM_bufferSize(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstDnMatDescr_t A, hipsparseConstDnMatDescr_t B, const void *beta, hipsparseSpMatDescr_t C, hipDataType computeType, hipsparseSDDMMAlg_t alg, size_t *pBufferSizeInBytes)#
hipsparseSDDMM_bufferSize
returns the size of the required buffer needed when computing the sampled dense-dense matrix multiplication:\[ C := \alpha (op(A) \cdot op(B)) \circ spy(C) + \beta \cdot C, \]where \(C\) is a sparse matrix and \(A\) and \(B\) are dense matrices. This routine is used in conjunction with hipsparseSDDMM_preprocess() and hipsparseSDDMM().hipsparseSDDMM_bufferSize
supports multiple combinations of data types and compute types. See hipsparseSDDMM for a complete listing of all the data type and compute type combinations available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] dense matrix \(A\) operation type.
opB – [in] dense matrix \(B\) operation type.
alpha – [in] scalar \(\alpha\).
A – [in] dense matrix \(A\) descriptor.
B – [in] dense matrix \(B\) descriptor.
beta – [in] scalar \(\beta\).
C – [inout] sparse matrix \(C\) descriptor.
computeType – [in] floating point precision for the SDDMM computation.
alg – [in] specification of the algorithm to use.
pBufferSizeInBytes – [out] number of bytes of the temporary storage buffer.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,beta
,A
,B
,D
,C
orpBufferSizeInBytes
pointer is invalid or the value ofopA
oropB
is incorrectHIPSPARSE_STATUS_NOT_SUPPORTED –
opA
== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE oropB
== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE.
hipsparseSDDMM_preprocess()#
-
hipsparseStatus_t hipsparseSDDMM_preprocess(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstDnMatDescr_t A, hipsparseConstDnMatDescr_t B, const void *beta, hipsparseSpMatDescr_t C, hipDataType computeType, hipsparseSDDMMAlg_t alg, void *tempBuffer)#
hipsparseSDDMM_preprocess
performs the required preprocessing used when computing the sampled dense dense matrix multiplication:\[ C := \alpha (op(A) \cdot op(B)) \circ spy(C) + \beta \cdot C, \]where \(C\) is a sparse matrix and \(A\) and \(B\) are dense matrices. This routine is used in conjunction with hipsparseSDDMM().hipsparseSDDMM_preprocess
supports multiple combinations of data types and compute types. See hipsparseSDDMM for a complete listing of all the data type and compute type combinations available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] dense matrix \(A\) operation type.
opB – [in] dense matrix \(B\) operation type.
alpha – [in] scalar \(\alpha\).
A – [in] dense matrix \(A\) descriptor.
B – [in] dense matrix \(B\) descriptor.
beta – [in] scalar \(\beta\).
C – [inout] sparse matrix \(C\) descriptor.
computeType – [in] floating point precision for the SDDMM computation.
alg – [in] specification of the algorithm to use.
tempBuffer – [in] temporary storage buffer allocated by the user. The size must be greater or equal to the size obtained with hipsparseSDDMM_bufferSize.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,beta
,A
,B
,C
ortempBuffer
pointer is invalid or the value ofopA
oropB
is incorrect.HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE oropB
== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE.
hipsparseSDDMM()#
-
hipsparseStatus_t hipsparseSDDMM(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstDnMatDescr_t A, hipsparseConstDnMatDescr_t B, const void *beta, hipsparseSpMatDescr_t C, hipDataType computeType, hipsparseSDDMMAlg_t alg, void *tempBuffer)#
Sampled Dense-Dense Matrix Multiplication.
hipsparseSDDMM
multiplies the scalar \(\alpha\) with the dense \(m \times k\) matrix \(op(A)\), the dense \(k \times n\) matrix \(op(B)\), filtered by the sparsity pattern of the \(m \times n\) sparse matrix \(C\) and adds the result to \(C\) scaled by \(\beta\). The final result is stored in the sparse \(m \times n\) matrix \(C\), such that\[ C := \alpha ( op(A) \cdot op(B) ) \circ spy(C) + \beta C, \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if op(A) == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if op(A) == HIPSPARSE_OPERATION_TRANSPOSE} \\ \end{array} \right. \end{split}\],\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if op(B) == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if op(B) == HIPSPARSE_OPERATION_TRANSPOSE} \\ \end{array} \right. \end{split}\]and\[\begin{split} spy(C)_{ij} = \left\{ \begin{array}{ll} 1, & \text{ if C_{ij} != 0} \\ 0, & \text{ otherwise} \\ \end{array} \right. \end{split}\]Computing the above sampled dense-dense multiplication requires three steps to complete. First, the user calls hipsparseSDDMM_bufferSize to determine the size of the required temporary storage buffer. Next, the user allocates this buffer and calls hipsparseSDDMM_preprocess which performs any analysis of the input matrices that may be required. Finally, the user calls
hipsparseSDDMM
to complete the computation. Once all calls tohipsparseSDDMM
are complete, the temporary buffer can be deallocated.hipsparseSDDMM
supports different algorithms which can provide better performance for different matrices.CSR/CSC Algorithms
HIPSPARSE_SDDMM_ALG_DEFAULT
Currently,
hipsparseSDDMM
only supports the uniform precisions indicated in the table below. For the sparse matrix \(C\),hipsparseSDDMM
supports the index types HIPSPARSE_INDEX_32I and HIPSPARSE_INDEX_64I.- Uniform Precisions:
A / B / C / compute_type
HIP_R_16F
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Mixed precisions:
A / B
C
compute_type
HIP_R_16F
HIP_R_32F
HIP_R_32F
HIP_R_16F
HIP_R_16F
HIP_R_32F
- Example
This example performs sampled dense-dense matrix product, \(C := \alpha ( A \cdot B ) \circ spy(C) + \beta C\) where \(\circ\) is the hadamard product
// hipSPARSE handle hipsparseHandle_t handle; hipsparseCreate(&handle); _Float16 halpha = 1.0; _Float16 hbeta = 0.0; // A, B, and C are mxk, kxn, and mxn int m = 4; int k = 3; int n = 2; int nnzC = 5; // 2 3 -1 // A = 0 2 1 // 0 0 5 // 0 -2 0.5 // 0 4 // B = 1 0 // -2 0.5 // 1 0 1 0 // C = 2 3 spy(C) = 1 1 // 0 0 0 0 // 4 5 1 1 std::vector<_Float16> hA = {2.0, 3.0, -1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 5.0, 0.0, -2.0, 0.5}; std::vector<_Float16> hB = {0.0, 4.0, 1.0, 0.0, -2.0, 0.5}; std::vector<int> hcsr_row_ptrC = {0, 1, 3, 3, 5}; std::vector<int> hcsr_col_indC = {0, 0, 1, 0, 1}; std::vector<_Float16> hcsr_valC = {1.0, 2.0, 3.0, 4.0, 5.0}; _Float16* dA = nullptr; _Float16* dB = nullptr; hipMalloc((void**)&dA, sizeof(_Float16) * m * k); hipMalloc((void**)&dB, sizeof(_Float16) * k * n); int* dcsr_row_ptrC = nullptr; int* dcsr_col_indC = nullptr; _Float16* dcsr_valC = nullptr; hipMalloc((void**)&dcsr_row_ptrC, sizeof(int) * (m + 1)); hipMalloc((void**)&dcsr_col_indC, sizeof(int) * nnzC); hipMalloc((void**)&dcsr_valC, sizeof(_Float16) * nnzC); hipMemcpy(dA, hA.data(), sizeof(_Float16) * m * k, hipMemcpyHostToDevice); hipMemcpy(dB, hB.data(), sizeof(_Float16) * k * n, hipMemcpyHostToDevice); hipMemcpy(dcsr_row_ptrC, hcsr_row_ptrC.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice); hipMemcpy(dcsr_col_indC, hcsr_col_indC.data(), sizeof(int) * nnzC, hipMemcpyHostToDevice); hipMemcpy(dcsr_valC, hcsr_valC.data(), sizeof(_Float16) * nnzC, hipMemcpyHostToDevice); hipsparseDnMatDescr_t matA; hipsparseCreateDnMat(&matA, m, k, k, dA, HIP_R_16F, HIPSPARSE_ORDER_ROW); hipsparseDnMatDescr_t matB; hipsparseCreateDnMat(&matB, k, n, n, dB, HIP_R_16F, HIPSPARSE_ORDER_ROW); hipsparseSpMatDescr_t matC; hipsparseCreateCsr(&matC, m, n, nnzC, dcsr_row_ptrC, dcsr_col_indC, dcsr_valC, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_BASE_ZERO, HIP_R_16F); size_t buffer_size = 0; hipsparseSDDMM_bufferSize(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, HIPSPARSE_OPERATION_NON_TRANSPOSE, &halpha, matA, matB, &hbeta, matC, HIP_R_16F, HIPSPARSE_SDDMM_ALG_DEFAULT, &buffer_size); void* dbuffer = nullptr; hipMalloc((void**) &dbuffer, buffer_size); hipsparseSDDMM_preprocess(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, HIPSPARSE_OPERATION_NON_TRANSPOSE, &halpha, matA, matB, &hbeta, matC, HIP_R_16F, HIPSPARSE_SDDMM_ALG_DEFAULT, dbuffer); hipsparseSDDMM(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, HIPSPARSE_OPERATION_NON_TRANSPOSE, &halpha, matA, matB, &hbeta, matC, HIP_R_16F, HIPSPARSE_SDDMM_ALG_DEFAULT, dbuffer); hipMemcpy(hcsr_row_ptrC.data(), dcsr_row_ptrC, sizeof(int) * (m + 1), hipMemcpyDeviceToHost); hipMemcpy(hcsr_col_indC.data(), dcsr_col_indC, sizeof(int) * nnzC, hipMemcpyDeviceToHost); hipMemcpy(hcsr_valC.data(), dcsr_valC, sizeof(_Float16) * nnzC, hipMemcpyDeviceToHost); hipsparseDestroyMatDescr(matA); hipsparseDestroyMatDescr(matB); hipsparseDestroyMatDescr(matC); hipsparseDestroy(handle); hipFree(dA); hipFree(dB); hipFree(dcsr_row_ptrC); hipFree(dcsr_col_indC); hipFree(dcsr_valC); hipFree(dbuffer);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] dense matrix \(A\) operation type.
opB – [in] dense matrix \(B\) operation type.
alpha – [in] scalar \(\alpha\).
A – [in] dense matrix \(A\) descriptor.
B – [in] dense matrix \(B\) descriptor.
beta – [in] scalar \(\beta\).
C – [inout] sparse matrix \(C\) descriptor.
computeType – [in] floating point precision for the SDDMM computation.
alg – [in] specification of the algorithm to use.
tempBuffer – [in] temporary storage buffer allocated by the user. The size must be greater or equal to the size obtained with hipsparseSDDMM_bufferSize.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,beta
,A
,B
,C
ortempBuffer
pointer is invalid or the value ofopA
oropB
is incorrect.HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE oropB
== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE.
hipsparseSpSV_createDescr()#
-
hipsparseStatus_t hipsparseSpSV_createDescr(hipsparseSpSVDescr_t *descr)#
hipsparseSpSV_createDescr
creates a sparse matrix triangular solve descriptor. It should be destroyed at the end using hipsparseSpSV_destroyDescr().
hipsparseSpSV_destroyDescr()#
-
hipsparseStatus_t hipsparseSpSV_destroyDescr(hipsparseSpSVDescr_t descr)#
hipsparseSpSV_destroyDescr
destroys a sparse matrix triangular solve descriptor and releases all resources used by the descriptor.
hipsparseSpSV_bufferSize()#
-
hipsparseStatus_t hipsparseSpSV_bufferSize(hipsparseHandle_t handle, hipsparseOperation_t opA, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnVecDescr_t x, const hipsparseDnVecDescr_t y, hipDataType computeType, hipsparseSpSVAlg_t alg, hipsparseSpSVDescr_t spsvDescr, size_t *pBufferSizeInBytes)#
hipsparseSpSV_bufferSize
computes the size of the required user allocated buffer needed when solving the triangular linear system:\[ op(A) \cdot y := \alpha \cdot x, \]where \(op(A)\) is a sparse matrix in CSR or COO storage format, \(x\) and \(y\) are dense vectors.hipsparseSpSV_bufferSize
supports multiple combinations of data types and compute types. See hipsparseSpSV_solve for a complete listing of all the data type and compute type combinations available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] matrix operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] matrix descriptor.
x – [in] vector descriptor.
y – [inout] vector descriptor.
computeType – [in] floating point precision for the SpSV computation.
alg – [in] SpSV algorithm for the SpSV computation.
spsvDescr – [in] SpSV descriptor.
pBufferSizeInBytes – [out] number of bytes of the temporary storage buffer.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,matA
,x
,y
,spsvDescr
orpBufferSizeInBytes
pointer is invalid.HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
,computeType
oralg
is currently not supported.
hipsparseSpSV_analysis()#
-
hipsparseStatus_t hipsparseSpSV_analysis(hipsparseHandle_t handle, hipsparseOperation_t opA, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnVecDescr_t x, const hipsparseDnVecDescr_t y, hipDataType computeType, hipsparseSpSVAlg_t alg, hipsparseSpSVDescr_t spsvDescr, void *externalBuffer)#
hipsparseSpSV_analysis
performs the required analysis needed when solving the triangular linear system:\[ op(A) \cdot y := \alpha \cdot x, \]where \(op(A)\) is a sparse matrix in CSR or COO storage format, \(x\) and \(y\) are dense vectors.hipsparseSpSV_analysis
supports multiple combinations of data types and compute types. See hipsparseSpSV_solve for a complete listing of all the data type and compute type combinations available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] matrix operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] matrix descriptor.
x – [in] vector descriptor.
y – [inout] vector descriptor.
computeType – [in] floating point precision for the SpSV computation.
alg – [in] SpSV algorithm for the SpSV computation.
spsvDescr – [in] SpSV descriptor.
externalBuffer – [out] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,matA
,x
,y
,spsvDescr
orexternalBuffer
pointer is invalid.HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
,computeType
oralg
is currently not supported.
hipsparseSpSV_solve()#
-
hipsparseStatus_t hipsparseSpSV_solve(hipsparseHandle_t handle, hipsparseOperation_t opA, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnVecDescr_t x, const hipsparseDnVecDescr_t y, hipDataType computeType, hipsparseSpSVAlg_t alg, hipsparseSpSVDescr_t spsvDescr)#
Sparse triangular solve.
hipsparseSpSV_solve
solves a triangular linear system of equations defined by a sparse \(m \times m\) square matrix \(op(A)\), given in CSR or COO storage format, such that\[ op(A) \cdot y = \alpha \cdot x, \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if trans == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if trans == HIPSPARSE_OPERATION_TRANSPOSE} \end{array} \right. \end{split}\]and where \(y\) is the dense solution vector and \(x\) is the dense right-hand side vector.Performing the above operation requires three steps. First, hipsparseSpSV_bufferSize must be called which will determine the size of the required temporary storage buffer. The user then allocates this buffer and calls hipsparseSpSV_analysis which will perform analysis on the sparse matrix \(op(A)\). Finally, the user completes the computation by calling
hipsparseSpSV_solve
. The buffer size and preprecess routines only need to be called once for a given sparse matrix \(op(A)\) while the computation can be repeatedly used with different \(x\) and \(y\) vectors. Once all calls tohipsparseSpSV_solve
are complete, the temporary buffer can be deallocated.hipsparseSpSV_solve
supports HIPSPARSE_INDEX_32I and HIPSPARSE_INDEX_64I index types for storing the row pointer and column indices arrays of the sparse matrices.hipsparseSpSV_solve
supports the following data types for \(op(A)\), \(x\), \(y\) and compute types for \(\alpha\):- Uniform Precisions:
A / X / Y / compute_type
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Example
// 1 0 0 0 // A = 4 2 0 0 // 0 3 7 0 // 0 0 0 1 int m = 4; std::vector<int> hcsr_row_ptr = {0, 1, 3, 5, 6}; std::vector<int> hcsr_col_ind = {0, 0, 1, 1, 2, 3}; std::vector<float> hcsr_val = {1, 4, 2, 3, 7, 1}; std::vector<float> hx(m, 1.0f); std::vector<float> hy(m, 0.0f); // Scalar alpha float alpha = 1.0f; int nnz = hcsr_row_ptr[m] - hcsr_row_ptr[0]; // Offload data to device int* dcsr_row_ptr; int* dcsr_col_ind; float* dcsr_val; float* dx; float* dy; hipMalloc((void**)&dcsr_row_ptr, sizeof(int) * (m + 1)); hipMalloc((void**)&dcsr_col_ind, sizeof(int) * nnz); hipMalloc((void**)&dcsr_val, sizeof(float) * nnz); hipMalloc((void**)&dx, sizeof(float) * m); hipMalloc((void**)&dy, sizeof(float) * m); hipMemcpy(dcsr_row_ptr, hcsr_row_ptr.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice); hipMemcpy(dcsr_col_ind, hcsr_col_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice); hipMemcpy(dcsr_val, hcsr_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice); hipMemcpy(dx, hx.data(), sizeof(float) * m, hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseSpMatDescr_t matA; hipsparseDnVecDescr_t vecX; hipsparseDnVecDescr_t vecY; hipsparseIndexType_t row_idx_type = HIPSPARSE_INDEX_32I; hipsparseIndexType_t col_idx_type = HIPSPARSE_INDEX_32I; hipDataType data_type = HIP_R_32F; hipDataType computeType = HIP_R_32F; hipsparseIndexBase_t idx_base = HIPSPARSE_INDEX_BASE_ZERO; hipsparseOperation_t trans = HIPSPARSE_OPERATION_NON_TRANSPOSE; hipsparseCreate(&handle); // Create sparse matrix A hipsparseCreateCsr(&matA, m, m, nnz, dcsr_row_ptr, dcsr_col_ind, dcsr_val, row_idx_type, col_idx_type, idx_base, data_type); // Create dense vector X hipsparseCreateDnVec(&vecX, m, dx, data_type); // Create dense vector Y hipsparseCreateDnVec(&vecY, m, dy, data_type); hipsparseSpSVDescr_t descr; hipsparseSpSV_createDescr(&descr); // Call spsv to get buffer size size_t buffer_size; hipsparseSpSV_bufferSize(handle, trans, &alpha, matA, vecX, vecY, computeType, HIPSPARSE_SPSV_ALG_DEFAULT, descr, &buffer_size); void* temp_buffer; hipMalloc((void**)&temp_buffer, buffer_size); // Call spsv to perform analysis hipsparseSpSV_analysis(handle, trans, &alpha, matA, vecX, vecY, computeType, HIPSPARSE_SPSV_ALG_DEFAULT, descr, temp_buffer); // Call spsv to perform computation hipsparseSpSV_solve(handle, trans, &alpha, matA, vecX, vecY, computeType, HIPSPARSE_SPSV_ALG_DEFAULT, descr); // Copy result back to host hipMemcpy(hy.data(), dy, sizeof(float) * m, hipMemcpyDeviceToHost); // Clear hipSPARSE hipsparseSpSV_destroyDescr(descr); hipsparseDestroyMatDescr(matA); hipsparseDestroyDnVec(vecX); hipsparseDestroyDnVec(vecY); hipsparseDestroy(handle); // Clear device memory hipFree(dcsr_row_ptr); hipFree(dcsr_col_ind); hipFree(dcsr_val); hipFree(dx); hipFree(dy); hipFree(temp_buffer);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] matrix operation type.
alpha – [in] scalar \(\alpha\).
matA – [in] matrix descriptor.
x – [in] vector descriptor.
y – [inout] vector descriptor.
computeType – [in] floating point precision for the SpSV computation.
alg – [in] SpSV algorithm for the SpSV computation.
spsvDescr – [in] SpSV descriptor.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,matA
,x
,y
, orspsvDescr
pointer is invalid.HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
,computeType
oralg
is currently not supported.
hipsparseSpSM_createDescr()#
-
hipsparseStatus_t hipsparseSpSM_createDescr(hipsparseSpSMDescr_t *descr)#
hipsparseSpSM_createDescr
creates a sparse matrix triangular solve with multiple rhs descriptor. It should be destroyed at the end using hipsparseSpSM_destroyDescr().
hipsparseSpSM_destroyDescr()#
-
hipsparseStatus_t hipsparseSpSM_destroyDescr(hipsparseSpSMDescr_t descr)#
hipsparseSpSM_destroyDescr
destroys a sparse matrix triangular solve with multiple rhs descriptor and releases all resources used by the descriptor.
hipsparseSpSM_bufferSize()#
-
hipsparseStatus_t hipsparseSpSM_bufferSize(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnMatDescr_t matB, const hipsparseDnMatDescr_t matC, hipDataType computeType, hipsparseSpSMAlg_t alg, hipsparseSpSMDescr_t spsmDescr, size_t *pBufferSizeInBytes)#
hipsparseSpSM_bufferSize
computes the size of the required user allocated buffer needed when solving the triangular linear system:\[ op(A) \cdot C := \alpha \cdot op(B), \]where \(op(A)\) is a square sparse matrix in CSR or COO storage format, \(B\) and \(C\) are dense matrices.hipsparseSpSM_bufferSize
supports multiple combinations of data types and compute types. See hipsparseSpSM_solve for a complete listing of all the data type and compute type combinations available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] matrix operation type for the sparse matrix \(A\).
opB – [in] matrix operation type for the dense matrix \(B\).
alpha – [in] scalar \(\alpha\).
matA – [in] sparse matrix descriptor.
matB – [in] dense matrix descriptor.
matC – [inout] dense matrix descriptor.
computeType – [in] floating point precision for the SpSM computation.
alg – [in] SpSM algorithm for the SpSM computation.
spsmDescr – [in] SpSM descriptor.
pBufferSizeInBytes – [out] number of bytes of the temporary storage buffer.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,matA
,matB
,matC
,spsmDescr
orpBufferSizeInBytes
pointer is invalid.HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
,opB
,computeType
oralg
is currently not supported.
hipsparseSpSM_analysis()#
-
hipsparseStatus_t hipsparseSpSM_analysis(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnMatDescr_t matB, const hipsparseDnMatDescr_t matC, hipDataType computeType, hipsparseSpSMAlg_t alg, hipsparseSpSMDescr_t spsmDescr, void *externalBuffer)#
hipsparseSpSM_analysis
performs the required analysis needed when solving the triangular linear system:\[ op(A) \cdot C := \alpha \cdot op(B), \]where \(A\) is a sparse matrix in CSR or COO storage format, \(B\) and \(C\) are dense vectors.hipsparseSpSM_bufferSize
supports multiple combinations of data types and compute types. See hipsparseSpSM_solve for a complete listing of all the data type and compute type combinations available.- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] matrix operation type for the sparse matrix \(A\).
opB – [in] matrix operation type for the dense matrix \(B\).
alpha – [in] scalar \(\alpha\).
matA – [in] sparse matrix descriptor.
matB – [in] dense matrix descriptor.
matC – [inout] dense matrix descriptor.
computeType – [in] floating point precision for the SpSM computation.
alg – [in] SpSM algorithm for the SpSM computation.
spsmDescr – [in] SpSM descriptor.
externalBuffer – [out] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,matA
,matB
,matC
,spsmDescr
orexternalBuffer
pointer is invalid.HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
,opB
,computeType
oralg
is currently not supported.
hipsparseSpSM_solve()#
-
hipsparseStatus_t hipsparseSpSM_solve(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnMatDescr_t matB, const hipsparseDnMatDescr_t matC, hipDataType computeType, hipsparseSpSMAlg_t alg, hipsparseSpSMDescr_t spsmDescr, void *externalBuffer)#
Sparse triangular system solve.
hipsparseSpSM_solve
solves a triangular linear system of equations defined by a sparse \(m \times m\) square matrix \(op(A)\), given in CSR or COO storage format, such that\[ op(A) \cdot C = \alpha \cdot op(B), \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if opA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if opB == HIPSPARSE_OPERATION_TRANSPOSE} \end{array} \right. \end{split}\]and\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if opA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if opB == HIPSPARSE_OPERATION_TRANSPOSE} \end{array} \right. \end{split}\]and where \(C\) is the dense solution matrix and \(B\) is the dense right-hand side matrix. Both \(B\) and \(C\) can be in row or column order.Performing the above operation requires three steps. First, the user calls hipsparseSpSM_bufferSize in order to determine the size of the required temporary storage buffer. The user then allocates this buffer and calls hipsparseSpSM_analysis which will perform analysis on the sparse matrix \(op(A)\). Finally, the user completes the computation by calling
hipsparseSpSM_solve
. The buffer size and analysis routines only need to be called once for a given sparse matrix \(op(A)\) while the computation can be called repeatedly with different \(B\) and \(C\) matrices. Once all calls tohipsparseSpSM_solve
are complete, the temporary buffer can be deallocated.As noted above, both \(B\) and \(C\) can be in row or column order (this includes mixing the order so that \(B\) is row order and \(C\) is column order and vice versa). When running on an AMD system with the rocSPARSE backend, the kernels solve the system assuming the matrices \(B\) and \(C\) are in row order as this provides the best memory access. This means that if the matrix \(C\) is not in row order and/or the matrix \(B\) is not row order (or \(B^{T}\) is not column order as this is equivalent to being in row order), then internally memory copies and/or transposing of data may be performed to get them into the correct order (possbily using extra buffer size). Once computation is completed, additional memory copies and/or transposing of data may be performed to get them back into the user arrays. For best performance and smallest required temporary storage buffers on an AMD system, use row order for the matrix \(C\) and row order for the matrix \(B\) (or column order if \(B\) is being transposed).
hipsparseSpSM_solve
supports HIPSPARSE_INDEX_32I and HIPSPARSE_INDEX_64I index precisions for storing the row pointer and column indices arrays of the sparse matrices.hipsparseSpSM_solve
supports the following data types for \(op(A)\), \(op(B)\), \(C\) and compute types for \(\alpha\):- Uniform Precisions:
A / B / C / compute_type
HIP_R_32F
HIP_R_64F
HIP_C_32F
HIP_C_64F
- Example
// 1 0 0 0 // A = 4 2 0 0 // 0 3 7 0 // 0 0 0 1 int m = 4; int n = 2; std::vector<int> hcsr_row_ptr = {0, 1, 3, 5, 6}; std::vector<int> hcsr_col_ind = {0, 0, 1, 1, 2, 3}; std::vector<float> hcsr_val = {1, 4, 2, 3, 7, 1}; std::vector<float> hB(m * n); std::vector<float> hC(m * n); for(int i = 0; i < n; i++) { for(int j = 0; j < m; j++) { hB[m * i + j] = static_cast<float>(i + 1); } } // Scalar alpha float alpha = 1.0f; int nnz = hcsr_row_ptr[m] - hcsr_row_ptr[0]; // Offload data to device int* dcsr_row_ptr; int* dcsr_col_ind; float* dcsr_val; float* dB; float* dC; hipMalloc((void**)&dcsr_row_ptr, sizeof(int) * (m + 1)); hipMalloc((void**)&dcsr_col_ind, sizeof(int) * nnz); hipMalloc((void**)&dcsr_val, sizeof(float) * nnz); hipMalloc((void**)&dB, sizeof(float) * m * n); hipMalloc((void**)&dC, sizeof(float) * m * n); hipMemcpy(dcsr_row_ptr, hcsr_row_ptr.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice); hipMemcpy(dcsr_col_ind, hcsr_col_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice); hipMemcpy(dcsr_val, hcsr_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice); hipMemcpy(dB, hB.data(), sizeof(float) * m * n, hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseSpMatDescr_t matA; hipsparseDnMatDescr_t matB; hipsparseDnMatDescr_t matC; hipsparseIndexType_t row_idx_type = HIPSPARSE_INDEX_32I; hipsparseIndexType_t col_idx_type = HIPSPARSE_INDEX_32I; hipDataType dataType = HIP_R_32F; hipDataType computeType = HIP_R_32F; hipsparseIndexBase_t idxBase = HIPSPARSE_INDEX_BASE_ZERO; hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE; hipsparseOperation_t transB = HIPSPARSE_OPERATION_NON_TRANSPOSE; hipsparseCreate(&handle); // Create sparse matrix A hipsparseCreateCsr(&matA, m, m, nnz, dcsr_row_ptr, dcsr_col_ind, dcsr_val, row_idx_type, col_idx_type, idxBase, dataType); // Create dense matrix B hipsparseCreateDnMat(&matB, m, n, m, dB, dataType, HIPSPARSE_ORDER_COLUMN); // Create dense matrix C hipsparseCreateDnMat(&matC, m, n, m, dC, dataType, HIPSPARSE_ORDER_COLUMN); hipsparseSpSMDescr_t descr; hipsparseSpSM_createDescr(&descr); // Call SpSM to get buffer size size_t buffer_size; hipsparseSpSM_bufferSize(handle, transA, transB, &alpha, matA, matB, matC, computeType, HIPSPARSE_SPSM_ALG_DEFAULT, descr, &buffer_size); void* temp_buffer; hipMalloc((void**)&temp_buffer, buffer_size); // Call SpSM to perform analysis hipsparseSpSM_analysis(handle, transA, transB, &alpha, matA, matB, matC, computeType, HIPSPARSE_SPSM_ALG_DEFAULT, descr, temp_buffer); // Call SpSM to perform computation hipsparseSpSM_solve(handle, transA, transB, &alpha, matA, matB, matC, computeType, HIPSPARSE_SPSM_ALG_DEFAULT, descr, temp_buffer); // Copy result back to host hipMemcpy(hC.data(), dC, sizeof(float) * m * n, hipMemcpyDeviceToHost); // Clear hipSPARSE hipsparseSpSM_destroyDescr(descr); hipsparseDestroyMatDescr(matA); hipsparseDestroyDnMat(matB); hipsparseDestroyDnMat(matC); hipsparseDestroy(handle); // Clear device memory hipFree(dcsr_row_ptr); hipFree(dcsr_col_ind); hipFree(dcsr_val); hipFree(dB); hipFree(dC); hipFree(temp_buffer);
- Parameters:
handle – [in] handle to the hipsparse library context queue.
opA – [in] matrix operation type for the sparse matrix \(A\).
opB – [in] matrix operation type for the dense matrix \(B\).
alpha – [in] scalar \(\alpha\).
matA – [in] sparse matrix descriptor.
matB – [in] dense matrix descriptor.
matC – [inout] dense matrix descriptor.
computeType – [in] floating point precision for the SpSM computation.
alg – [in] SpSM algorithm for the SpSM computation.
spsmDescr – [in] SpSM descriptor.
externalBuffer – [out] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle
,alpha
,matA
,matB
,matC
,spsmDescr
orexternalBuffer
pointer is invalid.HIPSPARSE_STATUS_NOT_SUPPORTED –
opA
,opB
,computeType
oralg
is currently not supported.