📄 cublas.cu
字号:
{ struct cublasContext *ctx = CUBLAS_GET_CTX(); cudaError_t cudaStat; if (!cublasInitialized (ctx)) { return CUBLAS_STATUS_NOT_INITIALIZED; } if (devicePtr) { cudaStat = cudaFree ((void *)devicePtr); if (cudaStat != cudaSuccess) { /* should never fail, except when there is internal corruption*/ return CUBLAS_STATUS_INTERNAL_ERROR; } } return CUBLAS_STATUS_SUCCESS;}/* * cublasStatus * cublasSetVector (int n, int elemSize, const void *x, int incx, * void *y, int incy) * * copies n elements from a vector x in CPU memory space to a vector y * in GPU memory space. Elements in both vectors are assumed to have a * size of elemSize bytes. Storage spacing between consecutive elements * is incx for the source vector x and incy for the destination vector * y. In general, y points to an object, or part of an object, allocated * via cublasAlloc(). Column major format for two-dimensional matrices * is assumed throughout CUBLAS. Therefore, if the increment for a vector * is equal to 1, this access a column vector while using an increment * equal to the leading dimension of the respective matrix accesses a * row vector. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0 * CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory * CUBLAS_STATUS_SUCCESS if the operation completed successfully */cublasStatus CUBLASAPI cublasSetVector (int n, int elemSize, const void *hostPtr, int incx, void *devicePtr, int incy){ struct cublasContext *ctx = CUBLAS_GET_CTX(); cudaError_t cudaStat = cudaSuccess; const char *sp = (const char *)hostPtr; char *dp = (char *)devicePtr; if (!cublasInitialized (ctx)) { return CUBLAS_STATUS_NOT_INITIALIZED; } if ((incx <= 0) || (incy <= 0) || (elemSize <= 0)) { return CUBLAS_STATUS_INVALID_VALUE; } /* early out if nothing to do */ if (n == 0) { return CUBLAS_STATUS_SUCCESS; } if ((incx == 1) && (incy == 1)) { cudaStat = cudaMemcpy (dp, sp, n * elemSize, cudaMemcpyHostToDevice); } else { cudaStat = cudaMemcpy2D (dp, incy * elemSize, sp, incx * elemSize, elemSize, n, cudaMemcpyHostToDevice); } if (cudaStat != cudaSuccess) { return CUBLAS_STATUS_MAPPING_ERROR; } return CUBLAS_STATUS_SUCCESS;}/* * cublasStatus * cublasGetVector (int n, int elemSize, const void *x, int incx, * void *y, int incy) * * copies n elements from a vector x in GPU memory space to a vector y * in CPU memory space. Elements in both vectors are assumed to have a * size of elemSize bytes. Storage spacing between consecutive elements * is incx for the source vector x and incy for the destination vector * y. In general, x points to an object, or part of an object, allocated * via cublasAlloc(). Column major format for two-dimensional matrices * is assumed throughout CUBLAS. Therefore, if the increment for a vector * is equal to 1, this access a column vector while using an increment * equal to the leading dimension of the respective matrix accesses a * row vector. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0 * CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory * CUBLAS_STATUS_SUCCESS if the operation completed successfully */cublasStatus CUBLASAPI cublasGetVector (int n, int elemSize, const void *devicePtr, int incx, void *hostPtr, int incy){ struct cublasContext *ctx = CUBLAS_GET_CTX(); cudaError_t cudaStat = cudaSuccess; const char *sp = (const char *)devicePtr; char *dp = (char *)hostPtr; if (!cublasInitialized (ctx)) { return CUBLAS_STATUS_NOT_INITIALIZED; } if ((incx <= 0) || (incy <= 0) || (elemSize <= 0)) { return CUBLAS_STATUS_INVALID_VALUE; } /* early out if nothing to do */ if (n == 0) { return CUBLAS_STATUS_SUCCESS; } if ((incx == 1) && (incy == 1)) { cudaStat = cudaMemcpy (dp, sp, n * elemSize, cudaMemcpyDeviceToHost); } else { cudaStat = cudaMemcpy2D (dp, incy * elemSize, sp, incx * elemSize, elemSize, n, cudaMemcpyDeviceToHost); } if (cudaStat != cudaSuccess) { return CUBLAS_STATUS_MAPPING_ERROR; } return CUBLAS_STATUS_SUCCESS;}/* * cublasStatus * cublasSetMatrix (int rows, int cols, int elemSize, const void *A, * int lda, void *B, int ldb) * * copies a tile of rows x cols elements from a matrix A in CPU memory * space to a matrix B in GPU memory space. Each element requires storage * of elemSize bytes. Both matrices are assumed to be stored in column * major format, with the leading dimension (i.e. number of rows) of * source matrix A provided in lda, and the leading dimension of matrix B * provided in ldb. In general, B points to an object, or part of an * object, that was allocated via cublasAlloc(). * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if rows or cols < 0, or elemSize, lda, or * ldb <= 0 * CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory * CUBLAS_STATUS_SUCCESS if the operation completed successfully */cublasStatus CUBLASAPI cublasSetMatrix (int rows, int cols, int elemSize, const void *A, int lda, void *B, int ldb){ struct cublasContext *ctx = CUBLAS_GET_CTX(); cudaError_t cudaStat = cudaSuccess; const char *sp = (const char *)A; char *dp = (char *)B; if (!cublasInitialized (ctx)) { return CUBLAS_STATUS_NOT_INITIALIZED; } if ((lda <= 0) || (ldb <= 0) || (elemSize <= 0) || (rows < 0) || (cols<0)){ return CUBLAS_STATUS_INVALID_VALUE; } /* early out if nothing to do */ if ((rows == 0) || (cols == 0)) { return CUBLAS_STATUS_SUCCESS; } if ((rows == lda) && (rows == ldb)) { cudaStat = cudaMemcpy (dp, sp, rows * cols * elemSize, cudaMemcpyHostToDevice); } else { cudaStat = cudaMemcpy2D (dp, ldb * elemSize, sp, lda * elemSize, rows * elemSize, cols, cudaMemcpyHostToDevice); } if (cudaStat != cudaSuccess) { return CUBLAS_STATUS_MAPPING_ERROR; } return CUBLAS_STATUS_SUCCESS;}/* * cublasStatus * cublasGetMatrix (int rows, int cols, int elemSize, const void *A, * int lda, void *B, int ldb) * * copies a tile of rows x cols elements from a matrix A in GPU memory * space to a matrix B in CPU memory space. Each element requires storage * of elemSize bytes. Both matrices are assumed to be stored in column * major format, with the leading dimension (i.e. number of rows) of * source matrix A provided in lda, and the leading dimension of matrix B * provided in ldb. In general, A points to an object, or part of an * object, that was allocated via cublasAlloc(). * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if rows, cols, eleSize, lda, or ldb <= 0 * CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory * CUBLAS_STATUS_SUCCESS if the operation completed successfully */cublasStatus CUBLASAPI cublasGetMatrix (int rows, int cols, int elemSize, const void *A, int lda, void *B, int ldb){ struct cublasContext *ctx = CUBLAS_GET_CTX(); cudaError_t cudaStat = cudaSuccess; const char *sp = (const char *)A; char *dp = (char *)B; if (!cublasInitialized (ctx)) { return CUBLAS_STATUS_NOT_INITIALIZED; } if ((lda <= 0) || (ldb <= 0) || (elemSize <= 0) || (rows < 0) || (cols<0)){ return CUBLAS_STATUS_INVALID_VALUE; } /* early out if nothing to do */ if ((rows == 0) || (cols == 0)) { return CUBLAS_STATUS_SUCCESS; } if ((rows == lda) && (rows == ldb)) { cudaStat = cudaMemcpy (dp, sp, rows * cols * elemSize, cudaMemcpyDeviceToHost); } else { cudaStat = cudaMemcpy2D (dp, ldb * elemSize, sp, lda * elemSize, rows * elemSize, cols, cudaMemcpyDeviceToHost); } if (cudaStat != cudaSuccess) { return CUBLAS_STATUS_MAPPING_ERROR; } return CUBLAS_STATUS_SUCCESS;}/* -------------------------- stub functions ------------------------------- *//* Add a GUID to the compiled library for tracking purposes */#include "../../cuda/common/version.h"CUDA_STAMP_GUID;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -