📄 cublas.cu

📁 Nividia提供的CUDA的BLAS库源码
💻 CU
📖 第 1 页 / 共 2 页
字号:
上一页 12
{    struct cublasContext *ctx = CUBLAS_GET_CTX();    cudaError_t cudaStat;    if (!cublasInitialized (ctx)) {        return CUBLAS_STATUS_NOT_INITIALIZED;    }    if (devicePtr) {        cudaStat = cudaFree ((void *)devicePtr);        if (cudaStat != cudaSuccess) {            /* should never fail, except when there is internal corruption*/            return CUBLAS_STATUS_INTERNAL_ERROR;        }    }    return CUBLAS_STATUS_SUCCESS;}/*  * cublasStatus  * cublasSetVector (int n, int elemSize, const void *x, int incx,  *                  void *y, int incy)  * * copies n elements from a vector x in CPU memory space to a vector y  * in GPU memory space. Elements in both vectors are assumed to have a  * size of elemSize bytes. Storage spacing between consecutive elements * is incx for the source vector x and incy for the destination vector * y. In general, y points to an object, or part of an object, allocated * via cublasAlloc(). Column major format for two-dimensional matrices * is assumed throughout CUBLAS. Therefore, if the increment for a vector  * is equal to 1, this access a column vector while using an increment  * equal to the leading dimension of the respective matrix accesses a  * row vector. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library not been initialized * CUBLAS_STATUS_INVALID_VALUE    if incx, incy, or elemSize <= 0 * CUBLAS_STATUS_MAPPING_ERROR    if an error occurred accessing GPU memory    * CUBLAS_STATUS_SUCCESS          if the operation completed successfully */cublasStatus CUBLASAPI cublasSetVector (int n, int elemSize,                                         const void *hostPtr, int incx,                                         void *devicePtr, int incy){    struct cublasContext *ctx = CUBLAS_GET_CTX();    cudaError_t cudaStat = cudaSuccess;    const char *sp = (const char *)hostPtr;    char *dp = (char *)devicePtr;    if (!cublasInitialized (ctx)) {        return CUBLAS_STATUS_NOT_INITIALIZED;    }    if ((incx <= 0) || (incy <= 0) || (elemSize <= 0)) {        return CUBLAS_STATUS_INVALID_VALUE;    }    /* early out if nothing to do */    if (n == 0) {        return CUBLAS_STATUS_SUCCESS;    }        if ((incx == 1) && (incy == 1)) {        cudaStat = cudaMemcpy (dp, sp, n * elemSize, cudaMemcpyHostToDevice);    } else {        cudaStat = cudaMemcpy2D (dp, incy * elemSize, sp, incx * elemSize,                                 elemSize, n, cudaMemcpyHostToDevice);    }    if (cudaStat != cudaSuccess) {        return CUBLAS_STATUS_MAPPING_ERROR;    }    return CUBLAS_STATUS_SUCCESS;}/*  * cublasStatus  * cublasGetVector (int n, int elemSize, const void *x, int incx,  *                  void *y, int incy) *  * copies n elements from a vector x in GPU memory space to a vector y  * in CPU memory space. Elements in both vectors are assumed to have a  * size of elemSize bytes. Storage spacing between consecutive elements * is incx for the source vector x and incy for the destination vector * y. In general, x points to an object, or part of an object, allocated * via cublasAlloc(). Column major format for two-dimensional matrices * is assumed throughout CUBLAS. Therefore, if the increment for a vector  * is equal to 1, this access a column vector while using an increment  * equal to the leading dimension of the respective matrix accesses a  * row vector. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library not been initialized * CUBLAS_STATUS_INVALID_VALUE    if incx, incy, or elemSize <= 0 * CUBLAS_STATUS_MAPPING_ERROR    if an error occurred accessing GPU memory    * CUBLAS_STATUS_SUCCESS          if the operation completed successfully */cublasStatus CUBLASAPI cublasGetVector (int n, int elemSize,                                        const void *devicePtr, int incx,                                        void *hostPtr, int incy){    struct cublasContext *ctx = CUBLAS_GET_CTX();    cudaError_t cudaStat = cudaSuccess;    const char *sp = (const char *)devicePtr;    char *dp = (char *)hostPtr;    if (!cublasInitialized (ctx)) {        return CUBLAS_STATUS_NOT_INITIALIZED;    }    if ((incx <= 0) || (incy <= 0) || (elemSize <= 0)) {        return CUBLAS_STATUS_INVALID_VALUE;    }    /* early out if nothing to do */    if (n == 0) {        return CUBLAS_STATUS_SUCCESS;    }    if ((incx == 1) && (incy == 1)) {        cudaStat = cudaMemcpy (dp, sp, n * elemSize, cudaMemcpyDeviceToHost);    } else {        cudaStat = cudaMemcpy2D (dp, incy * elemSize, sp, incx * elemSize,                                 elemSize, n, cudaMemcpyDeviceToHost);    }    if (cudaStat != cudaSuccess) {        return CUBLAS_STATUS_MAPPING_ERROR;    }    return CUBLAS_STATUS_SUCCESS;}/* * cublasStatus  * cublasSetMatrix (int rows, int cols, int elemSize, const void *A,  *                  int lda, void *B, int ldb) * * copies a tile of rows x cols elements from a matrix A in CPU memory * space to a matrix B in GPU memory space. Each element requires storage * of elemSize bytes. Both matrices are assumed to be stored in column  * major format, with the leading dimension (i.e. number of rows) of  * source matrix A provided in lda, and the leading dimension of matrix B * provided in ldb. In general, B points to an object, or part of an  * object, that was allocated via cublasAlloc(). * * Return Values  * ------------- * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE    if rows or cols < 0, or elemSize, lda, or  *                                ldb <= 0 * CUBLAS_STATUS_MAPPING_ERROR    if error occurred accessing GPU memory * CUBLAS_STATUS_SUCCESS          if the operation completed successfully */cublasStatus CUBLASAPI cublasSetMatrix (int rows, int cols, int elemSize,                                        const void *A, int lda, void *B,                                        int ldb){    struct cublasContext *ctx = CUBLAS_GET_CTX();    cudaError_t cudaStat = cudaSuccess;    const char *sp = (const char *)A;    char *dp = (char *)B;    if (!cublasInitialized (ctx)) {        return CUBLAS_STATUS_NOT_INITIALIZED;    }    if ((lda <= 0) || (ldb <= 0) || (elemSize <= 0) || (rows < 0) || (cols<0)){        return CUBLAS_STATUS_INVALID_VALUE;    }    /* early out if nothing to do */    if ((rows == 0) || (cols == 0)) {        return CUBLAS_STATUS_SUCCESS;    }    if ((rows == lda) && (rows == ldb)) {        cudaStat = cudaMemcpy (dp, sp, rows * cols * elemSize,                                cudaMemcpyHostToDevice);    }  else {        cudaStat = cudaMemcpy2D (dp, ldb * elemSize, sp, lda * elemSize,                                 rows * elemSize, cols,                                 cudaMemcpyHostToDevice);    }    if (cudaStat != cudaSuccess) {        return CUBLAS_STATUS_MAPPING_ERROR;    }    return CUBLAS_STATUS_SUCCESS;}/* * cublasStatus  * cublasGetMatrix (int rows, int cols, int elemSize, const void *A,  *                  int lda, void *B, int ldb) * * copies a tile of rows x cols elements from a matrix A in GPU memory * space to a matrix B in CPU memory space. Each element requires storage * of elemSize bytes. Both matrices are assumed to be stored in column  * major format, with the leading dimension (i.e. number of rows) of  * source matrix A provided in lda, and the leading dimension of matrix B * provided in ldb. In general, A points to an object, or part of an  * object, that was allocated via cublasAlloc(). * * Return Values  * ------------- * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE    if rows, cols, eleSize, lda, or ldb <= 0 * CUBLAS_STATUS_MAPPING_ERROR    if error occurred accessing GPU memory * CUBLAS_STATUS_SUCCESS          if the operation completed successfully */cublasStatus CUBLASAPI cublasGetMatrix (int rows, int cols, int elemSize,                                        const void *A, int lda, void *B,                                        int ldb){    struct cublasContext *ctx = CUBLAS_GET_CTX();    cudaError_t cudaStat = cudaSuccess;    const char *sp = (const char *)A;    char *dp = (char *)B;    if (!cublasInitialized (ctx)) {        return CUBLAS_STATUS_NOT_INITIALIZED;    }    if ((lda <= 0) || (ldb <= 0) || (elemSize <= 0) || (rows < 0) || (cols<0)){        return CUBLAS_STATUS_INVALID_VALUE;    }    /* early out if nothing to do */    if ((rows == 0) || (cols == 0)) {        return CUBLAS_STATUS_SUCCESS;    }    if ((rows == lda) && (rows == ldb)) {        cudaStat = cudaMemcpy (dp, sp, rows * cols * elemSize,                                cudaMemcpyDeviceToHost);    }  else {        cudaStat = cudaMemcpy2D (dp, ldb * elemSize, sp, lda * elemSize,                                 rows * elemSize, cols,                                 cudaMemcpyDeviceToHost);    }    if (cudaStat != cudaSuccess) {        return CUBLAS_STATUS_MAPPING_ERROR;    }    return CUBLAS_STATUS_SUCCESS;}/* -------------------------- stub functions ------------------------------- *//* Add a GUID to the compiled library for tracking purposes */#include "../../cuda/common/version.h"CUDA_STAMP_GUID;
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -