📄 cublas.cu

📁 Nividia提供的CUDA的BLAS库源码
💻 CU
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * Copyright 1993-2008 NVIDIA Corporation.  All rights reserved. * * NOTICE TO USER:    * * This source code is subject to NVIDIA ownership rights under U.S. and * international Copyright laws.   * * This software and the information contained herein is being provided  * under the terms and conditions of a Source Code License Agreement.      * * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR  * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE * OR PERFORMANCE OF THIS SOURCE CODE.   * * U.S. Government End Users.   This source code is a "commercial item" as  * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of * "commercial computer  software"  and "commercial computer software  * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995) * and is provided to the U.S. Government only as a commercial end item. * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the  * source code with only those rights set forth herein. *//* This file contains the implementation of the CUBLAS helper functions */#include <stdlib.h>#include <assert.h>#include <string.h>#include <stdio.h>#include <limits.h>#include <math.h>#include "cublas.h"   /* CUBLAS public header file  */#include "cublasP.h"  /* CUBLAS private header file *//* the next two macro definitions trigger * code generation when tlsHook.h is included */ #define __tlsHookIdentifier cublasThreadContext#define __tlsHookType       struct cublasContext#include <tlshook.h>void cublasSetError (struct cublasContext *ctx, cublasStatus error){    if (ctx) {        ctx->cublasLastError = error;    }}int cublasInitialized (const struct cublasContext *ctx){    if (!ctx) {        return 0;    } else {        return ctx->cublasIsInitialized;    }}/*  * For a given vector size, cublasVectorSplay() determines what CTA grid  * size to use, and how many threads per CTA. */void cublasVectorSplay (int n, int tMin, int tMax, int gridW, int *nbrCtas,                         int *elemsPerCta, int *threadsPerCta){    if (n < tMin) {        *nbrCtas = 1;        *elemsPerCta = n;        *threadsPerCta = tMin;    } else if (n < (gridW * tMin)) {        *nbrCtas = ((n + tMin - 1) / tMin);        *threadsPerCta = tMin;        *elemsPerCta = *threadsPerCta;    } else if (n < (gridW * tMax)) {        int grp;        *nbrCtas = gridW;        grp = ((n + tMin - 1) / tMin);        *threadsPerCta = (((grp + gridW -1) / gridW) * tMin);        *elemsPerCta = *threadsPerCta;    } else {        int grp;        *nbrCtas = gridW;        *threadsPerCta = tMax;        grp = ((n + tMin - 1) / tMin);        grp = ((grp + gridW - 1) / gridW);        *elemsPerCta = grp * tMin;    }}void cublasShutDownCtx (struct cublasContext *ctx){}__tlsHookStatus cublasInitCtx (struct cublasContext *ctx, void *_status){    cublasStatus* status = (cublasStatus*)_status;        if (!ctx) {        if (status) *status = CUBLAS_STATUS_ALLOC_FAILED;        return __tlsHookStatusFAIL;    }    ctx->cublasIsInitialized = false;    ctx->cublasLastError = CUBLAS_STATUS_SUCCESS;        /* This will do nothing really but will initialize CUDA as a side effect */    if (cudaFree ((void *)0) != cudaSuccess) {        if (status) *status = CUBLAS_STATUS_NOT_INITIALIZED;        return __tlsHookStatusFAIL;    }    ctx->cublasIsInitialized = true;    if (status) {        *status = CUBLAS_STATUS_SUCCESS;    }        return __tlsHookStatusOK;}/* --------------------------- CUBLAS API functions ------------------------ *//* * cublasStatus  * cublasInit (void) * * initializes the CUBLAS library and must be called before any other  * CUBLAS API function is invoked. It allocates hardware resources  * necessary for accessing the GPU. * * Return Values * ------------- * CUBLAS_STATUS_ALLOC_FAILED     if resources could not be allocated * CUBLAS_STATUS_SUCCESS          if CUBLAS library initialized successfully */cublasStatus CUBLASAPI cublasInit (void){    cublasStatus status = CUBLAS_STATUS_SUCCESS;        (void)__tlsHookInitTlsValueForcublasThreadContext(cublasInitCtx,                                                       cublasShutDownCtx,                                                       &status);    return status;}/* * cublasStatus  * cublasShutdown (void) * * releases CPU-side resources used by the CUBLAS library. The release of  * GPU-side resources may be deferred until the application shuts down. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library has not been initialized * CUBLAS_STATUS_SUCCESS          if CUBLAS library shut down successfully */cublasStatus CUBLASAPI cublasShutdown (void){    __tlsHookClearTlsValue(&cublasThreadContext);    return CUBLAS_STATUS_SUCCESS;}/*  * cublasStatus  * cublasGetError (void) * * returns the last error that occurred on invocation of any of the * CUBLAS BLAS functions. While the CUBLAS helper functions return status * directly, the BLAS functions do not do so for improved  * compatibility with existing environments that do not expect BLAS * functions to return status. Reading the error status via  * cublasGetError() resets the internal error state to  * CUBLAS_STATUS_SUCCESS. */cublasStatus CUBLASAPI cublasGetError (void){    struct cublasContext *ctx = CUBLAS_GET_CTX();    if (!ctx) {        return CUBLAS_STATUS_NOT_INITIALIZED;    } else {        cublasStatus retVal = ctx->cublasLastError;        ctx->cublasLastError = CUBLAS_STATUS_SUCCESS;        return retVal;    }}/* * cublasStatus  * cublasAlloc (int n, int elemSize, void **devicePtr) * * creates an object in GPU memory space capable of holding an array of * n elements, where each element requires elemSize bytes of storage. If  * the function call is successful, a pointer to the object in GPU memory  * space is placed in devicePtr. Note that this is a device pointer that * cannot be dereferenced in host code. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE    if n <= 0, or elemSize <= 0 * CUBLAS_STATUS_ALLOC_FAILED     if the object could not be allocated due to *                                lack of resources. * CUBLAS_STATUS_SUCCESS          if storage was successfully allocated */cublasStatus CUBLASAPI cublasAlloc (int n, int elemSize, void **devicePtr){    cudaError_t cudaStat;    struct cublasContext *ctx = CUBLAS_GET_CTX();    *devicePtr = 0;    if (!cublasInitialized (ctx)) {        return CUBLAS_STATUS_NOT_INITIALIZED;    }    if ((n <= 0) || (elemSize <= 0)) {                return CUBLAS_STATUS_INVALID_VALUE;    }    cudaStat = cudaMalloc (devicePtr, elemSize * n);    if (cudaStat != cudaSuccess) {        return CUBLAS_STATUS_ALLOC_FAILED;    }    return CUBLAS_STATUS_SUCCESS;}/* * cublasStatus  * cublasFree (const void *devicePtr) * * destroys the object in GPU memory space pointed to by devicePtr. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED  if CUBLAS library has not been initialized * CUBLAS_STATUS_INTERNAL_ERROR   if the object could not be deallocated * CUBLAS_STATUS_SUCCESS          if object was destroyed successfully */cublasStatus CUBLASAPI cublasFree (const void *devicePtr)
12 下一页
💿 文件大小 259 K
👤 上传用户 mujinhua2010
📂 所属分类并行计算
🏷️ 相关标签

#Nividia #CUDA #BLAS #源码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -