📄 cublas.cu
字号:
/* * Copyright 1993-2008 NVIDIA Corporation. All rights reserved. * * NOTICE TO USER: * * This source code is subject to NVIDIA ownership rights under U.S. and * international Copyright laws. * * This software and the information contained herein is being provided * under the terms and conditions of a Source Code License Agreement. * * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE * OR PERFORMANCE OF THIS SOURCE CODE. * * U.S. Government End Users. This source code is a "commercial item" as * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of * "commercial computer software" and "commercial computer software * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) * and is provided to the U.S. Government only as a commercial end item. * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the * source code with only those rights set forth herein. *//* This file contains the implementation of the CUBLAS helper functions */#include <stdlib.h>#include <assert.h>#include <string.h>#include <stdio.h>#include <limits.h>#include <math.h>#include "cublas.h" /* CUBLAS public header file */#include "cublasP.h" /* CUBLAS private header file *//* the next two macro definitions trigger * code generation when tlsHook.h is included */ #define __tlsHookIdentifier cublasThreadContext#define __tlsHookType struct cublasContext#include <tlshook.h>void cublasSetError (struct cublasContext *ctx, cublasStatus error){ if (ctx) { ctx->cublasLastError = error; }}int cublasInitialized (const struct cublasContext *ctx){ if (!ctx) { return 0; } else { return ctx->cublasIsInitialized; }}/* * For a given vector size, cublasVectorSplay() determines what CTA grid * size to use, and how many threads per CTA. */void cublasVectorSplay (int n, int tMin, int tMax, int gridW, int *nbrCtas, int *elemsPerCta, int *threadsPerCta){ if (n < tMin) { *nbrCtas = 1; *elemsPerCta = n; *threadsPerCta = tMin; } else if (n < (gridW * tMin)) { *nbrCtas = ((n + tMin - 1) / tMin); *threadsPerCta = tMin; *elemsPerCta = *threadsPerCta; } else if (n < (gridW * tMax)) { int grp; *nbrCtas = gridW; grp = ((n + tMin - 1) / tMin); *threadsPerCta = (((grp + gridW -1) / gridW) * tMin); *elemsPerCta = *threadsPerCta; } else { int grp; *nbrCtas = gridW; *threadsPerCta = tMax; grp = ((n + tMin - 1) / tMin); grp = ((grp + gridW - 1) / gridW); *elemsPerCta = grp * tMin; }}void cublasShutDownCtx (struct cublasContext *ctx){}__tlsHookStatus cublasInitCtx (struct cublasContext *ctx, void *_status){ cublasStatus* status = (cublasStatus*)_status; if (!ctx) { if (status) *status = CUBLAS_STATUS_ALLOC_FAILED; return __tlsHookStatusFAIL; } ctx->cublasIsInitialized = false; ctx->cublasLastError = CUBLAS_STATUS_SUCCESS; /* This will do nothing really but will initialize CUDA as a side effect */ if (cudaFree ((void *)0) != cudaSuccess) { if (status) *status = CUBLAS_STATUS_NOT_INITIALIZED; return __tlsHookStatusFAIL; } ctx->cublasIsInitialized = true; if (status) { *status = CUBLAS_STATUS_SUCCESS; } return __tlsHookStatusOK;}/* --------------------------- CUBLAS API functions ------------------------ *//* * cublasStatus * cublasInit (void) * * initializes the CUBLAS library and must be called before any other * CUBLAS API function is invoked. It allocates hardware resources * necessary for accessing the GPU. * * Return Values * ------------- * CUBLAS_STATUS_ALLOC_FAILED if resources could not be allocated * CUBLAS_STATUS_SUCCESS if CUBLAS library initialized successfully */cublasStatus CUBLASAPI cublasInit (void){ cublasStatus status = CUBLAS_STATUS_SUCCESS; (void)__tlsHookInitTlsValueForcublasThreadContext(cublasInitCtx, cublasShutDownCtx, &status); return status;}/* * cublasStatus * cublasShutdown (void) * * releases CPU-side resources used by the CUBLAS library. The release of * GPU-side resources may be deferred until the application shuts down. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_SUCCESS if CUBLAS library shut down successfully */cublasStatus CUBLASAPI cublasShutdown (void){ __tlsHookClearTlsValue(&cublasThreadContext); return CUBLAS_STATUS_SUCCESS;}/* * cublasStatus * cublasGetError (void) * * returns the last error that occurred on invocation of any of the * CUBLAS BLAS functions. While the CUBLAS helper functions return status * directly, the BLAS functions do not do so for improved * compatibility with existing environments that do not expect BLAS * functions to return status. Reading the error status via * cublasGetError() resets the internal error state to * CUBLAS_STATUS_SUCCESS. */cublasStatus CUBLASAPI cublasGetError (void){ struct cublasContext *ctx = CUBLAS_GET_CTX(); if (!ctx) { return CUBLAS_STATUS_NOT_INITIALIZED; } else { cublasStatus retVal = ctx->cublasLastError; ctx->cublasLastError = CUBLAS_STATUS_SUCCESS; return retVal; }}/* * cublasStatus * cublasAlloc (int n, int elemSize, void **devicePtr) * * creates an object in GPU memory space capable of holding an array of * n elements, where each element requires elemSize bytes of storage. If * the function call is successful, a pointer to the object in GPU memory * space is placed in devicePtr. Note that this is a device pointer that * cannot be dereferenced in host code. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n <= 0, or elemSize <= 0 * CUBLAS_STATUS_ALLOC_FAILED if the object could not be allocated due to * lack of resources. * CUBLAS_STATUS_SUCCESS if storage was successfully allocated */cublasStatus CUBLASAPI cublasAlloc (int n, int elemSize, void **devicePtr){ cudaError_t cudaStat; struct cublasContext *ctx = CUBLAS_GET_CTX(); *devicePtr = 0; if (!cublasInitialized (ctx)) { return CUBLAS_STATUS_NOT_INITIALIZED; } if ((n <= 0) || (elemSize <= 0)) { return CUBLAS_STATUS_INVALID_VALUE; } cudaStat = cudaMalloc (devicePtr, elemSize * n); if (cudaStat != cudaSuccess) { return CUBLAS_STATUS_ALLOC_FAILED; } return CUBLAS_STATUS_SUCCESS;}/* * cublasStatus * cublasFree (const void *devicePtr) * * destroys the object in GPU memory space pointed to by devicePtr. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INTERNAL_ERROR if the object could not be deallocated * CUBLAS_STATUS_SUCCESS if object was destroyed successfully */cublasStatus CUBLASAPI cublasFree (const void *devicePtr)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -