⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cublasp.h

📁 Nividia提供的CUDA的BLAS库源码
💻 H
📖 第 1 页 / 共 2 页
字号:
/* * Copyright 1993-2008 NVIDIA Corporation.  All rights reserved. * * NOTICE TO USER:    * * This source code is subject to NVIDIA ownership rights under U.S. and * international Copyright laws.   * * This software and the information contained herein is being provided  * under the terms and conditions of a Source Code License Agreement.      * * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR  * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE * OR PERFORMANCE OF THIS SOURCE CODE.   * * U.S. Government End Users.   This source code is a "commercial item" as  * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of * "commercial computer  software"  and "commercial computer software  * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995) * and is provided to the U.S. Government only as a commercial end item. * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the  * source code with only those rights set forth herein. *//* * This is the private header file used by the CUBLAS library internally */#if !defined(CUBLAS_P_H_)#define CUBLAS_P_H_#if defined(__cplusplus)extern "C" {#endif#if defined (__GNUC__)#include <stdint.h>#endif#include "cublas.h"#include "cuComplex.h"/* CUBLAS context */struct cublasContext {    int cublasIsInitialized;    cublasStatus cublasLastError;};/* the next three macro definitions trigger * code generation when tlsHook.h is included */ #define __tlsHookIdentifier cublasThreadContext#define __tlsHookType       struct cublasContext#define __tlsHookExtern#include <tlshook.h>#define CUBLAS_GET_CTX() \__tlsHookInitTlsValueForcublasThreadContext(cublasInitCtx, cublasShutDownCtx)#define CUBLAS_THREAD_BUNDLE_SIZE        (32)#define CUBLAS_CTA_MAX_DIM               (65535)#define CUBLAS_FASTIMUL_F_MAX_DIM        (2000) /* float */#define CUBLAS_FASTIMUL_D_MAX_DIM        (1410) /* double, complex */#define CUBLAS_SMALL_SGEMM_MAT_MAX_ELEMS (6400)#define CUBLAS_WORD_ALIGN                (64)   /* alignment for 32-bit word */#define CUBLAS_LONG_ALIGN                (128)  /* alignment for 64-bit long */#define CUBLAS_1DBUF_ALIGN               (256)  /* alignment for 1D buffer */#define CUBLAS_MAX_1DBUF_SIZE            ((1<<27)-CUBLAS_1DBUF_ALIGN)#define CUBLAS_SAXPY_CTAS_MIN           (1)#define CUBLAS_SAXPY_CTAS_MAX           (80)#define CUBLAS_SAXPY_THREAD_MIN         (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_SAXPY_THREAD_MAX         (128)#define CUBLAS_SCOPY_CTAS_MIN           (1)#define CUBLAS_SCOPY_CTAS_MAX           (80)#define CUBLAS_SCOPY_THREAD_MIN         (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_SCOPY_THREAD_MAX         (128)#define CUBLAS_SSCAL_CTAS_MIN           (1)#define CUBLAS_SSCAL_CTAS_MAX           (96)#define CUBLAS_SSCAL_THREAD_MIN         (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_SSCAL_THREAD_MAX         (128)#define CUBLAS_SSWAP_CTAS_MIN           (1)#define CUBLAS_SSWAP_CTAS_MAX           (80)#define CUBLAS_SSWAP_THREAD_MIN         (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_SSWAP_THREAD_MAX         (128)#define CUBLAS_SROT_CTAS_MIN            (1)#define CUBLAS_SROT_CTAS_MAX            (64)#define CUBLAS_SROT_THREAD_MIN          (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_SROT_THREAD_MAX          (128)#define CUBLAS_CSROT_CTAS_MIN           (1)#define CUBLAS_CSROT_CTAS_MAX           (64)#define CUBLAS_CSROT_THREAD_MIN         (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_CSROT_THREAD_MAX         (128)#define CUBLAS_CROT_CTAS_MIN            (1)#define CUBLAS_CROT_CTAS_MAX            (64)#define CUBLAS_CROT_THREAD_MIN          (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_CROT_THREAD_MAX          (128)#define CUBLAS_SROTM_CTAS_MIN           (1)#define CUBLAS_SROTM_CTAS_MAX           (64)#define CUBLAS_SROTM_THREAD_MIN         (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_SROTM_THREAD_MAX         (128)#define CUBLAS_SROTM_PARAM_VEC_LEN      (5)#define CUBLAS_SDOT_LOG_THREAD_COUNT    (7)#define CUBLAS_SDOT_THREAD_COUNT        (1 << CUBLAS_SDOT_LOG_THREAD_COUNT)#define CUBLAS_SDOT_CTAS                (80)#define CUBLAS_SASUM_LOG_THREAD_COUNT   (7)#define CUBLAS_SASUM_THREAD_COUNT       (1 << CUBLAS_SASUM_LOG_THREAD_COUNT)#define CUBLAS_SASUM_CTAS               (96)#define CUBLAS_ISAMAX_LOG_THREAD_COUNT  (7)#define CUBLAS_ISAMAX_THREAD_COUNT      (1 << CUBLAS_ISAMAX_LOG_THREAD_COUNT)#define CUBLAS_ISAMAX_CTAS              (80)#define CUBLAS_ICAMAX_LOG_THREAD_COUNT  (7)#define CUBLAS_ICAMAX_THREAD_COUNT      (1 << CUBLAS_ICAMAX_LOG_THREAD_COUNT)#define CUBLAS_ICAMAX_CTAS              (80)#define CUBLAS_ISAMIN_LOG_THREAD_COUNT  (7)#define CUBLAS_ISAMIN_THREAD_COUNT      (1 << CUBLAS_ISAMIN_LOG_THREAD_COUNT)#define CUBLAS_ISAMIN_CTAS              (80)#define CUBLAS_ICAMIN_LOG_THREAD_COUNT  (7)#define CUBLAS_ICAMIN_THREAD_COUNT      (1 << CUBLAS_ICAMIN_LOG_THREAD_COUNT)#define CUBLAS_ICAMIN_CTAS              (80)#define CUBLAS_SNRM2_LOG_THREAD_COUNT   (7)#define CUBLAS_SNRM2_THREAD_COUNT       (1 << CUBLAS_SNRM2_LOG_THREAD_COUNT)#define CUBLAS_SNRM2_CTAS               (64)#define CUBLAS_SCNRM2_LOG_THREAD_COUNT  (7)#define CUBLAS_SCNRM2_THREAD_COUNT      (1 << CUBLAS_SCNRM2_LOG_THREAD_COUNT)#define CUBLAS_SCNRM2_CTAS              (64)#define CUBLAS_SGEMM_LOG_LARGE_THREAD_COUNT   (9)#define CUBLAS_SGEMM_LOG_SMALL_THREAD_COUNT   (8)#define CUBLAS_SGEMM_LARGE_THREAD_COUNT (1 << CUBLAS_SGEMM_LOG_LARGE_THREAD_COUNT)#define CUBLAS_SGEMM_SMALL_THREAD_COUNT (1 << CUBLAS_SGEMM_LOG_SMALL_THREAD_COUNT)#define CUBLAS_SGEMM_GRIDW_LOG          (2)#define CUBLAS_SGEMM_GRIDW              (1 << CUBLAS_SGEMM_GRIDW_LOG)#define CUBLAS_SGEMM_GRIDH_LOG          (2)#define CUBLAS_SGEMM_GRIDH              (1 << CUBLAS_SGEMM_GRIDH_LOG)#define CUBLAS_SSYMM_LOG_THREAD_COUNT   (9)#define CUBLAS_SSYMM_THREAD_COUNT       (1 << CUBLAS_SSYMM_LOG_THREAD_COUNT)#define CUBLAS_SSYMM_GRIDW_LOG          (2)#define CUBLAS_SSYMM_GRIDW              (1 << CUBLAS_SSYMM_GRIDW_LOG)#define CUBLAS_SSYMM_GRIDH_LOG          (2)#define CUBLAS_SSYMM_GRIDH              (1 << CUBLAS_SSYMM_GRIDH_LOG)#define CUBLAS_SSYRK_LOG_THREAD_COUNT   (9)#define CUBLAS_SSYRK_THREAD_COUNT       (1 << CUBLAS_SSYRK_LOG_THREAD_COUNT)#define CUBLAS_SSYRK_GRIDW_LOG          (2)#define CUBLAS_SSYRK_GRIDW              (1 << CUBLAS_SSYRK_GRIDW_LOG)#define CUBLAS_SSYRK_GRIDH_LOG          (2)#define CUBLAS_SSYRK_GRIDH              (1 << CUBLAS_SSYRK_GRIDH_LOG)#define CUBLAS_SSYR2K_LOG_THREAD_COUNT  (9)#define CUBLAS_SSYR2K_THREAD_COUNT      (1 << CUBLAS_SSYR2K_LOG_THREAD_COUNT)#define CUBLAS_SSYR2K_GRIDW_LOG         (2)#define CUBLAS_SSYR2K_GRIDW             (1 << CUBLAS_SSYR2K_GRIDW_LOG)#define CUBLAS_SSYR2K_GRIDH_LOG         (2)#define CUBLAS_SSYR2K_GRIDH             (1 << CUBLAS_SSYR2K_GRIDH_LOG)#define CUBLAS_SSYMV_LOG_THREAD_COUNT   (7)#define CUBLAS_SSYMV_THREAD_COUNT       (1 << CUBLAS_SSYMV_LOG_THREAD_COUNT)#define CUBLAS_SSYMV_CTAS               (64)#define CUBLAS_SGBMV_LOG_THREAD_COUNT   (7)#define CUBLAS_SGBMV_THREAD_COUNT       (1 << CUBLAS_SGBMV_LOG_THREAD_COUNT)#define CUBLAS_SGBMV_CTAS               (64)#define CUBLAS_SSBMV_LOG_THREAD_COUNT   (7)#define CUBLAS_SSBMV_THREAD_COUNT       (1 << CUBLAS_SSBMV_LOG_THREAD_COUNT)#define CUBLAS_SSBMV_CTAS               (64)#define CUBLAS_SSPMV_LOG_THREAD_COUNT   (7)#define CUBLAS_SSPMV_THREAD_COUNT       (1 << CUBLAS_SSPMV_LOG_THREAD_COUNT)#define CUBLAS_SSPMV_CTAS               (64)#define CUBLAS_SGEMVN_LOG_THREAD_COUNT  (7)#define CUBLAS_SGEMVN_THREAD_COUNT      (1 << CUBLAS_SGEMVN_LOG_THREAD_COUNT)#define CUBLAS_SGEMVN_CTAS              (64)#define CUBLAS_SGEMVT_LOG_THREAD_COUNT  (7)#define CUBLAS_SGEMVT_THREAD_COUNT      (1 << CUBLAS_SGEMVT_LOG_THREAD_COUNT)#define CUBLAS_SGEMVT_CTAS              (64)#define CUBLAS_STRSM_LOG_THREAD_COUNT   (9)#define CUBLAS_STRSM_THREAD_COUNT       (1 << CUBLAS_STRSM_LOG_THREAD_COUNT)#define CUBLAS_STRSM_CTAS               (16)#define CUBLAS_STRMM_LOG_THREAD_COUNT   (9)#define CUBLAS_STRMM_THREAD_COUNT       (1 << CUBLAS_STRMM_LOG_THREAD_COUNT)#define CUBLAS_STRMM_CTAS               (16)#define CUBLAS_SSYR_LOG_THREAD_COUNT    (9)#define CUBLAS_SSYR_THREAD_COUNT        (1 << CUBLAS_SSYR_LOG_THREAD_COUNT)#define CUBLAS_SSYR_GRIDW_LOG           (2)#define CUBLAS_SSYR_GRIDW               (1 << CUBLAS_SSYR_GRIDW_LOG)#define CUBLAS_SSYR_GRIDH_LOG           (2)#define CUBLAS_SSYR_GRIDH               (1 << CUBLAS_SSYR_GRIDH_LOG)#define CUBLAS_SSPR_LOG_THREAD_COUNT    (9)#define CUBLAS_SSPR_THREAD_COUNT        (1 << CUBLAS_SSPR_LOG_THREAD_COUNT)#define CUBLAS_SSPR_GRIDW_LOG           (2)#define CUBLAS_SSPR_GRIDW               (1 << CUBLAS_SSPR_GRIDW_LOG)#define CUBLAS_SSPR_GRIDH_LOG           (2)#define CUBLAS_SSPR_GRIDH               (1 << CUBLAS_SSPR_GRIDH_LOG)#define CUBLAS_SGER_LOG_THREAD_COUNT    (8)#define CUBLAS_SGER_THREAD_COUNT        (1 << CUBLAS_SGER_LOG_THREAD_COUNT)#define CUBLAS_SGER_GRIDW_LOG           (2)#define CUBLAS_SGER_GRIDW               (1 << CUBLAS_SGER_GRIDW_LOG)#define CUBLAS_SGER_GRIDH_LOG           (2)#define CUBLAS_SGER_GRIDH               (1 << CUBLAS_SGER_GRIDH_LOG)#define CUBLAS_SSYR2_LOG_THREAD_COUNT   (9)#define CUBLAS_SSYR2_THREAD_COUNT       (1 << CUBLAS_SSYR2_LOG_THREAD_COUNT)#define CUBLAS_SSYR2_GRIDW_LOG          (2)#define CUBLAS_SSYR2_GRIDW              (1 << CUBLAS_SSYR2_GRIDW_LOG)#define CUBLAS_SSYR2_GRIDH_LOG          (2)#define CUBLAS_SSYR2_GRIDH              (1 << CUBLAS_SSYR2_GRIDH_LOG)#define CUBLAS_SSPR2_LOG_THREAD_COUNT   (9)#define CUBLAS_SSPR2_THREAD_COUNT       (1 << CUBLAS_SSPR2_LOG_THREAD_COUNT)#define CUBLAS_SSPR2_GRIDW_LOG          (2)#define CUBLAS_SSPR2_GRIDW              (1 << CUBLAS_SSPR2_GRIDW_LOG)#define CUBLAS_SSPR2_GRIDH_LOG          (2)#define CUBLAS_SSPR2_GRIDH              (1 << CUBLAS_SSPR2_GRIDH_LOG)#define CUBLAS_STRSV_LOG_THREAD_COUNT   (9)#define CUBLAS_STRSV_THREAD_COUNT       (1 << CUBLAS_STRSV_LOG_THREAD_COUNT)#define CUBLAS_STRSV_CTAS               (1)#define CUBLAS_STRSV_MAX_DIM            (4070)#define CUBLAS_STPSV_LOG_THREAD_COUNT   (9)#define CUBLAS_STPSV_THREAD_COUNT       (1 << CUBLAS_STPSV_LOG_THREAD_COUNT)#define CUBLAS_STPSV_CTAS               (1)#define CUBLAS_STPSV_MAX_DIM            (4070)#define CUBLAS_STBSV_LOG_THREAD_COUNT   (9)#define CUBLAS_STBSV_THREAD_COUNT       (1 << CUBLAS_STBSV_LOG_THREAD_COUNT)#define CUBLAS_STBSV_CTAS               (1)#define CUBLAS_STBSV_MAX_DIM            (4070)#define CUBLAS_STRMV_LOG_THREAD_COUNT   (9)#define CUBLAS_STRMV_THREAD_COUNT       (1 << CUBLAS_STRMV_LOG_THREAD_COUNT)#define CUBLAS_STRMV_CTAS               (1)#define CUBLAS_STRMV_MAX_DIM            (4070)#define CUBLAS_STBMV_LOG_THREAD_COUNT   (9)#define CUBLAS_STBMV_THREAD_COUNT       (1 << CUBLAS_STRMV_LOG_THREAD_COUNT)#define CUBLAS_STBMV_CTAS               (1)#define CUBLAS_STBMV_MAX_DIM            (4070)#define CUBLAS_STPMV_LOG_THREAD_COUNT   (9)#define CUBLAS_STPMV_THREAD_COUNT       (1 << CUBLAS_STPMV_LOG_THREAD_COUNT)#define CUBLAS_STPMV_CTAS               (1)#define CUBLAS_STPMV_MAX_DIM            (4070)#define CUBLAS_CAXPY_CTAS_MIN           (1)#define CUBLAS_CAXPY_CTAS_MAX           (80)#define CUBLAS_CAXPY_THREAD_MIN         (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_CAXPY_THREAD_MAX         (128)#define CUBLAS_CCOPY_CTAS_MIN           (1)#define CUBLAS_CCOPY_CTAS_MAX           (80)#define CUBLAS_CCOPY_THREAD_MIN         (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_CCOPY_THREAD_MAX         (128)#define CUBLAS_CSCAL_CTAS_MIN           (1)#define CUBLAS_CSCAL_CTAS_MAX           (96)#define CUBLAS_CSCAL_THREAD_MIN         (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_CSCAL_THREAD_MAX         (128)#define CUBLAS_CSSCAL_CTAS_MIN          (1)#define CUBLAS_CSSCAL_CTAS_MAX          (96)#define CUBLAS_CSSCAL_THREAD_MIN        (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_CSSCAL_THREAD_MAX        (128)#define CUBLAS_CSWAP_CTAS_MIN           (1)#define CUBLAS_CSWAP_CTAS_MAX           (80)#define CUBLAS_CSWAP_THREAD_MIN         (CUBLAS_THREAD_BUNDLE_SIZE)#define CUBLAS_CSWAP_THREAD_MAX         (128)#define CUBLAS_CDOTU_LOG_THREAD_COUNT   (7)#define CUBLAS_CDOTU_THREAD_COUNT       (1 << CUBLAS_CDOTU_LOG_THREAD_COUNT)#define CUBLAS_CDOTU_CTAS               (64)#define CUBLAS_CDOTC_LOG_THREAD_COUNT   (7)#define CUBLAS_CDOTC_THREAD_COUNT       (1 << CUBLAS_CDOTC_LOG_THREAD_COUNT)#define CUBLAS_CDOTC_CTAS               (64)#define CUBLAS_SCASUM_LOG_THREAD_COUNT  (7)#define CUBLAS_SCASUM_THREAD_COUNT      (1 << CUBLAS_SASUM_LOG_THREAD_COUNT)#define CUBLAS_SCASUM_CTAS              (96)#define CUBLAS_CGEMM_LOG_THREAD_COUNT   (8)#define CUBLAS_CGEMM_THREAD_COUNT       (1 << CUBLAS_CGEMM_LOG_THREAD_COUNT)#define CUBLAS_CGEMM_GRIDW_LOG          (2)#define CUBLAS_CGEMM_GRIDW              (1 << CUBLAS_CGEMM_GRIDW_LOG)#define CUBLAS_CGEMM_GRIDH_LOG          (2)#define CUBLAS_CGEMM_GRIDH              (1 << CUBLAS_CGEMM_GRIDH_LOG)struct cublasSaxpyParams {    const float *sx;    float *sy;    int   n;    float sa;    int   incx;    int   incy;    int   texXOfs;    int   texYOfs;};struct cublasScopyParams {    const float *sx;    float *sy;    int   n;    int   incx;    int   incy;    int   texXOfs;};struct cublasSswapParams {    float *sx;    float *sy;    int   n;    int   incx;    int   incy;    int   texXOfs;    int   texYOfs;};struct cublasSscalParams {    float *sx;    int   n;    float sa;    int   incx;    int   texXOfs;};struct cublasSasumParams {    const float *sx;    float *result;    int   n;    int   incx;    int   texXOfs;    };struct cublasIsamaxParams {    const float *sx;    float *resMax;    int   *resPos;    int   n;    int   incx;    int   texXOfs;};struct cublasIcamaxParams {    const cuComplex *cx;    float *resMax;    int   *resPos;    int   n;    int   incx;    int   texXOfs;};struct cublasIsaminParams {    const float *sx;    float *resMin;    int   *resPos;    int   n;    int   incx;    int   texXOfs;};struct cublasIcaminParams {    const cuComplex *cx;    float *resMin;    int   *resPos;    int   n;    int   incx;    int   texXOfs;};struct cublasSdotParams {    const float *sx;    const float *sy;    float *result;    int   n;    int   incx;    int   incy;    int   texXOfs;    int   texYOfs;};struct cublasCdotcParams {    const cuComplex *cx;    const cuComplex *cy;    cuComplex *result;    int   n;    int   incx;    int   incy;    int   texXOfs;    int   texYOfs;};struct cublasCdotuParams {    const cuComplex *cx;    const cuComplex *cy;    cuComplex *result;    int   n;    int   incx;    int   incy;    int   texXOfs;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -