fast_cgemm.cu

来自「Nividia提供的CUDA的BLAS库源码」· CU 代码 · 共 1,967 行 · 第 1/4 页

CU
1,967
字号
#define TRANSA            0#define TRANSB            1#define CONJGA            0#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_6_main_sw_gld_fulltile (struct cublasCgemmParams parms){    /* C = alpha*conjg(transpose(A))*conjg(transpose(B)) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           0#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            1#define CONJGA            1#define CONJGB            1#include "cgemm.h"}__global__ void fast_cgemm_7_main_sw_gld_fulltile (struct cublasCgemmParams parms){    /* C = alpha*conjg(transpose(A))*transpose(B) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           0#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            1#define CONJGA            1#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_8_main_sw_gld_fulltile (struct cublasCgemmParams parms){    /*  C = alpha*transpose(A)*conjg(transpose(B)) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           0#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            1#define CONJGA            0#define CONJGB            1#include "cgemm.h"}__global__ void fast_cgemm_9_main_sw_gld_fulltile (struct cublasCgemmParams parms){    /* C = alpha*transpose(A)*transpose(B) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           0#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            1#define CONJGA            0#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_1_main_sw_tex (struct cublasCgemmParams parms) {    /* C = alpha*A*B + beta*C. */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   0#define USE_MIXED_STEPPER 1#define TRANSA            0#define TRANSB            0#define CONJGA            0#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_2_main_sw_tex (struct cublasCgemmParams parms){    /* C = alpha*conj(transpose(A))*B + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   0#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            0#define CONJGA            1#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_3_main_sw_tex (struct cublasCgemmParams parms){    /* C = alpha*transpose(A)*B + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   0#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            0#define CONJGA            0#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_4_main_sw_tex (struct cublasCgemmParams parms){    /* C = alpha*A*conj(transpose(B)) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   0#define USE_MIXED_STEPPER 1#define TRANSA            0#define TRANSB            1#define CONJGA            0#define CONJGB            1#include "cgemm.h"}__global__ void fast_cgemm_5_main_sw_tex (struct cublasCgemmParams parms){    /* C = alpha*A*transpose(B) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   0#define USE_MIXED_STEPPER 1#define TRANSA            0#define TRANSB            1#define CONJGA            0#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_6_main_sw_tex (struct cublasCgemmParams parms){    /* C = alpha*conjg(transpose(A))*conjg(transpose(B)) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   0#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            1#define CONJGA            1#define CONJGB            1#include "cgemm.h"}__global__ void fast_cgemm_7_main_sw_tex (struct cublasCgemmParams parms){    /* C = alpha*conjg(transpose(A))*transpose(B) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   0#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            1#define CONJGA            1#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_8_main_sw_tex (struct cublasCgemmParams parms){    /*  C = alpha*transpose(A)*conjg(transpose(B)) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   0#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            1#define CONJGA            0#define CONJGB            1#include "cgemm.h"}__global__ void fast_cgemm_9_main_sw_tex (struct cublasCgemmParams parms){    /* C = alpha*transpose(A)*transpose(B) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   0#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            1#define CONJGA            0#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_1_main_sw_tex_fulltile (struct cublasCgemmParams parms) {    /* C = alpha*A*B + beta*C. */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            0#define TRANSB            0#define CONJGA            0#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_2_main_sw_tex_fulltile (struct cublasCgemmParams parms){    /* C = alpha*conj(transpose(A))*B + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            0#define CONJGA            1#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_3_main_sw_tex_fulltile (struct cublasCgemmParams parms){    /* C = alpha*transpose(A)*B + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            0#define CONJGA            0#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_4_main_sw_tex_fulltile (struct cublasCgemmParams parms){    /* C = alpha*A*conj(transpose(B)) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            0#define TRANSB            1#define CONJGA            0#define CONJGB            1#include "cgemm.h"}__global__ void fast_cgemm_5_main_sw_tex_fulltile (struct cublasCgemmParams parms){    /* C = alpha*A*transpose(B) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            0#define TRANSB            1#define CONJGA            0#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_6_main_sw_tex_fulltile (struct cublasCgemmParams parms){    /* C = alpha*conjg(transpose(A))*conjg(transpose(B)) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            1#define CONJGA            1#define CONJGB            1#include "cgemm.h"}__global__ void fast_cgemm_7_main_sw_tex_fulltile (struct cublasCgemmParams parms){    /* C = alpha*conjg(transpose(A))*transpose(B) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            1#define CONJGA            1#define CONJGB            0#include "cgemm.h"}__global__ void fast_cgemm_8_main_sw_tex_fulltile (struct cublasCgemmParams parms){    /*  C = alpha*transpose(A)*conjg(transpose(B)) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            1#define CONJGA            0#define CONJGB            1#include "cgemm.h"}__global__ void fast_cgemm_9_main_sw_tex_fulltile (struct cublasCgemmParams parms){    /* C = alpha*transpose(A)*transpose(B) + beta*C */#undef  USE_TEX#undef  FAST_IMUL#undef  FULL_TILES_ONLY#undef  USE_MIXED_STEPPER#undef  TRANSA#undef  TRANSB#undef  CONJGA#undef  CONJGB#define USE_TEX           1#define FAST_IMUL         1#define FULL_TILES_ONLY   1#define USE_MIXED_STEPPER 1#define TRANSA            1#define TRANSB            1#define CONJGA            0#define CONJGB            0#include "cgemm.h"}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?