📄 cgemm.cu
字号:
if (useTexture) { if ((cudaStat = cudaUnbindTexture (texA)) != cudaSuccess) { cublasSetError (ctx, CUBLAS_STATUS_INTERNAL_ERROR); } if ((cudaStat = cudaUnbindTexture (texB)) != cudaSuccess) { cublasSetError (ctx, CUBLAS_STATUS_INTERNAL_ERROR); } }}__global__ void cgemm_1_main_sw_gld (struct cublasCgemmParams parms) { /* C = alpha*A*B + beta*C. */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 0#define TRANSB 0#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_2_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*conj(transpose(A))*B + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 0#define CONJGA 1#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_3_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*B + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 0#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_4_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*A*conj(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 0#define TRANSB 1#define CONJGA 0#define CONJGB 1#include "cgemm.h"}__global__ void cgemm_5_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*A*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 0#define TRANSB 1#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_6_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*conjg(transpose(A))*conjg(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 1#define CONJGA 1#define CONJGB 1#include "cgemm.h"}__global__ void cgemm_7_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*conjg(transpose(A))*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 1#define CONJGA 1#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_8_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*conjg(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 1#define CONJGA 0#define CONJGB 1#include "cgemm.h"}__global__ void cgemm_9_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 1#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_1_main_hw_gld (struct cublasCgemmParams parms) { /* C = alpha*A*B + beta*C. */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define TRANSA 0#define TRANSB 0#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_2_main_hw_gld (struct cublasCgemmParams parms){ /* C = alpha*conj(transpose(A))*B + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define TRANSA 1#define TRANSB 0#define CONJGA 1#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_3_main_hw_gld (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*B + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define TRANSA 1#define TRANSB 0#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_4_main_hw_gld (struct cublasCgemmParams parms){ /* C = alpha*A*conj(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define TRANSA 0#define TRANSB 1#define CONJGA 0#define CONJGB 1#include "cgemm.h"}__global__ void cgemm_5_main_hw_gld (struct cublasCgemmParams parms){ /* C = alpha*A*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define TRANSA 0#define TRANSB 1#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_6_main_hw_gld (struct cublasCgemmParams parms){ /* C = alpha*conjg(transpose(A))*conjg(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define TRANSA 1#define TRANSB 1#define CONJGA 1#define CONJGB 1#include "cgemm.h"}__global__ void cgemm_7_main_hw_gld (struct cublasCgemmParams parms){ /* C = alpha*conjg(transpose(A))*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define TRANSA 1#define TRANSB 1#define CONJGA 1#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_8_main_hw_gld (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*conjg(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define TRANSA 1#define TRANSB 1#define CONJGA 0#define CONJGB 1#include "cgemm.h"}__global__ void cgemm_9_main_hw_gld (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define TRANSA 1#define TRANSB 1#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_1_main_sw_gld_fulltile (struct cublasCgemmParams parms) { /* C = alpha*A*B + beta*C. */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define TRANSA 0#define TRANSB 0#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_2_main_sw_gld_fulltile (struct cublasCgemmParams parms){ /* C = alpha*conj(transpose(A))*B + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 0#define CONJGA 1#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_3_main_sw_gld_fulltile (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*B + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 0#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_4_main_sw_gld_fulltile (struct cublasCgemmParams parms){ /* C = alpha*A*conj(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define TRANSA 0#define TRANSB 1#define CONJGA 0#define CONJGB 1#include "cgemm.h"}__global__ void cgemm_5_main_sw_gld_fulltile (struct cublasCgemmParams parms){ /* C = alpha*A*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define TRANSA 0#define TRANSB 1#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void cgemm_6_main_sw_gld_fulltile (struct cublasCgemmParams parms){ /* C = alpha*conjg(transpose(A))*conjg(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 0#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -