📄 fast_cgemm.cu
字号:
#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 1#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define TRANSA 0#define TRANSB 0#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_2_main_hw_tex_fulltile (struct cublasCgemmParams parms){ /* C = alpha*conj(transpose(A))*B + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 1#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define TRANSA 1#define TRANSB 0#define CONJGA 1#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_3_main_hw_tex_fulltile (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*B + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 1#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define TRANSA 1#define TRANSB 0#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_4_main_hw_tex_fulltile (struct cublasCgemmParams parms){ /* C = alpha*A*conj(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 1#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define TRANSA 0#define TRANSB 1#define CONJGA 0#define CONJGB 1#include "cgemm.h"}__global__ void fast_cgemm_5_main_hw_tex_fulltile (struct cublasCgemmParams parms){ /* C = alpha*A*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 1#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define TRANSA 0#define TRANSB 1#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_6_main_hw_tex_fulltile (struct cublasCgemmParams parms){ /* C = alpha*conjg(transpose(A))*conjg(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 1#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define TRANSA 1#define TRANSB 1#define CONJGA 1#define CONJGB 1#include "cgemm.h"}__global__ void fast_cgemm_7_main_hw_tex_fulltile (struct cublasCgemmParams parms){ /* C = alpha*conjg(transpose(A))*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 1#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define TRANSA 1#define TRANSB 1#define CONJGA 1#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_8_main_hw_tex_fulltile (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*conjg(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 1#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define TRANSA 1#define TRANSB 1#define CONJGA 0#define CONJGB 1#include "cgemm.h"}__global__ void fast_cgemm_9_main_hw_tex_fulltile (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 1#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define TRANSA 1#define TRANSB 1#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_1_main_sw_gld (struct cublasCgemmParams parms) { /* C = alpha*A*B + beta*C. */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 0#define TRANSB 0#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_2_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*conj(transpose(A))*B + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 0#define CONJGA 1#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_3_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*B + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 0#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_4_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*A*conj(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 0#define TRANSB 1#define CONJGA 0#define CONJGB 1#include "cgemm.h"}__global__ void fast_cgemm_5_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*A*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 0#define TRANSB 1#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_6_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*conjg(transpose(A))*conjg(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 1#define CONJGA 1#define CONJGB 1#include "cgemm.h"}__global__ void fast_cgemm_7_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*conjg(transpose(A))*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 1#define CONJGA 1#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_8_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*conjg(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 1#define CONJGA 0#define CONJGB 1#include "cgemm.h"}__global__ void fast_cgemm_9_main_sw_gld (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 1#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_1_main_sw_gld_fulltile (struct cublasCgemmParams parms) { /* C = alpha*A*B + beta*C. */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define TRANSA 0#define TRANSB 0#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_2_main_sw_gld_fulltile (struct cublasCgemmParams parms){ /* C = alpha*conj(transpose(A))*B + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 0#define CONJGA 1#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_3_main_sw_gld_fulltile (struct cublasCgemmParams parms){ /* C = alpha*transpose(A)*B + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define TRANSA 1#define TRANSB 0#define CONJGA 0#define CONJGB 0#include "cgemm.h"}__global__ void fast_cgemm_4_main_sw_gld_fulltile (struct cublasCgemmParams parms){ /* C = alpha*A*conj(transpose(B)) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define TRANSA 0#define TRANSB 1#define CONJGA 0#define CONJGB 1#include "cgemm.h"}__global__ void fast_cgemm_5_main_sw_gld_fulltile (struct cublasCgemmParams parms){ /* C = alpha*A*transpose(B) + beta*C */#undef USE_TEX#undef FAST_IMUL#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef TRANSA#undef TRANSB#undef CONJGA#undef CONJGB#define USE_TEX 0#define FAST_IMUL 1#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -