ssyrk.cu
来自「Nividia提供的CUDA的BLAS库源码」· CU 代码 · 共 867 行 · 第 1/2 页
CU
867 行
#define USE_MIXED_STEPPER 0#define FAST_IMUL 0#define UPPER 1#define TRANSA 0#define TRANSB 1#include "ssyrk.h"}__global__ void ssyrk_lo_nt_main_hw (struct cublasSsyrkParams parms){#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define FAST_IMUL 0#define UPPER 0#define TRANSA 0#define TRANSB 1#include "ssyrk.h"}__global__ void fast_ssyrk_up_tr_main_sw (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define FAST_IMUL 1#define UPPER 1#define TRANSA 1#define TRANSB 0#include "ssyrk.h"}__global__ void fast_ssyrk_lo_tr_main_sw (struct cublasSsyrkParams parms){#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define FAST_IMUL 1#define UPPER 0#define TRANSA 1#define TRANSB 0#include "ssyrk.h"}__global__ void fast_ssyrk_up_nt_main_sw (struct cublasSsyrkParams parms){#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define FAST_IMUL 1#define UPPER 1#define TRANSA 0#define TRANSB 1#include "ssyrk.h"} __global__ void fast_ssyrk_lo_nt_main_sw (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 1#define FAST_IMUL 1#define UPPER 0#define TRANSA 0#define TRANSB 1#include "ssyrk.h"}__global__ void fast_ssyrk_up_tr_main_hw (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define FAST_IMUL 1#define UPPER 1#define TRANSA 1#define TRANSB 0#include "ssyrk.h"}__global__ void fast_ssyrk_lo_tr_main_hw (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define FAST_IMUL 1#define UPPER 0#define TRANSA 1#define TRANSB 0#include "ssyrk.h"}__global__ void fast_ssyrk_up_nt_main_hw (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define FAST_IMUL 1#define UPPER 1#define TRANSA 0#define TRANSB 1#include "ssyrk.h"}__global__ void fast_ssyrk_lo_nt_main_hw (struct cublasSsyrkParams parms){#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 0#define USE_MIXED_STEPPER 0#define FAST_IMUL 1#define UPPER 0#define TRANSA 0#define TRANSB 1#include "ssyrk.h"}__global__ void ssyrk_up_tr_main_sw_fulltile (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define FAST_IMUL 0#define UPPER 1#define TRANSA 1#define TRANSB 0#include "ssyrk.h"}__global__ void ssyrk_lo_tr_main_sw_fulltile (struct cublasSsyrkParams parms){#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define FAST_IMUL 0#define UPPER 0#define TRANSA 1#define TRANSB 0#include "ssyrk.h"}__global__ void ssyrk_up_nt_main_sw_fulltile (struct cublasSsyrkParams parms){#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define FAST_IMUL 0#define UPPER 1#define TRANSA 0#define TRANSB 1#include "ssyrk.h"} __global__ void ssyrk_lo_nt_main_sw_fulltile (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define FAST_IMUL 0#define UPPER 0#define TRANSA 0#define TRANSB 1#include "ssyrk.h"}__global__ void ssyrk_up_tr_main_hw_fulltile (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define FAST_IMUL 0#define UPPER 1#define TRANSA 1#define TRANSB 0#include "ssyrk.h"}__global__ void ssyrk_lo_tr_main_hw_fulltile (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define FAST_IMUL 0#define UPPER 0#define TRANSA 1#define TRANSB 0#include "ssyrk.h"}__global__ void ssyrk_up_nt_main_hw_fulltile (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define FAST_IMUL 0#define UPPER 1#define TRANSA 0#define TRANSB 1#include "ssyrk.h"}__global__ void ssyrk_lo_nt_main_hw_fulltile (struct cublasSsyrkParams parms){#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define FAST_IMUL 0#define UPPER 0#define TRANSA 0#define TRANSB 1#include "ssyrk.h"}__global__ void fast_ssyrk_up_tr_main_sw_fulltile (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define FAST_IMUL 1#define UPPER 1#define TRANSA 1#define TRANSB 0#include "ssyrk.h"}__global__ void fast_ssyrk_lo_tr_main_sw_fulltile (struct cublasSsyrkParams parms){#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define FAST_IMUL 1#define UPPER 0#define TRANSA 1#define TRANSB 0#include "ssyrk.h"}__global__ void fast_ssyrk_up_nt_main_sw_fulltile (struct cublasSsyrkParams parms){#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define FAST_IMUL 1#define UPPER 1#define TRANSA 0#define TRANSB 1#include "ssyrk.h"} __global__ void fast_ssyrk_lo_nt_main_sw_fulltile (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 1#define FAST_IMUL 1#define UPPER 0#define TRANSA 0#define TRANSB 1#include "ssyrk.h"}__global__ void fast_ssyrk_up_tr_main_hw_fulltile (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define FAST_IMUL 1#define UPPER 1#define TRANSA 1#define TRANSB 0#include "ssyrk.h"}__global__ void fast_ssyrk_lo_tr_main_hw_fulltile (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define FAST_IMUL 1#define UPPER 0#define TRANSA 1#define TRANSB 0#include "ssyrk.h"}__global__ void fast_ssyrk_up_nt_main_hw_fulltile (struct cublasSsyrkParams parms) {#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define FAST_IMUL 1#define UPPER 1#define TRANSA 0#define TRANSB 1#include "ssyrk.h"}__global__ void fast_ssyrk_lo_nt_main_hw_fulltile (struct cublasSsyrkParams parms){#undef FULL_TILES_ONLY#undef USE_MIXED_STEPPER#undef FAST_IMUL#undef UPPER#undef TRANSA#undef TRANSB#define FULL_TILES_ONLY 1#define USE_MIXED_STEPPER 0#define FAST_IMUL 1#define UPPER 0#define TRANSA 0#define TRANSB 1#include "ssyrk.h"}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?