⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 matrix.h

📁 C++编写的高性能矩阵乘法的Stranssen算法
💻 H
字号:
/* Avoid multiple inclusion of this header file */#ifndef PRISM_MATRIX_H  #define PRISM_MATRIX_H  #include <math.h>#include <time.h>#include <stdio.h>#include <malloc.h>#if PRISM_T3D || PRISM_CRAY#include <fortran.h>#include <strings.h>#endif#if PRISM_MEM_CHK# define MALLOC(i) trmalloc(i, lineno, file)#else# define MALLOC(i) malloc(i)#endif#define MAX(x,y) ( (x) > (y) ? (x) : (y))#define MIN(x,y) ( (x) < (y) ? (x) : (y))/* Set recursion cutoff.  As long as the matrix size passes the cutoff test, * the Strassen recursion will continue.  Once the cutoff has been reached, the * matrices are multiplied using the conventional method.  These values have  * been chosen based on empirical results.  The default is sufficiently good for * most machines, but is certainly not optimal. * * Listed below are the values for the square cutoff (STRAS_CUTOFF), the rectangular * test cutoffs (STRAS_CUTOFF_M, STRAS_CUTOFF_N, STRAS_CUTOFF_K) for general * values of alpha & beta, and cutoffs (STRAS_CUTOFF_M1, STRAS_CUTOFF_N1,  * STRAS_CUTOFF_K1) for alpha = 1 & beta = 0.  We have determined that there is  * a slight performance gain in distinguishing between these values for  * rectangular matrices, but have only measured these for the RS/60000.  For  * all other machines listed, the values are the same for both cases. */#if PRISM_CRAY#define STRAS_CUTOFF    128#define STRAS_CUTOFF_M   80#define STRAS_CUTOFF_N   20#define STRAS_CUTOFF_K   45#define STRAS_CUTOFF_M1  80#define STRAS_CUTOFF_N1  20#define STRAS_CUTOFF_K1  45#elif PRISM_T3D#define STRAS_CUTOFF    325#define STRAS_CUTOFF_M  125#define STRAS_CUTOFF_N  109#define STRAS_CUTOFF_K   75#define STRAS_CUTOFF_M1 125#define STRAS_CUTOFF_N1 109#define STRAS_CUTOFF_K1  75#elif PRISM_SP#define STRAS_CUTOFF    199#define STRAS_CUTOFF_M   75#define STRAS_CUTOFF_N   80#define STRAS_CUTOFF_K  100#define STRAS_CUTOFF_M1  75#define STRAS_CUTOFF_N1  95#define STRAS_CUTOFF_K1 125#elif PRISM_SUN#define STRAS_CUTOFF     64#define STRAS_CUTOFF_M   15#define STRAS_CUTOFF_N   15#define STRAS_CUTOFF_K   15#define STRAS_CUTOFF_M1  15#define STRAS_CUTOFF_N1  15#define STRAS_CUTOFF_K1  15#else/* Default value for any other machines */#define STRAS_CUTOFF    1024#define STRAS_CUTOFF_M   59#define STRAS_CUTOFF_N   59#define STRAS_CUTOFF_K   59#define STRAS_CUTOFF_M1  59#define STRAS_CUTOFF_N1  59#define STRAS_CUTOFF_K1  59#endif/* Take care of character strings between FORTRAN and C for Cray */#if PRISM_CRAY || PRISM_T3D#define FCHAR _fcd_fcd cf1, cf2;#else#define FCHAR char*#endif#if PRISM_CRAY || PRISM_T3D/* Defines for CRAY or T3D. Use upper case; single  * precision is 64 bits; no trailing underscore */#define dgemm_          SGEMM#define dcopy_          SCOPY#define dscal_          SSCAL#define daxpy_          SAXPY#define dger_           SGER#define dgemv_          SGEMV#define matrix_fadd_    MATRIX_FADD#define matrix_flinear_ MATRIX_FLINEAR#define matrix_fscalcp_ MATRIX_FSCALCP#define matrix_fsub_    MATRIX_FSUB#elif PRISM_SP/* IBM does not have trailing underscores for FORTRAN/BLAS routines */#define dgemm_          dgemm#define dcopy_          dcopy#define dscal_          dscal#define daxpy_          daxpy#define dger_           dger#define dgemv_          dgemv#endif/* Give mapping for C routines callable from FORTRAN programs */#if PRISM_SUN/* SUN needs trailing underscores */#define DGEFMM          dgefmm_#define DGEFMM_MEM      dgefmm_mem_#define TMP_DGEFMM_MEM  tmp_dgefmm_mem_#elif PRISM_SP/* IBM needs lower case names */#define DGEFMM          dgefmm#define DGEFMM_MEM      dgefmm_mem#define TMP_DGEFMM_MEM  tmp_dgefmm_mem/* Cray keeps names as uppercase without any underscores */#endifint cutoff,in_cutoff_m,in_cutoff_n,in_cutoff_k,cutoff_m,cutoff_n,cutoff_k;/* Use shifted name so we can automatically get filename and line# from calling   routine */extern void s_generror(char c_error_text[],char *filename,int lineno);#define generror(A) s_generror(A,__FILE__,__LINE__)extern void s_matrix_alloc(int rows,int cols,double **a,int *lda,char *filename,			   int lineno);#define matrix_alloc(A,B,C,D) s_matrix_alloc(A,B,C,D,__FILE__,__LINE__)/* Regular ANSI C declarations */extern void submatrix(double *a,int lda,int oldrl,int oldcl,double **b);extern void matrix_add(int m,int n,double alpha,double *a,int lda,double *b,		       int ldb,double *c,int ldc);extern void matrix_sub(int m,int n,double alpha,double *a,int lda,double *b,		       int ldb,double *c,int ldc);extern void matrix_prod(char c_transa,char c_transb,int m,int n,int k,			double alpha,double *a,int lda,double *b,int ldb,double beta,			double *c,int ldc);extern void matrix_lin_update(int m,int n,double alpha,double *a,int lda,			      double beta,double *c,int ldc);extern void free_matrix(double *m);extern void strassen_internal(char c_transa,char c_transb,int m,int n,int k,			      double alpha,double *a,int lda,double *b,			      int ldb,double beta,double *c,int ldc,			      double *d_aux,int i_naux);extern void fmm_internal(char c_transa,char c_transb,int m,int n,int k,double alpha,			 double *a,int lda,double *b,int ldb,double beta,double *c,			 int ldc,double *d_aux,int i_naux);extern void fmm(char c_transa,char c_transb,int m,int n,int k,double alpha,		double *a,int lda,double *b,int ldb,double beta,double *c,int ldc,		double *d_aux,int i_naux);extern void DGEFMM_MEM(char *c_transa,char *c_transb,int *m,int *n,int *k,double *alpha,		  double *a,int *lda,double *b,int *ldb,double *beta,double *c,int *ldc,		   double *d_aux,int *i_naux);extern void DGEFMM(char *c_transa,char *c_transb,int *m,int *n,int *k,double *alpha,		   double *a,int *lda,double *b,int *ldb,double *beta,double *c,		   int *ldc);extern void fixup_internal(char c_transa,char c_transb,int m,int n,int k,int m_mod,			   int n_mod,int k_mod,int r1,int s1,int t1,double alpha,			   double *a,int lda,double *b,int ldb,double beta,double *c,			   int ldc);extern int tmp_fmm(int m,int n,int k,double alpha,double beta);extern int TMP_DGEFMM_MEM(int *m,int *n,int *k,double *alpha,double *beta);extern int no_recursion(int m,int n,int k,double alpha,double beta);/* BLAS */extern void dgemm_(FCHAR transa,FCHAR transb,int *m,int *n,int *k,		   double *alpha,double *a,int *lda,double *b,		   int *ldb,double *beta,double *c,int *ldc);extern void dgemv_(FCHAR trans,int *m,int *n,double *alpha,double *a,int *lda,		   double *X,int *incx,double *beta,double *Y,int *incy);#if PRISM_CRAY || PRISM_T3D/* Take care of character data between C and FORTRAN */#define SGEMM(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13)\  cf1=_cptofcd(a1,strlen(a1));cf2=_cptofcd(a2,strlen(a2));SGEMM(cf1,cf2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13)#define SGEMV(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11)\  cf1=_cptofcd(a1,strlen(a1));SGEMV(cf1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11)#endifextern void dcopy_(int *n,double *dx,int *incx,double *dy,int *incy);extern void daxpy_(int *n,double *alpha,double *X,int *incx,		   double *Y,int *incy);extern void dger_(int *m,int *n,double *alpha,double *X,int *incx,		  double *Y,int *incy,double *a,int *lda);#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -