📄 matrix.c
字号:
#include "matrix.h"#if STRAS_TIME_PARTS || PRISM_MEM_CHK#include "matrix_test.h"#endifdouble one=1.0,m_one=-1.0; /* Needed as FORTRAN parameter */void s_matrix_alloc(int rows, int cols, double **a, int *lda, char file[], int lineno) /* Allocates a matrix A of size rows x cols and returns the leading dimension * * NOTE: Call this routine by invoking matrix_alloc() where * the CPP definitions in matrix.h do the name shift and * add the last two arguments automatically. * */{ if (rows == 0) { *a = NULL; return; } else { #if STRAS_TIME_PARTS CLOCK_START(gt0);#endif /* Allocate memory for array */ *a = (double *) MALLOC((unsigned) (rows*cols)*sizeof(double)); if (*a == NULL) s_generror("allocation failure in s_matrix_alloc()", file, lineno); #if STRAS_TIME_PARTS CLOCK_UPDATE(time_matrix,gt0,d_correct1,i_sum_clock);#endif *lda = rows; } }void submatrix(double *a, int lda, int rl, int cl, double **b) /* Returns a pointer to submatrix B which points to A[c1][r1]. */{#if STRAS_TIME_PARTS CLOCK_START(gt0);#endif /* Set pointer to starting location of submatrix */ *b = a+(cl-1)*lda + rl - 1;#if STRAS_TIME_PARTS CLOCK_UPDATE(time_submatrix,gt0,d_correct1,i_sum_clock);#endif}void free_matrix(double *a) /* Frees a double matrix of size mem_size allocated by matrix_alloc(). Parameter mem_size used for profiling only. */ {#if STRAS_TIME_PARTS CLOCK_START(gt0);#endif free(a);#if STRAS_TIME_PARTS CLOCK_UPDATE(time_free_matrix,gt0,d_correct1,i_sum_clock);#endif}void matrix_add(int m, int n, double alpha, double *a, int lda, double *b, int ldb, double *c, int ldc) /* Matrix Add. A, B, & C are m x n matrices. * * C = alpha * (A + B) * * NOTE: We pass lda, ldb, & ldc so that when we call Fortran routine we have * these values. Not needed by others but passed for consistency. * * NOTE: Cases that are optimized are used by our fmm routines. */{ int i,j,inc=1; double *p_a,*p_b,*p_c; #if STRAS_TIME_PARTS CLOCK_START(gt0);#endif#if STRAS_BLAS_ADD if (b != c) { /* If aliasing was not used */ if (alpha == 1.0) { /* Compute C = A + B */#pragma _CRI ivdep for (j = 0; j < n; j++) { dcopy_(&m,a+j*lda,&inc,c+j*ldc,&inc); daxpy_(&m,&one,b+j*ldb,&inc,c+j*ldc,&inc); } } else { /* Compute C = alpha * (A + B) */#pragma _CRI ivdep for (j = 0; j < n; j++) { dcopy_(&m,a+j*lda,&inc,c+j*ldc,&inc); dscal_(&m,&alpha,c+j*ldc,&inc); daxpy_(&m,&alpha,b+j*ldb,&inc,c+j*ldc,&inc); } } } else { /* If aliasing was used */ if (alpha == 1.0) { /* Compute C = A + C */#pragma _CRI ivdep for (j = 0; j < n; j++) { daxpy_(&m,&one,a+j*lda,&inc,c+j*ldc,&inc); } } else { /* Compute C = alpha * (A + C) */#pragma _CRI ivdep for (j = 0; j < n; j++) { daxpy_(&m,&one,a+j*lda,&inc,c+j*ldc,&inc); dscal_(&m,&alpha,c+j*ldc,&inc); } } } #elif STRAS_FORTRAN_ADD matrix_fadd_(&m,&n,&alpha,a,&lda,b,&ldb,c,&ldc);#elif STRAS_ESSL_ADD if (alpha == 1.0) /* Compute C = A + B */ dgeadd_(a,&lda,"N",b,&ldb,"N",c,&ldc,&m,&n); else /* Compute C = alpha * (A + B) + C */ matrix_fadd_(&m,&n,&alpha,a,&lda,b,&ldb,c,&ldc); #else/* Use pointer addition by default */ for (j = 0; j < n; j++) { p_a = a+j*lda; p_b = b+j*ldb; p_c = c+j*ldc;#pragma _CRI ivdep for (i = 0; i < m; i++) { *p_c++ = alpha * (*p_a++ + *p_b++); } }#endif #if STRAS_TIME_PARTS CLOCK_UPDATE(time_matrix_add,gt0,d_correct1,i_sum_clock);#endif}void matrix_sub(int m, int n, double alpha, double *a, int lda, double *b, int ldb, double *c, int ldc) /* Matrix subtraction. A, B & C are m x n matrices. * * C = alpha * (A - B) */{ int i,j,inc=1; double *p_a,*p_b,*p_c,m_alpha;#if STRAS_TIME_PARTS CLOCK_START(gt0);#endif#if STRAS_BLAS_SUB if (b != c) { /* If aliasing was not used */ if (alpha == 1.0) /* Compute C = A - B */#pragma _CRI ivdep for (j = 0; j < n; j++) { dcopy_(&m,a+j*lda,&inc,c+j*ldc,&inc); daxpy_(&m,&m_one,b+j*ldb,&inc,c+j*ldc,&inc); } else { /* Compute C = alpha * (A - B) */ m_alpha = -alpha;#pragma _CRI ivdep for (j = 0; j < n; j++) { dcopy_(&m,a+j*lda,&inc,c+j*ldc,&inc); dscal_(&m,&alpha,c+j*ldc,&inc); daxpy_(&m,&m_alpha,b+j*ldb,&inc,c+j*ldc,&inc); } } } else { /* If aliasing was used */ if (alpha == 1.0) /* Compute C = A - C */#pragma _CRI ivdep for (j = 0; j < n; j++) { dscal_(&m,&m_one,c+j*ldc,&inc); daxpy_(&m,&one,a+j*lda,&inc,c+j*ldc,&inc); } else { /* Compute C = alpha * (A - C) */ m_alpha = -alpha;#pragma _CRI ivdep for (j = 0; j < n; j++) { dscal_(&m,&m_alpha,c+j*ldc,&inc); daxpy_(&m,&alpha,a+j*lda,&inc,c+j*ldc,&inc); } } } #elif STRAS_FORTRAN_SUB matrix_fsub_(&m,&n,&alpha,a,&lda,b,&ldb,c,&ldc);#elif STRAS_ESSL_SUB if (alpha == 1.0) /* Compute C = A - B */ dgesub_(a,&lda,"N",b,&ldb,"N",c,&ldc,&m,&n); else /* Compute C = alpha * (A - B) */ matrix_fsub_(&m,&n,&alpha,a,&lda,b,&ldb,c,&ldc);#else/* Use pointer subtraction by default */ for (j = 0; j < n; j++) { p_a = a+j*lda; p_b = b+j*ldb; p_c = c+j*ldc;#pragma _CRI ivdep for (i = 0; i < m; i++) { *p_c++ = alpha * (*p_a++ - *p_b++); } }#endif#if STRAS_TIME_PARTS CLOCK_UPDATE(time_matrix_sub,gt0,d_correct1,i_sum_clock);#endif}void matrix_lin_update(int m, int n, double alpha, double *a, int lda, double beta, double *c, int ldc) /* Matrix Linear Combination. A & C are m x n matrices. * * C = alpha * A + beta * C * * NOTE: Cases that are optimized are used by our fmm routines. */{ int i,j,inc=1; double *p_a,*p_c; #if STRAS_TIME_PARTS CLOCK_START(gt0);#endif#if STRAS_BLAS_ADD if (beta == 0.0) { if (alpha == 1.0) {#pragma _CRI ivdep for (j = 0; j < n; j++) { dcopy_(&m,a+j*lda,&inc,c+j*ldc,&inc); } } else {#pragma _CRI ivdep for (j = 0; j < n; j++) { dcopy_(&m,a+j*lda,&inc,c+j*ldc,&inc); dscal_(&m,&alpha,c+j*ldc,&inc); } } } else if (beta == 1.0)#pragma _CRI ivdep for (j = 0; j < n; j++) { daxpy_(&m,&alpha,a+j*lda,&inc,c+j*ldc,&inc); } else#pragma _CRI ivdep for (j = 0; j < n; j++) { dscal_(&m,&beta,c+j*ldc,&inc); daxpy_(&m,&alpha,a+j*lda,&inc,c+j*ldc,&inc); }#elif STRAS_FORTRAN_ADD || STRAS_ESSL_ADD if (alpha == 1.0) { if (beta == 0.0) {#pragma _CRI ivdep for (j = 0; j < n; j++) { dcopy_(&m,a+j*lda,&inc,c+j*ldc,&inc); } } else if (beta == -1.0) { matrix_fsub_(&m,&n,&one,a,&lda,c,&ldc,c,&ldc); } else { matrix_flinear_(&m,&n,&one,a,&lda,&beta,c,&ldc); } } else if (beta == 1.0) { if (alpha == -1.0) { matrix_fsub_(&m,&n,&one,c,&ldc,a,&lda,c,&ldc); } else {#pragma _CRI ivdep for (j = 0; j < n; j++) { daxpy_(&m,&alpha,a+j*lda,&inc,c+j*ldc,&inc); } } } else if (beta == -1.0) { matrix_flinear_(&m,&n,&alpha,a,&lda,&m_one,c,&ldc); } else { matrix_flinear_(&m,&n,&alpha,a,&lda,&beta,c,&ldc); }#else/* Use pointer update by default */ for (j = 0; j < n; j++) { p_a = a+j*lda; p_c = c+j*ldc;#pragma _CRI ivdep for (i = 0; i < m; i++) { /* Some compilers (Cray mostly) handle *pc++ on RHS differently and give the wrong answer; split into 2 separate statements.*/ *p_c = beta * *p_c + alpha * *p_a++; p_c++; } }#endif#if STRAS_TIME_PARTS CLOCK_UPDATE(time_matrix_linear,gt0,d_correct1,i_sum_clock);#endif}void matrix_prod(char c_transa, char c_transb, int m, int n, int k, double alpha, double *a, int lda, double *b, int ldb, double beta, double *c, int ldc) /* * Matrix product. A is an m x k matrix and B is a k x n matrix. * * C = alpha * (A * B) + beta * C */{#if STRAS_TIME_PARTS CLOCK_START(gt0);#endif dgemm_(&c_transa, &c_transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc);#if STRAS_TIME_PARTS CLOCK_UPDATE(time_matrix_prod,gt0,d_correct1,i_sum_clock);#endif}int tmp_fmm(int m, int n, int k, double alpha, double beta) /* Compute amount of memory needed for temporary workspace by * Strassen/Winograd. */{ int i_tmp,j,i_min,m2,n2,k2,i_max; double tmp1; /* Compute j = number of levels of recursion performed */ j = 0; m2 = m; n2 = n; k2 = k; i_min = MIN(MIN(m,n),k); i_max = MAX(MAX(m,n),k); /* Since the routine no_recursion can be called with varying values of alpha & * beta, this routine must calculate the max possible memeory needed, which * requires the smallest values for the rectangular cutoffs. * * If rectangular cutoff values were not given, use the default values. */ if (in_cutoff_m <= 0) { cutoff_m = MIN(STRAS_CUTOFF_M, STRAS_CUTOFF_M1); cutoff_n = MIN(STRAS_CUTOFF_N, STRAS_CUTOFF_N1); cutoff_k = MIN(STRAS_CUTOFF_K, STRAS_CUTOFF_K1); } while (((((double)m2 * (double)n2 * (double)k2) > cutoff_m*(double)n2*(double)k2+cutoff_n*(double)m2*(double)k2+ cutoff_k*(double)n2*(double)m2) || (i_min > cutoff)) && (i_max > cutoff)) { m2 = m2 >> 1; n2 = n2 >> 1; k2 = k2 >> 1; i_max = MAX(MAX(m2,n2),k2); i_min = MIN(MIN(m2,n2),k2); j++; } /* Compute memory needed based on number of levels of recursion */ tmp1 = (1./3.) * (1 - pow(.25, (double) j)); if (beta != 0.0) i_tmp = m*k + k*n + m*n; else i_tmp = m*MAX(k,n) + k*n; i_tmp = ceil((tmp1 * i_tmp)); return i_tmp;}int TMP_DGEFMM_MEM(int *m, int *n, int *k, double *alpha, double *beta) /* Compute amount of memory needed for temporary workspace by * Strassen/Winograd. This routine has a Fortran style interface. */{ int i_tmp; /* Call the C routine */ i_tmp = tmp_fmm(*m, *n, *k, *alpha, *beta); return i_tmp;}int no_recursion(int m, int n, int k, double alpha, double beta) /* Determine if recursion should continue based on matrix size(s) */{ int stop_recursion = 0,i_min,i_max; double r_mnk; i_min = MIN(MIN(m,n),k); i_max = MAX(MAX(m,n),k); /* If rectangular cutoff values were not given, use the default values. */ if (in_cutoff_m <= 0) { if (alpha == 1.0 && beta == 0.0) { cutoff_m = STRAS_CUTOFF_M1; cutoff_n = STRAS_CUTOFF_N1; cutoff_k = STRAS_CUTOFF_K1; } else { cutoff_m = STRAS_CUTOFF_M; cutoff_n = STRAS_CUTOFF_N; cutoff_k = STRAS_CUTOFF_K; } } r_mnk = (double) m * (double) n * (double) k; if (((r_mnk <= cutoff_m*(double)n*(double)k+cutoff_n*(double)m*(double)k+ cutoff_k*(double)n*(double)m) && (i_min <= cutoff)) || (i_max <= cutoff)) stop_recursion = 1; return stop_recursion;}void s_generror(char c_error_text[], char file[], int lineno) /* Routine to print error message. * * NOTE: Call this routine by invoking generror() where * the CPP definitions in matrix.h do the name shift * and add the last two arguments automatically. * */{ char buf[150] ; sprintf(buf, "\n*** Run-time error in file %s on line %d ***\n", file, lineno); strcat(buf, c_error_text);#if PRISM_SP printf("%s\n\n", buf);#else fprintf(stderr, "%s***\n\n", buf);#endif exit(99);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -