⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fixup_internal.c

📁 C++编写的高性能矩阵乘法的Stranssen算法
💻 C
字号:
/*============================================================================Internal routine for fmm that performs cleanup of remaining row(s)/col(s) removed to give optimum even-sized matrix for Strassen recursion. Inputs  transa,transb     : characters specifying form of A or B (transpose or not)  m,n,k             : matrix dimensions  m_mod,n_mod,k_mod : dimension of the even-sized matrices sent to Strassen  t1,s1,r1          : dimension of remaining portion of matrices  alpha,beta        : scalars  A                 : m x k matrix  B                 : k x n matrix  ldb,lda,ldc       : matrix leading dimensionsOutputs  C                 : m x n matrix, C = alpha*A*B+beta*C============================================================================*/#include "matrix.h"#if STRAS_TIME_PARTS#include "matrix_test.h"#endifvoid fixup_internal(char c_transa,char c_transb,int m,int n,int k,int m_mod,		    int n_mod,int k_mod,int r1,int s1,int t1,double alpha,		    double *a,int lda,double *b,int ldb,double beta,double *c,		    int ldc){  int i_one = 1,               /* Needed as FORTRAN parameter   */      a_start, b_start,        /* Offset into A & B             */      a_inc, b_inc,            /* Step between columns of A & B */      rows, cols;              /* Remaining matrix size         */   double one=1.0,m_one=-1.0;   /* Needed as FORTRAN parameter   */  char c_trans;                /* Needed for call to DGEMV      */#if STRAS_TIME_PARTS      CLOCK_START(time_fixup_s);#endif  /* Compute contribution of extra col(s) of A & extra row(s) of B, if any:   *   *              C11 = alpha * (a12 * b21) + C11   */    if (k_mod != k) {    if (c_transa == 'T') {      a_start = k_mod;       /* Points to row since transpose */      a_inc = lda;    }    else {      a_start = k_mod*lda;   /* Points to start of a12 */      a_inc = 1;    }	    if (c_transb == 'T') {      b_start = k_mod*ldb;   /* Points to column since transpose */      b_inc = 1;    }    else {      b_start = k_mod;       /* Points to start of b21 */      b_inc = ldb;    }	    if (t1 == 1) {      dger_(&m_mod,&n_mod,&alpha,a+a_start,&a_inc,b+b_start,&b_inc,c,&ldc);     }    else {      dgemm_(&c_transa,&c_transb,&m_mod,&n_mod,&t1,&alpha,a+a_start,&lda,b+b_start,	     &ldb,&one,c,&ldc);    }  }    /* Compute contribution of extra col(s) of B, if any:   *   *              c12 = alpha * [(A11*b12)+(b12*b22)] + beta * c12   *   *          |     |         |     |     |   | b12 |        |     |   *          | c12 | = alpha | A11 | a12 | X | ___ | + beta | c12 |   *          |     |         |     |     |   |     |        |     |   *                                          | b22 |   */    if (n_mod != n) {    if (c_transa == 'T') {      rows = k;      cols = m_mod;    }    else {      rows = m_mod;      cols = k;    }	    if (c_transb == 'T') {      b_start = n_mod;       /* Points to row since transpose */      b_inc = ldb;    }    else {      b_start = n_mod*ldb;   /* Points to start of b12 */      b_inc = 1;    }	    if (s1 == 1) {      dgemv_(&c_transa,&rows,&cols,&alpha,a,&lda,b+b_start,&b_inc,&beta,c+n_mod*ldc,	     &i_one);    }    else {      dgemm_(&c_transa,&c_transb,&m_mod,&s1,&k,&alpha,a,&lda,b+b_start,&ldb,&beta,	     c+n_mod*ldc,&ldc);    }  }    /* Compute contribution of extra row(s) of A, if any:   *   *              c21 = alpha * [(a21*B11)+(a22*b21)] + beta * c21   *              c22 = alpha * [(a21*b12)+(a22*b22)] + beta * c22   *                                            *                                          |       |   |   *    |     |     |         |     |     |   |  B11  |b12|        |     |     |   *    | c21 | c22 | = alpha | a21 | a22 | X |       |   | + beta | c21 | c22 |   *    |     |     |         |     |     |   |_______|___|        |     |     |   *                                          |  b21  |b22|   */    if (m_mod != m) {    if (c_transa == 'T') {      a_start = m_mod*lda;   /* Points to column since transpose */      a_inc = 1;    }    else {      a_start = m_mod;       /* Points to start of a21 */      a_inc = lda;    }	    if (c_transb == 'T') {      c_trans = 'N';      rows = n;      cols = k;    }    else {      c_trans = 'T';      rows = k;      cols = n;    }	    if (r1 == 1) {      dgemv_(&c_trans,&rows,&cols,&alpha,b,&ldb,a+a_start,&a_inc,&beta,c+m_mod,&ldc);    }    else {      dgemm_(&c_transa,&c_transb,&r1,&n,&k,&alpha,a+a_start,&lda,b,&ldb,&beta,c+m_mod,	     &ldc);    }  }#if STRAS_TIME_PARTS      CLOCK_UPDATE(time_fixup_e,time_fixup_s,d_correct1,i_trash);#endif}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -