📄 fixup_internal.c
字号:
/*============================================================================Internal routine for fmm that performs cleanup of remaining row(s)/col(s) removed to give optimum even-sized matrix for Strassen recursion. Inputs transa,transb : characters specifying form of A or B (transpose or not) m,n,k : matrix dimensions m_mod,n_mod,k_mod : dimension of the even-sized matrices sent to Strassen t1,s1,r1 : dimension of remaining portion of matrices alpha,beta : scalars A : m x k matrix B : k x n matrix ldb,lda,ldc : matrix leading dimensionsOutputs C : m x n matrix, C = alpha*A*B+beta*C============================================================================*/#include "matrix.h"#if STRAS_TIME_PARTS#include "matrix_test.h"#endifvoid fixup_internal(char c_transa,char c_transb,int m,int n,int k,int m_mod, int n_mod,int k_mod,int r1,int s1,int t1,double alpha, double *a,int lda,double *b,int ldb,double beta,double *c, int ldc){ int i_one = 1, /* Needed as FORTRAN parameter */ a_start, b_start, /* Offset into A & B */ a_inc, b_inc, /* Step between columns of A & B */ rows, cols; /* Remaining matrix size */ double one=1.0,m_one=-1.0; /* Needed as FORTRAN parameter */ char c_trans; /* Needed for call to DGEMV */#if STRAS_TIME_PARTS CLOCK_START(time_fixup_s);#endif /* Compute contribution of extra col(s) of A & extra row(s) of B, if any: * * C11 = alpha * (a12 * b21) + C11 */ if (k_mod != k) { if (c_transa == 'T') { a_start = k_mod; /* Points to row since transpose */ a_inc = lda; } else { a_start = k_mod*lda; /* Points to start of a12 */ a_inc = 1; } if (c_transb == 'T') { b_start = k_mod*ldb; /* Points to column since transpose */ b_inc = 1; } else { b_start = k_mod; /* Points to start of b21 */ b_inc = ldb; } if (t1 == 1) { dger_(&m_mod,&n_mod,&alpha,a+a_start,&a_inc,b+b_start,&b_inc,c,&ldc); } else { dgemm_(&c_transa,&c_transb,&m_mod,&n_mod,&t1,&alpha,a+a_start,&lda,b+b_start, &ldb,&one,c,&ldc); } } /* Compute contribution of extra col(s) of B, if any: * * c12 = alpha * [(A11*b12)+(b12*b22)] + beta * c12 * * | | | | | | b12 | | | * | c12 | = alpha | A11 | a12 | X | ___ | + beta | c12 | * | | | | | | | | | * | b22 | */ if (n_mod != n) { if (c_transa == 'T') { rows = k; cols = m_mod; } else { rows = m_mod; cols = k; } if (c_transb == 'T') { b_start = n_mod; /* Points to row since transpose */ b_inc = ldb; } else { b_start = n_mod*ldb; /* Points to start of b12 */ b_inc = 1; } if (s1 == 1) { dgemv_(&c_transa,&rows,&cols,&alpha,a,&lda,b+b_start,&b_inc,&beta,c+n_mod*ldc, &i_one); } else { dgemm_(&c_transa,&c_transb,&m_mod,&s1,&k,&alpha,a,&lda,b+b_start,&ldb,&beta, c+n_mod*ldc,&ldc); } } /* Compute contribution of extra row(s) of A, if any: * * c21 = alpha * [(a21*B11)+(a22*b21)] + beta * c21 * c22 = alpha * [(a21*b12)+(a22*b22)] + beta * c22 * * | | | * | | | | | | | B11 |b12| | | | * | c21 | c22 | = alpha | a21 | a22 | X | | | + beta | c21 | c22 | * | | | | | | |_______|___| | | | * | b21 |b22| */ if (m_mod != m) { if (c_transa == 'T') { a_start = m_mod*lda; /* Points to column since transpose */ a_inc = 1; } else { a_start = m_mod; /* Points to start of a21 */ a_inc = lda; } if (c_transb == 'T') { c_trans = 'N'; rows = n; cols = k; } else { c_trans = 'T'; rows = k; cols = n; } if (r1 == 1) { dgemv_(&c_trans,&rows,&cols,&alpha,b,&ldb,a+a_start,&a_inc,&beta,c+m_mod,&ldc); } else { dgemm_(&c_transa,&c_transb,&r1,&n,&k,&alpha,a+a_start,&lda,b,&ldb,&beta,c+m_mod, &ldc); } }#if STRAS_TIME_PARTS CLOCK_UPDATE(time_fixup_e,time_fixup_s,d_correct1,i_trash);#endif}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -