📄 cxmatmul.cpp
字号:
else if( a != d )
{
int c_step0 = 1;
if( c == zero )
{
c_step0 = 0;
c_step = 1;
}
for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 )
{
double t0 = a[0]*b[0] + a[1]*b[b_step];
double t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step];
d[0] = t0*alpha + c[0]*beta;
d[d_step] = t1*alpha + c[c_step]*beta;
}
}
else
break;
EXIT;
case 3:
if( len == d_size.width && b != d )
{
for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step )
{
double t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2];
double t1 = a[0]*b[1] + a[1]*b[b_step+1] + a[2]*b[b_step*2+1];
double t2 = a[0]*b[2] + a[1]*b[b_step+2] + a[2]*b[b_step*2+2];
d[0] = t0*alpha + c[0]*beta;
d[1] = t1*alpha + c[1]*beta;
d[2] = t2*alpha + c[2]*beta;
}
}
else if( a != d )
{
int c_step0 = 1;
if( c == zero )
{
c_step0 = 0;
c_step = 1;
}
for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 )
{
double t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2];
double t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step] + a[a_step+2]*b[b_step*2];
double t2 = a[a_step*2]*b[0] + a[a_step*2+1]*b[b_step] + a[a_step*2+2]*b[b_step*2];
d[0] = t0*alpha + c[0]*beta;
d[d_step] = t1*alpha + c[c_step]*beta;
d[d_step*2] = t2*alpha + c[c_step*2]*beta;
}
}
else
break;
EXIT;
case 4:
if( len == d_size.width && b != d )
{
for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step )
{
double t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2] + a[3]*b[b_step*3];
double t1 = a[0]*b[1] + a[1]*b[b_step+1] + a[2]*b[b_step*2+1] + a[3]*b[b_step*3+1];
double t2 = a[0]*b[2] + a[1]*b[b_step+2] + a[2]*b[b_step*2+2] + a[3]*b[b_step*3+2];
double t3 = a[0]*b[3] + a[1]*b[b_step+3] + a[2]*b[b_step*2+3] + a[3]*b[b_step*3+3];
d[0] = t0*alpha + c[0]*beta;
d[1] = t1*alpha + c[1]*beta;
d[2] = t2*alpha + c[2]*beta;
d[3] = t3*alpha + c[3]*beta;
}
}
else if( d_size.width <= 16 && a != d )
{
int c_step0 = 1;
if( c == zero )
{
c_step0 = 0;
c_step = 1;
}
for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 )
{
double t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2] + a[3]*b[b_step*3];
double t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step] +
a[a_step+2]*b[b_step*2] + a[a_step+3]*b[b_step*3];
double t2 = a[a_step*2]*b[0] + a[a_step*2+1]*b[b_step] +
a[a_step*2+2]*b[b_step*2] + a[a_step*2+3]*b[b_step*3];
double t3 = a[a_step*3]*b[0] + a[a_step*3+1]*b[b_step] +
a[a_step*3+2]*b[b_step*2] + a[a_step*3+3]*b[b_step*3];
d[0] = t0*alpha + c[0]*beta;
d[d_step] = t1*alpha + c[c_step]*beta;
d[d_step*2] = t2*alpha + c[c_step*2]*beta;
d[d_step*3] = t3*alpha + c[c_step*3]*beta;
}
}
else
break;
EXIT;
}
}
if( type == CV_32F )
{
float* d = D->data.fl;
const float *a = A->data.fl, *b = B->data.fl, *c = C->data.fl;
size_t d_step = D->step/sizeof(d[0]),
a_step = A->step/sizeof(a[0]),
b_step = B->step/sizeof(b[0]),
c_step = C->step/sizeof(c[0]);
if( !c )
c = zerof;
switch( len )
{
case 2:
if( len == d_size.width && b != d )
{
for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step )
{
float t0 = a[0]*b[0] + a[1]*b[b_step];
float t1 = a[0]*b[1] + a[1]*b[b_step+1];
d[0] = (float)(t0*alpha + c[0]*beta);
d[1] = (float)(t1*alpha + c[1]*beta);
}
}
else if( a != d )
{
int c_step0 = 1;
if( c == zerof )
{
c_step0 = 0;
c_step = 1;
}
for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 )
{
float t0 = a[0]*b[0] + a[1]*b[b_step];
float t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step];
d[0] = (float)(t0*alpha + c[0]*beta);
d[d_step] = (float)(t1*alpha + c[c_step]*beta);
}
}
else
break;
EXIT;
case 3:
if( len == d_size.width && b != d )
{
for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step )
{
float t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2];
float t1 = a[0]*b[1] + a[1]*b[b_step+1] + a[2]*b[b_step*2+1];
float t2 = a[0]*b[2] + a[1]*b[b_step+2] + a[2]*b[b_step*2+2];
d[0] = (float)(t0*alpha + c[0]*beta);
d[1] = (float)(t1*alpha + c[1]*beta);
d[2] = (float)(t2*alpha + c[2]*beta);
}
}
else if( a != d )
{
int c_step0 = 1;
if( c == zerof )
{
c_step0 = 0;
c_step = 1;
}
for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 )
{
float t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2];
float t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step] + a[a_step+2]*b[b_step*2];
float t2 = a[a_step*2]*b[0] + a[a_step*2+1]*b[b_step] + a[a_step*2+2]*b[b_step*2];
d[0] = (float)(t0*alpha + c[0]*beta);
d[d_step] = (float)(t1*alpha + c[c_step]*beta);
d[d_step*2] = (float)(t2*alpha + c[c_step*2]*beta);
}
}
else
break;
EXIT;
case 4:
if( len == d_size.width && b != d )
{
for( i = 0; i < d_size.height; i++, d += d_step, a += a_step, c += c_step )
{
float t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2] + a[3]*b[b_step*3];
float t1 = a[0]*b[1] + a[1]*b[b_step+1] + a[2]*b[b_step*2+1] + a[3]*b[b_step*3+1];
float t2 = a[0]*b[2] + a[1]*b[b_step+2] + a[2]*b[b_step*2+2] + a[3]*b[b_step*3+2];
float t3 = a[0]*b[3] + a[1]*b[b_step+3] + a[2]*b[b_step*2+3] + a[3]*b[b_step*3+3];
d[0] = (float)(t0*alpha + c[0]*beta);
d[1] = (float)(t1*alpha + c[1]*beta);
d[2] = (float)(t2*alpha + c[2]*beta);
d[3] = (float)(t3*alpha + c[3]*beta);
}
}
else if( len <= 16 && a != d )
{
int c_step0 = 1;
if( c == zerof )
{
c_step0 = 0;
c_step = 1;
}
for( i = 0; i < d_size.width; i++, d++, b++, c += c_step0 )
{
float t0 = a[0]*b[0] + a[1]*b[b_step] + a[2]*b[b_step*2] + a[3]*b[b_step*3];
float t1 = a[a_step]*b[0] + a[a_step+1]*b[b_step] +
a[a_step+2]*b[b_step*2] + a[a_step+3]*b[b_step*3];
float t2 = a[a_step*2]*b[0] + a[a_step*2+1]*b[b_step] +
a[a_step*2+2]*b[b_step*2] + a[a_step*2+3]*b[b_step*3];
float t3 = a[a_step*3]*b[0] + a[a_step*3+1]*b[b_step] +
a[a_step*3+2]*b[b_step*2] + a[a_step*3+3]*b[b_step*3];
d[0] = (float)(t0*alpha + c[0]*beta);
d[d_step] = (float)(t1*alpha + c[c_step]*beta);
d[d_step*2] = (float)(t2*alpha + c[c_step*2]*beta);
d[d_step*3] = (float)(t3*alpha + c[c_step*3]*beta);
}
}
else
break;
EXIT;
}
}
}
{
int b_step = B->step;
CvGEMMSingleMulFunc single_mul_func;
CvMat tmat, *D0 = D;
icvBLAS_GEMM_32f_t blas_func = 0;
if( !inittab )
{
icvInitGEMMTable( &single_mul_tab, &block_mul_tab, &store_tab );
inittab = 1;
}
single_mul_func = (CvGEMMSingleMulFunc)single_mul_tab.fn_2d[type];
if( !single_mul_func )
CV_ERROR( CV_StsUnsupportedFormat, "" );
if( D->data.ptr == A->data.ptr || D->data.ptr == B->data.ptr )
{
int buf_size = d_size.width*d_size.height*CV_ELEM_SIZE(type);
if( d_size.width <= CV_MAX_LOCAL_MAT_SIZE )
{
buffer = (uchar*)cvStackAlloc( buf_size );
local_alloc = 1;
}
else
CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -