📄 vec_mpy_08.c
字号:
void vec_mpy(const short *restrict a, const short *restrict b, short *restrict c, int len, int shift){ int i; unsigned a3_a2, a1_a0; /* Packed 16-bit values */ unsigned b3_b2, b1_b0; /* Packed 16-bit values */ double c3_c2_dbl, c1_c0_dbl; /* 32-bit prod in 64-bit pairs */ int c3, c2, c1, c0; /* Separate 32-bit products */ unsigned c3_c2, c1_c0; /* Packed 16-bit values */ for (i = 0; i < len; i += 4) { a3_a2 = _hi(*(const double *) &a[i]); a1_a0 = _lo(*(const double *) &a[i]); b3_b2 = _hi(*(const double *) &b[i]); b1_b0 = _lo(*(const double *) &b[i]); /* Multiply elements together, producing four products */ c3_c2_dbl = _mpy2(a3_a2, b3_b2); c1_c0_dbl = _mpy2(a1_a0, b1_b0); /* Shift each of the four products right by our shift amount */ c3 = _hi(c3_c2_dbl) >> shift; c2 = _lo(c3_c2_dbl) >> shift; c1 = _hi(c1_c0_dbl) >> shift; c0 = _lo(c1_c0_dbl) >> shift; /* Pack the results back together into packed 16-bit format */ c3_c2 = _pack2(c3, c2); c1_c0 = _pack2(c1, c0); /* Store the results. */ *(double *) &c[i] = _itod(c3_c2, c1_c0); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -