📄 vec_cx_mpy_12.c
字号:
void vec_cx_mpy(const short *restrict a, const short *restrict b, short *restrict c, int len, int shift){ int i; unsigned a3_a2, a1_a0; /* Packed 16-bit values */ unsigned b3_b2, b1_b0; /* Packed 16-bit values */ short a3, a2, a1, a0; /* Separate 16-bit elements */ short b3, b2, b1, b0; /* Separate 16-bit elements */ short c3, c2, c1, c0; /* Separate 16-bit results */ unsigned c3_c2, c1_c0; /* Packed 16-bit values */ for (i = 0; i < len; i += 4) { /* Load two complex numbers from the a[] array. */ /* The complex values loaded are represented as 'a3 + a2 * j' */ /* and 'a1 + a0 * j'. That is, the real components are a3 */ /* and a1, and the imaginary components are a2 and a0. */ a3_a2 = _hi(*(const double *) &a[i]); a1_a0 = _lo(*(const double *) &a[i]); /* Load two complex numbers from the b[] array. */ b3_b2 = _hi(*(const double *) &b[i]); b1_b0 = _lo(*(const double *) &b[i]); /* Separate the 16-bit coefficients so that the complex */ /* multiply may be performed. This portion needs further */ /* optimization. */ a3 = ((signed) a3_a2) >> 16; a2 = _ext(a3_a2, 16, 16); a1 = ((signed) a1_a0) >> 16; a0 = _ext(a1_a0, 16, 16); b3 = ((signed) a3_a2) >> 16; b2 = _ext(a3_a2, 16, 16); b1 = ((signed) a1_a0) >> 16; b0 = _ext(a1_a0, 16, 16); /* Perform the complex multiplies using 16x16 multiplies. */ c3 = (b3 * a2 + b2 * a3) >> 16; c2 = (b3 * a3 - b2 * a2) >> 16; c1 = (b1 * a0 + b0 * a1) >> 16; c0 = (b1 * a1 - b0 * a0) >> 16; /* Pack the 16-bit results into 32-bit words. */ c3_c2 = _pack2(c3, c2); c1_c0 = _pack2(c1, c0); /* Store the results. */ *(double *) &c[i] = _itod(c3_c2, c1_c0); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -