📄 x86-64.c
字号:
void dsp_x86_64_iirf (float *fpVect, long lDataLength, const float *fpCoeff, float *fpX, float *fpY){ long lDataCntr; X86_64_ASM ( "movss %0, %%xmm1\n\t" \ "movss %1, %%xmm2\n\t" \ "movss %2, %%xmm3\n\t" \ "movss %3, %%xmm4\n\t" \ "prefetchnta %4\n\t" : : "m" (fpX[1]), "m" (fpX[2]), "m" (fpY[0]), "m" (fpY[1]), "m" (fpCoeff[0]) : "xmm1", "xmm2", "xmm3", "xmm4", "memory"); for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++) { X86_64_ASM ( "movss %%xmm1, %%xmm0\n\t" \ "movss %%xmm2, %%xmm1\n\t" \ "movss %1, %%xmm2\n\t" \ \ "movss %2, %%xmm5\n\t" \ "mulss %%xmm2, %%xmm5\n\t" \ "movss %3, %%xmm6\n\t" \ "mulss %%xmm1, %%xmm6\n\t" \ "addss %%xmm6, %%xmm5\n\t" \ "movss %4, %%xmm6\n\t" \ "mulss %%xmm0, %%xmm6\n\t" \ "addss %%xmm6, %%xmm5\n\t" \ \ "movss %5, %%xmm6\n\t" \ "mulss %%xmm4, %%xmm6\n\t" \ "movss %6, %%xmm7\n\t" \ "mulss %%xmm3, %%xmm7\n\t" \ "addss %%xmm7, %%xmm6\n\t" \ \ "addss %%xmm5, %%xmm6\n\t" \ "movss %%xmm4, %%xmm3\n\t" \ "movss %%xmm6, %%xmm4\n\t" \ \ "movss %%xmm6, %0\n\t" : "=m" (fpVect[lDataCntr]) : "m0" (fpVect[lDataCntr]), "m" (fpCoeff[0]), "m" (fpCoeff[1]), "m" (fpCoeff[2]), "m" (fpCoeff[3]), "m" (fpCoeff[4]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"); } X86_64_ASM ( "movss %%xmm0, %0\n\t" \ "movss %%xmm1, %1\n\t" \ "movss %%xmm2, %2\n\t" \ "movss %%xmm3, %3\n\t" \ "movss %%xmm4, %4\n\t" : "=m" (fpX[0]), "=m" (fpX[1]), "=m" (fpX[2]), "=m" (fpY[0]), "=m" (fpY[1]) : : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "memory");}void dsp_x86_64_iir (double *dpVect, long lDataLength, const double *dpCoeff, double *dpX, double *dpY){ long lDataCntr; X86_64_ASM ( "movsd %0, %%xmm1\n\t" \ "movsd %1, %%xmm2\n\t" \ "movsd %2, %%xmm3\n\t" \ "movsd %3, %%xmm4\n\t" \ "prefetchnta %4\n\t" \ "prefetchnta %5\n\t" : : "m" (dpX[1]), "m" (dpX[2]), "m" (dpY[0]), "m" (dpY[1]), "m" (dpCoeff[0]), "m" (dpCoeff[3]) : "xmm1", "xmm2", "xmm3", "xmm4", "memory"); for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++) { X86_64_ASM ( "movsd %%xmm1, %%xmm0\n\t" \ "movsd %%xmm2, %%xmm1\n\t" \ "movsd %1, %%xmm2\n\t" \ \ "movsd %2, %%xmm5\n\t" \ "mulsd %%xmm2, %%xmm5\n\t" \ "movsd %3, %%xmm6\n\t" \ "mulsd %%xmm1, %%xmm6\n\t" \ "addsd %%xmm6, %%xmm5\n\t" \ "movsd %4, %%xmm6\n\t" \ "mulsd %%xmm0, %%xmm6\n\t" \ "addsd %%xmm6, %%xmm5\n\t" \ \ "movsd %5, %%xmm6\n\t" \ "mulsd %%xmm4, %%xmm6\n\t" \ "movsd %6, %%xmm7\n\t" \ "mulsd %%xmm3, %%xmm7\n\t" \ "addsd %%xmm7, %%xmm6\n\t" \ \ "addsd %%xmm5, %%xmm6\n\t" \ "movsd %%xmm4, %%xmm3\n\t" \ "movsd %%xmm6, %%xmm4\n\t" \ \ "movsd %%xmm6, %0\n\t" : "=m" (dpVect[lDataCntr]) : "m0" (dpVect[lDataCntr]), "m" (dpCoeff[0]), "m" (dpCoeff[1]), "m" (dpCoeff[2]), "m" (dpCoeff[3]), "m" (dpCoeff[4]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"); } X86_64_ASM ( "movsd %%xmm0, %0\n\t" \ "movsd %%xmm1, %1\n\t" \ "movsd %%xmm2, %2\n\t" \ "movsd %%xmm3, %3\n\t" \ "movsd %%xmm4, %4\n\t" : "=m" (dpX[0]), "=m" (dpX[1]), "=m" (dpX[2]), "=m" (dpY[0]), "=m" (dpY[1]) : : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "memory");}void dsp_x86_64_iirf_nip (float *fpDest, const float *fpSrc, long lDataLength, const float *fpCoeff, float *fpX, float *fpY){ long lDataCntr; X86_64_ASM ( "movss %0, %%xmm1\n\t" \ "movss %1, %%xmm2\n\t" \ "movss %2, %%xmm3\n\t" \ "movss %3, %%xmm4\n\t" \ "prefetchnta %4\n\t" : : "m" (fpX[1]), "m" (fpX[2]), "m" (fpY[0]), "m" (fpY[1]), "m" (fpCoeff[0]) : "xmm1", "xmm2", "xmm3", "xmm4", "memory"); for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++) { X86_64_ASM ( "movss %%xmm1, %%xmm0\n\t" \ "movss %%xmm2, %%xmm1\n\t" \ "movss %1, %%xmm2\n\t" \ \ "movss %2, %%xmm5\n\t" \ "mulss %%xmm2, %%xmm5\n\t" \ "movss %3, %%xmm6\n\t" \ "mulss %%xmm1, %%xmm6\n\t" \ "addss %%xmm6, %%xmm5\n\t" \ "movss %4, %%xmm6\n\t" \ "mulss %%xmm0, %%xmm6\n\t" \ "addss %%xmm6, %%xmm5\n\t" \ \ "movss %5, %%xmm6\n\t" \ "mulss %%xmm4, %%xmm6\n\t" \ "movss %6, %%xmm7\n\t" \ "mulss %%xmm3, %%xmm7\n\t" \ "addss %%xmm7, %%xmm6\n\t" \ \ "addss %%xmm5, %%xmm6\n\t" \ "movss %%xmm4, %%xmm3\n\t" \ "movss %%xmm6, %%xmm4\n\t" \ \ "movss %%xmm6, %0\n\t" : "=m" (fpDest[lDataCntr]) : "m" (fpSrc[lDataCntr]), "m" (fpCoeff[0]), "m" (fpCoeff[1]), "m" (fpCoeff[2]), "m" (fpCoeff[3]), "m" (fpCoeff[4]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"); } X86_64_ASM ( "movss %%xmm0, %0\n\t" \ "movss %%xmm1, %1\n\t" \ "movss %%xmm2, %2\n\t" \ "movss %%xmm3, %3\n\t" \ "movss %%xmm4, %4\n\t" : "=m" (fpX[0]), "=m" (fpX[1]), "=m" (fpX[2]), "=m" (fpY[0]), "=m" (fpY[1]) : : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "memory");}void dsp_x86_64_iir_nip (double *dpDest, const double *dpSrc, long lDataLength, const double *dpCoeff, double *dpX, double *dpY){ long lDataCntr; X86_64_ASM ( "movsd %0, %%xmm1\n\t" \ "movsd %1, %%xmm2\n\t" \ "movsd %2, %%xmm3\n\t" \ "movsd %3, %%xmm4\n\t" \ "prefetchnta %4\n\t" \ "prefetchnta %5\n\t" : : "m" (dpX[1]), "m" (dpX[2]), "m" (dpY[0]), "m" (dpY[1]), "m" (dpCoeff[0]), "m" (dpCoeff[3]) : "xmm1", "xmm2", "xmm3", "xmm4", "memory"); for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++) { X86_64_ASM ( "movsd %%xmm1, %%xmm0\n\t" \ "movsd %%xmm2, %%xmm1\n\t" \ "movsd %1, %%xmm2\n\t" \ \ "movsd %2, %%xmm5\n\t" \ "mulsd %%xmm2, %%xmm5\n\t" \ "movsd %3, %%xmm6\n\t" \ "mulsd %%xmm1, %%xmm6\n\t" \ "addsd %%xmm6, %%xmm5\n\t" \ "movsd %4, %%xmm6\n\t" \ "mulsd %%xmm0, %%xmm6\n\t" \ "addsd %%xmm6, %%xmm5\n\t" \ \ "movsd %5, %%xmm6\n\t" \ "mulsd %%xmm4, %%xmm6\n\t" \ "movsd %6, %%xmm7\n\t" \ "mulsd %%xmm3, %%xmm7\n\t" \ "addsd %%xmm7, %%xmm6\n\t" \ \ "addsd %%xmm5, %%xmm6\n\t" \ "movsd %%xmm4, %%xmm3\n\t" \ "movsd %%xmm6, %%xmm4\n\t" \ \ "movsd %%xmm6, %0\n\t" : "=m" (dpDest[lDataCntr]) : "m" (dpSrc[lDataCntr]), "m" (dpCoeff[0]), "m" (dpCoeff[1]), "m" (dpCoeff[2]), "m" (dpCoeff[3]), "m" (dpCoeff[4]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"); } X86_64_ASM ( "movsd %%xmm0, %0\n\t" \ "movsd %%xmm1, %1\n\t" \ "movsd %%xmm2, %2\n\t" \ "movsd %%xmm3, %3\n\t" \ "movsd %%xmm4, %4\n\t" : "=m" (dpX[0]), "=m" (dpX[1]), "=m" (dpX[2]), "=m" (dpY[0]), "=m" (dpY[1]) : : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "memory");}#ifdef __cplusplus}#endif#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -