📄 x86.c
字号:
int iDataCntr; for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movsd %1, %%xmm0\n\t" \ "mulsd %2, %%xmm0\n\t" \ "movsd %%xmm0, %0\n\t" : "=m" (dpDest[iDataCntr]) : "m" (dpSrc1[iDataCntr]), "m" (dpSrc2[iDataCntr]) : "xmm0", "memory"); }}void dsp_x86_3dnow_cmulf (float *fpDest, const float *fpSrc, int iDataLength){ int iDataCntr; pv2sf m64pDest = (pv2sf) fpDest; X86_ASM ( "movq %0, %%mm3\n\t" : : "m" (fpSrc[0]) : "mm3", "memory"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movq %1, %%mm0\n\t" \ "movq %%mm3, %%mm1\n\t" \ "pswapd %%mm1, %%mm2\n\t" \ "pfmul %%mm0, %%mm1\n\t" \ "pfmul %%mm0, %%mm2\n\t" \ "pfpnacc %%mm2, %%mm1\n\t" \ "movntq %%mm1, %0\n\t" : "=m" (m64pDest[iDataCntr]) : "m0" (m64pDest[iDataCntr]) : "mm0", "mm1", "mm2", "mm3", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_cmulf (float *fpDest, const float *fpSrc, int iDataLength){ int iDataCntr; int iDataCount; X86_ASM ( "movss %0, %%xmm2\n\t" \ "movss %1, %%xmm3\n\t" : : "m" (fpSrc[0]), "m" (fpSrc[1]) : "xmm2", "xmm3", "memory"); iDataCount = (iDataLength << 1); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2) { X86_ASM ( "movss %2, %%xmm0\n\t" \ "movss %%xmm0, %%xmm1\n\t" \ "movss %3, %%xmm4\n\t" \ \ "mulss %%xmm2, %%xmm0\n\t" \ "movss %%xmm4, %%xmm5\n\t" \ "mulss %%xmm3, %%xmm5\n\t" \ "subss %%xmm5, %%xmm0\n\t" \ \ "mulss %%xmm3, %%xmm1\n\t" \ "movss %%xmm4, %%xmm5\n\t" \ "mulss %%xmm2, %%xmm5\n\t" \ "addss %%xmm5, %%xmm1\n\t" \ \ "movss %%xmm0, %0\n\t" \ "movss %%xmm1, %1\n\t" : "=m" (fpDest[iDataCntr]), "=m" (fpDest[iDataCntr + 1]) : "m0" (fpDest[iDataCntr]), "m1" (fpDest[iDataCntr + 1]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory"); }}void dsp_x86_sse_cmul (double *dpDest, const double *dpSrc, int iDataLength){ int iDataCntr; int iDataCount; X86_ASM ( "movsd %0, %%xmm2\n\t" \ "movsd %1, %%xmm3\n\t" : : "m" (dpSrc[0]), "m" (dpSrc[1]) : "xmm2", "xmm3", "memory"); iDataCount = (iDataLength << 1); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2) { X86_ASM ( "movsd %2, %%xmm0\n\t" \ "movsd %%xmm0, %%xmm1\n\t" \ "movsd %3, %%xmm4\n\t" \ \ "mulsd %%xmm2, %%xmm0\n\t" \ "movsd %%xmm4, %%xmm5\n\t" \ "mulsd %%xmm3, %%xmm5\n\t" \ "subsd %%xmm5, %%xmm0\n\t" \ \ "mulsd %%xmm3, %%xmm1\n\t" \ "movsd %%xmm4, %%xmm5\n\t" \ "mulsd %%xmm2, %%xmm5\n\t" \ "addsd %%xmm5, %%xmm1\n\t" \ \ "movsd %%xmm0, %0\n\t" \ "movsd %%xmm1, %1\n\t" : "=m" (dpDest[iDataCntr]), "=m" (dpDest[iDataCntr + 1]) : "m0" (dpDest[iDataCntr]), "m1" (dpDest[iDataCntr + 1]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory"); }}void dsp_x86_3dnow_cmul2f (float *fpDest, const float *fpSrc, int iDataLength){ int iDataCntr; pv2sf m64pDest = (pv2sf) fpDest; pv2sf m64pSrc = (pv2sf) fpSrc; for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movq %1, %%mm0\n\t" \ "movq %2, %%mm1\n\t" \ "pswapd %%mm1, %%mm2\n\t" \ "pfmul %%mm0, %%mm1\n\t" \ "pfmul %%mm0, %%mm2\n\t" \ "pfpnacc %%mm2, %%mm1\n\t" \ "movntq %%mm1, %0\n\t" : "=m" (m64pDest[iDataCntr]) : "m0" (m64pDest[iDataCntr]), "m" (m64pSrc[iDataCntr]) : "mm0", "mm1", "mm2", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_cmul2f (float *fpDest, const float *fpSrc, int iDataLength){ int iDataCntr; int iDataCount; iDataCount = (iDataLength << 1); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2) { X86_ASM ( "movss %4, %%xmm2\n\t" \ "movss %5, %%xmm3\n\t" \ \ "movss %2, %%xmm0\n\t" \ "movss %%xmm0, %%xmm1\n\t" \ "movss %3, %%xmm4\n\t" \ \ "mulss %%xmm2, %%xmm0\n\t" \ "movss %%xmm4, %%xmm5\n\t" \ "mulss %%xmm3, %%xmm5\n\t" \ "subss %%xmm5, %%xmm0\n\t" \ \ "mulss %%xmm3, %%xmm1\n\t" \ "movss %%xmm4, %%xmm5\n\t" \ "mulss %%xmm2, %%xmm5\n\t" \ "addss %%xmm5, %%xmm1\n\t" \ \ "movss %%xmm0, %0\n\t" \ "movss %%xmm1, %1\n\t" : "=m" (fpDest[iDataCntr]), "=m" (fpDest[iDataCntr + 1]) : "m0" (fpDest[iDataCntr]), "m1" (fpDest[iDataCntr + 1]), "m" (fpSrc[iDataCntr]), "m" (fpSrc[iDataCntr + 1]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory"); }}void dsp_x86_sse_cmul2 (double *dpDest, const double *dpSrc, int iDataLength){ int iDataCntr; int iDataCount; iDataCount = (iDataLength << 1); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2) { X86_ASM ( "movsd %4, %%xmm2\n\t" \ "movsd %5, %%xmm3\n\t" \ \ "movsd %2, %%xmm0\n\t" \ "movsd %%xmm0, %%xmm1\n\t" \ "movsd %3, %%xmm4\n\t" \ \ "mulsd %%xmm2, %%xmm0\n\t" \ "movsd %%xmm4, %%xmm5\n\t" \ "mulsd %%xmm3, %%xmm5\n\t" \ "subsd %%xmm5, %%xmm0\n\t" \ \ "mulsd %%xmm3, %%xmm1\n\t" \ "movsd %%xmm4, %%xmm5\n\t" \ "mulsd %%xmm2, %%xmm5\n\t" \ "addsd %%xmm5, %%xmm1\n\t" \ \ "movsd %%xmm0, %0\n\t" \ "movsd %%xmm1, %1\n\t" : "=m" (dpDest[iDataCntr]), "=m" (dpDest[iDataCntr + 1]) : "m0" (dpDest[iDataCntr]), "m1" (dpDest[iDataCntr + 1]), "m" (dpSrc[iDataCntr]), "m" (dpSrc[iDataCntr + 1]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory"); }}void dsp_x86_3dnow_cmul3f (float *fpDest, const float *fpSrc1, const float *fpSrc2, int iDataLength){ int iDataCntr; pv2sf m64pDest = (pv2sf) fpDest; pv2sf m64pSrc1 = (pv2sf) fpSrc1; pv2sf m64pSrc2 = (pv2sf) fpSrc2; for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movq %1, %%mm0\n\t" \ "movq %2, %%mm1\n\t" \ "pswapd %%mm1, %%mm2\n\t" \ "pfmul %%mm0, %%mm1\n\t" \ "pfmul %%mm0, %%mm2\n\t" \ "pfpnacc %%mm2, %%mm1\n\t" \ "movntq %%mm1, %0\n\t" : "=m" (m64pDest[iDataCntr]) : "m" (m64pSrc1[iDataCntr]), "m" (m64pSrc2[iDataCntr]) : "mm0", "mm1", "mm2", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_cmul3f (float *fpDest, const float *fpSrc1, const float *fpSrc2, int iDataLength){ int iDataCntr; int iDataCount; iDataCount = (iDataLength << 1); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2) { X86_ASM ( "movss %4, %%xmm2\n\t" \ "movss %5, %%xmm3\n\t" \ \ "movss %2, %%xmm0\n\t" \ "movss %%xmm0, %%xmm1\n\t" \ "movss %3, %%xmm4\n\t" \ \ "mulss %%xmm2, %%xmm0\n\t" \ "movss %%xmm4, %%xmm5\n\t" \ "mulss %%xmm3, %%xmm5\n\t" \ "subss %%xmm5, %%xmm0\n\t" \ \ "mulss %%xmm3, %%xmm1\n\t" \ "movss %%xmm4, %%xmm5\n\t" \ "mulss %%xmm2, %%xmm5\n\t" \ "addss %%xmm5, %%xmm1\n\t" \ \ "movss %%xmm0, %0\n\t" \ "movss %%xmm1, %1\n\t" : "=m" (fpDest[iDataCntr]), "=m" (fpDest[iDataCntr + 1]) : "m" (fpSrc1[iDataCntr]), "m" (fpSrc1[iDataCntr + 1]), "m" (fpSrc2[iDataCntr]), "m" (fpSrc2[iDataCntr + 1]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory"); }}void dsp_x86_sse_cmul3 (double *dpDest, const double *dpSrc1, const double *dpSrc2, int iDataLength){ int iDataCntr; int iDataCount; iDataCount = (iDataLength << 1); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2) { X86_ASM ( "movsd %4, %%xmm2\n\t" \ "movsd %5, %%xmm3\n\t" \ \ "movsd %2, %%xmm0\n\t" \ "movsd %%xmm0, %%xmm1\n\t" \ "movsd %3, %%xmm4\n\t" \ \ "mulsd %%xmm2, %%xmm0\n\t" \ "movsd %%xmm4, %%xmm5\n\t" \ "mulsd %%xmm3, %%xmm5\n\t" \ "subsd %%xmm5, %%xmm0\n\t" \ \ "mulsd %%xmm3, %%xmm1\n\t" \ "movsd %%xmm4, %%xmm5\n\t" \ "mulsd %%xmm2, %%xmm5\n\t" \ "addsd %%xmm5, %%xmm1\n\t" \ \ "movsd %%xmm0, %0\n\t" \ "movsd %%xmm1, %1\n\t" : "=m" (dpDest[iDataCntr]), "=m" (dpDest[iDataCntr + 1]) : "m" (dpSrc1[iDataCntr]), "m" (dpSrc1[iDataCntr + 1]), "m" (dpSrc2[iDataCntr]), "m" (dpSrc2[iDataCntr + 1]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory"); }}void dsp_x86_3dnow_maf (float *fpVect, float fMul, float fAdd, int iDataLength){ int iDataCntr; int iDataCount; pv2sf m64pVect = (pv2sf) fpVect; stm64 m64Mul; stm64 m64Add; m64Mul.f[0] = m64Mul.f[1] = fMul; m64Add.f[0] = m64Add.f[1] = fAdd; iDataCount = (iDataLength >> 1); X86_ASM ( "movq %0, %%mm1\n\t" \ "movq %1, %%mm2\n\t" : : "m" (m64Mul), "m" (m64Add) : "mm1", "mm2", "memory"); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++) { X86_ASM ( "movq %1, %%mm0\n\t" \ "pfmul %%mm1, %%mm0\n\t" \ "pfadd %%mm2, %%mm0\n\t" \ "movntq %%mm0, %0\n\t" : "=m" (m64pVect[iDataCntr]) : "m0" (m64pVect[iDataCntr]) : "mm0", "mm1", "mm2", "memory"); } if (iDataLength & 0x1) { X86_ASM ( "movd %1, %%mm0\n\t" \ "pfmul %%mm1, %%mm0\n\t" \ "pfadd %%mm2, %%mm0\n\t" \ "movd %%mm0, %0\n\t" : "=m" (fpVect[iDataLength - 1]) : "m0" (fpVect[iDataLength - 1]) : "mm0", "mm1", "mm2", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_maf (float *fpVect, float fMul, float fAdd, int iDataLength){ int iDataCntr; X86_ASM ( "movss %0, %%xmm1\n\t" \ "movss %1, %%xmm2\n\t" : : "m" (fMul), "m" (fAdd) : "xmm1", "xmm2", "memory"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movss %1, %%xmm0\n\t" \ "mulss %%xmm1, %%xmm0\n\t" \ "addss %%xmm2, %%xmm0\n\t" \ "movss %%xmm0, %0\n\t" : "=m" (fpVect[iDataCntr]) : "m0" (fpVect[iDataCntr]) : "xmm0", "xmm1", "xmm2", "memory"); }}void dsp_x86_sse_ma (double *dpVect, double dMul, double dAdd, int iDataLength){ int iDataCntr; X86_ASM ( "movsd %0, %%xmm1\n\t" \ "movsd %1, %%xmm2\n\t" : : "m" (dMul), "m" (dAdd) : "xmm1", "xmm2", "memory"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movsd %1, %%xmm0\n\t" \ "mulsd %%xmm1, %%xmm0\n\t" \ "addsd %%xmm2, %%xmm0\n\t" \ "movsd %%xmm0, %0\n\t" : "=m" (dpVect[iDataCntr]) : "m0" (dpVect[iDataCntr]) : "xmm0", "xmm1", "xmm2", "memory"); }}void dsp_x86_3dnow_ma2f (float *fpDest, const float *fpSrc, float fMul, float fAdd, int iDataLength){ int iDataCntr; int iDataCount; pv2sf m64pDest = (pv2sf) fpDest; pv2sf m64pSrc = (pv2sf) fpSrc; stm64 m64Mul; stm64 m64Add; m64Mul.f[0] = m64Mul.f[1] = fMul; m64Add.f[0] = m64Add.f[1] = fAdd; iDataCount = (iDataLength >> 1); X86_ASM ( "movq %0, %%mm1\n\t" \ "movq %1, %%mm2\n\t" : : "m" (m64Mul), "m" (m64Add) : "mm1", "mm2", "memory"); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++) { X86_ASM ( "movq %1, %%mm0\n\t" \ "pfmul %%mm1, %%mm0\n\t" \ "pfadd %%mm2, %%mm0\n\t" \ "movntq %%mm0, %0\n\t" : "=m" (m64pDest[iDataCntr]) : "m" (m64pSrc[iDataCntr]) : "mm0", "mm1", "mm2", "memory"); } if (iDataLength & 0x1) { X86_ASM ( "movd %1, %%mm0\n\t" \ "pfmul %%mm1, %%mm0\n\t" \ "pfadd %%mm2, %%mm0\n\t" \ "movd %%mm0, %0\n\t" : "=m" (fpDest[iDataLength - 1]) : "m" (fpSrc[iDataLength - 1]) : "mm0", "mm1", "mm2", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_ma2f (float *fpDest, const float *fpSrc, float fMul, float fAdd, int iDataLength){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -