📄 x86.c
字号:
int iDataCntr; X86_ASM ( "movss %0, %%xmm1\n\t" \ "movss %1, %%xmm2\n\t" : : "m" (fMul), "m" (fAdd) : "xmm1", "xmm2", "memory"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movss %1, %%xmm0\n\t" \ "mulss %%xmm1, %%xmm0\n\t" \ "addss %%xmm2, %%xmm0\n\t" \ "movss %%xmm0, %0\n\t" : "=m" (fpDest[iDataCntr]) : "m" (fpSrc[iDataCntr]) : "xmm0", "xmm1", "xmm2", "memory"); }}void dsp_x86_sse_ma2 (double *dpDest, const double *dpSrc, double dMul, double dAdd, int iDataLength){ int iDataCntr; X86_ASM ( "movsd %0, %%xmm1\n\t" \ "movsd %1, %%xmm2\n\t" : : "m" (dMul), "m" (dAdd) : "xmm1", "xmm2", "memory"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movsd %1, %%xmm0\n\t" \ "mulsd %%xmm1, %%xmm0\n\t" \ "addsd %%xmm2, %%xmm0\n\t" \ "movsd %%xmm0, %0\n\t" : "=m" (dpDest[iDataCntr]) : "m" (dpSrc[iDataCntr]) : "xmm0", "xmm1", "xmm2", "memory"); }}void dsp_x86_3dnow_cmaf (float *fpDest, const float *fpSrc1, const float *fpSrc2, int iDataLength){ int iDataCntr; pv2sf m64pDest = (pv2sf) fpDest; pv2sf m64pSrc1 = (pv2sf) fpSrc1; pv2sf m64pSrc2 = (pv2sf) fpSrc2; for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movq %2, %%mm0\n\t" \ "movq %3, %%mm1\n\t" \ "movq %1, %%mm3\n\t" \ "pswapd %%mm1, %%mm2\n\t" \ "pfmul %%mm0, %%mm1\n\t" \ "pfmul %%mm0, %%mm2\n\t" \ "pfpnacc %%mm2, %%mm1\n\t" \ "pfadd %%mm1, %%mm3\n\t" \ "movntq %%mm3, %0\n\t" : "=m" (m64pDest[iDataCntr]) : "m0" (m64pDest[iDataCntr]), "m" (m64pSrc1[iDataCntr]), "m" (m64pSrc2[iDataCntr]) : "mm0", "mm1", "mm2", "mm3", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_cmaf (float *fpDest, const float *fpSrc1, const float *fpSrc2, int iDataLength){ int iDataCntr; int iDataCount; iDataCount = (iDataLength << 1); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2) { X86_ASM ( "movss %6, %%xmm2\n\t" \ "movss %7, %%xmm3\n\t" \ \ "movss %4, %%xmm0\n\t" \ "movss %%xmm0, %%xmm1\n\t" \ "movss %5, %%xmm4\n\t" \ \ "movss %2, %%xmm6\n\t" \ "movss %3, %%xmm7\n\t" \ \ "mulss %%xmm2, %%xmm0\n\t" \ "movss %%xmm4, %%xmm5\n\t" \ "mulss %%xmm3, %%xmm5\n\t" \ "subss %%xmm5, %%xmm0\n\t" \ \ "mulss %%xmm3, %%xmm1\n\t" \ "movss %%xmm4, %%xmm5\n\t" \ "mulss %%xmm2, %%xmm5\n\t" \ "addss %%xmm5, %%xmm1\n\t" \ \ "addss %%xmm0, %%xmm6\n\t" \ "addss %%xmm1, %%xmm7\n\t" \ \ "movss %%xmm6, %0\n\t" \ "movss %%xmm7, %1\n\t" : "=m" (fpDest[iDataCntr]), "=m" (fpDest[iDataCntr + 1]) : "m0" (fpDest[iDataCntr]), "m1" (fpDest[iDataCntr + 1]), "m" (fpSrc1[iDataCntr]), "m" (fpSrc1[iDataCntr + 1]), "m" (fpSrc2[iDataCntr]), "m" (fpSrc2[iDataCntr + 1]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"); }}void dsp_x86_sse_cma (double *dpDest, const double *dpSrc1, const double *dpSrc2, int iDataLength){ int iDataCntr; int iDataCount; iDataCount = (iDataLength << 1); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2) { X86_ASM ( "movsd %6, %%xmm2\n\t" \ "movsd %7, %%xmm3\n\t" \ \ "movsd %4, %%xmm0\n\t" \ "movsd %%xmm0, %%xmm1\n\t" \ "movsd %5, %%xmm4\n\t" \ \ "movsd %2, %%xmm6\n\t" \ "movsd %3, %%xmm7\n\t" \ \ "mulsd %%xmm2, %%xmm0\n\t" \ "movsd %%xmm4, %%xmm5\n\t" \ "mulsd %%xmm3, %%xmm5\n\t" \ "subsd %%xmm5, %%xmm0\n\t" \ \ "mulsd %%xmm3, %%xmm1\n\t" \ "movsd %%xmm4, %%xmm5\n\t" \ "mulsd %%xmm2, %%xmm5\n\t" \ "addsd %%xmm5, %%xmm1\n\t" \ \ "addsd %%xmm0, %%xmm6\n\t" \ "addsd %%xmm1, %%xmm7\n\t" \ \ "movsd %%xmm6, %0\n\t" \ "movsd %%xmm7, %1\n\t" : "=m" (dpDest[iDataCntr]), "=m" (dpDest[iDataCntr + 1]) : "m0" (dpDest[iDataCntr]), "m1" (dpDest[iDataCntr + 1]), "m" (dpSrc1[iDataCntr]), "m" (dpSrc1[iDataCntr + 1]), "m" (dpSrc2[iDataCntr]), "m" (dpSrc2[iDataCntr + 1]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"); }}void dsp_x86_3dnow_amf (float *fpVect, float fAdd, float fMul, int iDataLength){ int iDataCntr; int iDataCount; pv2sf m64pVect = (pv2sf) fpVect; stm64 m64Add; stm64 m64Mul; m64Add.f[0] = m64Add.f[1] = fAdd; m64Mul.f[0] = m64Mul.f[1] = fMul; iDataCount = (iDataLength >> 1); X86_ASM ( "movq %0, %%mm1\n\t" \ "movq %1, %%mm2\n\t" : : "m" (m64Add), "m" (m64Mul) : "mm1", "mm2", "memory"); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++) { X86_ASM ( "movq %1, %%mm0\n\t" \ "pfadd %%mm1, %%mm0\n\t" \ "pfmul %%mm2, %%mm0\n\t" \ "movntq %%mm0, %0\n\t" : "=m" (m64pVect[iDataCntr]) : "m0" (m64pVect[iDataCntr]) : "mm0", "mm1", "mm2", "memory"); } if (iDataLength & 0x1) { X86_ASM ( "movd %1, %%mm0\n\t" \ "pfadd %%mm1, %%mm0\n\t" \ "pfmul %%mm2, %%mm0\n\t" \ "movd %%mm0, %0\n\t" : "=m" (fpVect[iDataLength - 1]) : "m0" (fpVect[iDataLength - 1]) : "mm0", "mm1", "mm2", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_amf (float *fpVect, float fAdd, float fMul, int iDataLength){ int iDataCntr; X86_ASM ( "movss %0, %%xmm1\n\t" \ "movss %1, %%xmm2\n\t" : : "m" (fAdd), "m" (fMul) : "xmm1", "xmm2", "memory"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movss %1, %%xmm0\n\t" \ "addss %%xmm1, %%xmm0\n\t" \ "mulss %%xmm2, %%xmm0\n\t" \ "movss %%xmm0, %0\n\t" : "=m" (fpVect[iDataCntr]) : "m0" (fpVect[iDataCntr]) : "xmm0", "xmm1", "xmm2", "memory"); }}float dsp_x86_3dnow_macf (const float *fpSrc1, const float *fpSrc2, int iDataLength){ int iDataCntr; int iDataCount; float fRes; pv2sf m64pSrc1 = (pv2sf) fpSrc1; pv2sf m64pSrc2 = (pv2sf) fpSrc2; iDataCount = (iDataLength >> 1); X86_ASM ( "pxor %%mm0, %%mm0\n\t" : : : "mm0"); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++) { X86_ASM ( "movq %0, %%mm1\n\t" \ "movq %1, %%mm2\n\t" \ "pfmul %%mm2, %%mm1\n\t" \ "pfacc %%mm1, %%mm0\n\t" : : "m" (m64pSrc1[iDataCntr]), "m" (m64pSrc2[iDataCntr]) : "mm0", "mm1", "mm2", "memory"); } if (iDataLength & 0x1) { X86_ASM ( "movd %0, %%mm1\n\t" \ "movd %1, %%mm2\n\t" \ "pfmul %%mm2, %%mm1\n\t" \ "pfacc %%mm1, %%mm0\n\t" : : "m" (fpSrc1[iDataLength - 1]), "m" (fpSrc2[iDataLength - 1]) : "mm0", "mm1", "mm2", "memory"); } X86_ASM ( "pfacc %%mm0, %%mm0\n\t" \ "movd %%mm0, %0\n\t" : "=m" (fRes) : : "mm0", "memory"); X86_ASM ("femms\n\t"); return fRes;}float dsp_x86_sse_macf (const float *fpSrc1, const float *fpSrc2, int iDataLength){ int iDataCntr; float fRes; X86_ASM ( "xorps %%xmm0, %%xmm0\n\t" : : : "xmm0"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movss %0, %%xmm1\n\t" \ "mulss %1, %%xmm1\n\t" \ "addss %%xmm1, %%xmm0\n\t" : : "m" (fpSrc1[iDataCntr]), "m" (fpSrc2[iDataCntr]) : "xmm0", "xmm1", "xmm2", "memory"); } X86_ASM ( "movss %%xmm0, %0\n\t" : "=m" (fRes) : : "xmm0"); return fRes;}double dsp_x86_sse_mac (const double *dpSrc1, const double *dpSrc2, int iDataLength){ int iDataCntr; double dRes; X86_ASM ( "xorpd %%xmm0, %%xmm0\n\t" : : : "xmm0"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movsd %0, %%xmm1\n\t" \ "mulsd %1, %%xmm1\n\t" \ "addsd %%xmm1, %%xmm0\n\t" : : "m" (dpSrc1[iDataCntr]), "m" (dpSrc2[iDataCntr]) : "xmm0", "xmm1", "xmm2", "memory"); } X86_ASM ( "movsd %%xmm0, %0\n\t" : "=m" (dRes) : : "xmm0"); return dRes;}void dsp_x86_3dnow_minmaxf (float *fpMin, float *fpMax, const float *fpSrc, int iDataLength){ int iDataCntr; int iDataCount; stm64 m64Min; stm64 m64Max; pv2sf m64pSrc = (pv2sf) fpSrc; m64Min.f[0] = m64Min.f[1] = FLT_MAX; m64Max.f[0] = m64Max.f[1] = -FLT_MAX; iDataCount = (iDataLength >> 1); X86_ASM ( "movq %0, %%mm1\n\t" \ "movq %1, %%mm2\n\t" : : "m" (m64Min), "m" (m64Max) : "mm1", "mm2", "memory"); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++) { X86_ASM ( "movq %0, %%mm0\n\t" \ "pfmin %%mm0, %%mm1\n\t" \ "pfmax %%mm0, %%mm2\n\t" : : "m" (m64pSrc[iDataCntr]) : "mm0", "mm1", "mm2", "memory"); } if (iDataLength & 0x1) { X86_ASM ( "movd %0, %%mm0\n\t" \ "pfmin %%mm0, %%mm1\n\t" \ "pfmax %%mm0, %%mm2\n\t" : : "m" (fpSrc[iDataLength - 1]) : "mm0", "mm1", "mm2", "memory"); } X86_ASM ( "pswapd %%mm1, %%mm3\n\t" \ "pfmin %%mm3, %%mm1\n\t" \ "pswapd %%mm2, %%mm3\n\t" \ "pfmax %%mm3, %%mm2\n\t" \ "movd %%mm1, %0\n\t" \ "movd %%mm2, %1\n\t" : "=m" (*fpMin), "=m" (*fpMax) : : "mm1", "mm2", "mm3", "memory"); X86_ASM ("femms\n\t");}void dsp_x86_sse_minmaxf (float *fpMin, float *fpMax, const float *fpSrc, int iDataLength){ int iDataCntr; *fpMin = FLT_MAX; *fpMax = -FLT_MAX; X86_ASM ( "movss %0, %%xmm0\n\t" \ "movss %1, %%xmm1\n\t" : : "m" (*fpMin), "m" (*fpMax) : "xmm0", "xmm1", "memory"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movss %0, %%xmm2\n\t" \ "minss %%xmm2, %%xmm0\n\t" \ "maxss %%xmm2, %%xmm1\n\t" : : "m" (fpSrc[iDataCntr]) : "xmm0", "xmm1", "xmm2", "memory"); } X86_ASM ( "movss %%xmm0, %0\n\t" \ "movss %%xmm1, %1\n\t" : "=m" (*fpMin), "=m" (*fpMax) : : "xmm0", "xmm1", "memory");}void dsp_x86_sse_minmax (double *dpMin, double *dpMax, const double *dpSrc, int iDataLength){ int iDataCntr; *dpMin = FLT_MAX; *dpMax = -FLT_MAX; X86_ASM ( "movsd %0, %%xmm0\n\t" \ "movsd %1, %%xmm1\n\t" : : "m" (*dpMin), "m" (*dpMax) : "xmm0", "xmm1", "memory"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movsd %0, %%xmm2\n\t" \ "minsd %%xmm2, %%xmm0\n\t" \ "maxsd %%xmm2, %%xmm1\n\t" : : "m" (dpSrc[iDataCntr]) : "xmm0", "xmm1", "xmm2", "memory"); } X86_ASM ( "movss %%xmm0, %0\n\t" \ "movss %%xmm1, %1\n\t" : "=m" (*dpMin), "=m" (*dpMax) : : "xmm0", "xmm1", "memory");}float dsp_x86_3dnow_crosscorrf (const float *fpSrc1, const float *fpSrc2, int iDataLength)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -