📄 x86.c
字号:
: "=m" (fpVect[iDataCntr]) : "m0" (fpVect[iDataCntr]) : "xmm0", "xmm1", "memory"); }}void dsp_x86_sse_add (double *dpVect, double dSrc, int iDataLength){ int iDataCntr; X86_ASM ( "movsd %0, %%xmm1\n\t" : : "m" (dSrc) : "xmm1", "memory"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movsd %1, %%xmm0\n\t" \ "addsd %%xmm1, %%xmm0\n\t" \ "movsd %%xmm0, %0\n\t" : "=m" (dpVect[iDataCntr]) : "m0" (dpVect[iDataCntr]) : "xmm0", "xmm1", "memory"); }}void dsp_x86_3dnow_mulf (float *fpVect, float fSrc, int iDataLength){ int iDataCntr; int iDataCount; pv2sf m64pVect = (pv2sf) fpVect; stm64 m64Src; m64Src.f[0] = m64Src.f[1] = fSrc; iDataCount = (iDataLength >> 1); X86_ASM ( "movq %0, %%mm1\n\t" : : "m" (m64Src) : "mm1", "memory"); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++) { X86_ASM ( "movq %1, %%mm0\n\t" \ "pfmul %%mm1, %%mm0\n\t" \ "movntq %%mm0, %0\n\t" : "=m" (m64pVect[iDataCntr]) : "m0" (m64pVect[iDataCntr]) : "mm0", "mm1", "memory"); } if (iDataLength & 0x1) { X86_ASM ( "movd %1, %%mm0\n\t" \ "pfmul %%mm1, %%mm0\n\t" \ "movd %%mm0, %0\n\t" : "=m" (fpVect[iDataLength - 1]) : "m0" (fpVect[iDataLength - 1]) : "mm0", "mm1", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_mulf (float *fpVect, float fSrc, int iDataLength){ int iDataCntr; X86_ASM ( "movss %0, %%xmm1\n\t" : : "m" (fSrc) : "xmm1", "memory"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movss %1, %%xmm0\n\t" \ "mulss %%xmm1, %%xmm0\n\t" \ "movss %%xmm0, %0\n\t" : "=m" (fpVect[iDataCntr]) : "m0" (fpVect[iDataCntr]) : "xmm0", "xmm1", "memory"); }}void dsp_x86_sse_mul (double *dpVect, double dSrc, int iDataLength){ int iDataCntr; X86_ASM ( "movsd %0, %%xmm1\n\t" : : "m" (dSrc) : "xmm1", "memory"); for (iDataCntr = 0; iDataCntr <iDataLength; iDataCntr++) { X86_ASM ( "movsd %1, %%xmm0\n\t" \ "mulsd %%xmm1, %%xmm0\n\t" \ "movsd %%xmm0, %0\n\t" : "=m" (dpVect[iDataCntr]) : "m0" (dpVect[iDataCntr]) : "xmm0", "xmm1", "memory"); }}void dsp_x86_3dnow_mulf_nip (float *fpDest, const float *fpSrc1, float fSrc2, int iDataLength){ int iDataCntr; int iDataCount; pv2sf m64pDest = (pv2sf) fpDest; pv2sf m64pSrc1 = (pv2sf) fpSrc1; stm64 m64Src2; m64Src2.f[0] = m64Src2.f[1] = fSrc2; iDataCount = (iDataLength >> 1); X86_ASM ( "movq %0, %%mm1\n\t" : : "m" (m64Src2) : "mm1", "memory"); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++) { X86_ASM ( "movq %1, %%mm0\n\t" \ "pfmul %%mm1, %%mm0\n\t" \ "movntq %%mm0, %0\n\t" : "=m" (m64pDest[iDataCntr]) : "m" (m64pSrc1[iDataCntr]) : "mm0", "mm1", "memory"); } if (iDataLength & 0x1) { X86_ASM ( "movd %1, %%mm0\n\t" \ "pfmul %%mm1, %%mm0\n\t" \ "movd %%mm0, %0\n\t" : "=m" (fpDest[iDataLength - 1]) : "m" (fpSrc1[iDataLength - 1]) : "mm0", "mm1", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_mulf_nip (float *fpDest, const float *fpSrc1, float fSrc2, int iDataLength){ int iDataCntr; X86_ASM ( "movss %0, %%xmm1\n\t" : : "m" (fSrc2) : "xmm1", "memory"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movss %1, %%xmm0\n\t" \ "mulss %%xmm1, %%xmm0\n\t" \ "movss %%xmm0, %0\n\t" : "=m" (fpDest[iDataCntr]) : "m" (fpSrc1[iDataCntr]) : "xmm0", "xmm1", "memory"); }}void dsp_x86_sse_mul_nip (double *dpDest, const double *dpSrc1, double dSrc2, int iDataLength){ int iDataCntr; X86_ASM ( "movsd %0, %%xmm1\n\t" : : "m" (dSrc2) : "xmm1", "memory"); for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movsd %1, %%xmm0\n\t" \ "mulsd %%xmm1, %%xmm0\n\t" \ "movsd %%xmm0, %0\n\t" : "=m" (dpDest[iDataCntr]) : "m" (dpSrc1[iDataCntr]) : "xmm0", "xmm1", "memory"); }}void dsp_x86_3dnow_add2f (float *fpDest, const float *fpSrc, int iDataLength){ int iDataCntr; int iDataCount; pv2sf m64pDest = (pv2sf) fpDest; pv2sf m64pSrc = (pv2sf) fpSrc; iDataCount = (iDataLength >> 1); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++) { X86_ASM ( "movq %1, %%mm0\n\t" \ "movq %2, %%mm1\n\t" \ "pfadd %%mm1, %%mm0\n\t" \ "movntq %%mm0, %0\n\t" : "=m" (m64pDest[iDataCntr]) : "m0" (m64pDest[iDataCntr]), "m" (m64pSrc[iDataCntr]) : "mm0", "mm1", "memory"); } if (iDataLength & 0x1) { X86_ASM ( "movd %1, %%mm0\n\t" \ "movd %2, %%mm1\n\t" \ "pfadd %%mm1, %%mm0\n\t" \ "movd %%mm0, %0\n\t" : "=m" (fpDest[iDataLength - 1]) : "m0" (fpDest[iDataLength - 1]), "m" (fpSrc[iDataLength - 1]) : "mm0", "mm1", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_add2f (float *fpDest, const float *fpSrc, int iDataLength){ int iDataCntr; for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movss %1, %%xmm0\n\t" \ "addss %2, %%xmm0\n\t" \ "movss %%xmm0, %0\n\t" : "=m" (fpDest[iDataCntr]) : "m0" (fpDest[iDataCntr]), "m" (fpSrc[iDataCntr]) : "xmm0", "memory"); }}void dsp_x86_sse_add2 (double *dpDest, const double *dpSrc, int iDataLength){ int iDataCntr; for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movsd %1, %%xmm0\n\t" \ "addsd %2, %%xmm0\n\t" \ "movsd %%xmm0, %0\n\t" : "=m" (dpDest[iDataCntr]) : "m0" (dpDest[iDataCntr]), "m" (dpSrc[iDataCntr]) : "xmm0", "memory"); }}void dsp_x86_3dnow_mul2f (float *fpDest, const float *fpSrc, int iDataLength){ int iDataCntr; int iDataCount; pv2sf m64pDest = (pv2sf) fpDest; pv2sf m64pSrc = (pv2sf) fpSrc; iDataCount = (iDataLength >> 1); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++) { X86_ASM ( "movq %1, %%mm0\n\t" \ "movq %2, %%mm1\n\t" \ "pfmul %%mm1, %%mm0\n\t" \ "movntq %%mm0, %0\n\t" : "=m" (m64pDest[iDataCntr]) : "m0" (m64pDest[iDataCntr]), "m" (m64pSrc[iDataCntr]) : "mm0", "mm1", "memory"); } if (iDataLength & 0x1) { X86_ASM ( "movd %1, %%mm0\n\t" \ "movd %2, %%mm1\n\t" \ "pfmul %%mm1, %%mm0\n\t" \ "movd %%mm0, %0\n\t" : "=m" (fpDest[iDataLength - 1]) : "m0" (fpDest[iDataLength - 1]), "m" (fpSrc[iDataLength - 1]) : "mm0", "mm1", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_mul2f (float *fpDest, const float *fpSrc, int iDataLength){ int iDataCntr; for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movss %1, %%xmm0\n\t" \ "mulss %2, %%xmm0\n\t" \ "movss %%xmm0, %0\n\t" : "=m" (fpDest[iDataCntr]) : "m0" (fpDest[iDataCntr]), "m" (fpSrc[iDataCntr]) : "xmm0", "memory"); }}void dsp_x86_sse_mul2 (double *dpDest, const double *dpSrc, int iDataLength){ int iDataCntr; for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movsd %1, %%xmm0\n\t" \ "mulsd %2, %%xmm0\n\t" \ "movsd %%xmm0, %0\n\t" : "=m" (dpDest[iDataCntr]) : "m0" (dpDest[iDataCntr]), "m" (dpSrc[iDataCntr]) : "xmm0", "memory"); }}void dsp_x86_3dnow_add3f (float *fpDest, const float *fpSrc1, const float *fpSrc2, int iDataLength){ int iDataCntr; int iDataCount; pv2sf m64pDest = (pv2sf) fpDest; pv2sf m64pSrc1 = (pv2sf) fpSrc1; pv2sf m64pSrc2 = (pv2sf) fpSrc2; iDataCount = (iDataLength >> 1); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++) { X86_ASM ( "movq %1, %%mm0\n\t" \ "movq %2, %%mm1\n\t" \ "pfadd %%mm1, %%mm0\n\t" \ "movntq %%mm0, %0\n\t" : "=m" (m64pDest[iDataCntr]) : "m" (m64pSrc1[iDataCntr]), "m" (m64pSrc2[iDataCntr]) : "mm0", "mm1", "memory"); } if (iDataLength & 0x1) { X86_ASM ( "movd %1, %%mm0\n\t" \ "movd %2, %%mm1\n\t" \ "pfadd %%mm1, %%mm0\n\t" \ "movd %%mm0, %0\n\t" : "=m" (fpDest[iDataLength - 1]) : "m" (fpSrc1[iDataLength - 1]), "m" (fpSrc2[iDataLength - 1]) : "mm0", "mm1", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_add3f (float *fpDest, const float *fpSrc1, const float *fpSrc2, int iDataLength){ int iDataCntr; for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movss %1, %%xmm0\n\t" \ "addss %2, %%xmm0\n\t" \ "movss %%xmm0, %0\n\t" : "=m" (fpDest[iDataCntr]) : "m" (fpSrc1[iDataCntr]), "m" (fpSrc2[iDataCntr]) : "xmm0", "memory"); }}void dsp_x86_sse_add3 (double *dpDest, const double *dpSrc1, const double *dpSrc2, int iDataLength){ int iDataCntr; for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movsd %1, %%xmm0\n\t" \ "addsd %2, %%xmm0\n\t" \ "movsd %%xmm0, %0\n\t" : "=m" (dpDest[iDataCntr]) : "m" (dpSrc1[iDataCntr]), "m" (dpSrc2[iDataCntr]) : "xmm0", "memory"); }}void dsp_x86_3dnow_mul3f (float *fpDest, const float *fpSrc1, const float *fpSrc2, int iDataLength){ int iDataCntr; int iDataCount; pv2sf m64pDest = (pv2sf) fpDest; pv2sf m64pSrc1 = (pv2sf) fpSrc1; pv2sf m64pSrc2 = (pv2sf) fpSrc2; iDataCount = (iDataLength >> 1); for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++) { X86_ASM ( "movq %1, %%mm0\n\t" \ "movq %2, %%mm1\n\t" \ "pfmul %%mm1, %%mm0\n\t" \ "movntq %%mm0, %0\n\t" : "=m" (m64pDest[iDataCntr]) : "m" (m64pSrc1[iDataCntr]), "m" (m64pSrc2[iDataCntr]) : "mm0", "mm1", "memory"); } if (iDataLength & 0x1) { X86_ASM ( "movd %1, %%mm0\n\t" \ "movd %2, %%mm1\n\t" \ "pfmul %%mm1, %%mm0\n\t" \ "movd %%mm0, %0\n\t" : "=m" (fpDest[iDataLength - 1]) : "m" (fpSrc1[iDataLength - 1]), "m" (fpSrc2[iDataLength - 1]) : "mm0", "mm1", "memory"); } X86_ASM ( "femms\n\t" \ "sfence\n\t");}void dsp_x86_sse_mul3f (float *fpDest, const float *fpSrc1, const float *fpSrc2, int iDataLength){ int iDataCntr; for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++) { X86_ASM ( "movss %1, %%xmm0\n\t" \ "mulss %2, %%xmm0\n\t" \ "movss %%xmm0, %0\n\t" : "=m" (fpDest[iDataCntr]) : "m" (fpSrc1[iDataCntr]), "m" (fpSrc2[iDataCntr]) : "xmm0", "memory"); }}void dsp_x86_sse_mul3 (double *dpDest, const double *dpSrc1, const double *dpSrc2, int iDataLength){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -