📄 x86-64.c
字号:
/* x86-64 specific optimized assembler dsp routines Copyright (C) 2003-2004 Jussi Laako This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/#if (defined(DSP_X86_64) && (!defined(DSP_X86)))#include <stdio.h>#include <string.h>#include <limits.h>#include <math.h>#include <float.h>#include "dsp/X86-64.h"#ifdef __cplusplusextern "C"{#endifvoid dsp_x86_64_minmaxf (float *fpMin, float *fpMax, const float *fpSrc, long lDataLength){ long lDataCntr; *fpMin = FLT_MAX; *fpMax = -FLT_MAX; X86_64_ASM ( "movss %0, %%xmm0\n\t" \ "movss %1, %%xmm1\n\t" : : "m" (*fpMin), "m" (*fpMax) : "xmm0", "xmm1", "memory"); for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++) { X86_64_ASM ( "movss %0, %%xmm2\n\t" \ "minss %%xmm2, %%xmm0\n\t" \ "maxss %%xmm2, %%xmm1\n\t" : : "m" (fpSrc[lDataCntr]) : "xmm0", "xmm1", "xmm2", "memory"); } X86_64_ASM ( "movss %%xmm0, %0\n\t" \ "movss %%xmm1, %1\n\t" : "=m" (*fpMin), "=m" (*fpMax) : : "xmm0", "xmm1", "memory");}void dsp_x86_64_minmax (double *dpMin, double *dpMax, const double *dpSrc, long lDataLength){ long lDataCntr; *dpMin = FLT_MAX; *dpMax = -FLT_MAX; X86_64_ASM ( "movsd %0, %%xmm0\n\t" \ "movsd %1, %%xmm1\n\t" : : "m" (*dpMin), "m" (*dpMax) : "xmm0", "xmm1", "memory"); for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++) { X86_64_ASM ( "movsd %0, %%xmm2\n\t" \ "minsd %%xmm2, %%xmm0\n\t" \ "maxsd %%xmm2, %%xmm1\n\t" : : "m" (dpSrc[lDataCntr]) : "xmm0", "xmm1", "xmm2", "memory"); } X86_64_ASM ( "movss %%xmm0, %0\n\t" \ "movss %%xmm1, %1\n\t" : "=m" (*dpMin), "=m" (*dpMax) : : "xmm0", "xmm1", "memory");}float dsp_x86_64_crosscorrf (const float *fpSrc1, const float *fpSrc2, long lDataLength){ long lDataCntr; float fScale; float fNormFact; float fProdSum; float fSqSum1; float fSqSum2; float fRes; X86_64_ASM ( "xorps %%xmm0, %%xmm0\n\t" \ "xorps %%xmm1, %%xmm1\n\t" \ "xorps %%xmm2, %%xmm2\n\t" : : : "xmm0", "xmm1", "xmm2"); for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++) { X86_64_ASM ( "movss %3, %%xmm3\n\t" \ "movss %4, %%xmm4\n\t" \ \ "movss %%xmm4, %%xmm5\n\t" \ "mulss %%xmm3, %%xmm5\n\t" \ "addss %%xmm5, %%xmm0\n\t" \ \ "movss %%xmm3, %%xmm5\n\t" \ "mulss %%xmm3, %%xmm5\n\t" \ "addss %%xmm5, %%xmm1\n\t" \ \ "movss %%xmm4, %%xmm5\n\t" \ "mulss %%xmm4, %%xmm5\n\t" \ "addss %%xmm5, %%xmm2\n\t" \ \ "movss %%xmm0, %0\n\t" \ "movss %%xmm1, %1\n\t" \ "movss %%xmm2, %2\n\t" : "=m" (fProdSum), "=m" (fSqSum1), "=m" (fSqSum2) : "m" (fpSrc1[lDataCntr]), "m" (fpSrc2[lDataCntr]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } fScale = 1.0F / lDataLength; fNormFact = sqrtf(fSqSum1 * fSqSum2) * fScale; fRes = (fProdSum * fScale) / fNormFact; return fRes;}double dsp_x86_64_crosscorr (const double *dpSrc1, const double *dpSrc2, long lDataLength){ long lDataCntr; double dScale; double dNormFact; double dProdSum; double dSqSum1; double dSqSum2; double dRes; X86_64_ASM ( "xorpd %%xmm0, %%xmm0\n\t" \ "xorpd %%xmm1, %%xmm1\n\t" \ "xorpd %%xmm2, %%xmm2\n\t" : : : "xmm0", "xmm1", "xmm2"); for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++) { X86_64_ASM ( "movsd %3, %%xmm3\n\t" \ "movsd %4, %%xmm4\n\t" \ \ "movsd %%xmm4, %%xmm5\n\t" \ "mulsd %%xmm3, %%xmm5\n\t" \ "addsd %%xmm5, %%xmm0\n\t" \ \ "movsd %%xmm3, %%xmm5\n\t" \ "mulsd %%xmm3, %%xmm5\n\t" \ "addsd %%xmm5, %%xmm1\n\t" \ \ "movsd %%xmm4, %%xmm5\n\t" \ "mulsd %%xmm4, %%xmm5\n\t" \ "addsd %%xmm5, %%xmm2\n\t" \ \ "movsd %%xmm0, %0\n\t" \ "movsd %%xmm1, %1\n\t" \ "movsd %%xmm2, %2\n\t" : "=m" (dProdSum), "=m" (dSqSum1), "=m" (dSqSum2) : "m" (dpSrc1[lDataCntr]), "m" (dpSrc2[lDataCntr]) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } dScale = 1.0 / lDataLength; dNormFact = sqrt(dSqSum1 * dSqSum2) * dScale; dRes = (dProdSum * dScale) / dNormFact; return dRes;}void dsp_x86_64_firf (float *fpDest, const float *fpSrc, long lDataLength, const float *fpCoeff, long lCoeffLength){ long lDestCntr; long lSrcCntr; long lCoeffCntr; long lSrcCount; lDestCntr = 0; lSrcCount = lDataLength + lCoeffLength; for (lSrcCntr = lCoeffLength; lSrcCntr < lSrcCount; lSrcCntr++) { X86_64_ASM ( "xorps %%xmm0, %%xmm0\n\t" : : : "xmm0"); for (lCoeffCntr = 0; lCoeffCntr < lCoeffLength; lCoeffCntr++) { X86_64_ASM ( "movss %0, %%xmm1\n\t" "mulss %1, %%xmm1\n\t" "addss %%xmm1, %%xmm0\n\t" : : "m" (fpSrc[lSrcCntr - lCoeffCntr]), "m" (fpCoeff[lCoeffCntr]) : "xmm0", "xmm1", "memory"); } X86_64_ASM ( "movss %%xmm0, %0\n\t" : "=m" (fpDest[lDestCntr++]) : : "xmm0", "memory"); }}void dsp_x86_64_fir (double *dpDest, const double *dpSrc, long lDataLength, const double *dpCoeff, long lCoeffLength){ long lDestCntr; long lSrcCntr; long lCoeffCntr; long lSrcCount; lDestCntr = 0; lSrcCount = lDataLength + lCoeffLength; for (lSrcCntr = lCoeffLength; lSrcCntr < lSrcCount; lSrcCntr++) { X86_64_ASM ( "xorpd %%xmm0, %%xmm0\n\t" : : : "xmm0"); for (lCoeffCntr = 0; lCoeffCntr < lCoeffLength; lCoeffCntr++) { X86_64_ASM ( "movsd %0, %%xmm1\n\t" "mulsd %1, %%xmm1\n\t" "addsd %%xmm1, %%xmm0\n\t" : : "m" (dpSrc[lSrcCntr - lCoeffCntr]), "m" (dpCoeff[lCoeffCntr]) : "xmm0", "xmm1", "memory"); } X86_64_ASM ( "movsd %%xmm0, %0\n\t" : "=m" (dpDest[lDestCntr++]) : : "xmm0", "memory"); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -