⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 x86.c

📁 各种工程计算的库函数
💻 C
📖 第 1 页 / 共 5 页
字号:
{    int iDataCntr;    int iDataCount;    float fRes;    pv2sf m64pSrc1 = (pv2sf) fpSrc1;    pv2sf m64pSrc2 = (pv2sf) fpSrc2;        iDataCount = (iDataLength >> 1);    X86_ASM (        "pxor %%mm3, %%mm3\n\t" \        "pxor %%mm4, %%mm4\n\t" \        "pxor %%mm5, %%mm5\n\t"        :        :        : "mm3", "mm4", "mm5");    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %0, %%mm0\n\t" \            "movq %1, %%mm1\n\t" \            "movq %%mm1, %%mm2\n\t" \            "pfmul %%mm0, %%mm2\n\t" \            "pfacc %%mm2, %%mm5\n\t" \            "pfmul %%mm0, %%mm0\n\t" \            "pfacc %%mm0, %%mm3\n\t" \            "pfmul %%mm1, %%mm1\n\t" \            "pfacc %%mm1, %%mm4\n\t"            :            : "m" (m64pSrc1[iDataCntr]),              "m" (m64pSrc2[iDataCntr])            : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %0, %%mm0\n\t" \            "movd %1, %%mm1\n\t" \            "movq %%mm1, %%mm2\n\t" \            "pfmul %%mm0, %%mm2\n\t" \            "pfacc %%mm2, %%mm5\n\t" \            "pfmul %%mm0, %%mm0\n\t" \            "pfacc %%mm0, %%mm3\n\t" \            "pfmul %%mm1, %%mm1\n\t" \            "pfacc %%mm1, %%mm4\n\t"            :            : "m" (fpSrc1[iDataLength - 1]),              "m" (fpSrc2[iDataLength - 1])            : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "memory");    }    X86_ASM (        "pfacc %%mm3, %%mm3\n\t" \        "pfacc %%mm4, %%mm4\n\t" \        "pfacc %%mm5, %%mm5\n\t" \        \        "movd %1, %%mm6\n\t" \        "pswapd %%mm6, %%mm7\n\t" \        "paddd %%mm7, %%mm6\n\t" \        "pi2fd %%mm6, %%mm7\n\t" \        \        "pfrcp %%mm7, %%mm6\n\t" \        "pfrcpit1 %%mm6, %%mm7\n\t" \        "pfrcpit2 %%mm6, %%mm7\n\t" \        \        "pfmul %%mm3, %%mm4\n\t" \        \        "movq %%mm4, %%mm0\n\t" \        "pfrsqrt %%mm4, %%mm1\n\t" \        "movq %%mm1, %%mm2\n\t" \        "pfmul %%mm1, %%mm1\n\t" \        "pfrsqit1 %%mm4, %%mm1\n\t" \        "pfrcpit2 %%mm2, %%mm1\n\t" \        "pfmul %%mm1, %%mm4\n\t" \        \        "pfmul %%mm6, %%mm4\n\t" \        \        "pfrcp %%mm4, %%mm0\n\t" \        "pfrcpit1 %%mm0, %%mm4\n\t" \        "pfrcpit2 %%mm0, %%mm4\n\t" \        \        "pfmul %%mm6, %%mm5\n\t" \        "pfmul %%mm4, %%mm5\n\t" \        "movd %%mm5, %0\n\t"        : "=m" (fRes)        : "m" (iDataLength)        : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "memory");    X86_ASM ("femms\n\t");    return fRes;}float dsp_x86_sse_crosscorrf (const float *fpSrc1, const float *fpSrc2,    int iDataLength){    int iDataCntr;    float fScale;    float fNormFact;    float fProdSum;    float fSqSum1;    float fSqSum2;    float fRes;        X86_ASM (        "xorps %%xmm0, %%xmm0\n\t" \        "xorps %%xmm1, %%xmm1\n\t" \        "xorps %%xmm2, %%xmm2\n\t"        :        :        : "xmm0", "xmm1", "xmm2");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %3, %%xmm3\n\t" \            "movss %4, %%xmm4\n\t" \            \            "movss %%xmm4, %%xmm5\n\t" \            "mulss %%xmm3, %%xmm5\n\t" \            "addss %%xmm5, %%xmm0\n\t" \            \            "movss %%xmm3, %%xmm5\n\t" \            "mulss %%xmm3, %%xmm5\n\t" \            "addss %%xmm5, %%xmm1\n\t" \            \            "movss %%xmm4, %%xmm5\n\t" \            "mulss %%xmm4, %%xmm5\n\t" \            "addss %%xmm5, %%xmm2\n\t" \            \            "movss %%xmm0, %0\n\t" \            "movss %%xmm1, %1\n\t" \            "movss %%xmm2, %2\n\t"            : "=m" (fProdSum),              "=m" (fSqSum1),              "=m" (fSqSum2)            : "m" (fpSrc1[iDataCntr]),              "m" (fpSrc2[iDataCntr])            : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");    }    fScale = 1.0F / iDataLength;    fNormFact = sqrtf(fSqSum1 * fSqSum2) * fScale;    fRes = (fProdSum * fScale) / fNormFact;    return fRes;}double dsp_x86_sse_crosscorr (const double *dpSrc1, const double *dpSrc2,    int iDataLength){    int iDataCntr;    double dScale;    double dNormFact;    double dProdSum;    double dSqSum1;    double dSqSum2;    double dRes;        X86_ASM (        "xorpd %%xmm0, %%xmm0\n\t" \        "xorpd %%xmm1, %%xmm1\n\t" \        "xorpd %%xmm2, %%xmm2\n\t"        :        :        : "xmm0", "xmm1", "xmm2");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movsd %3, %%xmm3\n\t" \            "movsd %4, %%xmm4\n\t" \            \            "movsd %%xmm4, %%xmm5\n\t" \            "mulsd %%xmm3, %%xmm5\n\t" \            "addsd %%xmm5, %%xmm0\n\t" \            \            "movsd %%xmm3, %%xmm5\n\t" \            "mulsd %%xmm3, %%xmm5\n\t" \            "addsd %%xmm5, %%xmm1\n\t" \            \            "movsd %%xmm4, %%xmm5\n\t" \            "mulsd %%xmm4, %%xmm5\n\t" \            "addsd %%xmm5, %%xmm2\n\t" \            \            "movsd %%xmm0, %0\n\t" \            "movsd %%xmm1, %1\n\t" \            "movsd %%xmm2, %2\n\t"            : "=m" (dProdSum),              "=m" (dSqSum1),              "=m" (dSqSum2)            : "m" (dpSrc1[iDataCntr]),              "m" (dpSrc2[iDataCntr])            : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");    }    dScale = 1.0 / iDataLength;    dNormFact = sqrt(dSqSum1 * dSqSum2) * dScale;    dRes = (dProdSum * dScale) / dNormFact;    return dRes;}void dsp_x86_3dnow_i16tof (float *fpDest, const short *ipSrc, int iDataLength,    int iIntMax){    int iDataCntr;    float fScale;        X86_ASM (        "movd %1, %%mm1\n\t" \        "pswapd %%mm1, %%mm2\n\t" \        "paddd %%mm2, %%mm1\n\t" \        "pi2fd %%mm1, %%mm1\n\t" \        "pfrcp %%mm1, %%mm2\n\t" \        "pfrcpit1 %%mm2, %%mm1\n\t" \        "pfrcpit2 %%mm2, %%mm1\n\t" \        "movd %%mm1, %0\n\t"        : "=m" (fScale)        : "m" (iIntMax)        : "mm1", "mm2", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr += 2)    {        X86_ASM (            "movd %1, %%mm0\n\t" \            "punpcklwd %%mm0, %%mm0\n\t" \            "pi2fw %%mm0, %%mm0\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (fpDest[iDataCntr])            : "m" (ipSrc[iDataCntr])            : "mm0", "mm1", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");    if ((iDataLength % 2) != 0)    {        fpDest[iDataLength - 1] = ((float) ipSrc[iDataLength - 1]) * fScale;    }}void dsp_x86_3dnow_i32tof (float *fpDest, const int *ipSrc, int iDataLength,    int iIntMax){    int iDataCntr;    float fScale;        X86_ASM (        "movd %1, %%mm1\n\t" \        "pswapd %%mm1, %%mm2\n\t" \        "paddd %%mm2, %%mm1\n\t" \        "pi2fd %%mm1, %%mm1\n\t" \        "pfrcp %%mm1, %%mm2\n\t" \        "pfrcpit1 %%mm2, %%mm1\n\t" \        "pfrcpit2 %%mm2, %%mm1\n\t" \        "movd %%mm1, %0\n\t"        : "=m" (fScale)        : "m" (iIntMax)        : "mm1", "mm2", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr += 2)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "pi2fd %%mm0, %%mm0\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (fpDest[iDataCntr])            : "m" (ipSrc[iDataCntr])            : "mm0", "mm1", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");    if ((iDataLength % 2) != 0)    {        fpDest[iDataLength - 1] = ((float) ipSrc[iDataLength - 1]) * fScale;    }}void dsp_x86_3dnow_firf (float *fpDest, const float *fpSrc, int iDataLength,     const float *fpCoeff, int iCoeffLength){    int iSrcCntr;    int iDestCntr;    int iCoeffCntr;    int iSrcCount;    pv2sf m64pDest = (pv2sf) fpDest;    iDestCntr = 0;    iSrcCount = iDataLength + iCoeffLength;    for (iSrcCntr = iCoeffLength;         iSrcCntr < iSrcCount;         iSrcCntr += 2)    {        X86_ASM (            "pxor %%mm0, %%mm0\n\t"             :            :            : "mm0");        for (iCoeffCntr = 0;             iCoeffCntr < iCoeffLength;            iCoeffCntr++)        {            X86_ASM (                "movq %0, %%mm1\n\t" \                "movd %1, %%mm2\n\t" \                "pswapd %%mm2, %%mm3\n\t" \                "pfadd %%mm3, %%mm2\n\t" \                "pfmul %%mm2, %%mm1\n\t" \                "pfadd %%mm1, %%mm0\n\t"                 :                : "m" (fpSrc[iSrcCntr - iCoeffCntr]),                  "m" (fpCoeff[iCoeffCntr])                : "mm0", "mm1", "mm2", "mm3", "memory");        }        X86_ASM (            "movntq %%mm0, %0\n\t"            : "=m" (m64pDest[iDestCntr++])            :            : "mm0", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "pxor %%mm0, %%mm0\n\t"             :            :            : "mm0");        for (iCoeffCntr = 0;             iCoeffCntr < iCoeffLength;            iCoeffCntr++)        {            X86_ASM (                "movd %0, %%mm1\n\t" \                "movd %1, %%mm2\n\t" \                "pfmul %%mm2, %%mm1\n\t" \                "pfadd %%mm1, %%mm0\n\t"                 :                : "m" (fpSrc[iDataLength - 1 - iCoeffCntr]),                  "m" (fpCoeff[iCoeffCntr])                : "mm0", "mm1", "mm2", "memory");        }        X86_ASM (            "movd %%mm0, %0\n\t"            : "=m" (fpDest[iDataLength - 1])            :            : "mm0", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_firf (float *fpDest, const float *fpSrc, int iDataLength,     const float *fpCoeff, int iCoeffLength){    int iDestCntr;    int iSrcCntr;    int iCoeffCntr;    int iSrcCount;    iDestCntr = 0;    iSrcCount = iDataLength + iCoeffLength;    for (iSrcCntr = iCoeffLength;        iSrcCntr < iSrcCount;        iSrcCntr++)    {        X86_ASM (            "xorps %%xmm0, %%xmm0\n\t"            :            :            : "xmm0");        for (iCoeffCntr = 0;            iCoeffCntr < iCoeffLength;            iCoeffCntr++)        {            X86_ASM (                "movss %0, %%xmm1\n\t"                "mulss %1, %%xmm1\n\t"                "addss %%xmm1, %%xmm0\n\t"                :                : "m" (fpSrc[iSrcCntr - iCoeffCntr]),                  "m" (fpCoeff[iCoeffCntr])                : "xmm0", "xmm1", "memory");        }        X86_ASM (            "movss %%xmm0, %0\n\t"            : "=m" (fpDest[iDestCntr++])            :            : "xmm0", "memory");    }}void dsp_x86_sse_fir (double *dpDest, const double *dpSrc, int iDataLength,     const double *dpCoeff, int iCoeffLength){    int iDestCntr;    int iSrcCntr;    int iCoeffCntr;    int iSrcCount;    iDestCntr = 0;    iSrcCount = iDataLength + iCoeffLength;    for (iSrcCntr = iCoeffLength;        iSrcCntr < iSrcCount;        iSrcCntr++)    {        X86_ASM (            "xorpd %%xmm0, %%xmm0\n\t"            :            :            : "xmm0");        for (iCoeffCntr = 0;            iCoeffCntr < iCoeffLength;            iCoeffCntr++)        {            X86_ASM (                "movsd %0, %%xmm1\n\t"                "mulsd %1, %%xmm1\n\t"                "addsd %%xmm1, %%xmm0\n\t"                :                : "m" (dpSrc[iSrcCntr - iCoeffCntr]),                  "m" (dpCoeff[iCoeffCntr])                : "xmm0", "xmm1", "memory");        }        X86_ASM (            "movsd %%xmm0, %0\n\t"            : "=m" (dpDest[iDestCntr++])            :            : "xmm0", "memory");    }}void dsp_x86_3dnow_iirf (float *fpVect, int iDataLength, const float *fpCoeff,     float *fpX, float *fpY){    int iDataCntr;    pv2sf m64pCoeff = (pv2sf) &fpCoeff[1];    pv2sf m64pCoeff2 = (pv2sf) &fpCoeff[3];    pv2sf m64pX = (pv2sf) fpX;    pv2sf m64pY = (pv2sf) fpY;    X86_ASM (        "movq %0, %%mm0\n\t" \        "pswapd %%mm0, %%mm2\n\t" \        "movd %1, %%mm3\n\t" \        "movq %2, %%mm0\n\t" \        "pswapd %%mm0, %%mm4\n\t" \        "movq %3, %%mm5\n\t" \        "movq %4, %%mm7\n\t" \        :        : "m" (*m64pCoeff),          "m" (fpCoeff[0]),          "m" (*m64pCoeff2),          "m" (*m64pX),          "m" (*m64pY)        : "mm0", "mm2", "mm3", "mm4", "mm5", "mm7", "memory");    for (iDataCntr = 0;         iDataCntr < iDataLength;         iDataCntr++)    {        X86_ASM (            "pxor %%mm0, %%mm0\n\t" \            "movd %1, %%mm6\n\t" \            "movq %%mm5, %%mm1\n\t" \            "pfmul %%mm2, %%mm1\n\t" \            "pfacc %%mm1, %%mm0\n\t" \            "movq %%mm6, %%mm1\n\t" \            "pfmul %%mm3, %%mm1\n\t" \            "pfacc %%mm1, %%mm0\n\t" \            "movq %%mm7, %%mm1\n\t" \            "pfmul %%mm4, %%mm1\n\t" \            "pfacc %%mm1, %%mm0\n\t" \            "pfacc %%mm0, %%mm0\n\t" \            \            "pswapd %%mm7, %%mm1\n\t" \            "movq %%mm1, %%mm7\n\t" \            "punpckldq %%mm0, %%mm7\n\t" \            \            "pswapd %%mm5, %%mm1\n\t" \            "movq %%mm1, %%mm5\n\t" \            "movq %%mm6, %%mm1\n\t" \            "punpckldq %%mm1, %%mm5\n\t" \            \            "movd %%mm0, %0\n\t"            : "=m" (fpVect[iDataCntr])            : "m0

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -