⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 x86.c

📁 各种工程计算的库函数
💻 C
📖 第 1 页 / 共 5 页
字号:
    int iDataCntr;        X86_ASM (        "movss %0, %%xmm1\n\t" \        "movss %1, %%xmm2\n\t"        :        : "m" (fMul),          "m" (fAdd)        : "xmm1", "xmm2", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %1, %%xmm0\n\t" \            "mulss %%xmm1, %%xmm0\n\t" \            "addss %%xmm2, %%xmm0\n\t" \            "movss %%xmm0, %0\n\t"            : "=m" (fpDest[iDataCntr])            : "m" (fpSrc[iDataCntr])            : "xmm0", "xmm1", "xmm2", "memory");    }}void dsp_x86_sse_ma2 (double *dpDest, const double *dpSrc,     double dMul, double dAdd, int iDataLength){    int iDataCntr;        X86_ASM (        "movsd %0, %%xmm1\n\t" \        "movsd %1, %%xmm2\n\t"        :        : "m" (dMul),          "m" (dAdd)        : "xmm1", "xmm2", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movsd %1, %%xmm0\n\t" \            "mulsd %%xmm1, %%xmm0\n\t" \            "addsd %%xmm2, %%xmm0\n\t" \            "movsd %%xmm0, %0\n\t"            : "=m" (dpDest[iDataCntr])            : "m" (dpSrc[iDataCntr])            : "xmm0", "xmm1", "xmm2", "memory");    }}void dsp_x86_3dnow_cmaf (float *fpDest, const float *fpSrc1,     const float *fpSrc2, int iDataLength){    int iDataCntr;    pv2sf m64pDest = (pv2sf) fpDest;    pv2sf m64pSrc1 = (pv2sf) fpSrc1;    pv2sf m64pSrc2 = (pv2sf) fpSrc2;        for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movq %2, %%mm0\n\t" \            "movq %3, %%mm1\n\t" \            "movq %1, %%mm3\n\t" \            "pswapd %%mm1, %%mm2\n\t" \            "pfmul %%mm0, %%mm1\n\t" \            "pfmul %%mm0, %%mm2\n\t" \            "pfpnacc %%mm2, %%mm1\n\t" \            "pfadd %%mm1, %%mm3\n\t" \            "movntq %%mm3, %0\n\t"            : "=m" (m64pDest[iDataCntr])            : "m0" (m64pDest[iDataCntr]),              "m" (m64pSrc1[iDataCntr]),              "m" (m64pSrc2[iDataCntr])            : "mm0", "mm1", "mm2", "mm3", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_cmaf (float *fpDest, const float *fpSrc1,     const float *fpSrc2, int iDataLength){    int iDataCntr;    int iDataCount;        iDataCount = (iDataLength << 1);    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2)    {        X86_ASM (            "movss %6, %%xmm2\n\t" \            "movss %7, %%xmm3\n\t" \            \            "movss %4, %%xmm0\n\t" \            "movss %%xmm0, %%xmm1\n\t" \            "movss %5, %%xmm4\n\t" \            \            "movss %2, %%xmm6\n\t" \            "movss %3, %%xmm7\n\t" \            \            "mulss %%xmm2, %%xmm0\n\t" \            "movss %%xmm4, %%xmm5\n\t" \            "mulss %%xmm3, %%xmm5\n\t" \            "subss %%xmm5, %%xmm0\n\t" \            \            "mulss %%xmm3, %%xmm1\n\t" \            "movss %%xmm4, %%xmm5\n\t" \            "mulss %%xmm2, %%xmm5\n\t" \            "addss %%xmm5, %%xmm1\n\t" \            \            "addss %%xmm0, %%xmm6\n\t" \            "addss %%xmm1, %%xmm7\n\t" \            \            "movss %%xmm6, %0\n\t" \            "movss %%xmm7, %1\n\t"            : "=m" (fpDest[iDataCntr]),              "=m" (fpDest[iDataCntr + 1])            : "m0" (fpDest[iDataCntr]),              "m1" (fpDest[iDataCntr + 1]),              "m" (fpSrc1[iDataCntr]),              "m" (fpSrc1[iDataCntr + 1]),              "m" (fpSrc2[iDataCntr]),              "m" (fpSrc2[iDataCntr + 1])            : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",              "memory");    }}void dsp_x86_sse_cma (double *dpDest, const double *dpSrc1,     const double *dpSrc2, int iDataLength){    int iDataCntr;    int iDataCount;        iDataCount = (iDataLength << 1);    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2)    {        X86_ASM (            "movsd %6, %%xmm2\n\t" \            "movsd %7, %%xmm3\n\t" \            \            "movsd %4, %%xmm0\n\t" \            "movsd %%xmm0, %%xmm1\n\t" \            "movsd %5, %%xmm4\n\t" \            \            "movsd %2, %%xmm6\n\t" \            "movsd %3, %%xmm7\n\t" \            \            "mulsd %%xmm2, %%xmm0\n\t" \            "movsd %%xmm4, %%xmm5\n\t" \            "mulsd %%xmm3, %%xmm5\n\t" \            "subsd %%xmm5, %%xmm0\n\t" \            \            "mulsd %%xmm3, %%xmm1\n\t" \            "movsd %%xmm4, %%xmm5\n\t" \            "mulsd %%xmm2, %%xmm5\n\t" \            "addsd %%xmm5, %%xmm1\n\t" \            \            "addsd %%xmm0, %%xmm6\n\t" \            "addsd %%xmm1, %%xmm7\n\t" \            \            "movsd %%xmm6, %0\n\t" \            "movsd %%xmm7, %1\n\t"            : "=m" (dpDest[iDataCntr]),              "=m" (dpDest[iDataCntr + 1])            : "m0" (dpDest[iDataCntr]),              "m1" (dpDest[iDataCntr + 1]),              "m" (dpSrc1[iDataCntr]),              "m" (dpSrc1[iDataCntr + 1]),              "m" (dpSrc2[iDataCntr]),              "m" (dpSrc2[iDataCntr + 1])            : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",              "memory");    }}void dsp_x86_3dnow_amf (float *fpVect, float fAdd, float fMul, int iDataLength){    int iDataCntr;    int iDataCount;    pv2sf m64pVect = (pv2sf) fpVect;    stm64 m64Add;    stm64 m64Mul;    m64Add.f[0] = m64Add.f[1] = fAdd;    m64Mul.f[0] = m64Mul.f[1] = fMul;    iDataCount = (iDataLength >> 1);    X86_ASM (        "movq %0, %%mm1\n\t" \        "movq %1, %%mm2\n\t"        :        : "m" (m64Add),          "m" (m64Mul)        : "mm1", "mm2", "memory");    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "pfadd %%mm1, %%mm0\n\t" \            "pfmul %%mm2, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (m64pVect[iDataCntr])            : "m0" (m64pVect[iDataCntr])            : "mm0", "mm1", "mm2", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %1, %%mm0\n\t" \            "pfadd %%mm1, %%mm0\n\t" \            "pfmul %%mm2, %%mm0\n\t" \            "movd %%mm0, %0\n\t"            : "=m" (fpVect[iDataLength - 1])            : "m0" (fpVect[iDataLength - 1])            : "mm0", "mm1", "mm2", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_amf (float *fpVect, float fAdd, float fMul, int iDataLength){    int iDataCntr;    X86_ASM (        "movss %0, %%xmm1\n\t" \        "movss %1, %%xmm2\n\t"        :        : "m" (fAdd),          "m" (fMul)        : "xmm1", "xmm2", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %1, %%xmm0\n\t" \            "addss %%xmm1, %%xmm0\n\t" \            "mulss %%xmm2, %%xmm0\n\t" \            "movss %%xmm0, %0\n\t"            : "=m" (fpVect[iDataCntr])            : "m0" (fpVect[iDataCntr])            : "xmm0", "xmm1", "xmm2", "memory");    }}float dsp_x86_3dnow_macf (const float *fpSrc1, const float *fpSrc2,     int iDataLength){    int iDataCntr;    int iDataCount;    float fRes;    pv2sf m64pSrc1 = (pv2sf) fpSrc1;    pv2sf m64pSrc2 = (pv2sf) fpSrc2;    iDataCount = (iDataLength >> 1);    X86_ASM (        "pxor %%mm0, %%mm0\n\t"        :        :        : "mm0");    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %0, %%mm1\n\t" \            "movq %1, %%mm2\n\t" \            "pfmul %%mm2, %%mm1\n\t" \            "pfacc %%mm1, %%mm0\n\t"            :            : "m" (m64pSrc1[iDataCntr]),              "m" (m64pSrc2[iDataCntr])            : "mm0", "mm1", "mm2", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %0, %%mm1\n\t" \            "movd %1, %%mm2\n\t" \            "pfmul %%mm2, %%mm1\n\t" \            "pfacc %%mm1, %%mm0\n\t"            :            : "m" (fpSrc1[iDataLength - 1]),              "m" (fpSrc2[iDataLength - 1])            : "mm0", "mm1", "mm2", "memory");    }    X86_ASM (        "pfacc %%mm0, %%mm0\n\t" \        "movd %%mm0, %0\n\t"        : "=m" (fRes)        :        : "mm0", "memory");    X86_ASM ("femms\n\t");    return fRes;}float dsp_x86_sse_macf (const float *fpSrc1, const float *fpSrc2,    int iDataLength){    int iDataCntr;    float fRes;        X86_ASM (        "xorps %%xmm0, %%xmm0\n\t"        :        :        : "xmm0");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %0, %%xmm1\n\t" \            "mulss %1, %%xmm1\n\t" \            "addss %%xmm1, %%xmm0\n\t"            :            : "m" (fpSrc1[iDataCntr]),              "m" (fpSrc2[iDataCntr])            : "xmm0", "xmm1", "xmm2", "memory");    }    X86_ASM (        "movss %%xmm0, %0\n\t"        : "=m" (fRes)        :        : "xmm0");    return fRes;}double dsp_x86_sse_mac (const double *dpSrc1, const double *dpSrc2,    int iDataLength){    int iDataCntr;    double dRes;        X86_ASM (        "xorpd %%xmm0, %%xmm0\n\t"        :        :        : "xmm0");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movsd %0, %%xmm1\n\t" \            "mulsd %1, %%xmm1\n\t" \            "addsd %%xmm1, %%xmm0\n\t"            :            : "m" (dpSrc1[iDataCntr]),              "m" (dpSrc2[iDataCntr])            : "xmm0", "xmm1", "xmm2", "memory");    }    X86_ASM (        "movsd %%xmm0, %0\n\t"        : "=m" (dRes)        :        : "xmm0");    return dRes;}void dsp_x86_3dnow_minmaxf (float *fpMin, float *fpMax, const float *fpSrc,     int iDataLength){    int iDataCntr;    int iDataCount;    stm64 m64Min;    stm64 m64Max;    pv2sf m64pSrc = (pv2sf) fpSrc;        m64Min.f[0] = m64Min.f[1] = FLT_MAX;    m64Max.f[0] = m64Max.f[1] = -FLT_MAX;    iDataCount = (iDataLength >> 1);    X86_ASM (        "movq %0, %%mm1\n\t" \        "movq %1, %%mm2\n\t"        :        : "m" (m64Min),          "m" (m64Max)        : "mm1", "mm2", "memory");    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %0, %%mm0\n\t" \            "pfmin %%mm0, %%mm1\n\t" \            "pfmax %%mm0, %%mm2\n\t"            :            : "m" (m64pSrc[iDataCntr])            : "mm0", "mm1", "mm2", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %0, %%mm0\n\t" \            "pfmin %%mm0, %%mm1\n\t" \            "pfmax %%mm0, %%mm2\n\t"            :            : "m" (fpSrc[iDataLength - 1])            : "mm0", "mm1", "mm2", "memory");    }    X86_ASM (        "pswapd %%mm1, %%mm3\n\t" \        "pfmin %%mm3, %%mm1\n\t" \        "pswapd %%mm2, %%mm3\n\t" \        "pfmax %%mm3, %%mm2\n\t" \        "movd %%mm1, %0\n\t" \        "movd %%mm2, %1\n\t"        : "=m" (*fpMin),          "=m" (*fpMax)        :        : "mm1", "mm2", "mm3", "memory");    X86_ASM ("femms\n\t");}void dsp_x86_sse_minmaxf (float *fpMin, float *fpMax, const float *fpSrc,     int iDataLength){    int iDataCntr;    *fpMin = FLT_MAX;    *fpMax = -FLT_MAX;    X86_ASM (        "movss %0, %%xmm0\n\t" \        "movss %1, %%xmm1\n\t"        :        : "m" (*fpMin),          "m" (*fpMax)        : "xmm0", "xmm1", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %0, %%xmm2\n\t" \            "minss %%xmm2, %%xmm0\n\t" \            "maxss %%xmm2, %%xmm1\n\t"            :            : "m" (fpSrc[iDataCntr])            : "xmm0", "xmm1", "xmm2", "memory");    }    X86_ASM (        "movss %%xmm0, %0\n\t" \        "movss %%xmm1, %1\n\t"        : "=m" (*fpMin),          "=m" (*fpMax)        :        : "xmm0", "xmm1", "memory");}void dsp_x86_sse_minmax (double *dpMin, double *dpMax, const double *dpSrc,     int iDataLength){    int iDataCntr;    *dpMin = FLT_MAX;    *dpMax = -FLT_MAX;    X86_ASM (        "movsd %0, %%xmm0\n\t" \        "movsd %1, %%xmm1\n\t"        :        : "m" (*dpMin),          "m" (*dpMax)        : "xmm0", "xmm1", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movsd %0, %%xmm2\n\t" \            "minsd %%xmm2, %%xmm0\n\t" \            "maxsd %%xmm2, %%xmm1\n\t"            :            : "m" (dpSrc[iDataCntr])            : "xmm0", "xmm1", "xmm2", "memory");    }    X86_ASM (        "movss %%xmm0, %0\n\t" \        "movss %%xmm1, %1\n\t"        : "=m" (*dpMin),          "=m" (*dpMax)        :        : "xmm0", "xmm1", "memory");}float dsp_x86_3dnow_crosscorrf (const float *fpSrc1, const float *fpSrc2,    int iDataLength)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -