⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 x86.c

📁 各种工程计算的库函数
💻 C
📖 第 1 页 / 共 5 页
字号:
    int iDataCntr;        for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movsd %1, %%xmm0\n\t" \            "mulsd %2, %%xmm0\n\t" \            "movsd %%xmm0, %0\n\t"            : "=m" (dpDest[iDataCntr])            : "m" (dpSrc1[iDataCntr]),              "m" (dpSrc2[iDataCntr])            : "xmm0", "memory");    }}void dsp_x86_3dnow_cmulf (float *fpDest, const float *fpSrc, int iDataLength){    int iDataCntr;    pv2sf m64pDest = (pv2sf) fpDest;        X86_ASM (        "movq %0, %%mm3\n\t"        :        : "m" (fpSrc[0])        : "mm3", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "movq %%mm3, %%mm1\n\t" \            "pswapd %%mm1, %%mm2\n\t" \            "pfmul %%mm0, %%mm1\n\t" \            "pfmul %%mm0, %%mm2\n\t" \            "pfpnacc %%mm2, %%mm1\n\t" \            "movntq %%mm1, %0\n\t"            : "=m" (m64pDest[iDataCntr])            : "m0" (m64pDest[iDataCntr])            : "mm0", "mm1", "mm2", "mm3", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_cmulf (float *fpDest, const float *fpSrc, int iDataLength){    int iDataCntr;    int iDataCount;        X86_ASM (        "movss %0, %%xmm2\n\t" \        "movss %1, %%xmm3\n\t"        :        : "m" (fpSrc[0]),          "m" (fpSrc[1])        : "xmm2", "xmm3", "memory");    iDataCount = (iDataLength << 1);    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2)    {        X86_ASM (            "movss %2, %%xmm0\n\t" \            "movss %%xmm0, %%xmm1\n\t" \            "movss %3, %%xmm4\n\t" \            \            "mulss %%xmm2, %%xmm0\n\t" \            "movss %%xmm4, %%xmm5\n\t" \            "mulss %%xmm3, %%xmm5\n\t" \            "subss %%xmm5, %%xmm0\n\t" \            \            "mulss %%xmm3, %%xmm1\n\t" \            "movss %%xmm4, %%xmm5\n\t" \            "mulss %%xmm2, %%xmm5\n\t" \            "addss %%xmm5, %%xmm1\n\t" \            \            "movss %%xmm0, %0\n\t" \            "movss %%xmm1, %1\n\t"            : "=m" (fpDest[iDataCntr]),              "=m" (fpDest[iDataCntr + 1])            : "m0" (fpDest[iDataCntr]),              "m1" (fpDest[iDataCntr + 1])            : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory");    }}void dsp_x86_sse_cmul (double *dpDest, const double *dpSrc, int iDataLength){    int iDataCntr;    int iDataCount;        X86_ASM (        "movsd %0, %%xmm2\n\t" \        "movsd %1, %%xmm3\n\t"        :        : "m" (dpSrc[0]),          "m" (dpSrc[1])        : "xmm2", "xmm3", "memory");    iDataCount = (iDataLength << 1);    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2)    {        X86_ASM (            "movsd %2, %%xmm0\n\t" \            "movsd %%xmm0, %%xmm1\n\t" \            "movsd %3, %%xmm4\n\t" \            \            "mulsd %%xmm2, %%xmm0\n\t" \            "movsd %%xmm4, %%xmm5\n\t" \            "mulsd %%xmm3, %%xmm5\n\t" \            "subsd %%xmm5, %%xmm0\n\t" \            \            "mulsd %%xmm3, %%xmm1\n\t" \            "movsd %%xmm4, %%xmm5\n\t" \            "mulsd %%xmm2, %%xmm5\n\t" \            "addsd %%xmm5, %%xmm1\n\t" \            \            "movsd %%xmm0, %0\n\t" \            "movsd %%xmm1, %1\n\t"            : "=m" (dpDest[iDataCntr]),              "=m" (dpDest[iDataCntr + 1])            : "m0" (dpDest[iDataCntr]),              "m1" (dpDest[iDataCntr + 1])            : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory");    }}void dsp_x86_3dnow_cmul2f (float *fpDest, const float *fpSrc, int iDataLength){    int iDataCntr;    pv2sf m64pDest = (pv2sf) fpDest;    pv2sf m64pSrc = (pv2sf) fpSrc;        for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "movq %2, %%mm1\n\t" \            "pswapd %%mm1, %%mm2\n\t" \            "pfmul %%mm0, %%mm1\n\t" \            "pfmul %%mm0, %%mm2\n\t" \            "pfpnacc %%mm2, %%mm1\n\t" \            "movntq %%mm1, %0\n\t"            : "=m" (m64pDest[iDataCntr])            : "m0" (m64pDest[iDataCntr]),              "m" (m64pSrc[iDataCntr])            : "mm0", "mm1", "mm2", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_cmul2f (float *fpDest, const float *fpSrc, int iDataLength){    int iDataCntr;    int iDataCount;        iDataCount = (iDataLength << 1);    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2)    {        X86_ASM (            "movss %4, %%xmm2\n\t" \            "movss %5, %%xmm3\n\t" \            \            "movss %2, %%xmm0\n\t" \            "movss %%xmm0, %%xmm1\n\t" \            "movss %3, %%xmm4\n\t" \            \            "mulss %%xmm2, %%xmm0\n\t" \            "movss %%xmm4, %%xmm5\n\t" \            "mulss %%xmm3, %%xmm5\n\t" \            "subss %%xmm5, %%xmm0\n\t" \            \            "mulss %%xmm3, %%xmm1\n\t" \            "movss %%xmm4, %%xmm5\n\t" \            "mulss %%xmm2, %%xmm5\n\t" \            "addss %%xmm5, %%xmm1\n\t" \            \            "movss %%xmm0, %0\n\t" \            "movss %%xmm1, %1\n\t"            : "=m" (fpDest[iDataCntr]),              "=m" (fpDest[iDataCntr + 1])            : "m0" (fpDest[iDataCntr]),              "m1" (fpDest[iDataCntr + 1]),              "m" (fpSrc[iDataCntr]),              "m" (fpSrc[iDataCntr + 1])            : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory");    }}void dsp_x86_sse_cmul2 (double *dpDest, const double *dpSrc, int iDataLength){    int iDataCntr;    int iDataCount;        iDataCount = (iDataLength << 1);    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2)    {        X86_ASM (            "movsd %4, %%xmm2\n\t" \            "movsd %5, %%xmm3\n\t" \            \            "movsd %2, %%xmm0\n\t" \            "movsd %%xmm0, %%xmm1\n\t" \            "movsd %3, %%xmm4\n\t" \            \            "mulsd %%xmm2, %%xmm0\n\t" \            "movsd %%xmm4, %%xmm5\n\t" \            "mulsd %%xmm3, %%xmm5\n\t" \            "subsd %%xmm5, %%xmm0\n\t" \            \            "mulsd %%xmm3, %%xmm1\n\t" \            "movsd %%xmm4, %%xmm5\n\t" \            "mulsd %%xmm2, %%xmm5\n\t" \            "addsd %%xmm5, %%xmm1\n\t" \            \            "movsd %%xmm0, %0\n\t" \            "movsd %%xmm1, %1\n\t"            : "=m" (dpDest[iDataCntr]),              "=m" (dpDest[iDataCntr + 1])            : "m0" (dpDest[iDataCntr]),              "m1" (dpDest[iDataCntr + 1]),              "m" (dpSrc[iDataCntr]),              "m" (dpSrc[iDataCntr + 1])            : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory");    }}void dsp_x86_3dnow_cmul3f (float *fpDest, const float *fpSrc1,     const float *fpSrc2, int iDataLength){    int iDataCntr;    pv2sf m64pDest = (pv2sf) fpDest;    pv2sf m64pSrc1 = (pv2sf) fpSrc1;    pv2sf m64pSrc2 = (pv2sf) fpSrc2;        for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "movq %2, %%mm1\n\t" \            "pswapd %%mm1, %%mm2\n\t" \            "pfmul %%mm0, %%mm1\n\t" \            "pfmul %%mm0, %%mm2\n\t" \            "pfpnacc %%mm2, %%mm1\n\t" \            "movntq %%mm1, %0\n\t"            : "=m" (m64pDest[iDataCntr])            : "m" (m64pSrc1[iDataCntr]),              "m" (m64pSrc2[iDataCntr])            : "mm0", "mm1", "mm2", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_cmul3f (float *fpDest, const float *fpSrc1,     const float *fpSrc2, int iDataLength){    int iDataCntr;    int iDataCount;        iDataCount = (iDataLength << 1);    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2)    {        X86_ASM (            "movss %4, %%xmm2\n\t" \            "movss %5, %%xmm3\n\t" \            \            "movss %2, %%xmm0\n\t" \            "movss %%xmm0, %%xmm1\n\t" \            "movss %3, %%xmm4\n\t" \            \            "mulss %%xmm2, %%xmm0\n\t" \            "movss %%xmm4, %%xmm5\n\t" \            "mulss %%xmm3, %%xmm5\n\t" \            "subss %%xmm5, %%xmm0\n\t" \            \            "mulss %%xmm3, %%xmm1\n\t" \            "movss %%xmm4, %%xmm5\n\t" \            "mulss %%xmm2, %%xmm5\n\t" \            "addss %%xmm5, %%xmm1\n\t" \            \            "movss %%xmm0, %0\n\t" \            "movss %%xmm1, %1\n\t"            : "=m" (fpDest[iDataCntr]),              "=m" (fpDest[iDataCntr + 1])            : "m" (fpSrc1[iDataCntr]),              "m" (fpSrc1[iDataCntr + 1]),              "m" (fpSrc2[iDataCntr]),              "m" (fpSrc2[iDataCntr + 1])            : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory");    }}void dsp_x86_sse_cmul3 (double *dpDest, const double *dpSrc1,     const double *dpSrc2, int iDataLength){    int iDataCntr;    int iDataCount;        iDataCount = (iDataLength << 1);    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr += 2)    {        X86_ASM (            "movsd %4, %%xmm2\n\t" \            "movsd %5, %%xmm3\n\t" \            \            "movsd %2, %%xmm0\n\t" \            "movsd %%xmm0, %%xmm1\n\t" \            "movsd %3, %%xmm4\n\t" \            \            "mulsd %%xmm2, %%xmm0\n\t" \            "movsd %%xmm4, %%xmm5\n\t" \            "mulsd %%xmm3, %%xmm5\n\t" \            "subsd %%xmm5, %%xmm0\n\t" \            \            "mulsd %%xmm3, %%xmm1\n\t" \            "movsd %%xmm4, %%xmm5\n\t" \            "mulsd %%xmm2, %%xmm5\n\t" \            "addsd %%xmm5, %%xmm1\n\t" \            \            "movsd %%xmm0, %0\n\t" \            "movsd %%xmm1, %1\n\t"            : "=m" (dpDest[iDataCntr]),              "=m" (dpDest[iDataCntr + 1])            : "m" (dpSrc1[iDataCntr]),              "m" (dpSrc1[iDataCntr + 1]),              "m" (dpSrc2[iDataCntr]),              "m" (dpSrc2[iDataCntr + 1])            : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory");    }}void dsp_x86_3dnow_maf (float *fpVect, float fMul, float fAdd, int iDataLength){    int iDataCntr;    int iDataCount;    pv2sf m64pVect = (pv2sf) fpVect;    stm64 m64Mul;    stm64 m64Add;    m64Mul.f[0] = m64Mul.f[1] = fMul;    m64Add.f[0] = m64Add.f[1] = fAdd;    iDataCount = (iDataLength >> 1);    X86_ASM (        "movq %0, %%mm1\n\t" \        "movq %1, %%mm2\n\t"        :        : "m" (m64Mul),          "m" (m64Add)        : "mm1", "mm2", "memory");    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "pfadd %%mm2, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (m64pVect[iDataCntr])            : "m0" (m64pVect[iDataCntr])            : "mm0", "mm1", "mm2", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %1, %%mm0\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "pfadd %%mm2, %%mm0\n\t" \            "movd %%mm0, %0\n\t"            : "=m" (fpVect[iDataLength - 1])            : "m0" (fpVect[iDataLength - 1])            : "mm0", "mm1", "mm2", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_maf (float *fpVect, float fMul, float fAdd, int iDataLength){    int iDataCntr;        X86_ASM (        "movss %0, %%xmm1\n\t" \        "movss %1, %%xmm2\n\t"        :        : "m" (fMul),          "m" (fAdd)        : "xmm1", "xmm2", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %1, %%xmm0\n\t" \            "mulss %%xmm1, %%xmm0\n\t" \            "addss %%xmm2, %%xmm0\n\t" \            "movss %%xmm0, %0\n\t"            : "=m" (fpVect[iDataCntr])            : "m0" (fpVect[iDataCntr])            : "xmm0", "xmm1", "xmm2", "memory");    }}void dsp_x86_sse_ma (double *dpVect, double dMul, double dAdd, int iDataLength){    int iDataCntr;        X86_ASM (        "movsd %0, %%xmm1\n\t" \        "movsd %1, %%xmm2\n\t"        :        : "m" (dMul),          "m" (dAdd)        : "xmm1", "xmm2", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movsd %1, %%xmm0\n\t" \            "mulsd %%xmm1, %%xmm0\n\t" \            "addsd %%xmm2, %%xmm0\n\t" \            "movsd %%xmm0, %0\n\t"            : "=m" (dpVect[iDataCntr])            : "m0" (dpVect[iDataCntr])            : "xmm0", "xmm1", "xmm2", "memory");    }}void dsp_x86_3dnow_ma2f (float *fpDest, const float *fpSrc,    float fMul, float fAdd, int iDataLength){    int iDataCntr;    int iDataCount;    pv2sf m64pDest = (pv2sf) fpDest;    pv2sf m64pSrc = (pv2sf) fpSrc;    stm64 m64Mul;    stm64 m64Add;    m64Mul.f[0] = m64Mul.f[1] = fMul;    m64Add.f[0] = m64Add.f[1] = fAdd;    iDataCount = (iDataLength >> 1);    X86_ASM (        "movq %0, %%mm1\n\t" \        "movq %1, %%mm2\n\t"        :        : "m" (m64Mul),          "m" (m64Add)        : "mm1", "mm2", "memory");    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "pfadd %%mm2, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (m64pDest[iDataCntr])            : "m" (m64pSrc[iDataCntr])            : "mm0", "mm1", "mm2", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %1, %%mm0\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "pfadd %%mm2, %%mm0\n\t" \            "movd %%mm0, %0\n\t"            : "=m" (fpDest[iDataLength - 1])            : "m" (fpSrc[iDataLength - 1])            : "mm0", "mm1", "mm2", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_ma2f (float *fpDest, const float *fpSrc,     float fMul, float fAdd, int iDataLength){

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -