⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 x86.c

📁 各种工程计算的库函数
💻 C
📖 第 1 页 / 共 5 页
字号:
            : "=m" (fpVect[iDataCntr])            : "m0" (fpVect[iDataCntr])            : "xmm0", "xmm1", "memory");    }}void dsp_x86_sse_add (double *dpVect, double dSrc, int iDataLength){    int iDataCntr;        X86_ASM (        "movsd %0, %%xmm1\n\t"        :        : "m" (dSrc)        : "xmm1", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movsd %1, %%xmm0\n\t" \            "addsd %%xmm1, %%xmm0\n\t" \            "movsd %%xmm0, %0\n\t"            : "=m" (dpVect[iDataCntr])            : "m0" (dpVect[iDataCntr])            : "xmm0", "xmm1", "memory");    }}void dsp_x86_3dnow_mulf (float *fpVect, float fSrc, int iDataLength){    int iDataCntr;    int iDataCount;    pv2sf m64pVect = (pv2sf) fpVect;    stm64 m64Src;    m64Src.f[0] = m64Src.f[1] = fSrc;    iDataCount = (iDataLength >> 1);    X86_ASM (        "movq %0, %%mm1\n\t"        :        : "m" (m64Src)        : "mm1", "memory");    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (m64pVect[iDataCntr])            : "m0" (m64pVect[iDataCntr])            : "mm0", "mm1", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %1, %%mm0\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "movd %%mm0, %0\n\t"            : "=m" (fpVect[iDataLength - 1])            : "m0" (fpVect[iDataLength - 1])            : "mm0", "mm1", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_mulf (float *fpVect, float fSrc, int iDataLength){    int iDataCntr;    X86_ASM (        "movss %0, %%xmm1\n\t"        :        : "m" (fSrc)        : "xmm1", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %1, %%xmm0\n\t" \            "mulss %%xmm1, %%xmm0\n\t" \            "movss %%xmm0, %0\n\t"            : "=m" (fpVect[iDataCntr])            : "m0" (fpVect[iDataCntr])            : "xmm0", "xmm1", "memory");    }}void dsp_x86_sse_mul (double *dpVect, double dSrc, int iDataLength){    int iDataCntr;        X86_ASM (        "movsd %0, %%xmm1\n\t"        :        : "m" (dSrc)        : "xmm1", "memory");    for (iDataCntr = 0; iDataCntr <iDataLength; iDataCntr++)    {        X86_ASM (            "movsd %1, %%xmm0\n\t" \            "mulsd %%xmm1, %%xmm0\n\t" \            "movsd %%xmm0, %0\n\t"            : "=m" (dpVect[iDataCntr])            : "m0" (dpVect[iDataCntr])            : "xmm0", "xmm1", "memory");    }}void dsp_x86_3dnow_mulf_nip (float *fpDest, const float *fpSrc1, float fSrc2,     int iDataLength){    int iDataCntr;    int iDataCount;    pv2sf m64pDest = (pv2sf) fpDest;    pv2sf m64pSrc1 = (pv2sf) fpSrc1;    stm64 m64Src2;    m64Src2.f[0] = m64Src2.f[1] = fSrc2;    iDataCount = (iDataLength >> 1);    X86_ASM (        "movq %0, %%mm1\n\t"        :        : "m" (m64Src2)        : "mm1", "memory");    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (m64pDest[iDataCntr])            : "m" (m64pSrc1[iDataCntr])            : "mm0", "mm1", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %1, %%mm0\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "movd %%mm0, %0\n\t"            : "=m" (fpDest[iDataLength - 1])            : "m" (fpSrc1[iDataLength - 1])            : "mm0", "mm1", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_mulf_nip (float *fpDest, const float *fpSrc1, float fSrc2,    int iDataLength){    int iDataCntr;    X86_ASM (        "movss %0, %%xmm1\n\t"        :        : "m" (fSrc2)        : "xmm1", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %1, %%xmm0\n\t" \            "mulss %%xmm1, %%xmm0\n\t" \            "movss %%xmm0, %0\n\t"            : "=m" (fpDest[iDataCntr])            : "m" (fpSrc1[iDataCntr])            : "xmm0", "xmm1", "memory");    }}void dsp_x86_sse_mul_nip (double *dpDest, const double *dpSrc1, double dSrc2,    int iDataLength){    int iDataCntr;        X86_ASM (        "movsd %0, %%xmm1\n\t"        :        : "m" (dSrc2)        : "xmm1", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movsd %1, %%xmm0\n\t" \            "mulsd %%xmm1, %%xmm0\n\t" \            "movsd %%xmm0, %0\n\t"            : "=m" (dpDest[iDataCntr])            : "m" (dpSrc1[iDataCntr])            : "xmm0", "xmm1", "memory");    }}void dsp_x86_3dnow_add2f (float *fpDest, const float *fpSrc, int iDataLength){    int iDataCntr;    int iDataCount;    pv2sf m64pDest = (pv2sf) fpDest;    pv2sf m64pSrc = (pv2sf) fpSrc;    iDataCount = (iDataLength >> 1);    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "movq %2, %%mm1\n\t" \            "pfadd %%mm1, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (m64pDest[iDataCntr])            : "m0" (m64pDest[iDataCntr]),              "m" (m64pSrc[iDataCntr])            : "mm0", "mm1", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %1, %%mm0\n\t" \            "movd %2, %%mm1\n\t" \            "pfadd %%mm1, %%mm0\n\t" \            "movd %%mm0, %0\n\t"            : "=m" (fpDest[iDataLength - 1])            : "m0" (fpDest[iDataLength - 1]),              "m" (fpSrc[iDataLength - 1])            : "mm0", "mm1", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_add2f (float *fpDest, const float *fpSrc, int iDataLength){    int iDataCntr;        for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %1, %%xmm0\n\t" \            "addss %2, %%xmm0\n\t" \            "movss %%xmm0, %0\n\t"            : "=m" (fpDest[iDataCntr])            : "m0" (fpDest[iDataCntr]),              "m" (fpSrc[iDataCntr])            : "xmm0", "memory");    }}void dsp_x86_sse_add2 (double *dpDest, const double *dpSrc, int iDataLength){    int iDataCntr;        for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movsd %1, %%xmm0\n\t" \            "addsd %2, %%xmm0\n\t" \            "movsd %%xmm0, %0\n\t"            : "=m" (dpDest[iDataCntr])            : "m0" (dpDest[iDataCntr]),              "m" (dpSrc[iDataCntr])            : "xmm0", "memory");    }}void dsp_x86_3dnow_mul2f (float *fpDest, const float *fpSrc, int iDataLength){    int iDataCntr;    int iDataCount;    pv2sf m64pDest = (pv2sf) fpDest;    pv2sf m64pSrc = (pv2sf) fpSrc;    iDataCount = (iDataLength >> 1);    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "movq %2, %%mm1\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (m64pDest[iDataCntr])            : "m0" (m64pDest[iDataCntr]),              "m" (m64pSrc[iDataCntr])            : "mm0", "mm1", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %1, %%mm0\n\t" \            "movd %2, %%mm1\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "movd %%mm0, %0\n\t"            : "=m" (fpDest[iDataLength - 1])            : "m0" (fpDest[iDataLength - 1]),              "m" (fpSrc[iDataLength - 1])            : "mm0", "mm1", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_mul2f (float *fpDest, const float *fpSrc, int iDataLength){    int iDataCntr;        for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %1, %%xmm0\n\t" \            "mulss %2, %%xmm0\n\t" \            "movss %%xmm0, %0\n\t"            : "=m" (fpDest[iDataCntr])            : "m0" (fpDest[iDataCntr]),              "m" (fpSrc[iDataCntr])            : "xmm0", "memory");    }}void dsp_x86_sse_mul2 (double *dpDest, const double *dpSrc, int iDataLength){    int iDataCntr;        for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movsd %1, %%xmm0\n\t" \            "mulsd %2, %%xmm0\n\t" \            "movsd %%xmm0, %0\n\t"            : "=m" (dpDest[iDataCntr])            : "m0" (dpDest[iDataCntr]),              "m" (dpSrc[iDataCntr])            : "xmm0", "memory");    }}void dsp_x86_3dnow_add3f (float *fpDest, const float *fpSrc1,     const float *fpSrc2, int iDataLength){    int iDataCntr;    int iDataCount;    pv2sf m64pDest = (pv2sf) fpDest;    pv2sf m64pSrc1 = (pv2sf) fpSrc1;    pv2sf m64pSrc2 = (pv2sf) fpSrc2;    iDataCount = (iDataLength >> 1);    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "movq %2, %%mm1\n\t" \            "pfadd %%mm1, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (m64pDest[iDataCntr])            : "m" (m64pSrc1[iDataCntr]),              "m" (m64pSrc2[iDataCntr])            : "mm0", "mm1", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %1, %%mm0\n\t" \            "movd %2, %%mm1\n\t" \            "pfadd %%mm1, %%mm0\n\t" \            "movd %%mm0, %0\n\t"            : "=m" (fpDest[iDataLength - 1])            : "m" (fpSrc1[iDataLength - 1]),              "m" (fpSrc2[iDataLength - 1])            : "mm0", "mm1", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_add3f (float *fpDest, const float *fpSrc1,     const float *fpSrc2, int iDataLength){    int iDataCntr;        for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %1, %%xmm0\n\t" \            "addss %2, %%xmm0\n\t" \            "movss %%xmm0, %0\n\t"            : "=m" (fpDest[iDataCntr])            : "m" (fpSrc1[iDataCntr]),              "m" (fpSrc2[iDataCntr])            : "xmm0", "memory");    }}void dsp_x86_sse_add3 (double *dpDest, const double *dpSrc1,     const double *dpSrc2, int iDataLength){    int iDataCntr;        for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movsd %1, %%xmm0\n\t" \            "addsd %2, %%xmm0\n\t" \            "movsd %%xmm0, %0\n\t"            : "=m" (dpDest[iDataCntr])            : "m" (dpSrc1[iDataCntr]),              "m" (dpSrc2[iDataCntr])            : "xmm0", "memory");    }}void dsp_x86_3dnow_mul3f (float *fpDest, const float *fpSrc1,     const float *fpSrc2, int iDataLength){    int iDataCntr;    int iDataCount;    pv2sf m64pDest = (pv2sf) fpDest;    pv2sf m64pSrc1 = (pv2sf) fpSrc1;    pv2sf m64pSrc2 = (pv2sf) fpSrc2;    iDataCount = (iDataLength >> 1);    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "movq %2, %%mm1\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (m64pDest[iDataCntr])            : "m" (m64pSrc1[iDataCntr]),              "m" (m64pSrc2[iDataCntr])            : "mm0", "mm1", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %1, %%mm0\n\t" \            "movd %2, %%mm1\n\t" \            "pfmul %%mm1, %%mm0\n\t" \            "movd %%mm0, %0\n\t"            : "=m" (fpDest[iDataLength - 1])            : "m" (fpSrc1[iDataLength - 1]),              "m" (fpSrc2[iDataLength - 1])            : "mm0", "mm1", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_mul3f (float *fpDest, const float *fpSrc1,     const float *fpSrc2, int iDataLength){    int iDataCntr;        for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %1, %%xmm0\n\t" \            "mulss %2, %%xmm0\n\t" \            "movss %%xmm0, %0\n\t"            : "=m" (fpDest[iDataCntr])            : "m" (fpSrc1[iDataCntr]),              "m" (fpSrc2[iDataCntr])            : "xmm0", "memory");    }}void dsp_x86_sse_mul3 (double *dpDest, const double *dpSrc1,     const double *dpSrc2, int iDataLength){

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -