⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 x86.c

📁 各种工程计算的库函数
💻 C
📖 第 1 页 / 共 5 页
字号:
/*    x86 specific optimized assembler dsp routines    Copyright (C) 2001-2005 Jussi Laako    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/#ifdef DSP_X86#include <stdio.h>#include <string.h>#include <limits.h>#include <math.h>#include <float.h>#include "dsp/X86.h"#ifndef DSP_X86_64static char cpCPUid[13];#endif#ifdef __cplusplusextern "C"{#endif#ifndef DSP_X86_64const char *dsp_x86_cpuid (){    unsigned int *ipCPUid = (unsigned int *) cpCPUid;        X86_ASM (        "pushl %%ebx\n\t" \        "xorl %%eax, %%eax\n\t" \        "cpuid\n\t" \        "movl %%ebx, %0\n\t" \        "movl %%ecx, %2\n\t" \        "movl %%edx, %1\n\t" \        "popl %%ebx\n\t" \        : "=m" (ipCPUid[0]),          "=m" (ipCPUid[1]),          "=m" (ipCPUid[2])        :        : "eax", "ecx", "edx", "memory");    cpCPUid[12] = '\0';    return cpCPUid;}unsigned int dsp_x86_features (){    unsigned int uiFeatures = 0;        X86_ASM (        "pushl %%ebx\n\t" \        "movl $1, %%eax\n\t" \        "cpuid\n\t" \        "movl %%edx, %0\n\t" \        "popl %%ebx\n\t" \        : "=m" (uiFeatures)        :        : "eax", "ecx", "edx", "memory");        return uiFeatures;}unsigned int dsp_x86_amd_features (){    unsigned int uiFunction = 0x80000001;    unsigned int uiFeatures = 0;        X86_ASM (        "pushl %%ebx\n\t" \        "movl %1, %%eax\n\t" \        "cpuid\n\t" \        "movl %%edx, %0\n\t" \        "popl %%ebx\n\t" \        : "=m" (uiFeatures)        : "m" (uiFunction)        : "eax", "ecx", "edx", "memory");        return uiFeatures;}#endifextern int dsp_x86_have_e3dnow (){    #ifndef DSP_X86_64    unsigned int uiExtSup = 0;    unsigned int uiFeatures;    X86_ASM (        "pushl %%ebx\n\t" \        "movl $0x80000000, %%eax\n\t" \        "cpuid\n\t" \        "cmpl $0x80000001, %%eax\n\t" \        "jl have3dnowxit\n\t" \        "movl $1, %0\n\t" \        "have3dnowxit:\n\t" \        "popl %%ebx\n\t"        : "=m" (uiExtSup)        :        : "eax", "ecx", "edx", "memory");    if (uiExtSup)    {        uiFeatures = dsp_x86_amd_features();        if ((uiFeatures & (1 << 31)) && (uiFeatures & (1 << 30)))            return 1;    }    return 0;    #else    return 1;    #endif}extern int dsp_x86_have_sse2 (){    #ifndef DSP_X86_64    unsigned int uiFeatures;        uiFeatures = dsp_x86_features();    if ((uiFeatures & (1 << 25)) && (uiFeatures & (1 << 26)))        return 1;    return 0;    #else    return 1;    #endif}// --- inline code snippetsinline void dsp_x86_prefetchntf_init (const float *fpSrc){    stpm64 m64pSrc = (stpm64) fpSrc;    X86_ASM (        "prefetchnta %0\n\t" \        "prefetchnta %1\n\t" \        "prefetchnta %2\n\t" \        "prefetchnta %3\n\t"        :        : "m" (m64pSrc[0]),          "m" (m64pSrc[8]),          "m" (m64pSrc[16]),          "m" (m64pSrc[24]));}inline void dsp_x86_prefetchnt_init (const double *dpSrc){    stpm64 m64pSrc = (stpm64) dpSrc;    X86_ASM (        "prefetchnta %0\n\t" \        "prefetchnta %1\n\t" \        "prefetchnta %2\n\t" \        "prefetchnta %3\n\t"        :        : "m" (m64pSrc[0]),          "m" (m64pSrc[8]),          "m" (m64pSrc[16]),          "m" (m64pSrc[24]));}inline void dsp_x86_prefetchtf_init (const float *fpSrc){    stpm64 m64pSrc = (stpm64) fpSrc;    X86_ASM (        "prefetcht0 %0\n\t" \        "prefetcht0 %1\n\t" \        "prefetcht0 %2\n\t" \        "prefetcht0 %3\n\t"        :        : "m" (m64pSrc[0]),          "m" (m64pSrc[8]),          "m" (m64pSrc[16]),          "m" (m64pSrc[24]));}inline void dsp_x86_prefetcht_init (const double *dpSrc){    stpm64 m64pSrc = (stpm64) dpSrc;    X86_ASM (        "prefetcht0 %0\n\t" \        "prefetcht0 %1\n\t" \        "prefetcht0 %2\n\t" \        "prefetcht0 %3\n\t"        :        : "m" (m64pSrc[0]),          "m" (m64pSrc[8]),          "m" (m64pSrc[16]),          "m" (m64pSrc[24]));}inline void dsp_x86_prefetchntf_next (const float *fpSrc){    stpm64 m64pSrc = (stpm64) fpSrc;    X86_ASM (        "prefetchnta %0\n\t"        :        : "m" (m64pSrc[32]));}inline void dsp_x86_prefetchnt_next (const double *dpSrc){    stpm64 m64pSrc = (stpm64) dpSrc;    X86_ASM (        "prefetchnta %0\n\t"        :        : "m" (m64pSrc[32]));}inline void dsp_x86_prefetchtf_next (const float *fpSrc){    stpm64 m64pSrc = (stpm64) fpSrc;    X86_ASM (        "prefetcht0 %0\n\t"        :        : "m" (m64pSrc[32]));}inline void dsp_x86_prefetcht_next (const double *dpSrc){    stpm64 m64pSrc = (stpm64) dpSrc;    X86_ASM (        "prefetcht0 %0\n\t"        :        : "m" (m64pSrc[32]));}// ---void dsp_x86_3dnow_copyf (float *fpDest, const float *fpSrc, int iDataLength){    int iStartIdx;    int iDataCntr;    int iDataCount;    pv2sf m64pDest = (pv2sf) fpDest;    pv2sf m64pSrc = (pv2sf) fpSrc;        iStartIdx = 0;    X86_ASM (        "prefetchnta %0\n\t" \        "prefetchnta %1\n\t" \        "prefetchnta %2\n\t" \        "prefetchnta %3\n\t"        :        : "m" (m64pSrc[0]),          "m" (m64pSrc[8]),          "m" (m64pSrc[16]),          "m" (m64pSrc[24]));    iDataCount = ((iDataLength & 0xfffffff0) >> 1);    for (iDataCntr = iStartIdx; iDataCntr < iDataCount; iDataCntr += 8)    {        X86_ASM (            "prefetchnta %16\n\t" \            "movq %8, %%mm0\n\t" \            "movq %9, %%mm1\n\t" \            "movq %10, %%mm2\n\t" \            "movq %11, %%mm3\n\t" \            "movq %12, %%mm4\n\t" \            "movq %13, %%mm5\n\t" \            "movq %14, %%mm6\n\t" \            "movq %15, %%mm7\n\t" \            "movntq %%mm0, %0\n\t" \            "movntq %%mm1, %1\n\t" \            "movntq %%mm2, %2\n\t" \            "movntq %%mm3, %3\n\t" \            "movntq %%mm4, %4\n\t" \            "movntq %%mm5, %5\n\t" \            "movntq %%mm6, %6\n\t" \            "movntq %%mm7, %7\n\t"            : "=m" (m64pDest[iDataCntr]),              "=m" (m64pDest[iDataCntr + 1]),              "=m" (m64pDest[iDataCntr + 2]),              "=m" (m64pDest[iDataCntr + 3]),              "=m" (m64pDest[iDataCntr + 4]),              "=m" (m64pDest[iDataCntr + 5]),              "=m" (m64pDest[iDataCntr + 6]),              "=m" (m64pDest[iDataCntr + 7])            : "m" (m64pSrc[iDataCntr]),              "m" (m64pSrc[iDataCntr + 1]),              "m" (m64pSrc[iDataCntr + 2]),              "m" (m64pSrc[iDataCntr + 3]),              "m" (m64pSrc[iDataCntr + 4]),              "m" (m64pSrc[iDataCntr + 5]),              "m" (m64pSrc[iDataCntr + 6]),              "m" (m64pSrc[iDataCntr + 7]),              "m" (m64pSrc[iDataCntr + 32])            : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "memory");    }    iStartIdx = iDataCount;    iDataCount = ((iDataLength & 0xfffffffe) >> 1);    for (iDataCntr = iStartIdx; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "prefetchnta %2\n\t" \            "movq %1, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (m64pDest[iDataCntr])            : "m" (m64pSrc[iDataCntr]),              "m" (m64pSrc[iDataCntr + 32])            : "mm0", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %1, %%mm0\n\t" \            "movd %%mm0, %0\n\t"            : "=m" (fpDest[iDataLength - 1])            : "m" (fpSrc[iDataLength - 1])            : "mm0", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_3dnow_copyd (double *dpDest, const double *dpSrc, int iDataLength){    int iStartIdx;    int iDataCntr;    int iDataCount;        iStartIdx = 0;    X86_ASM (        "prefetchnta %0\n\t" \        "prefetchnta %1\n\t" \        "prefetchnta %2\n\t" \        "prefetchnta %3\n\t"        :        : "m" (dpSrc[0]),          "m" (dpSrc[8]),          "m" (dpSrc[16]),          "m" (dpSrc[24]));    iDataCount = (iDataLength & 0xfffffff8);    for (iDataCntr = iStartIdx; iDataCntr < iDataCount; iDataCntr += 8)    {        X86_ASM (            "prefetchnta %16\n\t" \            "movq %8, %%mm0\n\t" \            "movq %9, %%mm1\n\t" \            "movq %10, %%mm2\n\t" \            "movq %11, %%mm3\n\t" \            "movq %12, %%mm4\n\t" \            "movq %13, %%mm5\n\t" \            "movq %14, %%mm6\n\t" \            "movq %15, %%mm7\n\t" \            "movntq %%mm0, %0\n\t" \            "movntq %%mm1, %1\n\t" \            "movntq %%mm2, %2\n\t" \            "movntq %%mm3, %3\n\t" \            "movntq %%mm4, %4\n\t" \            "movntq %%mm5, %5\n\t" \            "movntq %%mm6, %6\n\t" \            "movntq %%mm7, %7\n\t"            : "=m" (dpDest[iDataCntr]),              "=m" (dpDest[iDataCntr + 1]),              "=m" (dpDest[iDataCntr + 2]),              "=m" (dpDest[iDataCntr + 3]),              "=m" (dpDest[iDataCntr + 4]),              "=m" (dpDest[iDataCntr + 5]),              "=m" (dpDest[iDataCntr + 6]),              "=m" (dpDest[iDataCntr + 7])            : "m" (dpSrc[iDataCntr]),              "m" (dpSrc[iDataCntr + 1]),              "m" (dpSrc[iDataCntr + 2]),              "m" (dpSrc[iDataCntr + 3]),              "m" (dpSrc[iDataCntr + 4]),              "m" (dpSrc[iDataCntr + 5]),              "m" (dpSrc[iDataCntr + 6]),              "m" (dpSrc[iDataCntr + 7]),              "m" (dpSrc[iDataCntr + 32])            : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "memory");    }    iStartIdx = iDataCount;    iDataCount = iDataLength;    for (iDataCntr = iStartIdx; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "prefetchnta %2\n\t" \            "movq %1, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (dpDest[iDataCntr])            : "m" (dpSrc[iDataCntr]),              "m" (dpSrc[iDataCntr + 32])            : "mm0", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_3dnow_addf (float *fpVect, float fSrc, int iDataLength){    int iDataCntr;    int iDataCount;    pv2sf m64pVect = (pv2sf) fpVect;    stm64 m64Src;    m64Src.f[0] = m64Src.f[1] = fSrc;    iDataCount = (iDataLength >> 1);    X86_ASM (        "movq %0, %%mm1\n\t"        :        : "m" (m64Src)        : "mm1", "memory");    for (iDataCntr = 0; iDataCntr < iDataCount; iDataCntr++)    {        X86_ASM (            "movq %1, %%mm0\n\t" \            "pfadd %%mm1, %%mm0\n\t" \            "movntq %%mm0, %0\n\t"            : "=m" (m64pVect[iDataCntr])            : "m0" (m64pVect[iDataCntr])            : "mm0", "mm1", "memory");    }    if (iDataLength & 0x1)    {        X86_ASM (            "movd %1, %%mm0\n\t" \            "pfadd %%mm1, %%mm0\n\t" \            "movd %%mm0, %0\n\t"            : "=m" (fpVect[iDataLength - 1])            : "m0" (fpVect[iDataLength - 1])            : "mm0", "mm1", "memory");    }    X86_ASM (        "femms\n\t" \        "sfence\n\t");}void dsp_x86_sse_addf (float *fpVect, float fSrc, int iDataLength){    int iDataCntr;        X86_ASM (        "movss %0, %%xmm1\n\t"        :        : "m" (fSrc)        : "xmm1", "memory");    for (iDataCntr = 0; iDataCntr < iDataLength; iDataCntr++)    {        X86_ASM (            "movss %1, %%xmm0\n\t" \            "addss %%xmm1, %%xmm0\n\t" \            "movss %%xmm0, %0\n\t"

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -