⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mmx_win.cpp

📁 Audacity是一款用於錄音和編輯聲音的、免費的開放源碼軟體。它可以執行於Mac OS X、Microsoft Windows、GNU/Linux和其它作業系統
💻 CPP
📖 第 1 页 / 共 2 页
字号:
        ; to improve CPU-level parallellization.        ;        ; Load [eax] into mm0 and mm1        ; Load [ebx] into mm3        ; unpack words of mm0, mm1 and mm3 into mm0 and mm1        ; multiply-add mm0*mm6 and mm1*mm7, store results into mm0 and mm1        ; divide mm0 and mm1 by 512 (=right-shift by overlapDividerBits)        ; pack the result into mm0 and store into [edx]        ;        ; Load [eax+8] into mm2 and mm3        ; Load [ebx+8] into mm4        ; unpack words of mm2, mm3 and mm4 into mm2 and mm3        ; multiply-add mm2*mm6 and mm3*mm7, store results into mm2 and mm3        ; divide mm2 and mm3 by 512 (=right-shift by overlapDividerBits)        ; pack the result into mm2 and store into [edx+8]                        movq        mm0, qword ptr [eax]    ; mm0 = m1l m1r m0l m0r        add         edx, 16        movq        mm3, qword ptr [ebx]    ; mm3 = i1l i1r i0l i0r        movq        mm1, mm0                ; mm1 = m1l m1r m0l m0r        movq        mm2, qword ptr [eax+8]  ; mm2 = m3l m3r m2l m2r        punpcklwd   mm0, mm3                ; mm0 = i0l m0l i0r m0r        movq        mm4, qword ptr [ebx+8]  ; mm4 = i3l i3r i2l i2r        punpckhwd   mm1, mm3                ; mm1 = i1l m1l i1r m1r        movq        mm3, mm2                ; mm3 = m3l m3r m2l m2r        punpcklwd   mm2, mm4                ; mm2 = i2l m2l i2r m2r        pmaddwd     mm0, mm6                ; mm0 = i0l*m63+m0l*m62 i0r*m61+m0r*m60        punpckhwd   mm3, mm4                ; mm3 = i3l m3l i3r m3r        movd        mm4, esi                ; mm4 = overlapDividerBits        pmaddwd     mm1, mm7                ; mm1 = i1l*m73+m1l*m72 i1r*m71+m1r*m70        paddw       mm6, mm5        paddw       mm7, mm5        psrad       mm0, mm4                ; mmo >>= overlapDividerBits        pmaddwd     mm2, mm6                ; mm2 = i2l*m63+m2l*m62 i2r*m61+m2r*m60        psrad       mm1, mm4                ; mm1 >>= overlapDividerBits        pmaddwd     mm3, mm7                ; mm3 = i3l*m73+m3l*m72 i3r*m71+m3r*m70        psrad       mm2, mm4                ; mm2 >>= overlapDividerBits        packssdw    mm0, mm1                ; mm0 = mm1h mm1l mm0h mm0l        psrad       mm3, mm4                ; mm3 >>= overlapDividerBits        add         eax, 16        paddw       mm6, mm5        packssdw    mm2, mm3                ; mm2 = mm2h mm2l mm3h mm3l        paddw       mm7, mm5        movq        qword ptr [edx-16], mm0        add         ebx, 16        movq        qword ptr [edx-8], mm2        dec         ecx            jnz         loop1        emms    }}////////////////////////////////////////////////////////////////////////////////// implementation of MMX optimized functions of class 'FIRFilter'////////////////////////////////////////////////////////////////////////////////#include "FIRFilter.h"FIRFilterMMX::FIRFilterMMX() : FIRFilter(){    filterCoeffsUnalign = NULL;}FIRFilterMMX::~FIRFilterMMX(){    delete[] filterCoeffsUnalign;}// (overloaded) Calculates filter coefficients for MMX routinevoid FIRFilterMMX::setCoefficients(const short *coeffs, uint newLength, uint uResultDivFactor){    uint i;    FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor);    // Ensure that filter coeffs array is aligned to 16-byte boundary    delete[] filterCoeffsUnalign;    filterCoeffsUnalign = new short[2 * newLength + 8];    filterCoeffsAlign = (short *)(((uint)filterCoeffsUnalign + 15) & -16);    // rearrange the filter coefficients for mmx routines     for (i = 0;i < length; i += 4)     {        filterCoeffsAlign[2 * i + 0] = coeffs[i + 0];        filterCoeffsAlign[2 * i + 1] = coeffs[i + 2];        filterCoeffsAlign[2 * i + 2] = coeffs[i + 0];        filterCoeffsAlign[2 * i + 3] = coeffs[i + 2];        filterCoeffsAlign[2 * i + 4] = coeffs[i + 1];        filterCoeffsAlign[2 * i + 5] = coeffs[i + 3];        filterCoeffsAlign[2 * i + 6] = coeffs[i + 1];        filterCoeffsAlign[2 * i + 7] = coeffs[i + 3];    }}// mmx-optimized version of the filter routine for stereo sounduint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, const uint numSamples) const{    // Create stack copies of the needed member variables for asm routines :    uint local_length = length;    uint local_lengthDiv8 = lengthDiv8;    uint local_resultDivider = resultDivFactor;    short *local_filterCoeffs = (short*)filterCoeffsAlign;    _asm     {        ; Load (num_samples-aa_filter_length)/2 to edi as a i        ; Load a pointer to samples to esi        ; Load a pointer to destination to edx        mov         edi, numSamples        mov         esi, dword ptr src        sub         edi, local_length        mov         edx, dword ptr dest        sar         edi, 1        ; Load filter length/8 to ecx        ; Load pointer to samples from esi to ebx        ; Load counter from edi to ecx        ; Load [ebx] to mm3        ; Load pointer to filter coefficients to eaxloop1:        mov         ebx, esi        pxor        mm0, mm0        mov         ecx, local_lengthDiv8        pxor        mm7, mm7        movq        mm1, [ebx]              ; mm1 = l1 r1 l0 r0        mov         eax, local_filterCoeffsloop2:        movq        mm2, [ebx+8]            ; mm2 = l3 r3 l2 r2        movq        mm4, mm1                ; mm4 = l1 r1 l0 r0        movq        mm3, [ebx+16]           ; mm3 = l5 r5 l4 r4        punpckhwd   mm1, mm2                ; mm1 = l3 l1 r3 r1        movq        mm6, mm2                ; mm6 = l3 r3 l2 r2        punpcklwd   mm4, mm2                ; mm4 = l2 l0 r2 r0        movq        mm2, qword ptr [eax]    ; mm2 = f2 f0 f2 f0        movq        mm5, mm1                ; mm5 = l3 l1 r3 r1        punpcklwd   mm6, mm3                ; mm6 = l4 l2 r4 r2        pmaddwd     mm4, mm2                ; mm4 = l2*f2+l0*f0 r2*f2+r0*f0        pmaddwd     mm5, mm2                ; mm5 = l3*f2+l1*f0 r3*f2+l1*f0        movq        mm2, qword ptr [eax+8]  ; mm2 = f3 f1 f3 f1        paddd       mm0, mm4                ; mm0 += s02*f02        movq        mm4, mm3                ; mm4 = l1 r1 l0 r0        pmaddwd     mm1, mm2                ; mm1 = l3*f3+l1*f1 r3*f3+l1*f1        paddd       mm7, mm5                ; mm7 += s13*f02        pmaddwd     mm6, mm2                ; mm6 = l4*f3+l2*f1 r4*f3+f4*f1        movq        mm2, [ebx+24]           ; mm2 = l3 r3 l2 r2        paddd       mm0, mm1                ; mm0 += s31*f31        movq        mm1, [ebx+32]           ; mm1 = l5 r5 l4 r4        paddd       mm7, mm6                ; mm7 += s42*f31        punpckhwd   mm3, mm2                ; mm3 = l3 l1 r3 r1        movq        mm6, mm2                ; mm6 = l3 r3 l2 r2        punpcklwd   mm4, mm2                ; mm4 = l2 l0 r2 r0        movq        mm2, qword ptr [eax+16] ; mm2 = f2 f0 f2 f0        movq        mm5, mm3                ; mm5 = l3 l1 r3 r1        punpcklwd   mm6, mm1                ; mm6 = l4 l2 r4 r2        add         eax, 32        pmaddwd     mm4, mm2                ; mm4 = l2*f2+l0*f0 r2*f2+r0*f0        add         ebx, 32        pmaddwd     mm5, mm2                ; mm5 = l3*f2+l1*f0 r3*f2+l1*f0        movq        mm2, qword ptr [eax-8]  ; mm2 = f3 f1 f3 f1        paddd       mm0, mm4                ; mm0 += s02*f02        pmaddwd     mm3, mm2                ; mm3 = l3*f3+l1*f1 r3*f3+l1*f1        paddd       mm7, mm5                ; mm7 += s13*f02        pmaddwd     mm6, mm2                ; mm6 = l4*f3+l2*f1 r4*f3+f4*f1        paddd       mm0, mm3                ; mm0 += s31*f31        paddd       mm7, mm6                ; mm7 += s42*f31        dec         ecx        jnz         loop2        ; Divide mm0 and mm7 by 8192 (= right-shift by 13),        ; pack and store to [edx]        movd        mm4, local_resultDivider;        psrad       mm0, mm4                ; divider the result        add         edx, 8        psrad       mm7, mm4                ; divider the result        add         esi, 8        packssdw    mm0, mm7        movq        qword ptr [edx-8], mm0        dec         edi        jnz         loop1        emms    }    return (numSamples & 0xfffffffe) - local_length;}#endif  // ALLOW_MMX

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -