⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mmx_win.cpp

📁 Audacity是一款用於錄音和編輯聲音的、免費的開放源碼軟體。它可以執行於Mac OS X、Microsoft Windows、GNU/Linux和其它作業系統
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/***************************************************************************** * * Win32 version of the MMX optimized routines. All MMX optimized functions * have been gathered into this single source code file, regardless to their  * class or original source code file, in order to ease porting the library * to other compiler and processor platforms. * * This file is to be compiled in Windows platform with Microsoft Visual C++  * Compiler. Please see 'mmx_gcc.cpp' for the gcc compiler version for all * GNU platforms. * * Author        : Copyright (c) Olli Parviainen * Author e-mail : oparviai @ iki.fi * File created  : 13-Jan-2002 * * Last changed  : $Date: 2004/10/26 19:09:37 $ * File revision : $Revision: 1.2 $ * * $Id: mmx_win.cpp,v 1.2 2004/10/26 19:09:37 vjohnson Exp $ * * License : *  *  SoundTouch sound processing library *  Copyright (c) Olli Parviainen * *  This library is free software; you can redistribute it and/or *  modify it under the terms of the GNU Lesser General Public *  License as published by the Free Software Foundation; either *  version 2.1 of the License, or (at your option) any later version. * *  This library is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU *  Lesser General Public License for more details. * *  You should have received a copy of the GNU Lesser General Public *  License along with this library; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *****************************************************************************/#include "STTypes.h"#ifndef WIN32#error "wrong platform - this source code file is exclusively for Win32 platform"#endifusing namespace soundtouch;#ifdef ALLOW_MMX// MMX routines available only with integer sample type    ////////////////////////////////////////////////////////////////////////////////// implementation of MMX optimized functions of class 'TDStretchMMX'////////////////////////////////////////////////////////////////////////////////#include "TDStretch.h"#include <limits.h>// these are declared in 'TDStretch.cpp'extern int scanOffsets[4][24];// Calculates cross correlation of two bufferslong TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const{    long corr;    uint local_overlapLength = overlapLength;    uint local_overlapDividerBits = overlapDividerBits;    _asm     {        ; Calculate cross-correlation between the tempOffset and tmpbid_buffer.        ;        ; Process 4 parallel batches of 2 * stereo samples each during one         ; round to improve CPU-level parallellization.        ;        ; load address of sloped pV2 buffer to eax        ; load address of mixing point of the sample data buffer to ebx        ; load counter to ecx = overlapLength / 8 - 1        ; empty the mm0         ;        ; prepare to the first round by loading         ; load mm1 = eax[0]        ; load mm2 = eax[1];        mov         eax, dword ptr pV1        mov         ebx, dword ptr pV2        movq        mm1, qword ptr [eax]        mov         ecx, local_overlapLength        movq        mm2, qword ptr [eax+8]        shr         ecx, 3        pxor        mm0, mm0        sub         ecx, 1                movd        mm5, local_overlapDividerBits    loop1:        ; multiply-add mm1 = mm1 * ebx[0]        ; multiply-add mm2 = mm2 * ebx[1]        ;        ; add mm2 += mm1        ; mm2 >>= mm5 (=overlapDividerBits)        ; add mm0 += mm2        ;        ; load mm3 = eax[2]        ; multiply-add mm3 = mm3 * ebx[2]        ;        ; load mm4 = eax[3]        ; multiply-add mm4 = mm4 * ebx[3]        ;        ; add mm3 += mm4        ; mm3 >>= mm5 (=overlapDividerBits)        ; add mm0 += mm3        ;        ; add eax += 4;        ; add ebx += 4        ; load mm1 = eax[0] (~eax[4])        ; load mm2 = eax[1] (~eax[5])        ;        ; loop        pmaddwd     mm1, qword ptr [ebx]        movq        mm3, qword ptr [eax+16]        pmaddwd     mm2, qword ptr [ebx+8]        movq        mm4, qword ptr [eax+24]        pmaddwd     mm3, qword ptr [ebx+16]        paddd       mm2, mm1        pmaddwd     mm4, qword ptr [ebx+24]        movq        mm1, qword ptr [eax+32]        psrad       mm2, mm5        add         eax, 32        paddd       mm3, mm4        paddd       mm0, mm2        movq        mm2, qword ptr [eax+8]        psrad       mm3, mm5        add         ebx, 32        paddd       mm0, mm3        dec         ecx        jnz         loop1        ; Finalize the last partial loop:        movq        mm3, qword ptr [eax+16]        pmaddwd     mm1, qword ptr [ebx]        movq        mm4, qword ptr [eax+24]        pmaddwd     mm2, qword ptr [ebx+8]        pmaddwd     mm3, qword ptr [ebx+16]        paddd       mm2, mm1        pmaddwd     mm4, qword ptr [ebx+24]        psrad       mm2, mm5        paddd       mm3, mm4        paddd       mm0, mm2        psrad       mm3, mm5        paddd       mm0, mm3        ; copy hi-dword of mm0 to lo-dword of mm1, then sum mmo+mm1        ; and finally store the result into the variable "corr"        movq        mm1, mm0        psrlq       mm1, 32        paddd       mm0, mm1        movd        corr, mm0    }    return corr;        // Note: Warning about the missing EMMS instruction is harmless    // as it'll be called elsewhere.}void TDStretchMMX::clearCrossCorrState(){    _asm EMMS;}// MMX-optimized version of the function overlapStereovoid TDStretchMMX::overlapStereo(short *output, const short *input) const{    short *local_midBuffer = pMidBuffer;    uint local_overlapLength = overlapLength;    uint local_overlapDividerBits = overlapDividerBits;    _asm     {        ; load sliding mixing value counter to mm6 and mm7        ; load counter value to ecx = overlapLength / 4        ; load divider-shifter value to esi        ; load mixing value adder to mm5        ; load address of midBuffer to eax        ; load address of inputBuffer added with ovlOffset to ebx        ; load address of end of the outputBuffer to edx        mov         eax, local_overlapLength        ; ecx = 0x0000 OVL_        mov         edi, 0x0002fffe     ; ecx = 0x0002 fffe        mov            esi, local_overlapDividerBits        movd        mm6, eax            ; mm6 = 0x0000 0000 0000 OVL_        mov         ecx, eax;        sub         eax, 1        punpckldq   mm6, mm6            ; mm6 = 0x0000 OVL_ 0000 OVL_        mov         edx, output        or          eax, 0x00010000     ; eax = 0x0001 overlapLength-1        mov         ebx, dword ptr input        movd        mm5, edi            ; mm5 = 0x0000 0000 0002 fffe        movd        mm7, eax            ; mm7 = 0x0000 0000 0001 01ff        mov         eax, dword ptr local_midBuffer        punpckldq   mm5, mm5            ; mm5 = 0x0002 fffe 0002 fffe        shr         ecx, 2              ; ecx = overlapLength / 2        punpckldq   mm7, mm7            ; mm7 = 0x0001 01ff 0001 01ff    loop1:        ; Process two parallel batches of 2+2 stereo samples during each round 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -