📄 mmxresample.c
字号:
psrad mm2, 3
pmaddwd mm0, [eax+32]
paddd mm7, mm2
movq mm1, [edi+RBYTEOFF+32]
psrad mm3, 3
pmaddwd mm1, [edx+32]
paddd mm7, mm3
movq mm2, [esi+RBYTEOFF+40]
psrad mm0, 2
pmaddwd mm2, [eax+40]
paddd mm7, mm0
movq mm3, [edi+RBYTEOFF+40]
psrad mm1, 2
pmaddwd mm3, [edx+40]
paddd mm7, mm1
movq mm0, [esi+RBYTEOFF+48]
psrad mm2, 1
pmaddwd mm0, [eax+48]
paddd mm7, mm2
movq mm1, [edi+RBYTEOFF+48]
psrad mm3, 1
pmaddwd mm1, [edx+48]
paddd mm7, mm3
/* acc shift */
psrad mm7, 8
movq mm2, [esi+RBYTEOFF+56]
psrad mm0, 8
pmaddwd mm2, [eax+56]
paddd mm7, mm0
movq mm3, [edi+RBYTEOFF+56]
psrad mm1, 8
pmaddwd mm3, [edx+56]
paddd mm7, mm1
movq mm0, [esi+RBYTEOFF+64]
psrad mm2, 7
pmaddwd mm0, [eax+64]
paddd mm7, mm2
movq mm1, [edi+RBYTEOFF+64]
psrad mm3, 7
pmaddwd mm1, [edx+64]
paddd mm7, mm3
movq mm2, [esi+RBYTEOFF+72]
psrad mm0, 6
pmaddwd mm2, [eax+72]
paddd mm7, mm0
movq mm3, [edi+RBYTEOFF+72]
psrad mm1, 6
pmaddwd mm3, [edx+72]
paddd mm7, mm1
movq mm0, [esi+RBYTEOFF+80]
psrad mm2, 5
pmaddwd mm0, [eax+80]
paddd mm7, mm2
movq mm1, [edi+RBYTEOFF+80]
psrad mm3, 5
pmaddwd mm1, [edx+80]
paddd mm7, mm3
movq mm2, [esi+RBYTEOFF+88]
psrad mm0, 4
pmaddwd mm2, [eax+88]
paddd mm7, mm0
movq mm3, [edi+RBYTEOFF+88]
psrad mm1, 4
pmaddwd mm3, [edx+88]
paddd mm7, mm1
/* flush */
psrad mm2, 1
paddd mm7, mm2
psrad mm3, 1
paddd mm7, mm3
/* BOTH CHANNELS */
/* horizontal add */
movq mm0, mm6
punpckhdq mm6, mm7
punpckldq mm0, mm7
paddd mm6, mm0
/* round, shift, clip */
paddd mm6, mm5
psrad mm6, 14
packssdw mm6, mm6
/* bump to next state */
mov edx, nextstate
mov ebx, [edx+4*ebx] // phase = nextstate[phase]
lea edx, [ebx+ebx]
and edx, 0xff<<1
add esi, edx // pcmptr += pcmstep
sub edi, edx // revptr -= pcmstep
mov edx, ebx
shr ebx, 20 // phase
shr edx, 3
and edx, 0xfff<<5 // 32 * idx
lea edx, [edx+2*edx] // 96 * idx
add edx, filter // lwingptr = filter[idx]
movd [ecx], mm6 // *outptr++
// add ecx, 4
add ecx,outstride
lea eax, [ebx+2*ebx] // 3 * idx
shl eax, 5 // 96 * idx
add eax, filter // rwingptr = filter[idx]
jmp conv_loop
conv_done:
sub ecx, outbuf // outsamps = outptr - outbuf
shr ecx, 1
mov outsamps, ecx
mov pcmptr, esi
mov revptr, edi
mov phase, ebx
mov rwingptr, eax
mov lwingptr, edx
emms
}
/* save filter state */
s->phase = phase;
s->offset = pcmptr - pcmend;
s->rwingptr = rwingptr;
s->lwingptr = lwingptr;
memmove(s->pcmleft-NTAPS, s->pcmleft-NTAPS + insamps/2, NTAPS * sizeof(short));
memmove(s->pcmrght-NTAPS, s->pcmrght-NTAPS + insamps/2, NTAPS * sizeof(short));
// TOCK(outsamps);
return outsamps;
}
int
RAResampleMonoMMX(void *inbuf, int insamps, tConverter *pCvt, short *outbuf, int outstride, void *inst)
{
state_t *s = (state_t *)inst;
short *pcmptr, *revptr, *pcmend;
short *rwingptr, *lwingptr;
int i, outsamps;
/* local copies */
filtwing *filter = s->filter;
int *nextstate = s->nextstate;
int phase = s->phase;
int round13[2] = { 1<<13, 1<<13 };
// TICK();
outstride *= sizeof(short) ;
/* copy/convert new input */
insamps = pCvt->pfCvt(s->pcmleft,inbuf,insamps,pCvt->pStateMachine) ;
/* create revbuf */
for (i = -insamps; i < NTAPS; i++)
s->revrght[i] = s->pcmleft[-i-1];
/* restore filter state */
pcmptr = s->pcmleft + s->offset - (NTAPS-1);
revptr = s->revrght - s->offset - 1;
pcmend = s->pcmleft + insamps - (NTAPS-1);
rwingptr = s->rwingptr;
lwingptr = s->lwingptr;
__asm {
mov esi, pcmptr
mov edi, revptr
mov ebx, phase
mov ecx, outbuf
mov eax, rwingptr
mov edx, lwingptr
movq mm5, round13
conv_loop: /* while (pcmptr < pcmend) */
cmp esi, pcmend
jae conv_done
/* prime */
movq mm7, [esi+0]
pmaddwd mm7, [eax+0]
movq mm1, [edi+0]
pmaddwd mm1, [edx+0]
movq mm2, [esi+8]
psrad mm7, 8
pmaddwd mm2, [eax+8]
//paddd mm7, mm0
movq mm3, [edi+8]
psrad mm1, 8
pmaddwd mm3, [edx+8]
paddd mm7, mm1
/* kernel */
movq mm0, [esi+16]
psrad mm2, 6
pmaddwd mm0, [eax+16]
paddd mm7, mm2
movq mm1, [edi+16]
psrad mm3, 6
pmaddwd mm1, [edx+16]
paddd mm7, mm3
movq mm2, [esi+24]
psrad mm0, 4
pmaddwd mm2, [eax+24]
paddd mm7, mm0
movq mm3, [edi+24]
psrad mm1, 4
pmaddwd mm3, [edx+24]
paddd mm7, mm1
/* end kernel */
movq mm0, [esi+32]
psrad mm2, 3
pmaddwd mm0, [eax+32]
paddd mm7, mm2
movq mm1, [edi+32]
psrad mm3, 3
pmaddwd mm1, [edx+32]
paddd mm7, mm3
movq mm2, [esi+40]
psrad mm0, 2
pmaddwd mm2, [eax+40]
paddd mm7, mm0
movq mm3, [edi+40]
psrad mm1, 2
pmaddwd mm3, [edx+40]
paddd mm7, mm1
movq mm0, [esi+48]
psrad mm2, 1
pmaddwd mm0, [eax+48]
paddd mm7, mm2
movq mm1, [edi+48]
psrad mm3, 1
pmaddwd mm1, [edx+48]
paddd mm7, mm3
/* acc shift */
psrad mm7, 8
movq mm2, [esi+56]
psrad mm0, 8
pmaddwd mm2, [eax+56]
paddd mm7, mm0
movq mm3, [edi+56]
psrad mm1, 8
pmaddwd mm3, [edx+56]
paddd mm7, mm1
movq mm0, [esi+64]
psrad mm2, 7
pmaddwd mm0, [eax+64]
paddd mm7, mm2
movq mm1, [edi+64]
psrad mm3, 7
pmaddwd mm1, [edx+64]
paddd mm7, mm3
movq mm2, [esi+72]
psrad mm0, 6
pmaddwd mm2, [eax+72]
paddd mm7, mm0
movq mm3, [edi+72]
psrad mm1, 6
pmaddwd mm3, [edx+72]
paddd mm7, mm1
movq mm0, [esi+80]
psrad mm2, 5
pmaddwd mm0, [eax+80]
paddd mm7, mm2
movq mm1, [edi+80]
psrad mm3, 5
pmaddwd mm1, [edx+80]
paddd mm7, mm3
movq mm2, [esi+88]
psrad mm0, 4
pmaddwd mm2, [eax+88]
paddd mm7, mm0
movq mm3, [edi+88]
psrad mm1, 4
pmaddwd mm3, [edx+88]
paddd mm7, mm1
/* flush */
psrad mm2, 1
paddd mm7, mm2
psrad mm3, 1
paddd mm7, mm3
/* horizontal add */
movq mm0, mm7
psrlq mm7, 32
paddd mm7, mm0
/* round, shift, clip */
paddd mm7, mm5
psrad mm7, 14
packssdw mm7, mm7
movd eax, mm7
/* bump to next state */
mov edx, nextstate
mov ebx, [edx+4*ebx] // phase = nextstate[phase]
lea edx, [ebx+ebx]
and edx, 0xff<<1
add esi, edx // pcmptr += pcmstep
sub edi, edx // revptr -= pcmstep
mov edx, ebx
shr ebx, 20 // phase
shr edx, 3
and edx, 0xfff<<5 // 32 * idx
lea edx, [edx+2*edx] // 96 * idx
add edx, filter // lwingptr = filter[idx]
mov word ptr [ecx], ax // *outptr++
// add ecx, 2
add ecx, outstride
lea eax, [ebx+2*ebx] // 3 * idx
shl eax, 5 // 96 * idx
add eax, filter // rwingptr = filter[idx]
jmp conv_loop
conv_done:
sub ecx, outbuf // outsamps = outptr - outbuf
shr ecx, 1
mov outsamps, ecx
mov pcmptr, esi
mov revptr, edi
mov phase, ebx
mov rwingptr, eax
mov lwingptr, edx
emms
}
/* save filter state */
s->phase = phase;
s->offset = pcmptr - pcmend;
s->rwingptr = rwingptr;
s->lwingptr = lwingptr;
memmove(s->pcmleft-NTAPS, s->pcmleft-NTAPS + insamps, NTAPS * sizeof(short));
// TOCK(outsamps);
return outsamps;
}
#include "mymath.h"
int
RAGetMaxOutputMMX(int insamps, void *inst)
{
state_t *s = (state_t *)inst;
int inframes, outframes, outsamps;
inframes = (insamps + (s->nchans-1)) / s->nchans;
outframes = (int) myCeil((double)inframes * s->up / s->dn);
outsamps = outframes * s->nchans;
return outsamps;
}
int
RAGetMinInputMMX(int outsamps, void *inst)
{
state_t *s = (state_t *)inst;
int inframes, outframes, insamps;
outframes = (outsamps + (s->nchans-1)) / s->nchans;
inframes = (int) myCeil((double)outframes * s->dn / s->up);
insamps = inframes * s->nchans;
return insamps;
}
int
RAGetDelayMMX(void *inst)
{
state_t *s = (state_t *)inst;
return (int)(NWING * (float)s->up / s->dn) ;
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -