📄 iir32_emac.s
字号:
addq.l #1,d1
bra .FORi4 ;//jumping to .FORi4
.NMORE3: ;}//end if #6
;//if number of coefficients more and equal 7
move.l d0,d1 ;if (N>=4) { //if #7
addq.l #4,d1 ;//i=N+4
.FORi3: ;for (i=N+4; i<=n; i+=4){ //begin of outer loop #3
cmp.l d7,d1 ;//comparing i with n
bhi .ENDFORi3 ;//if (i>n) then jump to .ENDFORi3
move.l (a2),a3 ;pCurCoef=pIIR->pIirCoef;
move.l 68(a7),a6 ;pCurX=pX+i-4;
lea (-16,a6,d1.l*4),a1
movem.l (a1),d3-d6 ;d3=*pCurX++; d4=*pCurX++; d5=*pCurX++; d6=*pCurX; pCurX-=3;
move.l (a3)+,a6 ;a6=*pCurCoef++;
move.l d0,d2 ;d2=N%4+4;
andi.l #3,d2
addq.l #4,d2
cmpi.l #4,d2 ;if (d2=4){ //if #8
bne .CYCLE_BEGIN2
addq.l #1,d2 ;d2+=1;
;}//end if #8
.CYCLE_BEGIN2:
;//multiplying 4 input samples on first coefficient
mac.l a6,d6,<<,-(a1),d6,ACC3 ;ACC3+=a6*d6; d6=*--pCurX;
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d3,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d3; a6=*pCurCoef++;
;//cycle of multiplying 8 input samples on 4 coefficients per iteration
.FORk4: ;for(k=d2; k<=N; k+=4) { //begin of inner loop #9
cmp.l d0,d2 ;//comparing k with N
bhi .ENDFORk4 ;//if (k>N) then jump to .ENDFORk4
adda.l #4,a3 ;pCurCoef++; //skip the coefficient for output sample
mac.l a6,d5,<<,-(a1),d5,ACC3 ;ACC3+=a6*d5; d5=*--pCurX;
mac.l a6,d4,<<,ACC2 ;ACC2+=a6*d4;
mac.l a6,d3,<<,ACC1 ;ACC1+=a6*d3;
mac.l a6,d6,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d6; a6=*pCurCoef++;
adda.l #4,a3 ;pCurCoef++; //skip the coefficient for ouput sample
mac.l a6,d4,<<,-(a1),d4,ACC3 ;ACC3+=a6*d4; d4=*--pCurX;
mac.l a6,d3,<<,ACC2 ;ACC2+=a6*d3;
mac.l a6,d6,<<,ACC1 ;ACC1+=a6*d6;
mac.l a6,d5,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d5; a6=*pCurCoef++;
adda.l #4,a3 ;pCurCoef++; //skip the coefficient for output sample
mac.l a6,d3,<<,-(a1),d3,ACC3 ;ACC3+=a6*d3; d3=*--pCurX;
mac.l a6,d6,<<,ACC2 ;ACC2+=a6*d6;
mac.l a6,d5,<<,ACC1 ;ACC1+=a6*d5;
mac.l a6,d4,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d4; a6=*pCurCoef++;
adda.l #4,a3 ;pCurCoef++; //skip the coefficient for output sample
mac.l a6,d6,<<,-(a1),d6,ACC3 ;ACC3+=a6*d6; d6=*--pCurX;
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d3,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d3; a6=*pCurCoef++;
addq.l #4,d2 ;//k+=4;
bra .FORk4 ;//jumping to .FORk4
.ENDFORk4: ;}//end of inner loop #9
move.l d0,d2 ;//k=(N-1)%4
subq.l #1,d2
andi.l #3,d2
;//cycle of multiplying 4 input samples on 1 coefficient per iteration
.FORk5: ;for(k=(N-1)%4; k>0; k--) { //begin of inner loop #10
cmpi.l #0,d2 ;//comparing k with 0
beq .ENDFORk5 ;//if (k=0) then jump to .ENDFORk5
adda.l #4,a3 ;pCurCoef++; //skip the coefficient for output sample
mac.l a6,d5,<<,ACC3 ;ACC3+=a6*d5;
mac.l a6,d4,<<,ACC2 ;ACC2+=a6*d4;
mac.l a6,d3,<<,ACC1 ;ACC1+=a6*d3;
mac.l a6,d6,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d6; a6=*pCurCoef++;
move.l d4,d5 ;d5=d4;
move.l d3,d4 ;d4=d3;
move.l d6,d3 ;d3=d6;
move.l -(a1),d6 ;d6=*--pCurX;
subq.l #1,d2 ;//decrementing k
bra .FORk5 ;//jumping to .FORk5
.ENDFORk5: ;}//end of inner loop #10
move.l 72(a7),a6 ;pPredY=pY+i-N-3;
move.l d1,d2
sub.l d0,d2
lea (-12,a6,d2.l*4),a5
move.l (a2),a6 ;pCurCoef=pIIR->pIirCoef+N*2-1;
lsl.l #1,d0
lea (-4,a6,d0.l*4),a3
lsr.l #1,d0
movem.l (a5),d3-d6 ;d3=*pPredY++; d4=pPredY++; d5=pPredY++; d6=pPredY++;
adda.l #16,a5
move.l -(a3),a6 ;a6=*--pCurCoef;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for input sample
move.l #8,d2
;//cycle of multiplying 8 output samples on 4 coefficients per iteration
.FORk6: ;for(k=8; k<=N; k+=4) { //begin of inner loop #11
cmp.l d0,d2 ;//comparing k with N
bhi .ENDFORk6 ;//if (k>N) then jump to .ENDFORk6
mac.l a6,d3,<<,(a5)+,d3,ACC0 ;ACC0+=a6*d3; d3=*pPredY++;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
mac.l a6,d6,<<,-(a3),a6,ACC3 ;ACC3+=a6*d6; a6=*--pCurCoef;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for input sample
mac.l a6,d4,<<,(a5)+,d4,ACC0 ;ACC0+=a6*d4; d4=*pPredY++;
mac.l a6,d5,<<,ACC1 ;ACC1+=a6*d5;
mac.l a6,d6,<<,ACC2 ;ACC2+=a6*d6;
mac.l a6,d3,<<,-(a3),a6,ACC3 ;ACC3+=a6*d3; a6=*--pCurCoef;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for input sample
mac.l a6,d5,<<,(a5)+,d5,ACC0 ;ACC0+=a6*d5; d5=*pPredY++;
mac.l a6,d6,<<,ACC1 ;ACC1+=a6*d6;
mac.l a6,d3,<<,ACC2 ;ACC2+=a6*d3;
mac.l a6,d4,<<,-(a3),a6,ACC3 ;ACC3+=a6*d4; a6=*--pCurCoef;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for input sample
mac.l a6,d6,<<,(a5)+,d6,ACC0 ;ACC0+=a6*d6; d6=*pPredY++;
mac.l a6,d3,<<,ACC1 ;ACC1+=a6*d3;
mac.l a6,d4,<<,ACC2 ;ACC2+=a6*d4;
mac.l a6,d5,<<,-(a3),a6,ACC3 ;ACC3+=a6*d5; a6=*--pCurCoef;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for input sample
addq.l #4,d2 ;//k+=4;
bra .FORk6 ;//jumping to .FORk6
.ENDFORk6: ;}//end of inner loop #11
move.l d0,d2 ;d2=(N-4)%4;
subq.l #4,d2
andi.l #3,d2
;//cycle of multiplying 4 output samples on 1 coefficient per iteration
.FORk7: ;for(k=d2; k>0; k--) { //begin of inner loop #12
cmpi.l #0,d2 ;//comparing k with 0
beq .ENDFORk7 ;//if (k=0) then jump to .ENDFORk7
mac.l a6,d3,<<,ACC0 ;ACC0+=a6*d3;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
mac.l a6,d6,<<,-(a3),a6,ACC3 ;ACC3+=a6*d6; a6=*--pCurCoef;
move.l d4,d3 ;d3=d4;
move.l d5,d4 ;d4=d5;
move.l d6,d5 ;d5=d6;
move.l (a5)+,d6 ;d6=*pPredY++;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for input sample
subq.l #1,d2 ;//decrementing k
bra .FORk7 ;//jumping to .FORk7
.ENDFORk7: ;} //end of inner loop #12
;//preparing final multiplications
move.l a6,d6 ;d6=a6;
mac.l a6,d3,<<,ACC0 ;ACC0+=a6*d3;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d5,<<,-(a3),a6,ACC2 ;ACC2+=a6*d5; a6=*--pCurCoef;
move.l a6,d3 ;d3=a6;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for input sample
mac.l a6,d4,<<,ACC0 ;ACC0+=a6*d4;
mac.l a6,d5,<<,-(a3),a6,ACC1 ;ACC1+=a6*d5; a6=*--pCurCoef;
mac.l a6,d5,<<,ACC0 ;ACC0+=a6*d5;
;//multiplying currently computing samples on coefficients and storing results
movclr.l ACC0,d4 ;d4=ACC0; ACC0=0;
move.l d4,(a0)+ ;(*pCurY++)=d4;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
movclr.l ACC1,d5 ;d5=ACC1; ACC1=0;
move.l d5,(a0)+ ;(*pCurY++)=d5;
mac.l d3,d4,<<,ACC2 ;ACC2+=d3*d4;
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
movclr.l ACC2,d2 ;d2=ACC2; ACC2=0;
move.l d2,(a0)+ ;(*pCurY++)=d2;
mac.l d6,d4,<<,ACC3 ;ACC3+=d6*d4;
mac.l d3,d5,<<,ACC3 ;ACC3+=d3*d5;
mac.l a6,d2,<<,ACC3 ;ACC3+=a6*d2;
movclr.l ACC3,d6 ;d6=ACC3; ACC3=0;
move.l d6,(a0)+ ;(*pCurY++)=d6;
addq.l #4,d1 ;//i+=4
bra .FORi3 ;//jumping to .FORi3
.ENDFORi3: ;}//end of outer loop #3
move.l d7,d5 ;d5=(n-N)%4;
sub.l d0,d5
andi.l #3,d5
move.l d7,d1 ;d1=n-d5+1;
sub.l d5,d1
addq.l #1,d1
;}//end if #7
;//computing a "tail" of output samples
.FORi4: ;for(i=d1; i<=n; i++) { //begin of outer loop #4
cmp.l d7,d1 ;//comparing i with n
bhi .ENDFORi4 ;//if (i>n) then jump to .ENDFORi4
move.l 68(a7),a6 ;pCurX=pX+i;
lea (0,a6,d1.l*4),a1
move.l 72(a7),a6 ;pPredY=pY+i-1;
lea (-4,a6,d1.l*4),a5
move.l (a2),a3 ;pCurCoef=pIIR->pIirCoef;
move.l -(a1),d3 ;d3=*--pCurX;
move.l (a3)+,d4 ;d4=*pCurCoef++;
mac.l d3,d4,<<,ACC0 ;ACC0+=d3*d4;
moveq #1,d2 ;//k=1
move.l (a3)+,d4 ;d4=*pCurCoef++;
.FORk8: ;for(k=1; k<N; k++) { //begin of inner loop #13
cmp.l d0,d2 ;//comparing k with N
bcc .ENDFORk8 ;//if (k>=N) then jump to .ENDFORk8
move.l -(a1),d3 ;d3=*--pCurX;
mac.l d3,d4,<<,(a3)+,d4,ACC0 ;ACC0+=d3*d4; d4=*pCurCoef++;
move.l -(a5),d3 ;d3=*--pPredY;
mac.l d3,d4,<<,(a3)+,d4,ACC0 ;ACC0+=d3*d4; d4=*pCurCoef++;
addq.l #1,d2 ;//Incrementing k-
bra .FORk8 ;//jumping to .FORk8
.ENDFORk8: ;}//end of inner loop #13
movclr.l ACC0,d6 ;d6=ACC0; ACC0=0;
move.l d6,(a0)+ ;(*pCurY++)=d6;
addq.l #1,d1 ;//Incrementing i
bra .FORi4 ;//Jumping to .FORi4
.ENDFORi4: ;}//end of outer loop #4
;---====== Begin of History Buffer Loading ======---
move.l 68(a7),a6 ;pCurX=pX+n-N+1;
sub.l d0,d7
lea (4,a6,d7.l*4),a1
move.l 72(a7),a6 ;pCurY=pY+n-N+1;
lea (4,a6,d7.l*4),a0
move.l 8(a2),a4 ;pCurHistory=pIIR->pIirHistory;
moveq #1,d1 ;for(i=1;i<N;i++) { //begin of outer loop #5
.FORbuf:
cmp.l d0,d1 ;//Comparing i with N
bcc .ENDbuf ;//If (i=>N) then jump to .ENDbuf
move.l a0,a6 ;*pCurHistory++=*pCurY++;
addq.l #4,a0
move.l (a6),(a4)+
move.l (a1)+,(a4)+ ;*pCurHistory++=*pCurX++;
addq.l #1,d1 ;//Incrementing i
bra .FORbuf ;//Jumping to .FORbuf
.ENDbuf: ;}//end of outer loop #5
move.l 4(a2),d7 ; pIIR->iIirHistoryCount=pIIR->iIirCoefCount-1;
subq.l #1,d7
move.l d7,12(a2) ;pIIR->iIirHistoryCount=pIIR->iIirCoefCount-1;
; ---====== End of History Buffer Loading ======--
;-=Restoring values of used registers=-
lea -4(a7),a7
move.l (a7),d0
move.l d0,MACSR
lea 4(a7),a7
movem.l (a7),d0-d7/a0-a6;
lea 60(a7),a7
rts
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -