📄 iir16_emac.s
字号:
.EndD4:
addq.l #8, d7 ; i += 8;
beq .EndTailH ;while(i)
;{
;---=Computation of N % 4 last output samples==--
.BegTailH:
;--== Next input samples loading ==--
move.w (a0), d2 ;iCurX0 = *pX;
movea.l a0, a4 ;pCurX = pX;
lea 2(a0), a0 ;pX++;
;--== Setting output samples pointer ==--
movea.l a1, a5 ;pCurY = pY;
;--== Next coefficients loading ==--
move.l a6, a3 ;pCurCoef = pIirCoef;
move.l (a3), d4 ;iCurA = *pCurCoef;
lea 2(a3), a3 ;pCurCoef++;
;--== Input samples and first coefficient multiplications ==--
mac.w d4.u, d2.l, <<, ACC0 ;iOut0 = iCurA * iCurX0
;--== The count of inner loops calculation ==--
move.l d6, d5 ;
sub.l d7, d5 ;j = tmp - i;
ble .EndIn1TailH ;while(j > 0)
;{
.ForIn1TailH:
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
;--== Next input sample loading ==--
move.w -(a4), d2 ;iCurX0 = *--pCurX;
;--== Next output sample loading ==--
move.w -(a5), d0 ;iCurY0 = *--pCurY;
;--== Two multiplications for each output sample ==--
mac.w d4.u, d2.l, <<, ACC0 ;iOut0 += iCurA * iCurX0
mac.w d4.l, d0.l, <<, ACC0 ;iOut0 += iCurB * iCurY0
subq.l #2, d5 ;j-= 2;
bgt .ForIn1TailH ;} // while(j > 0)
.EndIn1TailH:
move.l 12(a2), d5 ;if(pIIR->iIirHistoryCount)
beq .EndInHTail ;{
;--== The count of inner loops calculation ==--
move.l d7, d5 ;j = i
ble .EndInHTail ;if(j > 0)
;{
;--== Current history buffer pointer initialization==--
move.l 8(a2), a4 ;pCurHustory = pIirHistory + tmp * 2
adda.l d6, a4 ;
adda.l d6, a4 ;while(j > 0)
;{
.ForInHTail:
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
;--== Next input sample loading ==--
move.w -(a4), d2 ;iCurX0 = *--pCurHistory;
;--== Next output sample loading ==--
move.w -(a4), d0 ;iCurY0 = *--pCurHistory;
;--== Two multiplications for each output sample ==--
mac.w d4.u, d2.l, <<, ACC0 ;iOut0 += iCurA * iCurX0
mac.w d4.l, d0.l, <<, ACC0 ;iOut0 += iCurB * iCurY0
subq.l #2, d5 ;j -= 2;
bgt .ForInHTail ;}//while (j > 0)
.EndInHTail: ;}//if(pIIR->iIirHistoryCount)
;--==Storing computed sample into the memory==--
movclr.l ACC0, d0 ;
;#ifndef __FRACT_M
; swap d0 ;*pY++ = iOut0;
;#endif
move.w d0, (a1)+ ;
subq.l #2, d7 ;i -= 2;
bgt .BegTailH ;}
.EndTailH: ;}//if(i)
;---====== Begin of cycle of getting Y[N+1]..Y[n] (N = (pIIR->iIirCoefCount - 1) / 2)======---
move.l 4(a2), d6 ;tmp = IIR->iIirCoefCount;
move.l 80(a7), d7 ;i = n;
subq.l #1, d6 ;tmp = (tmp - 1) / 2;
asr.l #1, d6 ;
sub.l d6, d7 ;i -= tmp;
asr.l #2, d7 ; i /= 4
beq .BegTail2 ;while(i != 0)
;{
;--==Y[N+1]..Y[k] computation, where k is (N + 1) + ((n - N - 1) % 4)
.BegD4E: ;Begin of inner loop
;--== Next input samples loading ==--
move.l (a0), d2 ;iCurX0 = pX[0]; iCurX1 = pX[1];
move.l 4(a0), d3 ;iCurX2 = pX[2]; iCurX3 = pX[3];
movea.l a0, a4 ;pCurX = pX;
lea 8(a0), a0 ;pX += 4;
;--== Make four previous output samples zero==--
clr.l d0 ;iCurY0 = iCurY1 = 0;
clr.l d1 ;iCurY2 = iCurY3 = 0;
movea.l a1, a5 ;pCurY = pY;
;--== Next coefficient loading ==--
move.l a6, a3 ;pCurCoef = pIirCoef;
move.l (a3), d4 ;iCurA = *pCurCoef;
lea 2(a3), a3 ;pCurCoef++;
;--== Input samples and first coefficient multiplications ==--
mac.w d4.u, d2.u, <<, ACC0 ;iOut0 = iCurA * iCurX0
mac.w d4.u, d2.l, <<, ACC1 ;iOut1 = iCurA * iCurX1
mac.w d4.u, d3.u, <<, ACC2 ;iOut2 = iCurA * iCurX2
mac.w d4.u, d3.l, <<, ACC3 ;iOut3 = iCurA * iCurX3
;--== The count of inner loops calculation ==--
move.l d6, d5 ;
btst #0, d5 ;j = tmp - i - 8;
beq .ForIn1EBeg ;if(j & 1)
;{
;--== This multiplications executed only when j is odd ==--
;--== Next coefficient loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
;--== Next input sample loading ==--
move.w d2, d3 ;iCurX3 = iCurX2;
move.w -(a4), d2 ;iCurX2 = iCurX1;
swap d2 ;iCurX1 = iCurX0;
swap d3 ;iCurX0 = *--pCurHistory;
;--== Next output sample loading ==--
move.w d0, d1 ;iCurY3 = iCurY2;
move.w -(a5), d0 ;iCurY2 = iCurY1;
swap d0 ;iCurY1 = iCurY0;
swap d1 ;iCurY0 = *--pCurHistory;
mac.w d4.u, d2.u, <<, ACC0 ;iOut0 += iCurA * iCurX0
mac.w d4.u, d2.l, <<, ACC1 ;iOut1 += iCurA * iCurX1
mac.w d4.u, d3.u, <<, ACC2 ;iOut2 += iCurA * iCurX2
mac.w d4.u, d3.l, <<, ACC3 ;iOut3 += iCurA * iCurX3
mac.w d4.l, d0.u, <<, ACC0 ;iOut0 += iCurB * iCurY0
mac.w d4.l, d0.l, <<, ACC1 ;iOut1 += iCurB * iCurY1
mac.w d4.l, d1.u, <<, ACC2 ;iOut2 += iCurB * iCurY2
mac.w d4.l, d1.l, <<, ACC3 ;iOut3 += iCurB * iCurY3
subq.l #1, d5 ;j -= 2;
beq .EndIn1E ;}
.ForIn1EBeg:
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
.ForIn1E:
subq.l #2, d5 ;for(;j >= 0; j -=2)
blt .EndIn1E ;{
;--== Three first input samples and coefficients multiplication ==--
mac.w d4.u, d2.u, <<, ACC1 ;iOut1 += iCurA * iCurX0
mac.w d4.u, d2.l, <<, ACC2 ;iOut2 += iCurA * iCurX1
mac.w d4.u, d3.u, <<, ACC3 ;iOut3 += iCurA * iCurX2
;--== Three first output samples and coefficients multiplication ==--
mac.w d4.l, d0.u, <<, ACC1 ;iOut1 += iCurB * iCurY0
mac.w d4.l, d0.l, <<, ACC2 ;iOut2 += iCurB * iCurY1
mac.w d4.l, d1.u, <<, ACC3 ;iOut3 += iCurB * iCurY2
;--== Loading two next input samples ==--
move.l d2, d3 ;iCurX3 = iCurX1; iCurX2 = iCurX0;
move.l -(a4), d2 ;iCurX1 = *--pCurX; iCurX0 = *--pCurX;
;--== Loading two next output samples ==--
move.l d0, d1 ;iCurY3 = iCurY1; iCurY2 = iCurY0;
move.l -(a5), d0 ;iCurY1 = *--pCurY; iCurY0 = *--pCurY;
;--== Fourth input sample and coefficient multiplication ==--
mac.w d4.u, d2.l, <<, ACC0 ;iOut0 += iCurA * iCurX1
;--== Fourth output sample and coefficient multiplication ==--
mac.w d4.l, d0.l, <<, ACC0 ;iOut0 += iCurB * iCurY1
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
;--== Next Four input sample and coefficients multiplication ==--
mac.w d4.u, d2.u, <<, ACC0 ;iOut0 += iCurA * iCurX0
mac.w d4.u, d2.l, <<, ACC1 ;iOut1 += iCurA * iCurX1
mac.w d4.u, d3.u, <<, ACC2 ;iOut2 += iCurA * iCurX2
mac.w d4.u, d3.l, <<, ACC3 ;iOut3 += iCurA * iCurX3
;--== Next Four output sample and coefficients multiplication ==--
mac.w d4.l, d0.u, <<, ACC0 ;iOut0 += iCurB * iCurY0
mac.w d4.l, d0.l, <<, ACC1 ;iOut1 += iCurB * iCurY1
mac.w d4.l, d1.u, <<, ACC2 ;iOut2 += iCurB * iCurY2
;--== Last mac instruction with next coefficients loading ==--
mac.w d4.l, d1.l, <<, (a3)+, d4, ACC3 ;iOut3 += iCurB * iCurY3
;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
bra .ForIn1E ;}
.EndIn1E:
;--==Storing computed samples into the memory==--
move.l d6, d3
move.l a6, a3 ;pCurCoef = pIirCoef;
subq.l #1, d3
blt .NextPre
move.l 4(a3), d4 ;pCurB = pIirCoef[2];
subq.l #1, d3
blt .NextPre
move.w 8(a3), d4 ;pCurA = pIirCoef[4];
subq.l #1, d3
blt .NextPre
move.l 12(a3), d5 ;j = pIirCoef[6];
.NextPre:
move.l d6, d3
movclr.l ACC0, d0 ;
;#ifndef __FRACT_M
; swap d0 ;*pY++ = iOut0
;#endif
move.w d0, (a1)+ ;
subq.l #1, d3
blt .NextY1
mac.w d4.u, d0.l, <<, ACC1 ;iOut1 += iCurB * iOut0
.NextY1:
move.l d6, d3
movclr.l ACC1, d1 ;
;#ifndef __FRACT_M
; swap d1 ;*pY++ = iOut1
;#endif
move.w d1, (a1)+ ;
subq.l #1, d3
blt .NextY2
mac.w d4.u, d1.l, <<, ACC2 ;iOut2 += iCurB * iOut1
subq.l #1, d3
blt .NextY2
mac.w d4.l, d0.l, <<, ACC2 ;iOut2 += iCurA * iOut0
.NextY2:
move.l d6, d3
movclr.l ACC2, d2 ;
;#ifndef __FRACT_M
; swap d2 ;*pY++ = iOut2
;#endif
move.w d2, (a1)+ ;
subq.l #1, d3
blt .NextY3
mac.w d4.u, d2.l, <<, ACC3 ;iOut3 += iCurB * iOut2
subq.l #1, d3
blt .NextY3
mac.w d4.l, d1.l, <<, ACC3 ;iOut3 += iCurA * iOut1
subq.l #1, d3
blt .NextY3
mac.w d5.u, d0.l, <<, ACC3 ;iOut3 += j * iOut0
.NextY3:
movclr.l ACC3, d0 ;
;#ifndef __FRACT_M
; swap d0 ;*pY++ = iOut3
;#endif
move.w d0, (a1)+ ;
subq.l #1, d7 ;i --;
bne .BegD4E ;}
.EndD4E:
;---=Computation of N % 4 last output samples==--
.BegTail2:
move.l 4(a2), d6 ;tmp = IIR->iIirCoefCount;
move.l 80(a7), d7 ;i = n;
subq.l #1, d6 ;tmp = (tmp - 1) / 2;
asr.l #1, d6 ;
sub.l d6, d7 ;i -= tmp;
andi.l #3, d7 ;i &= 3;
beq .EndTail2 ;while(i)
;{
.BegOutTail2:
;--== Next input samples loading ==--
move.w (a0), d2 ;iCurX0 = pX;
movea.l a0, a4 ;pCurX = pX;
lea 2(a0), a0 ;pX++;
movea.l a1, a5 ;pCurY = pY;
;--== Next coefficient loading ==--
move.l a6, a3 ;pCurCoef = pIirCoef;
move.l (a3), d4 ;iCurA = *pCurCoef;
lea 2(a3), a3 ;pCurCoef++;
;--== Input samples and first coefficient multiplications ==--
mac.w d4.u, d2.l, <<, ACC0 ;iOut0 += iCurA * iCurX0
;--== The count of inner loops calculation ==--
move.l d6, d5 ;j = tmp;
.ForInTail2: ;do{
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
;--== Next input samples loading ==--
move.w -(a4), d2 ;iCurX0 = *--pCurX;
;--== Next output samples loading ==--
move.w -(a5), d0 ;iCurY0 = *--pCurY;
;--== Two multiplications for each output sample ==--
mac.w d4.u, d2.l, <<, ACC0 ;iOut0 += iCurA * iCurX0
mac.w d4.l, d0.l, <<, ACC0 ;iOut0 += iCurB * iCurY0
subq.l #1, d5 ;j--;
bne .ForInTail2 ;}while(j);
.EndInTail2:
;--==Storing computed sample into the memory==--
movclr.l ACC0, d0 ;
;#ifndef __FRACT_M
; swap d0 ;*pY++ = iOut0;
;#endif
move.w d0, (a1)+ ;
subq.l #1, d7 ;i--;
bne .BegOutTail2 ;}//while(i)
.EndTail2:
;--==Filling history buffer==--
move.l 8(a2), a3 ;pCurHistory = IIR->pIirHistory;
move.l 4(a2), d0 ;i = IIR->iIirCoefCount;
subq.l #1, d0 ;i--;
move.l d0, 12(a2) ;IIR->iIirHistoryCount = i;
lsl.l #1, d0 ;i /= 2;
adda.l d0, a3 ;pCurHistory += i;
;do{
.NextHist:
move.w -(a0), -(a3) ;*--pCurHistory = *--pX;
move.w -(a1), -(a3) ;*--pCurHistory = *--pY;
subq.l #4, d0 ;i += 4;
bne .NextHist ;}while(i);
;--==Restoring old MAC status register==--
move.l 60(a7), d0
move.l d0, MACSR
movem.l (a7), d0-d7/a0-a6
lea 64(a7), a7
rts
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -