📄 icfftr2_dif.asm
字号:
_icfftr2_dif: ; .cproc x, w, n
; short n2, i, k, l, nd2, n2A;
; float rtemp, itemp, s, c, xr, xi, yr, yi, Xr, Xi, Yr, Yi;
; float *wptrB, *xinptrA, *xoutptrB, *xoutptrA, p1r, p2r, p1i;
; float p2i;
; short n2p1;
p1r .set A0
p .set A1
k .set A2
l .set A2
p2r .set A3
c .set A4
s .set A5
yr .set A6
yi .set A7
rtemp .set A8
itemp .set A9
Yr .set A10
Yi .set A11
wptrB .set A12
xoutptrB .set A13
p1i .set A14
p2i .set A15
j .set B0
i .set B1
m .set B2
n2 .set B3
xr .set B4
xi .set B5
x .set B6
w .set B7
n .set B8
Xr .set B9
Xi .set B10
xinptrA .set B11
xoutptrA .set B12
n2As .set B13
tmpk .set B14
; ----------------- function prolog --------------------
; preserve "save-on-call" registers
sub B15, 4, A0
stw .D2 A10, *B15--[2] ; f
|| stw .D1 B10, *A0--[2] ; f
stw .D2 A11, *B15--[2] ; f
|| stw .D1 B11, *A0--[2] ; f
stw .D2 A12, *B15--[2] ; f
|| stw .D1 B12, *A0--[2] ; f
stw .D2 A13, *B15--[2] ; f
|| stw .D1 B13, *A0--[2] ; f
|| mvc .S2 CSR,B13 ; f
stw .D2 A14, *B15--[2] ; f
|| stw .D1 B14, *A0--[2] ; f
|| and .L2 -2,B13,B13 ; f
stw .D2 A15, *B15--[2] ; f
|| stw .D1 B3, *A0--[2] ; f
|| mvc .S2 B13,CSR ; f disable global interrupts
mv .L2x A4, xinptrA ; f move arg1 to x
|| mv .D2 B4, w ; f move arg2 to w
|| mvk .S2 1, n2 ; o n2 = 1;
; ----------------- prolog for loopl --------------------
mv .L1x w, wptrB ; o wptrB = w;
|| mv .L2 xinptrA, x ; o xinptrA = x;
|| lddw .D2 *+xinptrA[n2], A7:A6 ; p yr = *xinptr++;
; xi = *xinptr++;
|| mpy .M2 n2, 1, i ; o i = n2;
|| mv .S2x A6, n ; f move arg3 to n
mv .S1X x, xoutptrB ; o xoutptrB = x;
|| lddw .D2 *xinptrA++, B5:B4 ; p xr = *xinptrA++
; xi = *xinptrA++
|| shr .S2 n, 2, tmpk ; o tmpk = n/2
shr .S1x n, 1, l ; o l = n/2;
|| shl .S2 n2, 3, n2As ; o n2As = n2<<3;
||[i] sub .L2 i, 1, i ; p i = i - 1;
add .L2X xoutptrB, n2As, xoutptrA; o xoutptrA = xoutptrB + n2;
|| add .D1 xoutptrB, 4, xoutptrB ; o xoutptrB = 4 + xoutptrB
|| mv .D2 n2, j ; o j = n2;
|| mv .S1X n2, p ; o p = n2
|| sub .L1 l, 2, l ; o l = l - 2
||[!i] add .S2 xinptrA, n2As, xinptrA ; p if(i==0)xinptrA=xinptrA+n2A
; for(k=n; k > 1; k >>= 1)
; {
;------------------ outer loop - loopk ------------------------- ; for (l=0; l<n/2; l++)
loopk:
c0:
lddw .D2 *+xinptrA[n2], A7:A6 ; @ yr = xinptrA[2*n2];
; yi = xinptrA[2*n2 + 1]]
|| mpy .M2 i, 1, m ; m = i;
c1:
lddw .D2 *xinptrA++, B5:B4 ; @ xr = *xinptrA++;
; xi = *xinptrA++;
|| lddw .D1 *wptrB++, A5:A4 ; c = *wptrB++; s = *wptrB++;
||[!i] mv .S2 n2, i ; @ if (i==0) i = n2;
c2:
subsp .L1x xr, yr, rtemp ; rtemp = xr - yr;
||[i] sub .S2 i, 1, i ; @ i = i - 1;
c3:
subsp .L1x xi, yi, itemp ; itemp = xi - yi;
|| addsp .L2x xi, yi, Xi ; Xi = xi + yi;
||[!i] add .S2 xinptrA, n2As, xinptrA ; @ if (i==0)
c4:
lddw .D2 *+xinptrA[n2], A7:A6 ; @@ yr = xinptrA[2*n2];
; yi = xinptrA[2*n2 + 1];
|| addsp .L2x xr, yr, Xr ; Xr = xr + yr;
|| mpy .M2 i, 1, m ; m = i;
c5:
lddw .D2 *xinptrA++, B5:B4 ; @@ xr = *xinptrA++;
; xi = *xinptrA++;
||[!m] lddw .D1 *wptrB++, A5:A4 ; @ if (m==0) {c=*wptrB++;
; s=*wptrB++;}
||[!i] mv .S2 n2, i ; @@ if (i==0) i = n2;
c6:
subsp .L1x xr, yr, rtemp ; @ rtemp = xr - yr;
|| mpysp .M1 c, rtemp, p1r ; p1r = c*rtemp;
||[i] sub .S2 i, 1, i ; @@ i = i - 1;
c7:
subsp .L1x xi, yi, itemp ; @ itemp = xi - yi;
|| addsp .L2x xi, yi, Xi ; @ Xi = xi + yi;
|| mpysp .M1 s, itemp, p2r ; p2r = s*itemp;
||[!i] add .S2 xinptrA, n2As, xinptrA ; @@ if (i==0)
c8:
lddw .D2 *+xinptrA[n2], A7:A6 ; @@@ yr = xinptrA[2*n2]
; yi = xinptr[2*n2 + 1];
|| addsp .L2x xr, yr, Xr ; @ Xr = xr + yr;
|| mpysp .M1 s, rtemp, p2i ; p2i = s*rtemp;
|| mpy .M2 i, 1, m ; @ m = i;
c9:
lddw .D2 *xinptrA++, B5:B4 ; @@@ xr = *xinptrA++;
; xi = *xinptrA++;
|| mpysp .M1 c, itemp, p1i ; p1i = c*itemp;
||[!m] lddw .D1 *wptrB++, A5:A4 ; @@ if (m==0) {c=*wptrB++;
; s=*wptrB++;}
||[!i] mv .S2 n2, i ; @@@ if (i==0) i = n2;
c10:
subsp .L1x xr, yr, rtemp ; @@ rtemp = xr - yr;
|| mpysp .M1 c, rtemp, p1r ; @ p1r = c*rtemp;
|| stw .D1 Xi, *xoutptrB++[2] ; *xoutptrB = Xi;
; xoutptrB=xoutptrB+2;
||[i] sub .S2 i, 1, i ; @@@ i = i - 1;
c11:
subsp .L1x xi, yi, itemp ; @@ itemp = xi - yi;
|| addsp .L2x xi, yi, Xi ; @@ Xi = xi + yi;
|| mpysp .M1 s, itemp, p2r ; @ p2r = s*itemp;
|| stw .D1 Xr, *-xoutptrB[3] ; *(xoutptrB-3) = Xr;
||[!i] add .S2 xinptrA, n2As, xinptrA ; @@@ if (i==0)
; xinptrA = xinptrA + n2A;
||[p] sub .S1 p, 1, p ; p = p - 1;
c12:
lddw .D2 *+xinptrA[n2], A7:A6 ; @@@@ yr = xinptrA[2*n2];
; xyi = xinptrA[2*n2 + 1];
|| addsp .L2x xr, yr, Xr ; @@ Xr = xr + yr;
|| mpysp .M1 s, rtemp, p2i ; @ p2i = s*rtemp;
|| subsp .L1 p1r, p2r, Yr ; Yr = p1r - p2r;
||[!p] add .S1x xoutptrB, n2As, xoutptrB; if (p==0)
; xoutptrB = xoutptrB + n2;
|| mpy .M2 i, 1, m ; @@ m = i;
||[l] sub .D1 l, 1, l ; if (l!=0) l = l -1;
c13:
lddw .D2 *xinptrA++, B5:B4 ; @@@@ xr = *xinptrA++;
; xi = *xinptrA++;
|| mpysp .M1 c, itemp, p1i ; @ p1i = c*itemp;
|| addsp .L1 p1i, p2i, Yi ; Yi = p1i + p2i;
||[!m] lddw .D1 *wptrB++, A5:A4 ; @@@ if (m==0) {c=*wptrB++;
; s=*wptrB++;}
||[!i] mv .S2 n2, i ; @@@@ if (i==0), i = n2;
||[!p] mv .S1x n2, p ; if (p==0), p = n2;
c14:
subsp .L1x xr, yr, rtemp ; @@@ rtemp = xr - yr;
|| mpysp .M1 c, rtemp, p1r ; @@ p1r = c*rtemp;
|| stw .D1 Xi, *xoutptrB++[2] ; @ *xoutptrB=Xi;
; xoutptrB=xoutptrB+2;
||[i] sub .S2 i, 1, i ; @@@@ i = i - 1;
||[l] b .S1 loopl ; if (l!=0) branch to loopl
c15:
subsp .L1x xi, yi, itemp ; @@@ itemp = xi - yi;
|| addsp .L2x xi, yi, Xi ; @@@ Xi = xi + yi;
|| mpysp .M1 s, itemp, p2r ; @@ p2r = s*itemp;
|| stw .D1 Xr, *-xoutptrB[3] ; *(xoutptrB-3) = Xr;
||[!i] add .S2 xinptrA, n2As, xinptrA ; @@@@ if (i==0)
; xinptrA = xinptrA + n2A;
||[p] sub .S1 p, 1, p ; @ p = p - 1;
; ----------------- end prolog for inner loop - loopl ------------------
;------------------ inner loop - loopl loop code -----------------------
loopl:
c16:
lddw .D2 *+xinptrA[n2], A7:A6 ; @@@@@ yr = xinptrA[2*n2];
; yi = xinptrA[2*n2 + 1];
|| addsp .L2x xr, yr, Xr ; @@@ Xr = xr + yr;
|| mpysp .M1 s, rtemp, p2i ; @@ p2i = s*rtemp;
|| subsp .L1 p1r, p2r, Yr ; @ Yr = p1r - p2r;
||[!p] add .S1x xoutptrB, n2As, xoutptrB; @ if (j==0)
; xoutptrB = xoutptrB + n2;
||[l] sub .D1 l, 1, l ; @ if (l!=0) l = l -1;
|| mpy .M2 i, 1, m ; @@@ m = i;
||[!j] add .S2 xoutptrA, n2As, xoutptrA; if (j==0)
c17:
lddw .D2 *xinptrA++, B5:B4 ; @@@@@ xr = *xinptrA++;
; xi = *xinptrA++;
|| mpysp .M1 c, itemp, p1i ; @@ p1i = c*itemp;
|| addsp .L1 p1i, p2i, Yi ; @ Yi = p1i + p2i;
||[!m] lddw .D1 *wptrB++, A5:A4 ; @@@@ if (i==0) {c=*wptrB++;
; s=*wptrB++;}
||[!i] mv .S2 n2, i ; @@@@@ if (i==0) i = n2;
||[!p] mv .S1x n2, p ; @ if (p==0) p = n2;
||[j] sub .L2 j, 1, j ; j = j - 1;
c18:
subsp .L1x xr, yr, rtemp ; @@@@ rtemp = xr - yr;
|| mpysp .M1 c, rtemp, p1r ; @@@ p1r = c*rtemp;
|| stw .D2 Yr, *xoutptrA++ ; *xoutptrA++ = Yr;
|| stw .D1 Xi, *xoutptrB++[2] ; @ *xoutptrB=Xi;
; xoutptrB=xoutptrB+2;
||[i] sub .S2 i, 1, i ; @@@@@ i = i - 1;
||[l] b .S1 loopl ; @
c19:
subsp .L1x xi, yi, itemp ; @@@@ itemp = xi - yi;
|| addsp .L2x xi, yi, Xi ; @@@@ Xi = xi + yi;
|| mpysp .M1 s, itemp, p2r ; @@@ p2r = s*itemp;
|| stw .D2 Yi, *xoutptrA++ ; *xoutptrA++ = Yi;
|| stw .D1 Xr, *-xoutptrB[3] ; @ *(xoutptrB-3) = Xr;
||[!i] add .S2 xinptrA, n2As, xinptrA ; @@@@@ if (i==0)
; xinptrA = xinptrA + n2A;
||[p] sub .S1 p, 1, p ; @@ p = p - 1;
||[!j] mpy .M2 n2, 1, j ; if (j==0) j = n2;
loopl_end:
;------------------ end of inner loop - loopl loop code ----------------
; ----------------- epilog for inner loop - loopl ----------------------
c20:
subsp .L1 p1r, p2r, Yr ; e Yr = p1r - p2r;
||[!j] add .D2 xoutptrA, n2As, xoutptrA; e if (j==0)
; xoutptrA = xoutptrB + n2;
|| mv .S1x tmpk, k ; o k = tmpk
|| mv .L2 x, xinptrA ; o xinptrA = x;
|| shl .S2 n2, 1, n2 ; o n2 = n2 << 1;
c21:
addsp .L1 p1i, p2i, Yi ; e Yi = p1i + p2i;
||[j] sub .L2 j, 1, j ; e j = j - 1;
|| mv .S1x x, xoutptrB ; o xoutptrB = x;
c22:
stw .D2 Yr, *xoutptrA++ ; e *xoutptrA++ = Yr;
||[k] b .S1 loopk ; o
c23:
stw .D2 Yi, *xoutptrA++ ; e *xoutptrA++ = Yi;
||[!j] mpy .M2 n2, 1, j ; e if (j==0) j = n2;
|| mv .S1x w, wptrB ; o wptrB = w;
c24:
[!j] add .L2 xoutptrA, n2As, xoutptrA; e if (j==0)
; xoutptrA = xoutptrA + n2;
|| shr .S1x n, 1, l ; o l = n/2
|| shl .S2 n2, 3, n2As ; o n2As = n2<<3;
|| lddw .D2 *+xinptrA[n2], A7:A6 ; p yr = xinptrA[2*n2];
; yi = xinptrA[2*n2 + 1];
c25:
[k] shr .S2 tmpk, 1, tmpk ; o tmpk = tmpk >> 1;
|| sub l, 2, l ; o l = l - 2;
|| mv .L2 n2, i ; o i = n2;
|| lddw .D2 *xinptrA++, B5:B4 ; p xr = *xinptrA++;
; xi = *xinptrA++;
c26:
stw .D2 Yr, *xoutptrA++ ; e *xoutptrA++ = Yr;
|| mv .L2 n2, j ; o j = n2;
|| mv n2, p ; o p = n2
||[i] sub .S2 i, 1, i ; p i = i - 1;
c27:
stw .D2 Yi, *xoutptrA ; e *xoutptrA++ = Yi;
|| add .S2X xoutptrB, n2As, xoutptrA; o xoutptrA = xoutptrB + n2;
|| add xoutptrB, 4, xoutptrB ; o xoutptrB = xoutptrB + 4;
||[!i] add .L2 xinptrA, n2As, xinptrA ; p if (i==0)
; xinptrA = xinptrA + n2A;
loopk_end:
; ----------------- end of epilog for inner loop - loopl ---------------
; ----------------- end of outer loop - loopk --------------------------
; ---------------------------- function epilog ------------------------
mvc .S2 CSR, B13
; restore preserved by call registers
sub B15, 4, A0
ldw .D1 *++A0[2], B3 ; f
|| ldw .D2 *++B15[2], A15 ; f
|| mvc .S2 CSR, B13 ; f
ldw .D1 *++A0[2], B14 ; f
|| ldw .D2 *++B15[2], A14 ; f
|| or .L2 B13, 1, B13 ; f
ldw .D1 *++A0[2], B13 ; f
|| ldw .D2 *++B15[2], A13 ; f
|| mvc .S2 B13,CSR ; f enable global interrupts
ldw .D1 *++A0[2], B12 ; f
|| ldw .D2 *++B15[2], A12 ; f
ldw .D1 *++A0[2], B11 ; f
|| ldw .D2 *++B15[2], A11 ; f
ldw .D2 *++B15[2], A10 ; f
|| ldw .D1 *++A0[2], B10 ; f
|| b .S2 B3 ; f return();
nop 5 ; f
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -