📄 fft512pt.asm
字号:
Q[j1+=16]=yr11:8; r28=mr1:0, mr1:0+=r2**r18(CR);; Q[j1+=16]=yr15:12; r25=mr1:0, mr1:0+=r3**r19(CR);; Q[j1+=16]=xr11:8; r29=mr1:0, mr1:0+=r4**r20(CR);; Q[j1+=-44]=xr15:12; r26=mr1:0, mr1:0+=r5**r21(CR);; r1:0=cb Q[j0+=32]; r17:16=cb Q[k2+=32]; r30=mr1:0, mr1:0+=r6**r22(CR);; r3:2=cb Q[j0+=32]; r19:18=cb Q[k2+=32]; r27=mr1:0, mr1:0+=r7**r23(CR);; r5:4=cb Q[j0+=32]; r21:20=cb Q[k2+=32]; r31=mr1:0, mr1:0+=r0**r16(CR);; r7:6=cb Q[j0+=32]; r23:22=cb Q[k2+=32]; r8=mr1:0, mr1:0+=r1**r17(CR);; Q[j1+=16]=yr27:24; r12=mr1:0, mr1:0+=r2**r18(CR);; Q[j1+=16]=yr31:28; r9=mr1:0, mr1:0+=r3**r19(CR);; Q[j1+=16]=xr27:24; r13=mr1:0, mr1:0+=r4**r20(CR);; Q[j1+=-44]=xr31:28; r10=mr1:0, mr1:0+=r5**r21(CR);; r1:0=cb Q[j0+=32]; r17:16=cb Q[k2+=32]; r14=mr1:0, mr1:0+=r6**r22(CR);; r3:2=cb Q[j0+=32]; r19:18=cb Q[k2+=32]; r11=mr1:0, mr1:0+=r7**r23(CR);; r5:4=cb Q[j0+=32]; r21:20=cb Q[k2+=32]; r15=mr1:0, mr1:0+=r0**r16(CR);; r7:6=cb Q[j0+=32]; r23:22=cb Q[k2+=32]; r24=mr1:0, mr1:0+=r1**r17(CR);; Q[j1+=16]=yr11:8; r28=mr1:0, mr1:0+=r2**r18(CR);; Q[j1+=16]=yr15:12; r25=mr1:0, mr1:0+=r3**r19(CR);; Q[j1+=16]=xr11:8; r29=mr1:0, mr1:0+=r4**r20(CR);; Q[j1+=-44]=xr15:12; r26=mr1:0, mr1:0+=r5**r21(CR);; r1:0=cb Q[j0+=32]; r17:16=cb Q[k2+=32]; r30=mr1:0, mr1:0+=r6**r22(CR);; r3:2=cb Q[j0+=32]; r19:18=cb Q[k2+=32]; r27=mr1:0, mr1:0+=r7**r23(CR);; r5:4=cb Q[j0+=32]; r21:20=cb Q[k2+=32]; r31=mr1:0, mr1:0+=r0**r16(CR);; r7:6=cb Q[j0+=32]; r23:22=cb Q[k2+=32]; r8=mr1:0, mr1:0+=r1**r17(CR);; Q[j1+=16]=yr27:24; r12=mr1:0, mr1:0+=r2**r18(CR);; Q[j1+=16]=yr31:28; r9=mr1:0, mr1:0+=r3**r19(CR);; Q[j1+=16]=xr27:24; r13=mr1:0, mr1:0+=r4**r20(CR);;.align_code 4; if NLC1E, jump _MultXposeLoopOuter; Q[j1+=4]=xr31:28; r10=mr1:0, mr1:0+=r5**r21(CR);;//**********************************************************************************************// HORIZONTAL FFTs//**********************************************************************************************//*************************************** Stage 1 **********************************************// From j0->_ping_pong_buffer2 to k3->_ping_pong_buffer1.align_code 4; j0=j31+j6; k7:4=Q[k31+(_k_modifies+4)];; j1=j0+256; j8=64;; j2=j0+128; k3=j5;; j3=j0+(256+128); LC0=2;; // ---------------------------------- r5:4 =br Q[j2+=64];; //| F1 | | | | | r7:6 =br Q[j3+=j8]; r31=0x80000000;; //| F2 | | | | | r1:0 =br Q[j0+=j8]; k2=k31+(_twiddles16+2);; //| F3 | | | | | r3:2 =br Q[j1+=j8]; kB3=k3+4; sr13:12=r5:4+r7:6, sr15:14=r5:4-r7:6;; //| F4 | | | AS1 | | // ---------------------------------- r5:4 =br Q[j2+=j8]; kB2=k31+_twiddles16;; //| F1+ | | | | | r7:6 =br Q[j3+=j8]; j9=-108; mr1:0+=r14**r31(CR); sr9:8=r1:0+r3:2, sr11:10=r1:0-r3:2;; //| F2+ | MPY1 | | AS2 | | r1:0 =br Q[j0+=j8]; kL2=k31+4*16; r24=mr1:0, mr1:0+=r15**r31(CR);; //| F3+ | MPY2 | M1 | | | r3:2 =br Q[j1+=j8]; LC1=4; r25=mr1:0, mr1:0+=r15**r31(CR); sr29:28=r5:4+r7:6, sr15:14=r5:4-r7:6;; //| F4+ | | M2 | AS1+ | | // ---------------------------------- r5:4 =br Q[j2+=j8]; jB0=kB3; sr17:16=r9:8+r13:12, sr21:20=r9:8-r13:12;; //| F1++ | | | AS3 | |.align_code 4; // ----------------------------------_HorFFTStage1Inner: // ---------------------------------- r7:6 =br Q[j3+=j8]; cb Q[k3+=k5]=r17:16; mr1:0+=r14**r31(CR); sr9:8=r1:0+r3:2, sr27:26=r1:0-r3:2;; //| F2++ | MPY1+ | | AS2+ | S1 | r1:0 =br Q[j0+=j8]; Q[k3+=k6]=r21:20; r24=mr1:0, mr1:0+=r15**r31(CR); sr19:18=r11:10+r25:24, sr23:22=r11:10-r25:24;; //| F3++ | MPY2+ | M1+ | AS4 | S2 | r3:2 =br Q[j1+=j8]; cb Q[k3+=k5]=r19:18; r25=mr1:0, mr1:0+=r15**r31(CR); sr13:12=r5:4+r7:6, sr15:14=r5:4-r7:6;; //| F4++ | | M2+ | AS1++ | S3 | // ---------------------------------- r5:4 =br Q[j2+=j8]; cb Q[k3+=k7]=r23:22; sr17:16=r9:8+r29:28, sr21:20=r9:8-r29:28;; //| F1+++ | | | AS3+ | S4 | r7:6 =br Q[j3+=j8]; cb Q[k3+=k5]=r17:16; mr1:0+=r14**r31(CR); sr9:8=r1:0+r3:2, sr11:10=r1:0-r3:2;; //| F2+++ | MPY1++| | AS2++ | S1+| r1:0 =br Q[j0+=j8]; cb Q[k3+=k6]=r21:20; r24=mr1:0, mr1:0+=r15**r31(CR); sr19:18=r27:26+r25:24, sr23:22=r27:26-r25:24;; //| F3+++ | MPY2++| M1++| AS4+ | S2+| r3:2 =br Q[j1+=j8]; cb Q[k3+=k5]=r19:18; r25=mr1:0, mr1:0+=r15**r31(CR); sr29:28=r5:4+r7:6, sr15:14=r5:4-r7:6;; //| F4+++ | | M2++| AS1+++| S3+|.align_code 4; if NLC0E, jump _HorFFTStage1Inner; r5:4 =br Q[j2+=j8]; cb Q[k3+=k7]=r23:22; sr17:16=r9:8+r13:12, sr21:20=r9:8-r13:12;; //| F1++++| | | AS3++ | S4+| r7:6 =br Q[j3+=j8]; cb Q[k3+=k5]=r17:16; mr1:0+=r14**r31(CR); sr9:8=r1:0+r3:2, sr27:26=r1:0-r3:2;; r1:0 =br Q[j0+=j8]; cb Q[k3+=k6]=r21:20; r24=mr1:0, mr1:0+=r15**r31(CR); sr19:18=r11:10+r25:24, sr23:22=r11:10-r25:24;; r3:2 =br Q[j1+=j8]; cb Q[k3+=k5]=r19:18; r25=mr1:0, mr1:0+=r15**r31(CR); sr13:12=r5:4+r7:6, sr15:14=r5:4-r7:6;; // F4 r5:4 = Q[j2+=j9]; cb Q[k3+=k7]=r23:22; sr17:16=r9:8+r29:28, sr21:20=r9:8-r29:28;; r7:6 = Q[j3+=j9]; cb Q[k3+=k5]=r17:16; mr1:0+=r14**r31(CR); sr9:8=r1:0+r3:2, sr11:10=r1:0-r3:2;; r1:0 = Q[j0+=j9]; cb Q[k3+=k6]=r21:20; r24=mr1:0, mr1:0+=r15**r31(CR); sr19:18=r27:26+r25:24, sr23:22=r27:26-r25:24;; r3:2 = Q[j1+=j9]; cb Q[k3+=k5]=r19:18; r25=mr1:0, mr1:0+=r15**r31(CR); sr29:28=r5:4+r7:6, sr15:14=r5:4-r7:6;; // F4 r5:4 =br Q[j2+=j8]; cb Q[k3+=k7]=r23:22; sr17:16=r9:8+r13:12, sr21:20=r9:8-r13:12;; LC0=2;; r7:6 =br Q[j3+=j8]; cb Q[k3+=k5]=r17:16; mr1:0+=r14**r31(CR); sr9:8=r1:0+r3:2, sr27:26=r1:0-r3:2;; r1:0 =br Q[j0+=j8]; cb Q[k3+=k6]=r21:20; r24=mr1:0, mr1:0+=r15**r31(CR); sr19:18=r11:10+r25:24, sr23:22=r11:10-r25:24;; r3:2 =br Q[j1+=j8]; cb Q[k3+=k5]=r19:18; r25=mr1:0, mr1:0+=r15**r31(CR); sr13:12=r5:4+r7:6, sr15:14=r5:4-r7:6;; // F4 r5:4 =br Q[j2+=j8]; cb Q[k3+=k7]=r23:22; sr17:16=r9:8+r29:28, sr21:20=r9:8-r29:28;; r7:6 =br Q[j3+=j8]; cb Q[k3+=k5]=r17:16; mr1:0+=r14**r31(CR); sr9:8=r1:0+r3:2, sr11:10=r1:0-r3:2;; r1:0 =br Q[j0+=j8]; cb Q[k3+=k6]=r21:20; r24=mr1:0, mr1:0+=r15**r31(CR); sr19:18=r27:26+r25:24, sr23:22=r27:26-r25:24;; r3:2 =br Q[j1+=j8]; cb Q[k3+=k5]=r19:18; r25=mr1:0, mr1:0+=r15**r31(CR); sr29:28=r5:4+r7:6, sr15:14=r5:4-r7:6;; // F4.align_code 4; if NLC1E, jump _HorFFTStage1Inner; r5:4 =br Q[j2+=j8]; cb Q[k3+=k7]=r23:22; sr17:16=r9:8+r13:12, sr21:20=r9:8-r13:12;;//***************************************** Stage 2 ***********************************************// From j0->_ping_pong_buffer1 to k1->_ping_pong_buffer2.align_code 4; jB0=j5+4;; j0=j5+12*16; j1=-4*16;; r7:6 =cb Q[j0+=-4*16]; r31:30= L[k2+=-2];; //| F1 | | | | | r5:4 =cb Q[j0+=-4*16]; r29:28= L[k2+=6]; mr1:0+=r7**r31(CR);; r3:2 = Q[j0+=-4*16]; LC0=15; r15=mr1:0, mr1:0+=r6**r31(CR);; //| F3+ | MPY2 | M1 | | | r1:0 =cb Q[j0+=28*16]; j2=28*16; r14=mr1:0, mr1:0+=r5**r30(CR);; //| F3+ | MPY2 | M1 | | | r7:6 =cb Q[j0+=-4*16]; k1=j6; r13=mr1:0, mr1:0+=r4**r30(CR);; //| F3+ | MPY2 | M1 | | | r5:4 =cb Q[j0+=j1]; r12=mr1:0, mr1:0+=r3**r29(CR);; //| F3+ | MPY2 | M1 | | | r11=mr1:0, mr1:0+=r2**r29(CR);; //| F3+ | MPY2 | M1 | | | r10=mr1:0, mr1:0+=r1**r28(CR);; //| F3+ | MPY2 | M1 | | | r3:2 = Q[j0+=j1]; r9=mr1:0, mr1:0+=r0**r28(CR);; //| F3+ | MPY2 | M1 | | | r1:0 =cb Q[j0+=28*16]; r8=mr1:0, mr1:0+=r7**r31(CR); sr21:20=r13:12+r15:14, sr23:22=r13:12-r15:14;; // AS1 kB1=k1+4; r15=mr1:0, mr1:0+=r6**r31(CR);; r14=mr1:0, mr1:0+=r5**r30(CR); sr17:16=r9:8 +r11:10, sr19:18=r9:8 -r11:10;; // AS2 r7:6 =cb Q[j0+=j1]; r8=r23; r13=mr1:0, mr1:0+=r4**r30(CR); sr9=-r23;; // A2 r5:4 =cb Q[j0+=j1]; r31:30= L[k2+=-2];r12=mr1:0, mr1:0+=r3**r29(CR); sr23=-r22;; // A1 r11=mr1:0, mr1:0+=r2**r29(CR); lr9:8=rot r9:8 by -16;; // R2 k6=k31+4*16; r10=mr1:0, mr1:0+=r1**r28(CR); lr23:22=rot r23:22 by -16;; // R1.align_code 4; // ----------------------------------_HorFFTStage2: // ---------------------------------- r3:2 = Q[j0+=j1]; r23=r8; r9=mr1:0, mr1:0+=r0**r28(CR); sr17:16=r17:16+r21:20, sr21:20=r17:16-r21:20;; // AS3 r1:0 =cb Q[j0+=j2]; r29:28= L[k2+=6]; r8=mr1:0, mr1:0+=r7**r31(CR); sr25:24=r13:12+r15:14, sr27:26=r13:12-r15:14;; // AS1+ cb Q[k1+=k6]=r17:16; r15=mr1:0, mr1:0+=r6**r31(CR); sr19:18=r19:18+r23:22, sr23:22=r19:18-r23:22;; // AS4 cb Q[k1+=k6]=r19:18; r14=mr1:0, mr1:0+=r5**r30(CR); sr17:16=r9:8 +r11:10, sr19:18=r9:8 -r11:10;; // AS2+ r7:6 =cb Q[j0+=j1]; r8=r27; r13=mr1:0, mr1:0+=r4**r30(CR); sr9=-r27;; // A2+ r5:4 =cb Q[j0+=j1]; r12=mr1:0, mr1:0+=r3**r29(CR); sr27=-r26;; // A1+ cb Q[k1+=k6]=r21:20; r11=mr1:0, mr1:0+=r2**r29(CR); lr9:8=rot r9:8 by -16;; // R2+ cb Q[k1+=k6]=r23:22; r10=mr1:0, mr1:0+=r1**r28(CR); lr27:26=rot r27:26 by -16;; // R1+ r3:2 = Q[j0+=j1]; r27=r8; r9=mr1:0, mr1:0+=r0**r28(CR); sr17:16=r17:16+r25:24, sr25:24=r17:16-r25:24;; // AS3 r1:0 =cb Q[j0+=j2]; r8=mr1:0, mr1:0+=r7**r31(CR); sr21:20=r13:12+r15:14, sr23:22=r13:12-r15:14;; // AS1 cb Q[k1+=k6]=r17:16; r15=mr1:0, mr1:0+=r6**r31(CR); sr19:18=r19:18+r27:26, sr27:26=r19:18-r27:26;; // AS4+ cb Q[k1+=k6]=r19:18; r14=mr1:0, mr1:0+=r5**r30(CR); sr17:16=r9:8 +r11:10, sr19:18=r9:8 -r11:10;; // AS2 r7:6 =cb Q[j0+=j1]; r8=r23; r13=mr1:0, mr1:0+=r4**r30(CR); sr9=-r23;; // A2 r5:4 =cb Q[j0+=j1]; r31:30= L[k2+=-2];r12=mr1:0, mr1:0+=r3**r29(CR); sr23=-r22;; // A1 cb Q[k1+=k6]=r25:24; r11=mr1:0, mr1:0+=r2**r29(CR); lr9:8=rot r9:8 by -16;; // R2.align_code 4; // ---------------------------------- if NLC0E, jump _HorFFTStage2; // ---------------------------------- cb Q[k1+=k6]=r27:26; r10=mr1:0, mr1:0+=r1**r28(CR); lr23:22=rot r23:22 by -16;; // R1 r3:2 = Q[j0+=j1]; r23=r8; r9=mr1:0, mr1:0+=r0**r28(CR); sr17:16=r17:16+r21:20, sr21:20=r17:16-r21:20;; // AS3 r1:0 =cb Q[j0+=j2]; jB0=j6; r8=mr1:0, mr1:0+=r7**r31(CR); sr25:24=r13:12+r15:14, sr27:26=r13:12-r15:14;; // AS1+ cb Q[k1+=k6]=r17:16; j2=j31+_twiddles32; sr19:18=r19:18+r23:22, sr23:22=r19:18-r23:22;; // AS4 cb Q[k1+=k6]=r19:18; j1=j6+256; sr17:16=r9:8 +r11:10, sr19:18=r9:8 -r11:10;; // AS2+ cb Q[k1+=k6]=r21:20; r30=[j2+=1];; r3:2 = Q[j1+=4]; r8=r27; sr9=-r27;; // A2+ cb Q[k1+=k6]=r23:22; j0=j31+j6; sr27=-r26;; r1:0 = Q[j0+=4]; k3=j7; lr9:8=rot r9:8 by -16;; // R2+ r7:6 = Q[j1+=4]; mr1:0+=r2**r30(CR); lr27:26=rot r27:26 by -16;; // R1+ r5:4 = Q[j0+=4]; r27=r8; r2=mr1:0, mr1:0+=r3**r30(CR); sr17:16=r17:16+r25:24, sr25:24=r17:16-r25:24;; // AS3+ cb Q[k1+=k6]=r17:16; jB1=j6+256; sr19:18=r19:18+r27:26, sr27:26=r19:18-r27:26;; // AS4+ cb Q[k1+=k6]=r19:18; jL1=j31+512;; cb Q[k1+=k6]=r25:24; jL0=j31+512;; cb Q[k1+=k6]=r27:26;;//***************************************** Stage 3 **********************************************// From j0,j1->ping_pong_buffer2 to k3,k2->output r11:10= Q[j1+=4]; r3=mr1:0, mr1:0+=r6**r30(CR);; r9:8 = Q[j0+=4];; r6=mr1:0, mr1:0+=r7**r30(CR);; // F2 r15:14= Q[j1+=4]; LC1=8; r7=mr1:0, mr1:0+=r10**r30(CR); sr1:0=r1:0+r3:2, sr3:2=r1:0-r3:2;; // F3 r13:12= Q[j0+=4]; k2=k3+256; r10=mr1:0, mr1:0+=r11**r30(CR);; // F4.align_code 4;_HorFFTStage3Outer: r31=[j2+=1];; r3:2 =cb Q[j1+=4]; Q[k2+=4]=r3:2; r11=mr1:0, mr1:0+=r14**r30(CR); sr5:4=r5:4+r7:6, sr7:6=r5:4-r7:6;; r1:0 =cb Q[j0+=4]; Q[k3+=4]=r1:0; r14=mr1:0, mr1:0+=r15**r30(CR);; r7:6 =cb Q[j1+=4]; Q[k2+=4]=r7:6; r15=mr1:0, mr1:0+=r2**r31(CR); sr9:8=r9:8+r11:10, sr11:10=r9:8-r11:10;; r5:4 =cb Q[j0+=4]; Q[k3+=4]=r5:4; r2=mr1:0, mr1:0+=r3**r31(CR);; r11:10=cb Q[j1+=4]; Q[k2+=4]=r11:10; r3=mr1:0, mr1:0+=r6**r31(CR); sr13:12=r13:12+r15:14, sr15:14=r13:12-r15:14;; r9:8 =cb Q[j0+=4]; Q[k3+=4]=r9:8; r6=mr1:0, mr1:0+=r7**r31(CR);; // F2 r15:14=cb Q[j1+=4]; Q[k2+=4]=r15:14; r7=mr1:0, mr1:0+=r10**r31(CR); sr1:0=r1:0+r3:2, sr3:2=r1:0-r3:2;; // F3 r13:12=cb Q[j0+=4]; Q[k3+=4]=r13:12; r10=mr1:0, mr1:0+=r11**r31(CR);; // F4 r30=[j2+=1];; r3:2 =cb Q[j1+=4]; Q[k2+=4]=r3:2; r11=mr1:0, mr1:0+=r14**r31(CR); sr5:4=r5:4+r7:6, sr7:6=r5:4-r7:6;; r1:0 =cb Q[j0+=4]; Q[k3+=4]=r1:0; r14=mr1:0, mr1:0+=r15**r31(CR);; r7:6 =cb Q[j1+=4]; Q[k2+=4]=r7:6; r15=mr1:0, mr1:0+=r2**r30(CR); sr9:8=r9:8+r11:10, sr11:10=r9:8-r11:10;; r5:4 =cb Q[j0+=4]; Q[k3+=4]=r5:4; r2=mr1:0, mr1:0+=r3**r30(CR);; r11:10=cb Q[j1+=4]; Q[k2+=4]=r11:10; r3=mr1:0, mr1:0+=r6**r30(CR); sr13:12=r13:12+r15:14, sr15:14=r13:12-r15:14;; r9:8 =cb Q[j0+=4]; Q[k3+=4]=r9:8; r6=mr1:0, mr1:0+=r7**r30(CR);; // F2 r15:14=cb Q[j1+=4]; Q[k2+=4]=r15:14; r7=mr1:0, mr1:0+=r10**r30(CR); sr1:0=r1:0+r3:2, sr3:2=r1:0-r3:2;; // F3.align_code 4; if NLC1E, jump _HorFFTStage3Outer; r13:12=cb Q[j0+=4]; Q[k3+=4]=r13:12; r10=mr1:0, mr1:0+=r11**r30(CR);; // F4/******************************************* Done ***********************************************/// EPILOGUEYR27:24 = q[K27 + 16]; XR27:24 = q[J27 + 24];;YR31:28 = q[K27 + 12]; XR31:28 = q[J27 + 20];;cjmp (ABS); J27:24=q[J26+68]; K27:24=q[K26+68]; nop;;_fft512pt.end:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -