📄 idct_xvid.asm
字号:
.section L1_data_b;
.align 4;
//.global _idct_coeff;
.byte2 _idct_coeff[6]={
565, /* W7, 2048*sqrt(2)*cos(7*pi/16) */ 2841, /* W1, 2048*sqrt(2)*cos(1*pi/16) */ 2408, /* W3, 2048*sqrt(2)*cos(3*pi/16) */ 1609, /* W5, 2048*sqrt(2)*cos(5*pi/16) */ 1108, /* W6, 2048*sqrt(2)*cos(6*pi/16) */ 2676 /* W2, 2048*sqrt(2)*cos(2*pi/16) */};
.section L1_code;
.align 4;
//void idct_int32(short *const block);
.global _idct_int32;
//1182 cycles;
_idct_int32:
[--SP] = (R7:4, P5:3);
[--SP] = RETS;
L0 = 0;
L1 = 0;
L2 = 0;
L3 = 0;
B0 = R0;
P1 = R0;
R0 += 2;
I0 = R0;
R0 += 12;
I1 = R0;
R0 += -8;
I2 = R0;
B3.L = _idct_coeff;
B3.H = _idct_coeff;
I3 = B3;
P0 = 8;
M0 = 12;
M1 = 22;
M2 = 16;
P2 = 16;
LSETUP(row_s, row_e) LC0=P0;
row_s:
//I0=1,I1=7,I2=3;
R4.L = W[I0--] || R0.L = W[I3++];
R4.H = W[I1--] || R0.H = W[I3++];
//I0=0,I1=6,I2=3;
A0 = R0.L*R4.H, A1 = R0.L*R4.L(IS) || R0.L = W[I3++] || I1 -= 2;
//I0=0,I1=5,I2=3;
R4 = (A0 += R0.H*R4.L), R5 = (A1 -= R0.H*R4.H)(IS) || R6.L = W[I1++] || R6.H = W[I2--];
//I0=0,I1=6,I2=2;
A0 = R0.L*R6.H, A1 = R0.L*R6.L(IS) || R0.H = W[I3++] || R2.L = W[I1++];
//I0=0,I1=7,I2=2;
R6 = (A0 += R0.H*R6.L), R7 = (A1 -= R0.H*R6.H)(IS) || R0.L = W[I3++] || R2.H = W[I2++];
//I0=0,I1=7,I2=3;
A0 = R0.L*R2.H, A1 = R0.L*R2.L(IS) || R0.H = W[I3++] || R1 = W[P1+8](X);
R2 = (A0 -= R0.H*R2.L), R3 = (A1 += R0.H*R2.H)(IS) || R0 = W[P1++P2](X);
//P1 = 0+8;
R0 <<= 4;
R0 += 1;
R0 <<= 7;
R1 <<= 11;
R1 = R0 + R1, R0 = R0 - R1; //R1=X8, R0=X0
R6 = R4 + R6, R4 = R4 - R6; //R6=X1, R4=X4
R7 = R5 + R7, R5 = R5 - R7; //R7=X6, R5=X5
R3 = R1 + R3, R1 = R1 - R3; //R3=X7, R1=X8
R2 = R0 + R2, R0 = R0 - R2; //R2=X3, R0=X0
R5 = R4 + R5, R4 = R4 - R5; //R5=X2, R4=X4
R3 = R3 + R6, R6 = R3 - R6;
R3 >>>= 8;
R6 >>>= 8;
W[I0++] = R3.L;
W[I1--] = R6.L;
R6 = 181(Z);
R3 = 128(Z);
R5 *= R6;
R4 *= R6;
R5 = R5 + R3;
R4 = R4 + R3;
R5 >>>= 8;
R4 >>>= 8;
R2 = R2 + R5, R5 = R2 - R5;
R2 >>>= 8;
R5 >>>= 8;
W[I0++] = R2.L;
W[I1--] = R5.L;
R0 = R0 + R4, R4 = R0 - R4;
R0 >>>= 8;
R4 >>>= 8;
W[I0++] = R0.L;
W[I1--] = R4.L || I2 += M2; //I2=3+8;
R1 = R1 + R7, R7 = R1 - R7;
R1 >>>= 8;
R7 >>>= 8;
W[I0] = R1.L || I0 += M0; //I0=3+6;
W[I1] = R7.L || I1 += M1; //I1=4+11;
row_e:
I3 = B3;
R0 = B0;
P1 = R0;
R0 += 16;
I0 = R0;
R0 += 32;
I2 = R0;
R0 += 16;
P2 = R0;
R0 += 48;
I1 = R0;
M0 = 16;
M1 = 32;
M2 = 30;
M3 = 50;
LSETUP(col_s, col_e) LC0 = P0;
col_s:
//I0=8*1,I1=8*7,I2=8*3;
R4.L = W[I0] || R0.L = W[I3++];
R4.H = W[I1] || R0.H = W[I3++];
//I0=8*1,I1=8*7,I2=8*3;
A0 = R0.L*R4.H, A1 = R0.L*R4.L(IS) || R0.L = W[I3++] || I1 -= M1;
//I0=8*1,I1=8*5,I2=8*3;
R4 = (A0 += R0.H*R4.L), R5 = (A1 -= R0.H*R4.H)(IS) || R6.L = W[I1] || R6.H = W[I2];
R4 += 4;
R5 += 4;
R4 >>>= 3;
R5 >>>= 3;
A0 = R0.L*R6.H, A1 = R0.L*R6.L(IS) || R0.H = W[I3++] || I1 += M0;
//I0=8*1,I1=8*6,I2=8*3;
R6 = (A0 += R0.H*R6.L), R7 = (A1 -= R0.H*R6.H)(IS) || R0.L = W[I3++] || I2 -=M0;
//I0=8*1,I1=8*6,I2=8*2;
R6 += 4;
R7 += 4;
R6 >>>= 3;
R7 >>>= 3;
//P1=8*0,P2=8*4
R2.L = W[I1] || R2.H = W[I2];
A0 = R0.L*R2.H, A1 = R0.L*R2.L(IS) || R1 = W[P2++](X) || R0.H = W[I3++];
R2 = (A0 -= R0.H*R2.L), R3 = (A1 += R0.H*R2.H)(IS) || R0 = W[P1++](X);
//P1=8*0+1,P2=8*4+1
R2 += 4;
R3 += 4;
R2 >>>= 3;
R3 >>>= 3;
R0 += 32;
R0 <<= 8;
R1 <<= 8;
R1 = R0 + R1, R0 = R0 - R1; //R1=X8, R0=X0
R6 = R4 + R6, R4 = R4 - R6; //R6=X1, R4=X4
R7 = R5 + R7, R5 = R5 - R7; //R7=X6, R5=X5
R3 = R1 + R3, R1 = R1 - R3; //R3=X7, R1=X8
R2 = R0 + R2, R0 = R0 - R2; //R2=X3, R0=X0
R5 = R4 + R5, R4 = R4 - R5; //R5=X2, R4=X4
R3 = R3 + R6, R6 = R3 - R6;
//I0=8*1,I1=8*6,I2=8*2;
R3 = R3 << 9(S) || I0 -=M0;
R3 >>>= 23;
R6 = R6 << 9(S) || I1 +=M0;
//I0=8*0,I1=8*7,I2=8*2;
R6 >>>= 23;
W[I0] = R3.L || I0 += M0;
W[I1] = R6.L || I1 -= M0;
//I0=8*1,I1=8*6,I2=8*2;
R6 = 181(Z);
R3 = 128(Z);
R5 *= R6;
R4 *= R6;
R5 = R5 + R3;
R4 = R4 + R3;
R5 >>>= 8;
R4 >>>= 8;
R2 = R2 + R5, R5 = R2 - R5;
R2 = R2 << 9(S) || I2 += M0;
//I0=8*1,I1=8*6,I2=8*3;
R2 >>>= 23;
R5 = R5 << 9(S) || I2 += 2;
//I0=8*1,I1=8*6,I2=8*3+1;
R5 >>>= 23;
W[I0] = R2.L || I0 += M0;
W[I1] = R5.L || I1 -= M0;
//I0=8*2,I1=8*5,I2=8*3+1;
R0 = R0 + R4, R4 = R0 - R4;
R0 = R0 << 9(S);
R0 >>>= 23;
R4 = R4 << 9(S);
R4 >>>= 23;
W[I0] = R0.L || I0 += M0;
W[I1] = R4.L || I1 -= M0;
//I0=8*3,I1=8*4,I2=8*3+1;
R1 = R1 + R7, R7 = R1 - R7;
R1 = R1 << 9(S);
R1 >>>= 23;
R7 = R7 << 9(S);
R7 >>>= 23;
W[I0] = R1.L || I0 -= M2;
W[I1] = R7.L || I1 += M3;
//I0=8*1+1,I1=8*7+1,I2=8*3+1;
col_e:
I3 = B3;
_idct_int32.end:
RETS = [SP++];
(R7:4, P5:3) = [SP++];
RTS;
NOP;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -