📄 复件 idct1.sa
字号:
B8_SIZE .equ 8
Y_SHIFT .equ 4 ;每行有8*2 byte(short类型)(2^4)
X_SHIFT .equ 1 ;short类型xx左移1位就可以
CLIP3 .macro data,offset
addk 64,data
shr data,7,data
cmplt data,lt,flag1
[flag1] mv lt,data
cmpgt data,gt,flag2
[flag2] mv gt,data
sth data,*+addr[offset]
.endm
.global _inv_transform_B8
_inv_transform_B8: .cproc curr_blk
.reg xx,yy
.reg tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6,tmp7
.reg bb0,bb1,bb2,bb3,bb4,bb5,bb6,bb7
.reg t0,t1,t2,t3,t4,t5,t6,t7 ;存储中间结果 临时变量
.reg addr
.reg loop_flag,flag1,flag2
.reg lt,gt
mvk 0,xx
mvk 0,yy
yy_loop: .trip 8,8,2
shl yy,Y_SHIFT,addr
add addr,curr_blk,addr
ldh *+addr[0],tmp0 ; tmp[0]=curr_blk1[yy][0];
ldh *+addr[4],tmp1 ; tmp[1]=curr_blk1[yy][4];
ldh *+addr[2],tmp2 ; tmp[2]=curr_blk1[yy][2];
ldh *+addr[6],tmp3 ; tmp[3]=curr_blk1[yy][6];
ldh *+addr[1],tmp4 ; tmp[4]=curr_blk1[yy][1];
ldh *+addr[3],tmp5 ; tmp[5]=curr_blk1[yy][3];
ldh *+addr[5],tmp6 ; tmp[6]=curr_blk1[yy][5];
ldh *+addr[7],tmp7 ; tmp[7]=curr_blk1[yy][7];
sub tmp4,tmp7,bb4 ; b[0] = ((tmp[4] - tmp[7])<<1) + tmp[4];
shl bb4,1,bb4
add tmp4,bb4,bb0
add tmp5,tmp6,bb5 ; b[1] = ((tmp[5] + tmp[6])<<1) + tmp[5];
shl bb5,1,bb5
add tmp5,bb5,bb1
sub tmp5,tmp6,bb6 ; b[2] = ((tmp[5] - tmp[6])<<1) - tmp[6];
shl bb6,1,bb6
sub bb6,tmp6,bb2
add tmp4,tmp7,bb7 ; b[3] = ((tmp[4] + tmp[7])<<1) + tmp[7];
shl bb7,1,bb7
add tmp7,bb7,bb3
add bb0,bb1,bb4 ; b[4] = ((b[0] + b[1] + b[3])<<1) + b[1];
add bb3,bb4,bb4
shl bb4,1,bb4
add bb4,bb1,bb4
sub bb0,bb1,bb5 ; b[5] = ((b[0] - b[1] + b[2])<<1) + b[0];
add bb2,bb5,bb5
shl bb5,1,bb5
add bb5,bb0,bb5
add bb1,bb2,bb6 ; b[6] = ((-b[1] - b[2] + b[3])<<1)+ b[3];
sub bb3,bb6,bb6
shl bb6,1,bb6
add bb6,bb3,bb6
sub bb0,bb2,bb7 ; b[7] = ((b[0] - b[2] - b[3])<<1) - b[2];
sub bb7,bb3,bb7
shl bb7,1,bb7
sub bb7,bb2,bb7
mpy tmp2,10,bb0 ; t=((tmp[2]*10)+(tmp[3]<<2));
shl tmp3,2,bb1
add bb0,bb1,bb0
shl tmp2,2,bb2 ; tmp[3]=((tmp[2]<<2)-(tmp[3]*10));
mpy tmp3,10,bb3
sub bb2,bb3,tmp3
mv bb0,tmp2 ; tmp[2]=t;
add tmp0,tmp1,bb0 ; t=(tmp[0]+tmp[1])<<3;
shl bb0,3,bb0
sub tmp0,tmp1,bb2 ; tmp[1]=(tmp[0]-tmp[1])<<3;
shl bb2,3,tmp1
mv bb0,tmp0 ; tmp[0]=t;
add tmp0,tmp2,bb0 ; b[0]=tmp[0]+tmp[2];
add tmp1,tmp3,bb1 ; b[1]=tmp[1]+tmp[3];
sub tmp1,tmp3,bb2 ; b[2]=tmp[1]-tmp[3];
sub tmp0,tmp2,bb3 ; b[3]=tmp[0]-tmp[2];
add bb0,bb4,t0 ; curr_blk1[yy][0]=((b[0]+b[4])+4)>>3;
add t0,4,t0
shr t0,3,t0
sth t0,*+addr[0]
add bb1,bb5,t1 ; curr_blk1[yy][1]=((b[1]+b[5])+4)>>3;
add t1,4,t1
shr t1,3,t1
sth t1,*+addr[1]
add bb2,bb6,t2 ; curr_blk1[yy][2]=((b[2]+b[6])+4)>>3;
add t2,4,t2
shr t2,3,t2
sth t2,*+addr[2]
add bb3,bb7,t3 ; curr_blk1[yy][3]=((b[3]+b[7])+4)>>3;
add t3,4,t3
shr t3,3,t3
sth t3,*+addr[3]
sub bb3,bb7,t4 ; curr_blk1[yy][4]=((b[3]-b[7])+4)>>3;
add t4,4,t4
shr t4,3,t4
sth t4,*+addr[4]
sub bb2,bb6,t5 ; curr_blk1[yy][5]=((b[2]-b[6])+4)>>3;
add t5,4,t5
shr t5,3,t5
sth t5,*+addr[5]
sub bb1,bb5,t6 ; curr_blk1[yy][6]=((b[1]-b[5])+4)>>3;
add t6,4,t6
shr t6,3,t6
sth t6,*+addr[6]
sub bb0,bb4,t7 ; curr_blk1[yy][7]=((b[0]-b[4])+4)>>3;
add t7,4,t7
shr t7,3,t7
sth t7,*+addr[7]
add yy,1,yy ; for(yy=0; yy<8; yy++)
cmplt yy,B8_SIZE,loop_flag
[loop_flag] b yy_loop
xx_loop: .trip 8,8,2
shl xx,X_SHIFT,t0
add curr_blk,t0,addr
mvk 32,t1
mvk 16,t2
mvk 48,t3
mvk 8,t4
mvk 24,t5
mvk 40,t6
mvk 56,t7
ldh *+addr[0],tmp0 ; tmp[0]=curr_blk1[0][xx];
ldh *+addr[t1],tmp1 ; tmp[1]=curr_blk1[4][xx];
ldh *+addr[t2],tmp2 ; tmp[2]=curr_blk1[2][xx];
ldh *+addr[t3],tmp3 ; tmp[3]=curr_blk1[6][xx];
ldh *+addr[t4],tmp4 ; tmp[4]=curr_blk1[1][xx];
ldh *+addr[t5],tmp5 ; tmp[5]=curr_blk1[3][xx];
ldh *+addr[t6],tmp6 ; tmp[6]=curr_blk1[5][xx];
ldh *+addr[t7],tmp7 ; tmp[7]=curr_blk1[7][xx];
sub tmp4,tmp7,bb4 ; b[0] = ((tmp[4] - tmp[7])<<1) + tmp[4];
shl bb4,1,bb4
add tmp4,bb4,bb0
add tmp5,tmp6,bb5 ; b[1] = ((tmp[5] + tmp[6])<<1) + tmp[5];
shl bb5,1,bb5
add tmp5,bb5,bb1
sub tmp5,tmp6,bb6 ; b[2] = ((tmp[5] - tmp[6])<<1) - tmp[6];
shl bb6,1,bb6
sub bb6,tmp6,bb2
add tmp4,tmp7,bb7 ; b[3] = ((tmp[4] + tmp[7])<<1) + tmp[7];
shl bb7,1,bb7
add tmp7,bb7,bb3
add bb0,bb1,bb4 ; b[4] = ((b[0] + b[1] + b[3])<<1) + b[1];
add bb3,bb4,bb4
shl bb4,1,bb4
add bb4,bb1,bb4
sub bb0,bb1,bb5 ; b[5] = ((b[0] - b[1] + b[2])<<1) + b[0];
add bb2,bb5,bb5
shl bb5,1,bb5
add bb5,bb0,bb5
add bb1,bb2,bb6 ; b[6] = ((-b[1] - b[2] + b[3])<<1)+ b[3];
sub bb3,bb6,bb6
shl bb6,1,bb6
add bb6,bb3,bb6
sub bb0,bb2,bb7 ; b[7] = ((b[0] - b[2] - b[3])<<1) - b[2];
sub bb7,bb3,bb7
shl bb7,1,bb7
sub bb7,bb2,bb7
mpy tmp2,10,bb0 ; t=((tmp[2]*10)+(tmp[3]<<2));
shl tmp3,2,bb1
add bb0,bb1,bb0
shl tmp2,2,bb2 ; tmp[3]=((tmp[2]<<2)-(tmp[3]*10));
mpy tmp3,10,bb3
sub bb2,bb3,tmp3
mv bb0,tmp2 ; tmp[2]=t;
add tmp0,tmp1,bb0 ; t=(tmp[0]+tmp[1])<<3;
shl bb0,3,bb0
sub tmp0,tmp1,bb2 ; tmp[1]=(tmp[0]-tmp[1])<<3;
shl bb2,3,tmp1
mv bb0,tmp0 ; tmp[0]=t;
add tmp0,tmp2,bb0 ; b[0]=tmp[0]+tmp[2];
add tmp1,tmp3,bb1 ; b[1]=tmp[1]+tmp[3];
sub tmp1,tmp3,bb2 ; b[2]=tmp[1]-tmp[3];
sub tmp0,tmp2,bb3 ; b[3]=tmp[0]-tmp[2];
mvkl -32768,lt
mvkh -32768,lt
mvkl 32703,gt
mvkh 32703,gt
shl xx,1,addr
add addr,curr_blk,addr
mvk 0,tmp0
mvk 8,tmp1
mvk 16,tmp2
mvk 24,tmp3
mvk 32,tmp4
mvk 40,tmp5
mvk 48,tmp6
mvk 56,tmp7
; curr_blk1[0][xx]=/*(b[0]+b[4]+64)>>7;*/(Clip3(-32768,32703,b[0]+b[4])+64)>>7;
add bb0,bb4,t0
CLIP3 t0,tmp0
; curr_blk1[1][xx]=/*(b[1]+b[5]+64)>>7;*/(Clip3(-32768,32703,b[1]+b[5])+64)>>7;
add bb1,bb5,t1
CLIP3 t1,tmp1
; curr_blk1[2][xx]=/*(b[2]+b[6]+64)>>7;*/(Clip3(-32768,32703,b[2]+b[6])+64)>>7;
add bb2,bb6,t2
CLIP3 t2,tmp2
; curr_blk1[3][xx]=/*(b[3]+b[7]+64)>>7;*/(Clip3(-32768,32703,b[3]+b[7])+64)>>7;
add bb3,bb7,t3
CLIP3 t3,tmp3
; // Last Butterfly
; curr_blk1[4][xx]=/*(b[3]-b[7]+64)>>7;*/(Clip3(-32768,32703,b[3]-b[7])+64)>>7;
sub bb3,bb7,t4
CLIP3 t4,tmp4
; curr_blk1[5][xx]=/*(b[2]-b[6]+64)>>7;*/(Clip3(-32768,32703,b[2]-b[6])+64)>>7;
sub bb2,bb6,t5
CLIP3 t5,tmp5
; curr_blk1[6][xx]=/*(b[1]-b[5]+64)>>7;*/(Clip3(-32768,32703,b[1]-b[5])+64)>>7;
sub bb1,bb5,t6
CLIP3 t6,tmp6
; curr_blk1[7][xx]=/*(b[0]-b[4]+64)>>7;*/(Clip3(-32768,32703,b[0]-b[4])+64)>>7;
sub bb0,bb4,t7
CLIP3 t7,tmp7
add xx,1,xx ; for(xx=0; xx<8; xx++)
cmplt xx,B8_SIZE,loop_flag
[loop_flag] b xx_loop
.endproc
;void inv_transform_B8(int curr_blk1[B8_SIZE][B8_SIZE] // block to be inverse transformed.
; )
; {
; short int xx=0, yy=0;
; short int tmp[8];
; short int t=0;
; short int b[8];
; for(yy=0; yy<8; yy++)
; {
; // Horizontal inverse transform
; // Reorder
; tmp[0]=curr_blk1[yy][0];
; tmp[1]=curr_blk1[yy][4];
; tmp[2]=curr_blk1[yy][2];
; tmp[3]=curr_blk1[yy][6];
; tmp[4]=curr_blk1[yy][1];
; tmp[5]=curr_blk1[yy][3];
; tmp[6]=curr_blk1[yy][5];
; tmp[7]=curr_blk1[yy][7];
;
; // Downleft Butterfly
;/*Lou Change*/
; b[0] = ((tmp[4] - tmp[7])<<1) + tmp[4];
; b[1] = ((tmp[5] + tmp[6])<<1) + tmp[5];
; b[2] = ((tmp[5] - tmp[6])<<1) - tmp[6];
; b[3] = ((tmp[4] + tmp[7])<<1) + tmp[7];
;
; b[4] = ((b[0] + b[1] + b[3])<<1) + b[1];
; b[5] = ((b[0] - b[1] + b[2])<<1) + b[0];
; b[6] = ((-b[1] - b[2] + b[3])<<1)+ b[3];
; b[7] = ((b[0] - b[2] - b[3])<<1) - b[2];
;/*Lou End*/
;
; // Upleft Butterfly
; /*Lou Change*/
; t=((tmp[2]*10)+(tmp[3]<<2));
; tmp[3]=((tmp[2]<<2)-(tmp[3]*10));
; tmp[2]=t;
;
; t=(tmp[0]+tmp[1])<<3;
; tmp[1]=(tmp[0]-tmp[1])<<3;
; tmp[0]=t;
; /*Lou End*/
;
; b[0]=tmp[0]+tmp[2];
; b[1]=tmp[1]+tmp[3];
; b[2]=tmp[1]-tmp[3];
; b[3]=tmp[0]-tmp[2];
;
; // Last Butterfly
; /*Lou Change*/
; curr_blk1[yy][0]=((b[0]+b[4])+4)>>3;
; curr_blk1[yy][1]=((b[1]+b[5])+4)>>3;
; curr_blk1[yy][2]=((b[2]+b[6])+4)>>3;
; curr_blk1[yy][3]=((b[3]+b[7])+4)>>3;
; curr_blk1[yy][7]=((b[0]-b[4])+4)>>3;
; curr_blk1[yy][6]=((b[1]-b[5])+4)>>3;
; curr_blk1[yy][5]=((b[2]-b[6])+4)>>3;
; curr_blk1[yy][4]=((b[3]-b[7])+4)>>3;
; /*Lou End*/
; }
; // Vertical inverse transform
; for(xx=0; xx<8; xx++)
; {
;
; // Reorder
; tmp[0]=curr_blk1[0][xx];
; tmp[1]=curr_blk1[4][xx];
; tmp[2]=curr_blk1[2][xx];
; tmp[3]=curr_blk1[6][xx];
; tmp[4]=curr_blk1[1][xx];
; tmp[5]=curr_blk1[3][xx];
; tmp[6]=curr_blk1[5][xx];
; tmp[7]=curr_blk1[7][xx];
;
; // Downleft Butterfly
;/*Lou Change*/
;
; b[0] = ((tmp[4] - tmp[7])<<1) + tmp[4];
; b[1] = ((tmp[5] + tmp[6])<<1) + tmp[5];
; b[2] = ((tmp[5] - tmp[6])<<1) - tmp[6];
; b[3] = ((tmp[4] + tmp[7])<<1) + tmp[7];
;
; b[4] = ((b[0] + b[1] + b[3])<<1) + b[1];
; b[5] = ((b[0] - b[1] + b[2])<<1) + b[0];
; b[6] = ((-b[1] - b[2] + b[3])<<1) + b[3];
; b[7] = ((b[0] - b[2] - b[3])<<1) - b[2];
;/*Lou End*/
;
; // Upleft Butterfly
; /*Lou Change*/
; t=((tmp[2]*10)+(tmp[3]<<2));
; tmp[3]=((tmp[2]<<2)-(tmp[3]*10));
; tmp[2]=t;
;
; t=(tmp[0]+tmp[1])<<3;
; tmp[1]=(tmp[0]-tmp[1])<<3;
; tmp[0]=t;
; /*Lou End*/
;
; b[0]=tmp[0]+tmp[2];
; b[1]=tmp[1]+tmp[3];
; b[2]=tmp[1]-tmp[3];
; b[3]=tmp[0]-tmp[2];
;
; // Last Butterfly
; curr_blk1[0][xx]=/*(b[0]+b[4]+64)>>7;*/(Clip3(-32768,32703,b[0]+b[4])+64)>>7;
; curr_blk1[1][xx]=/*(b[1]+b[5]+64)>>7;*/(Clip3(-32768,32703,b[1]+b[5])+64)>>7;
; curr_blk1[2][xx]=/*(b[2]+b[6]+64)>>7;*/(Clip3(-32768,32703,b[2]+b[6])+64)>>7;
; curr_blk1[3][xx]=/*(b[3]+b[7]+64)>>7;*/(Clip3(-32768,32703,b[3]+b[7])+64)>>7;
; curr_blk1[7][xx]=/*(b[0]-b[4]+64)>>7;*/(Clip3(-32768,32703,b[0]-b[4])+64)>>7;
; curr_blk1[6][xx]=/*(b[1]-b[5]+64)>>7;*/(Clip3(-32768,32703,b[1]-b[5])+64)>>7;
; curr_blk1[5][xx]=/*(b[2]-b[6]+64)>>7;*/(Clip3(-32768,32703,b[2]-b[6])+64)>>7;
; curr_blk1[4][xx]=/*(b[3]-b[7]+64)>>7;*/(Clip3(-32768,32703,b[3]-b[7])+64)>>7;
;
; }
;
;}
;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -