⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 复件 idct1.sa

📁 TI DSP TMS320DM642 用定时器测试函数执行时间
💻 SA
字号:

B8_SIZE 	.equ 	8	
Y_SHIFT		.equ 	4		;每行有8*2 byte(short类型)(2^4)
X_SHIFT		.equ 	1       ;short类型xx左移1位就可以

CLIP3 .macro data,offset
	addk		64,data
	shr			data,7,data
	cmplt 		data,lt,flag1
[flag1]	mv		lt,data
	cmpgt	data,gt,flag2
[flag2] mv 		gt,data
	sth			data,*+addr[offset]  
   .endm   
   
    .global     _inv_transform_B8 


_inv_transform_B8:    .cproc    	curr_blk
    .reg      	xx,yy
	.reg      	tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6,tmp7
	.reg 		bb0,bb1,bb2,bb3,bb4,bb5,bb6,bb7
	.reg		t0,t1,t2,t3,t4,t5,t6,t7   ;存储中间结果 临时变量
	.reg 		addr
	.reg 		loop_flag,flag1,flag2
	.reg		lt,gt	

	mvk			0,xx	
	mvk 		0,yy
	
yy_loop: .trip 8,8,2
	shl 		yy,Y_SHIFT,addr
	add			addr,curr_blk,addr	
	ldh			*+addr[0],tmp0	;     tmp[0]=curr_blk1[yy][0];	              
	ldh			*+addr[4],tmp1	;     tmp[1]=curr_blk1[yy][4];	           
	ldh			*+addr[2],tmp2	;     tmp[2]=curr_blk1[yy][2];	           
	ldh			*+addr[6],tmp3	;     tmp[3]=curr_blk1[yy][6];	           
	ldh			*+addr[1],tmp4	;     tmp[4]=curr_blk1[yy][1];	           
	ldh			*+addr[3],tmp5	;     tmp[5]=curr_blk1[yy][3];	           
	ldh			*+addr[5],tmp6	;     tmp[6]=curr_blk1[yy][5];	           
	ldh			*+addr[7],tmp7	;     tmp[7]=curr_blk1[yy][7];	           

	sub			tmp4,tmp7,bb4	;	b[0] = ((tmp[4] - tmp[7])<<1) + tmp[4];
	shl			bb4,1,bb4
	add			tmp4,bb4,bb0
	
	add			tmp5,tmp6,bb5	;	b[1] = ((tmp[5] + tmp[6])<<1) + tmp[5];
	shl			bb5,1,bb5
	add			tmp5,bb5,bb1
	
	sub			tmp5,tmp6,bb6	;	b[2] = ((tmp[5] - tmp[6])<<1) - tmp[6];
	shl			bb6,1,bb6
	sub			bb6,tmp6,bb2		

	add			tmp4,tmp7,bb7	;	b[3] = ((tmp[4] + tmp[7])<<1) + tmp[7];
	shl			bb7,1,bb7
	add			tmp7,bb7,bb3
	
	add 		bb0,bb1,bb4		;	b[4] = ((b[0] + b[1] + b[3])<<1) + b[1];
	add			bb3,bb4,bb4
	shl			bb4,1,bb4
	add			bb4,bb1,bb4
	
	sub			bb0,bb1,bb5		;	b[5] = ((b[0] - b[1] + b[2])<<1) + b[0];
	add			bb2,bb5,bb5
	shl			bb5,1,bb5
	add 		bb5,bb0,bb5
	
	add			bb1,bb2,bb6		;	b[6] = ((-b[1] - b[2] + b[3])<<1)+ b[3];
	sub			bb3,bb6,bb6
	shl 		bb6,1,bb6
	add			bb6,bb3,bb6
	
	sub 		bb0,bb2,bb7		;	b[7] = ((b[0] - b[2] - b[3])<<1) - b[2];
	sub 		bb7,bb3,bb7
	shl 		bb7,1,bb7
	sub 		bb7,bb2,bb7

	mpy			tmp2,10,bb0		;	 t=((tmp[2]*10)+(tmp[3]<<2));
	shl			tmp3,2,bb1
	add 		bb0,bb1,bb0

	shl			tmp2,2,bb2		;	 tmp[3]=((tmp[2]<<2)-(tmp[3]*10));
	mpy			tmp3,10,bb3
	sub			bb2,bb3,tmp3
	
	mv 			bb0,tmp2 		;     tmp[2]=t;

	add			tmp0,tmp1,bb0	;     t=(tmp[0]+tmp[1])<<3;
	shl 		bb0,3,bb0
	
	sub			tmp0,tmp1,bb2	;     tmp[1]=(tmp[0]-tmp[1])<<3;
	shl			bb2,3,tmp1
	
	mv 			bb0,tmp0			;     tmp[0]=t;

	add			tmp0,tmp2,bb0	;     b[0]=tmp[0]+tmp[2];
	add			tmp1,tmp3,bb1	;     b[1]=tmp[1]+tmp[3];
	sub			tmp1,tmp3,bb2	;     b[2]=tmp[1]-tmp[3];	
	sub			tmp0,tmp2,bb3	;     b[3]=tmp[0]-tmp[2];

	add			bb0,bb4,t0		;	 curr_blk1[yy][0]=((b[0]+b[4])+4)>>3;
	add			t0,4,t0
	shr 		t0,3,t0
	sth			t0,*+addr[0]
	
	add			bb1,bb5,t1		;	 curr_blk1[yy][1]=((b[1]+b[5])+4)>>3;
	add			t1,4,t1
	shr			t1,3,t1
	sth			t1,*+addr[1]
	
	add			bb2,bb6,t2		;	 curr_blk1[yy][2]=((b[2]+b[6])+4)>>3;
	add			t2,4,t2
	shr			t2,3,t2
	sth			t2,*+addr[2]
	
	add			bb3,bb7,t3		;	 curr_blk1[yy][3]=((b[3]+b[7])+4)>>3;
	add			t3,4,t3
	shr			t3,3,t3
	sth			t3,*+addr[3]

	sub			bb3,bb7,t4		;	 curr_blk1[yy][4]=((b[3]-b[7])+4)>>3;
	add			t4,4,t4
	shr			t4,3,t4
	sth			t4,*+addr[4]	
	
	sub			bb2,bb6,t5		;	 curr_blk1[yy][5]=((b[2]-b[6])+4)>>3;
	add			t5,4,t5
	shr			t5,3,t5
	sth			t5,*+addr[5]	
	
	sub			bb1,bb5,t6		;	 curr_blk1[yy][6]=((b[1]-b[5])+4)>>3;
	add			t6,4,t6
	shr			t6,3,t6
	sth			t6,*+addr[6]
	
	sub			bb0,bb4,t7		;	 curr_blk1[yy][7]=((b[0]-b[4])+4)>>3;
	add			t7,4,t7
	shr			t7,3,t7
	sth			t7,*+addr[7]
	
	add			yy,1,yy			;   for(yy=0; yy<8; yy++)
	cmplt		yy,B8_SIZE,loop_flag
[loop_flag]	b 	yy_loop
	
		
xx_loop: .trip 8,8,2	
	shl			xx,X_SHIFT,t0
	add 		curr_blk,t0,addr
	mvk 		32,t1
	mvk 		16,t2
	mvk 		48,t3
	mvk 		8,t4
	mvk 		24,t5
	mvk 		40,t6
	mvk 		56,t7 			
	ldh			*+addr[0],tmp0		;     tmp[0]=curr_blk1[0][xx];
	ldh			*+addr[t1],tmp1		;     tmp[1]=curr_blk1[4][xx];
	ldh			*+addr[t2],tmp2		;     tmp[2]=curr_blk1[2][xx];
	ldh			*+addr[t3],tmp3		;     tmp[3]=curr_blk1[6][xx];
	ldh			*+addr[t4],tmp4		;     tmp[4]=curr_blk1[1][xx];
	ldh			*+addr[t5],tmp5		;	  tmp[5]=curr_blk1[3][xx];
	ldh			*+addr[t6],tmp6		;     tmp[6]=curr_blk1[5][xx];
	ldh			*+addr[t7],tmp7		;     tmp[7]=curr_blk1[7][xx];
	
	sub			tmp4,tmp7,bb4	;	b[0] = ((tmp[4] - tmp[7])<<1) + tmp[4];
	shl			bb4,1,bb4
	add			tmp4,bb4,bb0
	
	add			tmp5,tmp6,bb5	;	b[1] = ((tmp[5] + tmp[6])<<1) + tmp[5];
	shl			bb5,1,bb5
	add			tmp5,bb5,bb1
	
	sub			tmp5,tmp6,bb6	;	b[2] = ((tmp[5] - tmp[6])<<1) - tmp[6];
	shl			bb6,1,bb6
	sub			bb6,tmp6,bb2		

	add			tmp4,tmp7,bb7	;	b[3] = ((tmp[4] + tmp[7])<<1) + tmp[7];
	shl			bb7,1,bb7
	add			tmp7,bb7,bb3
	
	add 		bb0,bb1,bb4		;	b[4] = ((b[0] + b[1] + b[3])<<1) + b[1];
	add			bb3,bb4,bb4
	shl			bb4,1,bb4
	add			bb4,bb1,bb4
	
	sub			bb0,bb1,bb5		;	b[5] = ((b[0] - b[1] + b[2])<<1) + b[0];
	add			bb2,bb5,bb5
	shl			bb5,1,bb5
	add 		bb5,bb0,bb5
	
	add			bb1,bb2,bb6		;	b[6] = ((-b[1] - b[2] + b[3])<<1)+ b[3];
	sub			bb3,bb6,bb6
	shl 		bb6,1,bb6
	add			bb6,bb3,bb6
	
	sub 		bb0,bb2,bb7		;	b[7] = ((b[0] - b[2] - b[3])<<1) - b[2];
	sub 		bb7,bb3,bb7
	shl 		bb7,1,bb7
	sub 		bb7,bb2,bb7

	mpy			tmp2,10,bb0		;	 t=((tmp[2]*10)+(tmp[3]<<2));
	shl			tmp3,2,bb1
	add 		bb0,bb1,bb0

	shl			tmp2,2,bb2		;	 tmp[3]=((tmp[2]<<2)-(tmp[3]*10));
	mpy			tmp3,10,bb3
	sub			bb2,bb3,tmp3
	
	mv 			bb0,tmp2 		;     tmp[2]=t;

	add			tmp0,tmp1,bb0	;     t=(tmp[0]+tmp[1])<<3;
	shl 		bb0,3,bb0
	
	sub			tmp0,tmp1,bb2	;     tmp[1]=(tmp[0]-tmp[1])<<3;
	shl			bb2,3,tmp1
	
	mv 			bb0,tmp0			;     tmp[0]=t;

	add			tmp0,tmp2,bb0	;     b[0]=tmp[0]+tmp[2];
	add			tmp1,tmp3,bb1	;     b[1]=tmp[1]+tmp[3];
	sub			tmp1,tmp3,bb2	;     b[2]=tmp[1]-tmp[3];	
	sub			tmp0,tmp2,bb3	;     b[3]=tmp[0]-tmp[2];

	mvkl		-32768,lt
	mvkh		-32768,lt
	mvkl		32703,gt
	mvkh		32703,gt
	
	shl			xx,1,addr
	add			addr,curr_blk,addr
	mvk 		0,tmp0
	mvk 		8,tmp1
	mvk 		16,tmp2
	mvk 		24,tmp3
	mvk 		32,tmp4
	mvk 		40,tmp5
	mvk 		48,tmp6
	mvk 		56,tmp7
;     curr_blk1[0][xx]=/*(b[0]+b[4]+64)>>7;*/(Clip3(-32768,32703,b[0]+b[4])+64)>>7;
	add			bb0,bb4,t0
	CLIP3		t0,tmp0
;     curr_blk1[1][xx]=/*(b[1]+b[5]+64)>>7;*/(Clip3(-32768,32703,b[1]+b[5])+64)>>7;
	add			bb1,bb5,t1
	CLIP3		t1,tmp1
;     curr_blk1[2][xx]=/*(b[2]+b[6]+64)>>7;*/(Clip3(-32768,32703,b[2]+b[6])+64)>>7;
	add			bb2,bb6,t2
	CLIP3		t2,tmp2
;     curr_blk1[3][xx]=/*(b[3]+b[7]+64)>>7;*/(Clip3(-32768,32703,b[3]+b[7])+64)>>7;
	add			bb3,bb7,t3
	CLIP3		t3,tmp3
;     // Last Butterfly
;     curr_blk1[4][xx]=/*(b[3]-b[7]+64)>>7;*/(Clip3(-32768,32703,b[3]-b[7])+64)>>7;
	sub			bb3,bb7,t4
	CLIP3		t4,tmp4	
;     curr_blk1[5][xx]=/*(b[2]-b[6]+64)>>7;*/(Clip3(-32768,32703,b[2]-b[6])+64)>>7;
	sub			bb2,bb6,t5
	CLIP3		t5,tmp5		
;     curr_blk1[6][xx]=/*(b[1]-b[5]+64)>>7;*/(Clip3(-32768,32703,b[1]-b[5])+64)>>7;
	sub			bb1,bb5,t6
	CLIP3		t6,tmp6
;     curr_blk1[7][xx]=/*(b[0]-b[4]+64)>>7;*/(Clip3(-32768,32703,b[0]-b[4])+64)>>7;
	sub			bb0,bb4,t7
	CLIP3		t7,tmp7		

	add			xx,1,xx			;   for(xx=0; xx<8; xx++)
	cmplt		xx,B8_SIZE,loop_flag
[loop_flag]	b 	xx_loop	
	.endproc

;void inv_transform_B8(int curr_blk1[B8_SIZE][B8_SIZE]  // block to be inverse transformed.
;   )
; {
;   short int xx=0, yy=0;
;   short int tmp[8];
;   short int t=0;
;   short  int b[8];
	     
;   for(yy=0; yy<8; yy++)
;   {
;     // Horizontal inverse transform
;     // Reorder
;     tmp[0]=curr_blk1[yy][0];
;     tmp[1]=curr_blk1[yy][4];
;     tmp[2]=curr_blk1[yy][2];
;     tmp[3]=curr_blk1[yy][6];
;     tmp[4]=curr_blk1[yy][1];
;     tmp[5]=curr_blk1[yy][3];
;     tmp[6]=curr_blk1[yy][5];
;     tmp[7]=curr_blk1[yy][7];
;     
;     // Downleft Butterfly
;/*Lou Change*/
;	b[0] = ((tmp[4] - tmp[7])<<1) + tmp[4];
;	b[1] = ((tmp[5] + tmp[6])<<1) + tmp[5];
;	b[2] = ((tmp[5] - tmp[6])<<1) - tmp[6];
;	b[3] = ((tmp[4] + tmp[7])<<1) + tmp[7];
;
;	b[4] = ((b[0] + b[1] + b[3])<<1) + b[1];
;	b[5] = ((b[0] - b[1] + b[2])<<1) + b[0];
;	b[6] = ((-b[1] - b[2] + b[3])<<1)+ b[3];
;	b[7] = ((b[0] - b[2] - b[3])<<1) - b[2];
;/*Lou End*/
;     
;     // Upleft Butterfly
;     /*Lou Change*/
;	 t=((tmp[2]*10)+(tmp[3]<<2));
;	 tmp[3]=((tmp[2]<<2)-(tmp[3]*10));
;     tmp[2]=t;
;     
;     t=(tmp[0]+tmp[1])<<3;
;     tmp[1]=(tmp[0]-tmp[1])<<3;
;     tmp[0]=t;
;     /*Lou End*/
;     
;     b[0]=tmp[0]+tmp[2];
;     b[1]=tmp[1]+tmp[3];
;     b[2]=tmp[1]-tmp[3];
;     b[3]=tmp[0]-tmp[2];	 
;     
;     // Last Butterfly
;	 /*Lou Change*/
;	 curr_blk1[yy][0]=((b[0]+b[4])+4)>>3;
;	 curr_blk1[yy][1]=((b[1]+b[5])+4)>>3;
;	 curr_blk1[yy][2]=((b[2]+b[6])+4)>>3;
;	 curr_blk1[yy][3]=((b[3]+b[7])+4)>>3;
;	 curr_blk1[yy][7]=((b[0]-b[4])+4)>>3;
;	 curr_blk1[yy][6]=((b[1]-b[5])+4)>>3;
;	 curr_blk1[yy][5]=((b[2]-b[6])+4)>>3;
;	 curr_blk1[yy][4]=((b[3]-b[7])+4)>>3;
;	 /*Lou End*/
;   }
;   // Vertical inverse transform
;   for(xx=0; xx<8; xx++)
;   {
;     
;     // Reorder
;     tmp[0]=curr_blk1[0][xx];
;     tmp[1]=curr_blk1[4][xx];
;     tmp[2]=curr_blk1[2][xx];
;     tmp[3]=curr_blk1[6][xx];
;     tmp[4]=curr_blk1[1][xx];
;     tmp[5]=curr_blk1[3][xx];
;     tmp[6]=curr_blk1[5][xx];
;     tmp[7]=curr_blk1[7][xx];
;     
;     // Downleft Butterfly
;/*Lou Change*/
;
;	b[0] = ((tmp[4] - tmp[7])<<1) + tmp[4];
;	b[1] = ((tmp[5] + tmp[6])<<1) + tmp[5];
;	b[2] = ((tmp[5] - tmp[6])<<1) - tmp[6];
;	b[3] = ((tmp[4] + tmp[7])<<1) + tmp[7];
;
;	b[4] = ((b[0] + b[1] + b[3])<<1) + b[1];
;	b[5] = ((b[0] - b[1] + b[2])<<1) + b[0];
;	b[6] = ((-b[1] - b[2] + b[3])<<1) + b[3];
;	b[7] = ((b[0] - b[2] - b[3])<<1) - b[2];
;/*Lou End*/
;     
;     // Upleft Butterfly
;     /*Lou Change*/
;	 t=((tmp[2]*10)+(tmp[3]<<2));
;	 tmp[3]=((tmp[2]<<2)-(tmp[3]*10));
;     tmp[2]=t;
;     
;     t=(tmp[0]+tmp[1])<<3;
;     tmp[1]=(tmp[0]-tmp[1])<<3;
;     tmp[0]=t;
;     /*Lou End*/
;     
;     b[0]=tmp[0]+tmp[2];
;     b[1]=tmp[1]+tmp[3];
;     b[2]=tmp[1]-tmp[3];
;     b[3]=tmp[0]-tmp[2];
;     
;     // Last Butterfly
;     curr_blk1[0][xx]=/*(b[0]+b[4]+64)>>7;*/(Clip3(-32768,32703,b[0]+b[4])+64)>>7;
;     curr_blk1[1][xx]=/*(b[1]+b[5]+64)>>7;*/(Clip3(-32768,32703,b[1]+b[5])+64)>>7;
;     curr_blk1[2][xx]=/*(b[2]+b[6]+64)>>7;*/(Clip3(-32768,32703,b[2]+b[6])+64)>>7;
;     curr_blk1[3][xx]=/*(b[3]+b[7]+64)>>7;*/(Clip3(-32768,32703,b[3]+b[7])+64)>>7;
;     curr_blk1[7][xx]=/*(b[0]-b[4]+64)>>7;*/(Clip3(-32768,32703,b[0]-b[4])+64)>>7;
;     curr_blk1[6][xx]=/*(b[1]-b[5]+64)>>7;*/(Clip3(-32768,32703,b[1]-b[5])+64)>>7;
;     curr_blk1[5][xx]=/*(b[2]-b[6]+64)>>7;*/(Clip3(-32768,32703,b[2]-b[6])+64)>>7;
;     curr_blk1[4][xx]=/*(b[3]-b[7]+64)>>7;*/(Clip3(-32768,32703,b[3]-b[7])+64)>>7;
;	

;   }
;     
;}
;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -