⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 milli64.s

📁 linux下的gcc编译器
💻 S
📖 第 1 页 / 共 5 页
字号:
   When z is a power of 2 minus 1, then the division by z is slightly   more complicated, involving an iterative solution.   The code presented here solves division by 1 through 17, except for   11 and 13. There are algorithms for both signed and unsigned   quantities given.   TIMINGS (cycles)   divisor  positive  negative	unsigned   .   1	2	   2	     2   .   2	4	   4	     2   .   3       19	  21	    19   .   4	4	   4	     2   .   5       18	  22	    19   .   6       19	  22	    19   .   8	4	   4	     2   .  10       18	  19	    17   .  12       18	  20	    18   .  15       16	  18	    16   .  16	4	   4	     2   .  17       16	  18	    16   Now, the algorithm for 7, 9, and 14 is an iterative one.  That is,   a loop body is executed until the tentative quotient is 0.  The   number of times the loop body is executed varies depending on the   dividend, but is never more than two times.	If the dividend is   less than the divisor, then the loop body is not executed at all.   Each iteration adds 4 cycles to the timings.   divisor  positive  negative	unsigned   .   7       19+4n	 20+4n	   20+4n    n = number of iterations   .   9       21+4n	 22+4n	   21+4n   .  14       21+4n	 22+4n	   20+4n   To give an idea of how the number of iterations varies, here is a   table of dividend versus number of iterations when dividing by 7.   smallest	 largest       required   dividend	dividend      iterations   .	0	     6		    0   .	7	 0x6ffffff	    1   0x1000006	0xffffffff	    2   There is some overlap in the range of numbers requiring 1 and 2   iterations.	*/RDEFINE(t2,r1)RDEFINE(x2,arg0)	/*  r26 */RDEFINE(t1,arg1)	/*  r25 */RDEFINE(x1,ret1)	/*  r29 */	SUBSPA_MILLI_DIV	ATTR_MILLI	.proc	.callinfo	millicode	.entry/* NONE of these routines require a stack frame   ALL of these routines are unwindable from millicode	*/GSYM($$divide_by_constant)	.export $$divide_by_constant,millicode/*  Provides a "nice" label for the code covered by the unwind descriptor    for things like gprof.  *//* DIVISION BY 2 (shift by 1) */GSYM($$divI_2)	.export		$$divI_2,millicode	comclr,>=	arg0,0,0	addi		1,arg0,arg0	MILLIRET	extrs		arg0,30,31,ret1/* DIVISION BY 4 (shift by 2) */GSYM($$divI_4)	.export		$$divI_4,millicode	comclr,>=	arg0,0,0	addi		3,arg0,arg0	MILLIRET	extrs		arg0,29,30,ret1/* DIVISION BY 8 (shift by 3) */GSYM($$divI_8)	.export		$$divI_8,millicode	comclr,>=	arg0,0,0	addi		7,arg0,arg0	MILLIRET	extrs		arg0,28,29,ret1/* DIVISION BY 16 (shift by 4) */GSYM($$divI_16)	.export		$$divI_16,millicode	comclr,>=	arg0,0,0	addi		15,arg0,arg0	MILLIRET	extrs		arg0,27,28,ret1/******************************************************************************	DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these**	includes 3,5,15,17 and also 6,10,12*****************************************************************************//* DIVISION BY 3 (use z = 2**32; a = 55555555) */GSYM($$divI_3)	.export		$$divI_3,millicode	comb,<,N	x2,0,LREF(neg3)	addi		1,x2,x2		/* this can not overflow	*/	extru		x2,1,2,x1	/* multiply by 5 to get started */	sh2add		x2,x2,x2	b		LREF(pos)	addc		x1,0,x1LSYM(neg3)	subi		1,x2,x2		/* this can not overflow	*/	extru		x2,1,2,x1	/* multiply by 5 to get started */	sh2add		x2,x2,x2	b		LREF(neg)	addc		x1,0,x1GSYM($$divU_3)	.export		$$divU_3,millicode	addi		1,x2,x2		/* this CAN overflow */	addc		0,0,x1	shd		x1,x2,30,t1	/* multiply by 5 to get started */	sh2add		x2,x2,x2	b		LREF(pos)	addc		x1,t1,x1/* DIVISION BY 5 (use z = 2**32; a = 33333333) */GSYM($$divI_5)	.export		$$divI_5,millicode	comb,<,N	x2,0,LREF(neg5)	addi		3,x2,t1		/* this can not overflow	*/	sh1add		x2,t1,x2	/* multiply by 3 to get started */	b		LREF(pos)	addc		0,0,x1LSYM(neg5)	sub		0,x2,x2		/* negate x2			*/	addi		1,x2,x2		/* this can not overflow	*/	shd		0,x2,31,x1	/* get top bit (can be 1)	*/	sh1add		x2,x2,x2	/* multiply by 3 to get started */	b		LREF(neg)	addc		x1,0,x1GSYM($$divU_5)	.export		$$divU_5,millicode	addi		1,x2,x2		/* this CAN overflow */	addc		0,0,x1	shd		x1,x2,31,t1	/* multiply by 3 to get started */	sh1add		x2,x2,x2	b		LREF(pos)	addc		t1,x1,x1/* DIVISION BY	6 (shift to divide by 2 then divide by 3) */GSYM($$divI_6)	.export		$$divI_6,millicode	comb,<,N	x2,0,LREF(neg6)	extru		x2,30,31,x2	/* divide by 2			*/	addi		5,x2,t1		/* compute 5*(x2+1) = 5*x2+5	*/	sh2add		x2,t1,x2	/* multiply by 5 to get started */	b		LREF(pos)	addc		0,0,x1LSYM(neg6)	subi		2,x2,x2		/* negate, divide by 2, and add 1 */					/* negation and adding 1 are done */					/* at the same time by the SUBI   */	extru		x2,30,31,x2	shd		0,x2,30,x1	sh2add		x2,x2,x2	/* multiply by 5 to get started */	b		LREF(neg)	addc		x1,0,x1GSYM($$divU_6)	.export		$$divU_6,millicode	extru		x2,30,31,x2	/* divide by 2 */	addi		1,x2,x2		/* can not carry */	shd		0,x2,30,x1	/* multiply by 5 to get started */	sh2add		x2,x2,x2	b		LREF(pos)	addc		x1,0,x1/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */GSYM($$divU_10)	.export		$$divU_10,millicode	extru		x2,30,31,x2	/* divide by 2 */	addi		3,x2,t1		/* compute 3*(x2+1) = (3*x2)+3	*/	sh1add		x2,t1,x2	/* multiply by 3 to get started */	addc		0,0,x1LSYM(pos)	shd		x1,x2,28,t1	/* multiply by 0x11 */	shd		x2,0,28,t2	add		x2,t2,x2	addc		x1,t1,x1LSYM(pos_for_17)	shd		x1,x2,24,t1	/* multiply by 0x101 */	shd		x2,0,24,t2	add		x2,t2,x2	addc		x1,t1,x1	shd		x1,x2,16,t1	/* multiply by 0x10001 */	shd		x2,0,16,t2	add		x2,t2,x2	MILLIRET	addc		x1,t1,x1GSYM($$divI_10)	.export		$$divI_10,millicode	comb,<		x2,0,LREF(neg10)	copy		0,x1	extru		x2,30,31,x2	/* divide by 2 */	addib,TR	1,x2,LREF(pos)	/* add 1 (can not overflow)     */	sh1add		x2,x2,x2	/* multiply by 3 to get started */LSYM(neg10)	subi		2,x2,x2		/* negate, divide by 2, and add 1 */					/* negation and adding 1 are done */					/* at the same time by the SUBI   */	extru		x2,30,31,x2	sh1add		x2,x2,x2	/* multiply by 3 to get started */LSYM(neg)	shd		x1,x2,28,t1	/* multiply by 0x11 */	shd		x2,0,28,t2	add		x2,t2,x2	addc		x1,t1,x1LSYM(neg_for_17)	shd		x1,x2,24,t1	/* multiply by 0x101 */	shd		x2,0,24,t2	add		x2,t2,x2	addc		x1,t1,x1	shd		x1,x2,16,t1	/* multiply by 0x10001 */	shd		x2,0,16,t2	add		x2,t2,x2	addc		x1,t1,x1	MILLIRET	sub		0,x1,x1/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */GSYM($$divI_12)	.export		$$divI_12,millicode	comb,<		x2,0,LREF(neg12)	copy		0,x1	extru		x2,29,30,x2	/* divide by 4			*/	addib,tr	1,x2,LREF(pos)	/* compute 5*(x2+1) = 5*x2+5    */	sh2add		x2,x2,x2	/* multiply by 5 to get started */LSYM(neg12)	subi		4,x2,x2		/* negate, divide by 4, and add 1 */					/* negation and adding 1 are done */					/* at the same time by the SUBI   */	extru		x2,29,30,x2	b		LREF(neg)	sh2add		x2,x2,x2	/* multiply by 5 to get started */GSYM($$divU_12)	.export		$$divU_12,millicode	extru		x2,29,30,x2	/* divide by 4   */	addi		5,x2,t1		/* can not carry */	sh2add		x2,t1,x2	/* multiply by 5 to get started */	b		LREF(pos)	addc		0,0,x1/* DIVISION BY 15 (use z = 2**32; a = 11111111) */GSYM($$divI_15)	.export		$$divI_15,millicode	comb,<		x2,0,LREF(neg15)	copy		0,x1	addib,tr	1,x2,LREF(pos)+4	shd		x1,x2,28,t1LSYM(neg15)	b		LREF(neg)	subi		1,x2,x2GSYM($$divU_15)	.export		$$divU_15,millicode	addi		1,x2,x2		/* this CAN overflow */	b		LREF(pos)	addc		0,0,x1/* DIVISION BY 17 (use z = 2**32; a =  f0f0f0f) */GSYM($$divI_17)	.export		$$divI_17,millicode	comb,<,n	x2,0,LREF(neg17)	addi		1,x2,x2		/* this can not overflow */	shd		0,x2,28,t1	/* multiply by 0xf to get started */	shd		x2,0,28,t2	sub		t2,x2,x2	b		LREF(pos_for_17)	subb		t1,0,x1LSYM(neg17)	subi		1,x2,x2		/* this can not overflow */	shd		0,x2,28,t1	/* multiply by 0xf to get started */	shd		x2,0,28,t2	sub		t2,x2,x2	b		LREF(neg_for_17)	subb		t1,0,x1GSYM($$divU_17)	.export		$$divU_17,millicode	addi		1,x2,x2		/* this CAN overflow */	addc		0,0,x1	shd		x1,x2,28,t1	/* multiply by 0xf to get started */LSYM(u17)	shd		x2,0,28,t2	sub		t2,x2,x2	b		LREF(pos_for_17)	subb		t1,x1,x1/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these   includes 7,9 and also 14   z = 2**24-1   r = z mod x = 0   so choose b = 0   Also, in order to divide by z = 2**24-1, we approximate by dividing   by (z+1) = 2**24 (which is easy), and then correcting.   (ax) = (z+1)q' + r   .	= zq' + (q'+r)   So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)   Then the true remainder of (ax)/z is (q'+r).  Repeat the process   with this new remainder, adding the tentative quotients together,   until a tentative quotient is 0 (and then we are done).  There is   one last correction to be done.  It is possible that (q'+r) = z.   If so, then (q'+r)/(z+1) = 0 and it looks like we are done.	But,   in fact, we need to add 1 more to the quotient.  Now, it turns   out that this happens if and only if the original value x is   an exact multiple of y.  So, to avoid a three instruction test at   the end, instead use 1 instruction to add 1 to x at the beginning.  *//* DIVISION BY 7 (use z = 2**24-1; a = 249249) */GSYM($$divI_7)	.export		$$divI_7,millicode	comb,<,n	x2,0,LREF(neg7)LSYM(7)	addi		1,x2,x2		/* can not overflow */	shd		0,x2,29,x1	sh3add		x2,x2,x2	addc		x1,0,x1LSYM(pos7)	shd		x1,x2,26,t1	shd		x2,0,26,t2	add		x2,t2,x2	addc		x1,t1,x1	shd		x1,x2,20,t1	shd		x2,0,20,t2	add		x2,t2,x2	addc		x1,t1,t1	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/	copy		0,x1	shd,=		t1,x2,24,t1	/* tentative quotient  */LSYM(1)	addb,tr		t1,x1,LREF(2)	/* add to previous quotient   */	extru		x2,31,24,x2	/* new remainder (unadjusted) */	MILLIRETNLSYM(2)	addb,tr		t1,x2,LREF(1)	/* adjust remainder */	extru,=		x2,7,8,t1	/* new quotient     */LSYM(neg7)	subi		1,x2,x2		/* negate x2 and add 1 */LSYM(8)	shd		0,x2,29,x1	sh3add		x2,x2,x2	addc		x1,0,x1LSYM(neg7_shift)	shd		x1,x2,26,t1

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -