📄 milli64.s

📁 linux下的gcc编译器
💻 S
📖 第 1 页 / 共 5 页
字号:
	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  9th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  10th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  11th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  12th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  13th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  14th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  15th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  16th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  17th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  18th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  19th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  20th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  21st divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  22nd divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  23rd divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  24th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  25th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  26th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  27th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  28th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  29th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  30th divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  31st divide step */	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */	ds		 tmp,arg1, tmp		/*  32nd divide step, */	addc		retreg,retreg,retreg	/*  shift last bit into retreg */	movb,>=,n	 tmp,retreg,LREF(finish) /*  branch if pos.  tmp */	add,<		arg1,0,0		/*  if arg1 > 0, add arg1 */	add,tr		 tmp,arg1,retreg	/*    for correcting remainder tmp */	sub		 tmp,arg1,retreg	/*  else add absolute value arg1 */LSYM(finish)	add,>=		arg0,0,0		/*  set sign of remainder */	sub		0,retreg,retreg		/*    to sign of dividend */	MILLIRET	nop	.exit	.procend#ifdef milliext	.origin 0x00000200#endif	.end#endif#ifdef L_remU/* ROUTINE:	$$remU   .	Single precision divide for remainder with unsigned binary integers.   .   .	The remainder must be dividend-(dividend/divisor)*divisor.   .	Divide by zero is trapped.   INPUT REGISTERS:   .	arg0 ==	dividend   .	arg1 == divisor   .	mrp  == return pc   .	sr0  == return space when called externally   OUTPUT REGISTERS:   .	arg0 =	undefined   .	arg1 =	undefined   .	ret1 =	remainder   OTHER REGISTERS AFFECTED:   .	r1   =	undefined   SIDE EFFECTS:   .	Causes a trap under the following conditions:  DIVIDE BY ZERO   .	Changes memory at the following places:  NONE   PERMISSIBLE CONTEXT:   .	Unwindable.   .	Does not create a stack frame.   .	Suitable for internal or external millicode.   .	Assumes the special millicode register conventions.   DISCUSSION:   .	Calls other millicode routines using mrp: NONE   .	Calls other millicode routines: NONE  */RDEFINE(temp,r1)RDEFINE(rmndr,ret1)	/*  r29 */	SUBSPA_MILLI	ATTR_MILLI	.export $$remU,millicode	.proc	.callinfo	millicode	.entryGSYM($$remU)	ldo	-1(arg1),temp		/*  is there at most one bit set ? */	and,=	arg1,temp,r0		/*  if not, don't use power of 2 */	b	LREF(regular_seq)	addit,=	0,arg1,r0		/*  trap on div by zero */	and	arg0,temp,rmndr		/*  get the result for power of 2 */	MILLIRETNLSYM(regular_seq)	comib,>=,n  0,arg1,LREF(special_case)	subi	0,arg1,rmndr		/*  clear carry, negate the divisor */	ds	r0,rmndr,r0		/*  set V-bit to 1 */	add	arg0,arg0,temp		/*  shift msb bit into carry */	ds	r0,arg1,rmndr		/*  1st divide step, if no carry */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  2nd divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  3rd divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  4th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  5th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  6th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  7th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  8th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  9th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  10th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  11th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  12th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  13th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  14th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  15th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  16th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  17th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  18th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  19th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  20th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  21st divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  22nd divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  23rd divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  24th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  25th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  26th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  27th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  28th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  29th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  30th divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  31st divide step */	addc	temp,temp,temp		/*  shift temp with/into carry */	ds	rmndr,arg1,rmndr		/*  32nd divide step, */	comiclr,<= 0,rmndr,r0	  add	rmndr,arg1,rmndr	/*  correction */	MILLIRETN	nop/* Putting >= on the last DS and deleting COMICLR does not work!  */LSYM(special_case)	sub,>>=	arg0,arg1,rmndr	  copy	arg0,rmndr	MILLIRETN	nop	.exit	.procend	.end#endif#ifdef L_div_const/* ROUTINE:	$$divI_2   .		$$divI_3	$$divU_3   .		$$divI_4   .		$$divI_5	$$divU_5   .		$$divI_6	$$divU_6   .		$$divI_7	$$divU_7   .		$$divI_8   .		$$divI_9	$$divU_9   .		$$divI_10	$$divU_10   .   .		$$divI_12	$$divU_12   .   .		$$divI_14	$$divU_14   .		$$divI_15	$$divU_15   .		$$divI_16   .		$$divI_17	$$divU_17   .   .	Divide by selected constants for single precision binary integers.   INPUT REGISTERS:   .	arg0 ==	dividend   .	mrp  == return pc   .	sr0  == return space when called externally   OUTPUT REGISTERS:   .	arg0 =	undefined   .	arg1 =	undefined   .	ret1 =	quotient   OTHER REGISTERS AFFECTED:   .	r1   =	undefined   SIDE EFFECTS:   .	Causes a trap under the following conditions: NONE   .	Changes memory at the following places:  NONE   PERMISSIBLE CONTEXT:   .	Unwindable.   .	Does not create a stack frame.   .	Suitable for internal or external millicode.   .	Assumes the special millicode register conventions.   DISCUSSION:   .	Calls other millicode routines using mrp:  NONE   .	Calls other millicode routines:  NONE  *//* TRUNCATED DIVISION BY SMALL INTEGERS   We are interested in q(x) = floor(x/y), where x >= 0 and y > 0   (with y fixed).   Let a = floor(z/y), for some choice of z.  Note that z will be   chosen so that division by z is cheap.   Let r be the remainder(z/y).  In other words, r = z - ay.   Now, our method is to choose a value for b such that   q'(x) = floor((ax+b)/z)   is equal to q(x) over as large a range of x as possible.  If the   two are equal over a sufficiently large range, and if it is easy to   form the product (ax), and it is easy to divide by z, then we can   perform the division much faster than the general division algorithm.   So, we want the following to be true:   .	For x in the following range:   .   .	    ky <= x < (k+1)y   .   .	implies that   .   .	    k <= (ax+b)/z < (k+1)   We want to determine b such that this is true for all k in the   range {0..K} for some maximum K.   Since (ax+b) is an increasing function of x, we can take each   bound separately to determine the "best" value for b.   (ax+b)/z < (k+1)	       implies   (a((k+1)y-1)+b < (k+1)z     implies   b < a + (k+1)(z-ay)	       implies   b < a + (k+1)r   This needs to be true for all k in the range {0..K}.  In   particular, it is true for k = 0 and this leads to a maximum   acceptable value for b.   b < a+r   or   b <= a+r-1   Taking the other bound, we have   k <= (ax+b)/z	       implies   k <= (aky+b)/z	       implies   k(z-ay) <= b		       implies   kr <= b   Clearly, the largest range for k will be achieved by maximizing b,   when r is not zero.	When r is zero, then the simplest choice for b   is 0.  When r is not 0, set   .	b = a+r-1   Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)   for all x in the range:   .	0 <= x < (K+1)y   We need to determine what K is.  Of our two bounds,   .	b < a+(k+1)r	is satisfied for all k >= 0, by construction.   The other bound is   .	kr <= b   This is always true if r = 0.  If r is not 0 (the usual case), then   K = floor((a+r-1)/r), is the maximum value for k.   Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct   answer for q(x) = floor(x/y) when x is in the range   (0,(K+1)y-1)	       K = floor((a+r-1)/r)   To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that   the formula for q'(x) yields the correct value of q(x) for all x   representable by a single word in HPPA.   We are also constrained in that computing the product (ax), adding   b, and dividing by z must all be done quickly, otherwise we will be   better off going through the general algorithm using the DS   instruction, which uses approximately 70 cycles.   For each y, there is a choice of z which satisfies the constraints   for (K+1)y >= 2**32.  We may not, however, be able to satisfy the   timing constraints for arbitrary y.	It seems that z being equal to   a power of 2 or a power of 2 minus 1 is as good as we can do, since   it minimizes the time to do division by z.  We want the choice of z   to also result in a value for (a) that minimizes the computation of   the product (ax).  This is best achieved if (a) has a regular bit   pattern (so the multiplication can be done with shifts and adds).   The value of (a) also needs to be less than 2**32 so the product is   always guaranteed to fit in 2 words.   In actual practice, the following should be done:   1) For negative x, you should take the absolute value and remember   .  the fact so that the result can be negated.  This obviously does   .  not apply in the unsigned case.   2) For even y, you should factor out the power of 2 that divides y   .  and divide x by it.  You can then proceed by dividing by the   .  odd factor of y.   Here is a table of some odd values of y, and corresponding choices   for z which are "good".    y	  z	  r	 a (hex)     max x (hex)    3	2**32	  1	55555555      100000001    5	2**32	  1	33333333      100000003    7  2**24-1	  0	  249249     (infinite)    9  2**24-1	  0	  1c71c7     (infinite)   11  2**20-1	  0	   1745d     (infinite)   13  2**24-1	  0	  13b13b     (infinite)   15	2**32	  1	11111111      10000000d   17	2**32	  1	 f0f0f0f      10000000f   If r is 1, then b = a+r-1 = a.  This simplifies the computation   of (ax+b), since you can compute (x+1)(a) instead.  If r is 0,   then b = 0 is ok to use which simplifies (ax+b).   The bit patterns for 55555555, 33333333, and 11111111 are obviously   very regular.  The bit patterns for the other values of a above are:    y	   (hex)	  (binary)    7	  249249  001001001001001001001001  << regular >>    9	  1c71c7  000111000111000111000111  << regular >>   11	   1745d  000000010111010001011101  << irregular >>   13	  13b13b  000100111011000100111011  << irregular >>   The bit patterns for (a) corresponding to (y) of 11 and 13 may be   too irregular to warrant using this method.
💿 文件大小 30755 K
👤 上传用户 xfzzp_0321
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#linux #gcc #编译器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -