📄 ccmmath_cpu.s
字号:
#undef EXP#undef F ENTRY(CVMCCMruntimeI2F)ENTRY1( CVMCCMruntimeI2F ) ENTRY(CVMCCMruntimeI2F_C)ENTRY1( CVMCCMruntimeI2F_C ) /* * Keep the original argument in r0 (it has the sign) * Working registers are F (Fraction) and EXP (exponent) */#define F r1#define EXP r2 adds F, r0, #0 /* absolute value to F */ moveq pc, lr /* if zero, return zero */ rsblt F, F, #0 /* * 31 is the maximum number of shifts required to get * a '1' in the high-order bit of F. * 0x7f is the exponent of 1.0 (i.e. the bias). * -1 is to account for the implied high-order bit. Rather * than masking it out, we just subtract it out here. */ mov EXP, #(31+0x7f-1) /* * use shifts to get the high-order bit set. * diminish EXP by the amount shifted. */ /* get bits into high-order 16 bits */ cmp F, #0x10000 movlo F, F, LSL #16 sublo EXP, EXP, #16 /* get bits into high-order 8 bits */ cmp F, #0x1000000 movlo F, F, LSL #8 sublo EXP, EXP, #8 /* get bits into high-order 4 bits */ cmp F, #0x10000000 movlo F, F, LSL #4 sublo EXP, EXP, #4 /* get bits into high-order 2 bits */ cmp F, #0x40000000 movlo F, F, LSL #2 sublo EXP, EXP, #2 /* get bit into high-order bit */ cmp F, #0x80000000 movlo F, F, LSL #1 sublo EXP, EXP, #1 /* insert sign and exponent */ and r0, r0, #0x80000000 /* get the original sign */ add r0, r0, EXP, LSL #23 /* insert the exponent */ add r0, r0, F, LSR #8 /* insert the fraction */ /* rounding is necessary if any bits were lost */ ands F, F, #0xff moveq pc, lr /* no bits lost (most likely case) */ cmp F, #0x80 addhs r0, r0, #1 /* round up ... */ biceq r0, r0, #1 /* ... or to even in case of 1/2 LSB */ mov pc, lr /* return */#undef EXP#undef F ENTRY(CVMCCMruntimeI2D)ENTRY1( CVMCCMruntimeI2D ) ENTRY(CVMCCMruntimeI2D_C)ENTRY1( CVMCCMruntimeI2D_C ) /* * Keep the original argument in H (it has the sign) * Working registers are F (Fraction) and EXP (exponent) */#if CVM_DOUBLE_ENDIANNESS == CVM_BIG_ENDIAN#define H r0#define F r1#elif CVM_DOUBLE_ENDIANNESS == CVM_LITTLE_ENDIAN#define H r1#define F r0#endif#define EXP r2#if CVM_DOUBLE_ENDIANNESS == CVM_LITTLE_ENDIAN mov H, r0#endif adds F, H, #0 /* absolute value to F */ moveq F, #0 /* if zero, return zero */ moveq pc, lr /* if zero, return zero */ rsblt F, F, #0 /* * 31 is the maximum number of shifts required to get * a '1' in the high-order bit of F. * 0x3ff is the exponent of 1.0 (i.e. the bias). * -1 is to account for the implied high-order bit. Rather * than masking it out, we just subtract it out here. */ ldr EXP, L_EXP_CONSTANT /* * use shifts to get the high-order bit set. * diminish EXP by the amount shifted. */ /* get bits into high-order 16 bits */ cmp F, #0x10000 movlo F, F, LSL #16 sublo EXP, EXP, #16 /* get bits into high-order 8 bits */ cmp F, #0x1000000 movlo F, F, LSL #8 sublo EXP, EXP, #8 /* get bits into high-order 4 bits */ cmp F, #0x10000000 movlo F, F, LSL #4 sublo EXP, EXP, #4 /* get bits into high-order 2 bits */ cmp F, #0x40000000 movlo F, F, LSL #2 sublo EXP, EXP, #2 /* get bit into high-order bit */ cmp F, #0x80000000 movlo F, F, LSL #1 sublo EXP, EXP, #1 /* insert sign and exponent */ and H, H, #0x80000000 /* get the original sign */ add H, H, EXP, LSL #20 /* insert exponent */ add H, H, F, LSR #11 /* insert high 21 bits of the fraction */ mov F, F, LSL #21 /* low bits of the fraction here */ mov pc, lr /* no rounding, so just go */#undef H#undef F#undef EXP ENTRY(CVMCCMruntimeD2I)ENTRY1( CVMCCMruntimeD2I ) ENTRY(CVMCCMruntimeD2I_C)ENTRY1( CVMCCMruntimeD2I_C ) /* * Keep the original argument in H/L (it has the sign) * Working registers are F (Fraction) and EXP (exponent) */#if CVM_DOUBLE_ENDIANNESS == CVM_BIG_ENDIAN#define H r0#define L r1#elif CVM_DOUBLE_ENDIANNESS == CVM_LITTLE_ENDIAN#define H r1#define L r0#endif#define F r2#define EXP r3 bic F, H, #0x80000000 /* strip sign */ subs EXP, F, #0x3f000000 /* de-bias exponent */ subs EXP, EXP, #0x00f00000 blt _d2iTooLittle cmp EXP, #(31<<20) bhs _d2iTooBig /* shift fraction up to put high-order explicit bit in bit 30 */ mov F, F, LSL #11 orr F, F, L, LSR #21 orr F, F, #0x80000000 /* insert implicit high order bit */ mov EXP, EXP, LSR #20 /* now need to shift F right by 31 - exponent */ rsb EXP, EXP, #31 mov F, F, LSR EXP /* apply the sign and return */ cmp H, #0 rsblt F, F, #0 mov r0, F mov pc, lr LABEL(_d2iTooBig) /* test for Nan, which is returns a zero */ cmp EXP, #(0x7ff00000-0x3ff00000) cmpeq L, #0 /* any non-0 fraction bits indicate NaN */ bhi _d2iTooLittle cmp H, #0 mov r0, #0x80000000 /* largest negative */ mvngt r0, r0 /* is the complement of the largest positive */ mov pc, lrLABEL(_d2iTooLittle) mov r0, #0 mov pc, lr#undef H#undef L#undef F#undef EXP/* * Definitions and conventions shared by double-precision multiply * and add/subtract. * Operands arrive in A1/A2 and B1/B2, * Result is returned in A1/A2. * The exponent of A and B are unpacked into EXPA and EXPB, respectively. * The result exponent is developed in EXPA. * FLAGS store various state bits. The sign bit of FLAGS is the * sign of the result. * EXPMASK contains the value DOUBLE_EXPVAL, which is both a mask and * the exponent for infinity. * * When the shared rounding and packing code is entered, at _double_check_guard, * the resulting fraction has been moved into position in A1/A2, * the resulting exponent is in EXPA, and the guard bit is the high order bit * of RESULTX. The other bits of that register are the * sticky bits which will help determine rounding direction. * The fraction and exponent have the following relation at this point: either * - the implicit bit is explicitly present (in what is normally the * low-order bit of the exponent OR * - the exponent is incremented by 1. */#define DOUBLE_SAVE_SET {r4-r9, lr}#define DOUBLE_RESTORE_SET {r4-r9, pc}/* Registers definition for different endianness. */#if CVM_DOUBLE_ENDIANNESS == CVM_BIG_ENDIAN#define A1 r0#define A2 r1#define B1 r2#define B2 r3#define EXPA r4#define EXPB r5#define RESULTX r6#define EXPMASK r7#elif CVM_DOUBLE_ENDIANNESS == CVM_LITTLE_ENDIAN#define A1 r1#define A2 r0#define B1 r3#define B2 r2#define EXPA r5#define EXPB r4#define RESULTX r7#define EXPMASK r6#endif#define FLAGS lr#define EXPSHIFT 20#define DOUBLE_EXPVAL 0x7ff/* * The macros used for unpacking the two operands * have three parts. This allows us to place the less usual * code for dealing with denormalized values and NaNs out of line, * rather than disrupting the flow of normal computation. */#ifndef __RVCT__#define DOUBLE_UNPACK( HiSrc, ExpReg, DenormalDest, ExceptionalDest )\ ands ExpReg, EXPMASK, HiSrc, LSR #EXPSHIFT; \ bic HiSrc, HiSrc, EXPMASK, LSL #EXPSHIFT; \ beq DenormalDest; \ cmp ExpReg, EXPMASK; \ beq ExceptionalDest; \ orr HiSrc, HiSrc, #(1<<EXPSHIFT)#define DOUBLE_EXCEPTIONAL( HiSrc, LoSrc, Infflag, Lreturn )\ cmp HiSrc, #0; \ cmpeq LoSrc, #0; \ bne _double_deliver_NaN; \ orr FLAGS, FLAGS, Infflag; \ b Lreturn#define DOUBLE_NORMALIZE( HiSrc, LoSrc, ExpReg, Lreturn, Zflag )\ cmp HiSrc, #0; \ cmpeq LoSrc, #0; \ orreq FLAGS, FLAGS, Zflag; \ beq Lreturn; \ 1: \ adds LoSrc, LoSrc, LoSrc; \ adc HiSrc, HiSrc, HiSrc; \ cmp HiSrc, #(1<<EXPSHIFT);\ sublt ExpReg, ExpReg, #1; \ blt 1b; \ b Lreturn#else MACRO DOUBLE_UNPACK0 $HiSrc, $ExpReg, $DenormalDest, $ExceptionalDest ands $ExpReg, EXPMASK, $HiSrc, LSR #EXPSHIFT bic $HiSrc, $HiSrc, EXPMASK, LSL #EXPSHIFT beq $DenormalDest cmp $ExpReg, EXPMASK beq $ExceptionalDest orr $HiSrc, $HiSrc, #(1<<EXPSHIFT) MEND MACRO DOUBLE_EXCEPTIONAL0 $HiSrc, $LoSrc, $Infflag, $Lreturn cmp $HiSrc, #0 cmpeq $LoSrc, #0 bne _double_deliver_NaN orr FLAGS, FLAGS, $Infflag b $Lreturn MEND MACRO DOUBLE_NORMALIZE0 $HiSrc, $LoSrc, $ExpReg, $Lreturn, $Zflag cmp $HiSrc, #0 cmpeq $LoSrc, #0 orreq FLAGS, FLAGS, $Zflag beq $Lreturn1 adds $LoSrc, $LoSrc, $LoSrc adc $HiSrc, $HiSrc, $HiSrc cmp $HiSrc, #(1<<EXPSHIFT) sublt $ExpReg, $ExpReg, #1 blt %b1 b $Lreturn MEND#define DOUBLE_UNPACK( HiSrc, ExpReg, DenormalDest, ExceptionalDest )\ DOUBLE_UNPACK0 HiSrc, ExpReg, DenormalDest, ExceptionalDest#define DOUBLE_EXCEPTIONAL( HiSrc, LoSrc, Infflag, Lreturn )\ DOUBLE_EXCEPTIONAL0 HiSrc, LoSrc, Infflag, Lreturn#define DOUBLE_NORMALIZE( HiSrc, LoSrc, ExpReg, Lreturn, Zflag )\ DOUBLE_NORMALIZE0 HiSrc, LoSrc, ExpReg, Lreturn, Zflag#endif/* * Multiplication and addition/subtraction treat denormalized numbers * differently. For multiply we want to normalize by shifting. * For addition, we just leave the exponent at zero and refrain from * inserting an implicit bit, because there is none. *//* * Entry point for double precision floating multiplication. * On entry, multiplicand (call it A) is in r0/r1, * and multiplier (call it B) is in r2/r3. * On exit, product will be in r0/r1. */ ENTRY(CVMCCMruntimeDMul)ENTRY1( CVMCCMruntimeDMul ) ENTRY(CVMCCMruntimeDMul_C)ENTRY1( CVMCCMruntimeDMul_C )/* IAI-06 */#ifdef IAI_DMUL#define SIGN_FLAG r8#define PROD2 r8#define PROD3 ipLABEL(_dmul_judge_A_zero) orrs ip, A2, A1, LSL #1 bne _dmul_judge_A_INF_NaN LABEL(_dmul_A_equal_zero) mov ip, B1, LSL #1 mov ip, ip, ASR $(EXPSHIFT+1) adds ip, ip, #1 beq _dmul_A_equal_zero_B_equal_INF_or_NaN LABEL(_dmul_A_equal_zero_B_NOT_equal_INF_NaN)LABEL(_dmul_A_equal_INF_B_NOT_equal_zero_INF_NaN)LABEL(_dmul_A_equal_INF_B_equal_INF) and B1, B1, #0x80000000 /* get sign of B */ eor A1, A1, B1 /* deliver value of A */ mov pc, lr /* deliver sign of (A eor B) */ LABEL(_dmul_judge_A_INF_NaN) mov ip, A1, LSL #1 mov ip, ip, ASR $(EXPSHIFT+1) adds ip, ip, #1 bne _dmul_judge_B_zero LABEL(_dmul_A_equal_INF_NaN) orrs ip, A2, A1, LSL #12 bne _dmul_A_equal_NaN orrs ip, B2, B1, LSL #1 beq _dmul_A_equal_INF_B_equal_zero mov ip, B1, LSL #1 mov ip, ip, ASR $(EXPSHIFT+1) adds ip, ip, #1 bne _dmul_A_equal_INF_B_NOT_equal_zero_INF_NaN orrs ip, B2, B1, LSL #12 beq _dmul_A_equal_INF_B_eq
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -