📄 fpsoft.s
字号:
rs_cp_1w_fpr12: mfc1 t2,$f12; b load_rs_done; noprs_cp_1w_fpr13: mfc1 t2,$f13; b load_rs_done; noprs_cp_1w_fpr14: mfc1 t2,$f14; b load_rs_done; noprs_cp_1w_fpr15: mfc1 t2,$f15; b load_rs_done; noprs_cp_1w_fpr16: mfc1 t2,$f16; b load_rs_done; noprs_cp_1w_fpr17: mfc1 t2,$f17; b load_rs_done; noprs_cp_1w_fpr18: mfc1 t2,$f18; b load_rs_done; noprs_cp_1w_fpr19: mfc1 t2,$f19; b load_rs_done; noprs_cp_1w_fpr20: mfc1 t2,$f20; b load_rs_done; noprs_cp_1w_fpr21: mfc1 t2,$f21; b load_rs_done; noprs_cp_1w_fpr22: mfc1 t2,$f22; b load_rs_done; noprs_cp_1w_fpr23: mfc1 t2,$f23; b load_rs_done; noprs_cp_1w_fpr24: mfc1 t2,$f24; b load_rs_done; noprs_cp_1w_fpr25: mfc1 t2,$f25; b load_rs_done; noprs_cp_1w_fpr26: mfc1 t2,$f26; b load_rs_done; noprs_cp_1w_fpr27: mfc1 t2,$f27; b load_rs_done; noprs_cp_1w_fpr28: mfc1 t2,$f28; b load_rs_done; noprs_cp_1w_fpr29: mfc1 t2,$f29; b load_rs_done; noprs_cp_1w_fpr30: mfc1 t2,$f30; b load_rs_done; noprs_cp_1w_fpr31: mfc1 t2,$f31; b load_rs_done; nop .set reorder/********************************************************************************* rs_cp_2w -** Load the two words from the coprocessor for the FPR register specified by* the RS (v1) field into GPR registers t2,t3.*/rs_cp_2w:#if (_WRS_FP_REGISTER_SIZE == 4) srl v1, v1, 1 /* only allow even numbered registers */#endif /* _WRS_FP_REGISTER_SIZE */ sll v1, v1, 3 /* 8 bytes per entry */ la t9, rs_cp_2w_tab /* load table */ addu v1, t9, v1 /* get entry address */ j v1 .set noreorderrs_cp_2w_tab:#if (_WRS_FP_REGISTER_SIZE == 4) b rs_cp_2w_fpr0; nop b rs_cp_2w_fpr2; nop b rs_cp_2w_fpr4; nop b rs_cp_2w_fpr6; nop b rs_cp_2w_fpr8; nop b rs_cp_2w_fpr10; nop b rs_cp_2w_fpr12; nop b rs_cp_2w_fpr14; nop b rs_cp_2w_fpr16; nop b rs_cp_2w_fpr18; nop b rs_cp_2w_fpr20; nop b rs_cp_2w_fpr22; nop b rs_cp_2w_fpr24; nop b rs_cp_2w_fpr26; nop b rs_cp_2w_fpr28; nop b rs_cp_2w_fpr30; nop#elif (_WRS_FP_REGISTER_SIZE == 8) b rs_cp_2w_fpr0; nop b rs_cp_2w_fpr1; nop b rs_cp_2w_fpr2; nop b rs_cp_2w_fpr3; nop b rs_cp_2w_fpr4; nop b rs_cp_2w_fpr5; nop b rs_cp_2w_fpr6; nop b rs_cp_2w_fpr7; nop b rs_cp_2w_fpr8; nop b rs_cp_2w_fpr9; nop b rs_cp_2w_fpr10; nop b rs_cp_2w_fpr11; nop b rs_cp_2w_fpr12; nop b rs_cp_2w_fpr13; nop b rs_cp_2w_fpr14; nop b rs_cp_2w_fpr15; nop b rs_cp_2w_fpr16; nop b rs_cp_2w_fpr17; nop b rs_cp_2w_fpr18; nop b rs_cp_2w_fpr19; nop b rs_cp_2w_fpr20; nop b rs_cp_2w_fpr21; nop b rs_cp_2w_fpr22; nop b rs_cp_2w_fpr23; nop b rs_cp_2w_fpr24; nop b rs_cp_2w_fpr25; nop b rs_cp_2w_fpr26; nop b rs_cp_2w_fpr27; nop b rs_cp_2w_fpr28; nop b rs_cp_2w_fpr29; nop b rs_cp_2w_fpr30; nop b rs_cp_2w_fpr31; nop#else /* _WRS_FP_REGISTER_SIZE */#error "invalid _WRS_FP_REGISTER_SIZE value"#endif /* _WRS_FP_REGISTER_SIZE */ #if (_WRS_FP_REGISTER_SIZE == 4)rs_cp_2w_fpr0: mfc1 t3,$f0; mfc1 t2,$f1; b load_rs_done noprs_cp_2w_fpr2: mfc1 t3,$f2; mfc1 t2,$f3; b load_rs_done noprs_cp_2w_fpr4: mfc1 t3,$f4; mfc1 t2,$f5; b load_rs_done noprs_cp_2w_fpr6: mfc1 t3,$f6; mfc1 t2,$f7; b load_rs_done noprs_cp_2w_fpr8: mfc1 t3,$f8; mfc1 t2,$f9; b load_rs_done noprs_cp_2w_fpr10: mfc1 t3,$f10; mfc1 t2,$f11; b load_rs_done noprs_cp_2w_fpr12: mfc1 t3,$f12; mfc1 t2,$f13; b load_rs_done noprs_cp_2w_fpr14: mfc1 t3,$f14; mfc1 t2,$f15; b load_rs_done noprs_cp_2w_fpr16: mfc1 t3,$f16; mfc1 t2,$f17; b load_rs_done noprs_cp_2w_fpr18: mfc1 t3,$f18; mfc1 t2,$f19; b load_rs_done noprs_cp_2w_fpr20: mfc1 t3,$f20; mfc1 t2,$f21; b load_rs_done noprs_cp_2w_fpr22: mfc1 t3,$f22; mfc1 t2,$f23; b load_rs_done noprs_cp_2w_fpr24: mfc1 t3,$f24; mfc1 t2,$f25; b load_rs_done noprs_cp_2w_fpr26: mfc1 t3,$f26; mfc1 t2,$f27; b load_rs_done noprs_cp_2w_fpr28: mfc1 t3,$f28; mfc1 t2,$f29; b load_rs_done noprs_cp_2w_fpr30: mfc1 t3,$f30; mfc1 t2,$f31; b load_rs_done nop#elif (_WRS_FP_REGISTER_SIZE == 8)rs_cp_2w_fpr0: b 0f; dmfc1 t3,$f0rs_cp_2w_fpr1: b 0f; dmfc1 t3,$f1rs_cp_2w_fpr2: b 0f; dmfc1 t3,$f2rs_cp_2w_fpr3: b 0f; dmfc1 t3,$f3rs_cp_2w_fpr4: b 0f; dmfc1 t3,$f4rs_cp_2w_fpr5: b 0f; dmfc1 t3,$f5rs_cp_2w_fpr6: b 0f; dmfc1 t3,$f6rs_cp_2w_fpr7: b 0f; dmfc1 t3,$f7rs_cp_2w_fpr8: b 0f; dmfc1 t3,$f8rs_cp_2w_fpr9: b 0f; dmfc1 t3,$f9rs_cp_2w_fpr10: b 0f; dmfc1 t3,$f10rs_cp_2w_fpr11: b 0f; dmfc1 t3,$f11rs_cp_2w_fpr12: b 0f; dmfc1 t3,$f12rs_cp_2w_fpr13: b 0f; dmfc1 t3,$f13rs_cp_2w_fpr14: b 0f; dmfc1 t3,$f14rs_cp_2w_fpr15: b 0f; dmfc1 t3,$f15rs_cp_2w_fpr16: b 0f; dmfc1 t3,$f16rs_cp_2w_fpr17: b 0f; dmfc1 t3,$f17rs_cp_2w_fpr18: b 0f; dmfc1 t3,$f18rs_cp_2w_fpr19: b 0f; dmfc1 t3,$f19rs_cp_2w_fpr20: b 0f; dmfc1 t3,$f20rs_cp_2w_fpr21: b 0f; dmfc1 t3,$f21rs_cp_2w_fpr22: b 0f; dmfc1 t3,$f22rs_cp_2w_fpr23: b 0f; dmfc1 t3,$f23rs_cp_2w_fpr24: b 0f; dmfc1 t3,$f24rs_cp_2w_fpr25: b 0f; dmfc1 t3,$f25rs_cp_2w_fpr26: b 0f; dmfc1 t3,$f26rs_cp_2w_fpr27: b 0f; dmfc1 t3,$f27rs_cp_2w_fpr28: b 0f; dmfc1 t3,$f28rs_cp_2w_fpr29: b 0f; dmfc1 t3,$f29rs_cp_2w_fpr30: b 0f; dmfc1 t3,$f30rs_cp_2w_fpr31: b 0f; dmfc1 t3,$f31 .set reorder0: dsrl32 t2,t3,0 srlv t3,t3,zero b load_rs_done .set noreorder#else /* _WRS_FP_REGISTER_SIZE */#error "invalid _WRS_FP_REGISTER_SIZE value"#endif /* _WRS_FP_REGISTER_SIZE */ .set reorder/* * At this point the floating-point value for the specified FPR register * in the RS field (v1) will be loaded from the task control block (tcb) * of the current process for FMT specified (v0). Also the floating-point * contol and status register is loaded into gp register a3. */rs_tcb: lw a3, FRAMEA3(softFp)(sp) /* restore pFpContext */ lw a3, FPCSR(a3) /* read fpcsr */ and t8,a3,CSR_RM_MASK # isolate current Rounding Mode sw t8,RM_OFFSET(sp) # and save on stack la t9,rs_tcb_fmt_tab # load table address addu t9, v0, t9 # get entry address j t9 .set noreorderrs_tcb_fmt_tab: b rs_tcb_s; nop b rs_tcb_d; nop b illfpinst; nop b illfpinst; nop b rs_tcb_w; nop b rs_tcb_l; nop .set reorderrs_tcb_s:rs_tcb_w:#if (_WRS_FP_REGISTER_SIZE == 4) sll v1, v1, 2 /* 4 bytes per register */#elif (_WRS_FP_REGISTER_SIZE == 8) sll v1, v1, 3 /* 8 bytes per register */#else /* _WRS_FP_REGISTER_SIZE */#error "invalid _WRS_FP_REGISTER_SIZE value"#endif /* _WRS_FP_REGISTER_SIZE */ lw t2, FRAMEA3(softFp)(sp) /* restore pFpContext */ addu v1, t2 /* create register address */ lw t2, (v1) /* read correct register */ b load_rs_doners_tcb_d:rs_tcb_l:#if (_WRS_FP_REGISTER_SIZE == 4) sll v1, v1, 2 /* 4 bytes per register */#elif (_WRS_FP_REGISTER_SIZE == 8) sll v1, v1, 3 /* 8 bytes per register */#else /* _WRS_FP_REGISTER_SIZE */#error "invalid _WRS_FP_REGISTER_SIZE value"#endif /* _WRS_FP_REGISTER_SIZE */ lw t2, FRAMEA3(softFp)(sp) /* restore pFpContext */ addu v1, t2 /* create register address */ lw t2, (v1) /* read correct register */ lw t3, 4(v1) /* read next register *//* * At this point the floating-point value for the specified FPR register * in the RS field has been loaded into GPR registers and the C1_SR has * been loaded into the GPR register (a3). First the exception field is * cleared in the C1_SR. What is done next is to decode the FUNC field. * If this is a dyadic operation then the floating-point value specified * by the FPR register in the RT field will be loaded into GPR registers * before the instruction is futher decoded. If this is a monadic * instruction is decoded to be emulated. */load_rs_done: HAZARD_CP_READ /* many branches to this point have preceeding mfc0 */#ifdef DEBUG sw t2,_fp_rs sw t3,_fp_rs+4#endif and a3,~CSR_EXCEPT and t8,a1,C1_FUNC_MASK#ifdef DEBUG sw t8, _fp_val#endif ble t8,C1_FUNC_DIV,load_rt bge t8,C1_FUNC_1stCMP,load_rt bgt t8,C1_FUNC_CVTL,illfpinst bge t8,C1_FUNC_CVTS,conv bgt t8,C1_FUNC_FLOORW,illfpinst bge t8,C1_FUNC_ROUNDL,conv_round bgt t8,C1_FUNC_NEG,illfpinst /* t8 is >= 4 and <= 7 */ subu t8,4 la t9,mon_func_tab sll t8,t8,3 addu t9, t8, t9 j t9 .set noreorder nopmon_func_tab: b func_sqrt; nop b func_abs; nop b func_mov; nop b func_neg; nop .set reorderfunc_sqrt: la v1,sqrt_fmt_tab addu v1, v0, v1 j v1 .set noreorder nopsqrt_fmt_tab: b sqrt_s; nop b sqrt_d; nop b sqrt_e; nop b sqrt_q; nop b illfpinst; nop b illfpinst; nop .set reorder/********************************************************************************* sqrt_s - Square root single**/FUNC_LABEL(sqrt_s) /* * Break out the operand into its fields (sign,exp,fraction) and * handle a NaN operand by calling rs_breakout_s() . */ li t9,C1_FMT_SINGLE*4 move v1,zero jal rs_breakout_s /* Check for sqrt of infinity, and produce the correct action if so */ bne t1,SEXP_INF,4f /* is RS an infinity? */ /* RS is an infinity */ beq t0,zero,3f /* check for -infinity */ /* * This is -infinity so this is an invalid operation for sqrt so set * the invalid exception in the C1_SR (a3) and setup the result * depending if the enable for the invalid exception is set. */1: or a3,INVALID_EXC and v0,a3,INVALID_ENABLE beq v0,zero,2f /* * The invalid trap was enabled so signal a SIGFPE and leave the * result register unmodified. */ li v0, IV_FPA_INV_VEC jal post_signal li v0,1 b store_C1_SR /* * The invalid trap was NOT enabled so the result is a quiet NaN. * So use the default quiet NaN and exit softFp(). */2: li t2,SQUIETNAN_LEAST move v0,zero b rd_1w /* * This is +infinity so the result is just +infinity. */3: sll t2,t1,SEXP_SHIFT move v0,zero b rd_1w4: /* Check for the sqrt of zero and produce the correct action if so */ bne t1,zero,5f /* check RS for a zero value (first the exp) */ bne t2,zero,5f /* then the high part of the fraction */ /* Now RS is known to be zero so just return it */ move t2,t0 /* get the sign of the zero */ move v0,zero b rd_1w5: /* Check for sqrt of a negitive number if so it is an invalid */ bne t0,zero,1b /* * Now that all the NaN, infinity and zero and negitive cases have * been taken care of what is left is a value that the sqrt can be * taken. So get the value into a format that can be used. For * normalized numbers set the implied one and remove the exponent * bias. For denormalized numbers convert to normalized numbers * with the correct exponent. */ bne t1,zero,1f /* check for RS being denormalized */ li t1,-SEXP_BIAS+1 /* set denorms exponent */ jal rs_renorm_s /* normalize it */ b 2f1: subu t1,SEXP_BIAS /* - if RS is not denormalized then remove the */ or t2,SIMP_1BIT /* exponent bias, and set the implied 1 bit */2: /* * Now take the sqrt of the value. Written by George Tayor. * t1 -- twos comp exponent * t2 -- 24-bit fraction * t8, t9 -- temps * v0 -- trial subtraction * t4 -- remainder * t6 -- 25-bit result * t8 -- sticky */ andi t9, t1, 1 /* last bit of unbiased exponent */ sra t1, 1 /* divide exponent by 2 */ addi t1, -1 /* subtract 1, deliver 25-bit result */ beq t9, zero, 1f sll t2, t2, 1 /* shift operand left by 1 */ /* if exponent was odd */1: li t6, 1 /* initialize answer msw */ move t4, zero /* initialize remainder msw */ srl t4, t2, 23 /* shift operand left by 9 so that */ sll t2, t2, 9 /* 2 bits go into remainder */ li t8, 25 /* set cycle counter */2: subu v0, t4, t6 /* trial subtraction */ sll t6, t6, 1 /* shift answer left by 1 */ li t9, -4 /* put 01 back in low order bits */ and t6, t9 /* using 0xfffffffc mask */ or t6, 1 bltz v0, 3f /* branch on sign of trial subtract */ ori t6, 4 /* set new bit of answer */ sll t4, v0, 2 /* shift trial result left by 2 */ /* and put in remainder */ b 4f3: sll t4, t4, 2 /* shift remainder left by 2 */4: srl t9, t2, 30 /* shift operand left by 2 */ or t4, t9 sll t2, t2, 2
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -