📄 softfp.s
字号:
/* @(#)softfp.s 4.4 (ULTRIX) 2/12/91 *//* ------------------------------------------------------------------ *//* | Copyright Unpublished, MIPS Computer Systems, Inc. All Rights | *//* | Reserved. This software contains proprietary and confidential | *//* | information of MIPS and its suppliers. Use, disclosure or | *//* | reproduction is prohibited without the prior express written | *//* | consent of MIPS. | *//* ------------------------------------------------------------------ *//* * softfp.s -- floating point software emulation *//* Revision History * * * 19-Dec-90 -- jaw * sendsig change to mask... * * 29-Mar-90 -- gmm/jaw * Call setsoftnet to schedule psignal() rather than calling psignal() * directly. This is the result of changing splihigh() and spl6() to * be same as splclock(). * * 13-Oct-89 -- gmm * Made fpowner a per cpu variable in cpudata * */#ifdef MOXIE# define LOCORE# include "mips/cpu.h"# include "mips/fpu.h"# include "mips/reg.h"# include "mips/regdef.h"# include "mips/asm.h"# include "mips/pcb.h"# include "assym.h"# include "h/signal.h"# include "h/user.h"# include "softfp.h"#else#include "../machine/param.h"#include "../machine/cpu.h"#include "../machine/fpu.h"#include "../machine/reg.h"#include "../machine/regdef.h"#include "../machine/asm.h"#include "../machine/pcb.h"#include "../machine/vmparam.h"#include "../machine/softfp.h"#include "../h/signal.h"#include "assym.h"#endif MOXIE/* * The software floating-point emulator is called from either the floating- * point coprocessor unusable exception handler (in softfp_unusable.s) * or the floating-point interrupt handler (in fp_intr.s). This routine only * emulates floating-point operations and compares. It does not emulate nor * does it detect loads/stores, move to/from and branch on condition * instructions. * * This is the state on entry to this routine: * Register setup * a0 -- exception frame pointer * a1 -- fp instruction to be emulated * a2 -- fptype_word * The normal calling convention is assumed with the appropriate registers * saved by the caller as if it were a high level language routine. * * The floating-point coprocessor revision word is in fptype_word and is zero * if there is no floating-point coprocessor. If it is non-zero then this * routine was called from the floating-point interrupt handler. In this * case the values of the floating point registers are still in the coprocessor * and the pointer to the proc structure for process which executed the fp * instruction is in fpowner. * * If fptype_word is zero then this routine was called from the coprocessor * unusable handler. In this case the values of the floating point registers * are in the pcb for the current process and the pointer to the proc structure * for the current process is at u+U_PROCP. * * This routune returns a non-zero value in v0 if there was a signal posted * to the process as the result of an exception. Otherwise v0 will be zero. */#define FRAME_SIZE 40#define LOCAL_SIZE 0#define A0_OFFSET FRAME_SIZE+4*0#define A1_OFFSET FRAME_SIZE+4*1#define A2_OFFSET FRAME_SIZE+4*2#define A3_OFFSET FRAME_SIZE+4*3#define T0_OFFSET FRAME_SIZE-LOCAL_SIZE-4*1#define T1_OFFSET FRAME_SIZE-LOCAL_SIZE-4*2#define T2_OFFSET FRAME_SIZE-LOCAL_SIZE-4*3#define T3_OFFSET FRAME_SIZE-LOCAL_SIZE-4*4#define T8_OFFSET FRAME_SIZE-LOCAL_SIZE-4*5#define RA_OFFSET FRAME_SIZE-LOCAL_SIZE-4*6#ifndef MOXIE u = UADDR#endif MOXIENESTED(softfp, FRAME_SIZE, ra) .mask 0x80000000, -(FRAME_SIZE - 4*4) subu sp,FRAME_SIZE sw ra,RA_OFFSET(sp)/* * In decoding the instruction it is assumed that the opcode field is COP1 * and that bit 25 is set (since only floating-point ops are supposed to * be emulated in this routine) and therefore these are not checked. * The following fields are fully decoded and reserved encodings will result * an illegal instruction signal (SIGILL) being posted: * FMT -- (bits 24-21) * FUNC -- (bits 5-0) */ /* * Check the FMT field for reserved encodings and leave it right * justified in register v0 times 4. */ srl v0,a1,C1_FMT_SHIFT-2 and v0,C1_FMT_MASK<<2 bgt v0,C1_FMT_MAX<<2,illfpinst /* * Load the floating point value from the register specified by the * RS field into gp registers. The gp registers which are used for * the value specified by the RS feild are dependent on the FMT (v0) * as follows: * single t2 * double t2,t3 * extended t2,t3,s2,s3 (where t3 is really zero) * quad t2,t3,s2,s3 * * Also load the value of the floating-point control and status * register (fpc_csr) into gp register a3. */load_rs: srl v1,a1,RS_SHIFT-2 # get the RS field times 4 right and v1,RS_FPRMASK<<2 # justified into v1 with the last bit # of the field cleared. /* * If fptype_word (a2) is non-zero then the floating-point values * are loaded from the coprocessor else they are loaded from the pcb. */ beq a2,zero,rs_pcb /* * At this point the floating-point value for the specified FPR register * in the RS field (v1) will loaded from the coprocessor registers for * the FMT specified (v0). Also the floating-point contol and status * register (fpc_csr) is loaded into gp register a3. */ cfc1 a3,fpc_csr # setup to branch to the code to load lw t9,cp_rs_fmt_tab(v0) # the right number of words from the j t9 # cp for the specified format. .rdatacp_rs_fmt_tab: .word rs_cp_1w:1, rs_cp_2w:1, illfpinst:1, illfpinst:1, rs_cp_1w:1 .text/* * Load the one word from the coprocessor for the FPR register specified by * the RS (v1) field into GPR register t2. */rs_cp_1w: srl v1,1 lw v1,rs_cp_1w_tab(v1) j v1 .rdatars_cp_1w_tab: .word rs_cp_1w_fpr0:1, rs_cp_1w_fpr2:1, rs_cp_1w_fpr4:1 .word rs_cp_1w_fpr6:1, rs_cp_1w_fpr8:1, rs_cp_1w_fpr10:1 .word rs_cp_1w_fpr12:1, rs_cp_1w_fpr14:1, rs_cp_1w_fpr16:1 .word rs_cp_1w_fpr18:1, rs_cp_1w_fpr20:1, rs_cp_1w_fpr22:1 .word rs_cp_1w_fpr24:1, rs_cp_1w_fpr26:1, rs_cp_1w_fpr28:1 .word rs_cp_1w_fpr30:1 .textrs_cp_1w_fpr0: mfc1 t2,$f0; b load_rs_doners_cp_1w_fpr2: mfc1 t2,$f2; b load_rs_doners_cp_1w_fpr4: mfc1 t2,$f4; b load_rs_doners_cp_1w_fpr6: mfc1 t2,$f6; b load_rs_doners_cp_1w_fpr8: mfc1 t2,$f8; b load_rs_doners_cp_1w_fpr10: mfc1 t2,$f10; b load_rs_doners_cp_1w_fpr12: mfc1 t2,$f12; b load_rs_doners_cp_1w_fpr14: mfc1 t2,$f14; b load_rs_doners_cp_1w_fpr16: mfc1 t2,$f16; b load_rs_doners_cp_1w_fpr18: mfc1 t2,$f18; b load_rs_doners_cp_1w_fpr20: mfc1 t2,$f20; b load_rs_doners_cp_1w_fpr22: mfc1 t2,$f22; b load_rs_doners_cp_1w_fpr24: mfc1 t2,$f24; b load_rs_doners_cp_1w_fpr26: mfc1 t2,$f26; b load_rs_doners_cp_1w_fpr28: mfc1 t2,$f28; b load_rs_doners_cp_1w_fpr30: mfc1 t2,$f30; b load_rs_done/* * Load the two words from the coprocessor for the FPR register specified by * the RS (v1) field into GPR registers t2,t3. */rs_cp_2w: srl v1,1 lw v1,rs_cp_2w_tab(v1) j v1 .rdatars_cp_2w_tab: .word rs_cp_2w_fpr0:1, rs_cp_2w_fpr2:1, rs_cp_2w_fpr4:1 .word rs_cp_2w_fpr6:1, rs_cp_2w_fpr8:1, rs_cp_2w_fpr10:1 .word rs_cp_2w_fpr12:1, rs_cp_2w_fpr14:1, rs_cp_2w_fpr16:1 .word rs_cp_2w_fpr18:1, rs_cp_2w_fpr20:1, rs_cp_2w_fpr22:1 .word rs_cp_2w_fpr24:1, rs_cp_2w_fpr26:1, rs_cp_2w_fpr28:1 .word rs_cp_2w_fpr30:1 .textrs_cp_2w_fpr0: mfc1 t3,$f0; mfc1 t2,$f1; b load_rs_doners_cp_2w_fpr2: mfc1 t3,$f2; mfc1 t2,$f3; b load_rs_doners_cp_2w_fpr4: mfc1 t3,$f4; mfc1 t2,$f5; b load_rs_doners_cp_2w_fpr6: mfc1 t3,$f6; mfc1 t2,$f7; b load_rs_doners_cp_2w_fpr8: mfc1 t3,$f8; mfc1 t2,$f9; b load_rs_doners_cp_2w_fpr10: mfc1 t3,$f10; mfc1 t2,$f11; b load_rs_doners_cp_2w_fpr12: mfc1 t3,$f12; mfc1 t2,$f13; b load_rs_doners_cp_2w_fpr14: mfc1 t3,$f14; mfc1 t2,$f15; b load_rs_doners_cp_2w_fpr16: mfc1 t3,$f16; mfc1 t2,$f17; b load_rs_doners_cp_2w_fpr18: mfc1 t3,$f18; mfc1 t2,$f19; b load_rs_doners_cp_2w_fpr20: mfc1 t3,$f20; mfc1 t2,$f21; b load_rs_doners_cp_2w_fpr22: mfc1 t3,$f22; mfc1 t2,$f23; b load_rs_doners_cp_2w_fpr24: mfc1 t3,$f24; mfc1 t2,$f25; b load_rs_doners_cp_2w_fpr26: mfc1 t3,$f26; mfc1 t2,$f27; b load_rs_doners_cp_2w_fpr28: mfc1 t3,$f28; mfc1 t2,$f29; b load_rs_doners_cp_2w_fpr30: mfc1 t3,$f30; mfc1 t2,$f31; b load_rs_done/* * At this point the floating-point value for the specified FPR register * in the RS field (v1) will be loaded from the process control block (pcb) * of the current process for FMT specified (v0). Also the floating-point * contol and status register is loaded into gp register a3. */rs_pcb: lw a3,u+PCB_FPC_CSR lw t9,rs_pcb_fmt_tab(v0) j t9 .rdatars_pcb_fmt_tab: .word rs_pcb_s:1, rs_pcb_d:1, illfpinst:1, illfpinst:1, rs_pcb_w:1 .textrs_pcb_s:rs_pcb_w: lw t2,u+PCB_FPREGS(v1) b load_rs_doners_pcb_d: lw t3,u+PCB_FPREGS(v1) lw t2,u+PCB_FPREGS+4(v1)/* * At this point the floating-point value for the specified FPR register * in the RS field has been loaded into GPR registers and the fpc_csr has * been loaded into the GPR register (a3). First the exception field is * cleared in the fpc_csr. What is done next is to decode the FUNC field. * If this is a dyadic operation then the floating-point value specified * by the FPR register in the RT field will be loaded into GPR registers * before the instruction is futher decoded. If this is a monadic * instruction is decoded to be emulated. */load_rs_done: and a3,~CSR_EXCEPT and t8,a1,C1_FUNC_MASK ble t8,C1_FUNC_DIV,load_rt bge t8,C1_FUNC_1stCMP,load_rt bgt t8,C1_FUNC_CVTW,illfpinst bge t8,C1_FUNC_CVTS,conv bgt t8,C1_FUNC_NEG,illfpinst subu t8,4 sll t8,2 lw t9,mon_func_tab(t8) j t9 .rdatamon_func_tab: .word func_sqrt:1, func_abs:1, func_mov:1, func_neg:1 .textfunc_sqrt: lw v1,sqrt_fmt_tab(v0) j v1 .rdatasqrt_fmt_tab: .word sqrt_s:1, sqrt_d:1, sqrt_e:1, sqrt_q:1, illfpinst:1 .text/* * Square root single */sqrt_s: /* * Break out the operand into its fields (sign,exp,fraction) and * handle a NaN operand by calling rs_breakout_s() . */ li t9,C1_FMT_SINGLE*4 move v1,zero jal rs_breakout_s # Check for sqrt of infinity, and produce the correct action if so bne t1,SEXP_INF,4f # is RS an infinity? # RS is an infinity beq t0,zero,3f # check for -infinity /* * This is -infinity so this is an invalid operation for sqrt so set * the invalid exception in the fpc_csr (a3) and setup the result * depending if the enable for the invalid exception is set. */1: or a3,INVALID_EXC and v0,a3,INVALID_ENABLE beq v0,zero,2f /* * The invalid trap was enabled so signal a SIGFPE and leave the * result register unmodified. */ li v0,SIGFPE jal post_signal li v0,1 b store_fpc_csr /* * The invalid trap was NOT enabled so the result is a quiet NaN. * So use the default quiet NaN and exit softfp(). */2: li t2,SQUIETNAN_LEAST move v0,zero b rd_1w /* * This is +infinity so the result is just +infinity. */3: sll t2,t1,SEXP_SHIFT move v0,zero b rd_1w4: # Check for the sqrt of zero and produce the correct action if so bne t1,zero,5f # check RS for a zero value (first the exp) bne t2,zero,5f # then the high part of the fraction # Now RS is known to be zero so just return it move t2,t0 # get the sign of the zero move v0,zero b rd_1w5: # Check for sqrt of a negitive number if so it is an invalid bne t0,zero,1b /* * Now that all the NaN, infinity and zero and negitive cases have * been taken care of what is left is a value that the sqrt can be * taken. So get the value into a format that can be used. For * normalized numbers set the implied one and remove the exponent * bias. For denormalized numbers convert to normalized numbers * with the correct exponent. */ bne t1,zero,1f # check for RS being denormalized li t1,-SEXP_BIAS+1 # set denorm's exponent jal rs_renorm_s # normalize it b 2f1: subu t1,SEXP_BIAS # if RS is not denormalized then remove the or t2,SIMP_1BIT # exponent bias, and set the implied 1 bit2: /* * Now take the sqrt of the value. Written by George Tayor. * t1 -- two's comp exponent * t2 -- 24-bit fraction * t8, t9 -- temps * v0 -- trial subtraction * t4 -- remainder * t6 -- 25-bit result * t8 -- sticky */ andi t9, t1, 1 /* last bit of unbiased exponent */ sra t1, 1 /* divide exponent by 2 */ addi t1, -1 /* subtract 1, deliver 25-bit result */ beq t9, zero, 1f sll t2, 1 /* shift operand left by 1 */ /* if exponent was odd */1: li t6, 1 /* initialize answer msw */ move t4, zero /* initialize remainder msw */ srl t4, t2, 23 /* shift operand left by 9 so that */ sll t2, 9 /* 2 bits go into remainder */ li t8, 25 /* set cycle counter */2: subu v0, t4, t6 /* trial subtraction */ sll t6, 1 /* shift answer left by 1 */ li t9, -4 /* put 01 back in low order bits */ and t6, t9 /* using 0xfffffffc mask */ or t6, 1 bltz v0, 3f /* branch on sign of trial subtract */ ori t6, 4 /* set new bit of answer */ sll t4, v0, 2 /* shift trial result left by 2 */ /* and put in remainder */ b 4f3: sll t4, 2 /* shift remainder left by 2 */4: srl t9, t2, 30 /* shift operand left by 2 */ or t4, t9 sll t2, 2 addi t8, -1 bne t8, zero, 2b srl t6, 2 /* shift answer right by 2 */ /* to eliminate extra bits */ move t8, t4 /* form sticky bit */ move t2, t6 b norm_s/* * Square root double
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -