📄 i860.c
字号:
instruction which precedes the loop, but we need to have it done two times before we enter the loop because of the bizarre semantics of the bla instruction. */ output_asm_insn ("adds %5,%2,%2", xoperands); /* Check for the case where the original count was less than or equal to zero. Avoid going through the loop at all if the original count was indeed less than or equal to zero. Note that we treat the count as if it were a signed 32-bit quantity here, rather than an unsigned one, even though we really shouldn't. We have to do this because of the semantics of the `ble' instruction, which assume that the count is a signed 32-bit value. Anyway, in practice it won't matter because nobody is going to try to do a memcpy() of more than half of the entire address space (i.e. 2 gigabytes) anyway. */ output_asm_insn ("bc .Le%3", xoperands); /* Make available a register which is a temporary. */ xoperands[6] = operands[6]; /* Now the actual loop. In xoperands, elements 1 and 0 are the input and output vectors. Element 2 is the loop index. Element 5 is the increment. */ output_asm_insn ("subs %1,%5,%1", xoperands); output_asm_insn ("bla %5,%2,.Lm%3", xoperands); output_asm_insn ("adds %0,%2,%6", xoperands); output_asm_insn ("\n.Lm%3:", xoperands); /* Label for bla above. */ output_asm_insn ("\n.Ls%3:", xoperands); /* Loop start label. */ output_asm_insn ("adds %5,%6,%6", xoperands); /* NOTE: The code here which is supposed to handle the cases where the sources and destinations are known to start on a 4 or 2 byte boundary are currently broken. They fail to do anything about the overflow bytes which might still need to be copied even after we have copied some number of words or halfwords. Thus, for now we use the lowest common denominator, i.e. the code which just copies some number of totally unaligned individual bytes. (See the calculation of chunk_size above. */ if (chunk_size == 4) { output_asm_insn ("ld.l %2(%1),%?r31", xoperands); output_asm_insn ("bla %5,%2,.Ls%3", xoperands); output_asm_insn ("st.l %?r31,8(%6)", xoperands); } else if (chunk_size == 2) { output_asm_insn ("ld.s %2(%1),%?r31", xoperands); output_asm_insn ("bla %5,%2,.Ls%3", xoperands); output_asm_insn ("st.s %?r31,4(%6)", xoperands); } else /* chunk_size == 1 */ { output_asm_insn ("ld.b %2(%1),%?r31", xoperands); output_asm_insn ("bla %5,%2,.Ls%3", xoperands); output_asm_insn ("st.b %?r31,2(%6)", xoperands); } output_asm_insn ("\n.Le%3:", xoperands); /* Here if count <= 0. */ return "";}/* Output a delayed branch insn with the delay insn in its branch slot. The delayed branch insn template is in TEMPLATE, with operands OPERANDS. The insn in its delay slot is INSN. As a special case, since we know that all memory transfers are via ld/st insns, if we see a (MEM (SYMBOL_REF ...)) we divide the memory reference around the branch as orh ha%x,%?r0,%?r31 b ... ld/st l%x(%?r31),... As another special case, we handle loading (SYMBOL_REF ...) and other large constants around branches as well: orh h%x,%?r0,%0 b ... or l%x,%0,%1 */char *output_delayed_branch (template, operands, insn) char *template; rtx *operands; rtx insn;{ rtx src = XVECEXP (PATTERN (insn), 0, 1); rtx dest = XVECEXP (PATTERN (insn), 0, 0); /* See if we are doing some branch together with setting some register to some 32-bit value which does (or may) have some of the high-order 16 bits set. If so, we need to set the register in two stages. One stage must be done before the branch, and the other one can be done in the delay slot. */ if ( (GET_CODE (src) == CONST_INT && ((unsigned) INTVAL (src) & (unsigned) 0xffff0000) != (unsigned) 0) || (GET_CODE (src) == SYMBOL_REF) || (GET_CODE (src) == LABEL_REF) || (GET_CODE (src) == CONST)) { rtx xoperands[2]; xoperands[0] = dest; xoperands[1] = src; CC_STATUS_PARTIAL_INIT; /* Output the `orh' insn. */ output_asm_insn ("orh %H1,%?r0,%0", xoperands); /* Output the branch instruction next. */ output_asm_insn (template, operands); /* Now output the `or' insn. */ output_asm_insn ("or %L1,%0,%0", xoperands); } else if ((GET_CODE (src) == MEM && CONSTANT_ADDRESS_P (XEXP (src, 0))) || (GET_CODE (dest) == MEM && CONSTANT_ADDRESS_P (XEXP (dest, 0)))) { rtx xoperands[2]; char *split_template; xoperands[0] = dest; xoperands[1] = src; /* Output the `orh' insn. */ if (GET_CODE (src) == MEM) { if (! ((cc_prev_status.flags & CC_KNOW_HI_R31) && (cc_prev_status.flags & CC_HI_R31_ADJ) && cc_prev_status.mdep == XEXP (operands[1], 0))) { CC_STATUS_INIT; output_asm_insn ("orh %h1,%?r0,%?r31", xoperands); } split_template = load_opcode (GET_MODE (dest), "%L1(%?r31),%0", dest); } else { if (! ((cc_prev_status.flags & CC_KNOW_HI_R31) && (cc_prev_status.flags & CC_HI_R31_ADJ) && cc_prev_status.mdep == XEXP (operands[0], 0))) { CC_STATUS_INIT; output_asm_insn ("orh %h0,%?r0,%?r31", xoperands); } split_template = store_opcode (GET_MODE (dest), "%r1,%L0(%?r31)", src); } /* Output the branch instruction next. */ output_asm_insn (template, operands); /* Now output the load or store. No need to do a CC_STATUS_INIT, because we are branching anyway. */ output_asm_insn (split_template, xoperands); } else { int insn_code_number; rtx pat = gen_rtx (SET, VOIDmode, dest, src); rtx delay_insn = gen_rtx (INSN, VOIDmode, 0, 0, 0, pat, -1, 0, 0); int i; /* Output the branch instruction first. */ output_asm_insn (template, operands); /* Now recognize the insn which we put in its delay slot. We must do this after outputting the branch insn, since operands may just be a pointer to `recog_operand'. */ INSN_CODE (delay_insn) = insn_code_number = recog (pat, delay_insn); if (insn_code_number == -1) abort (); for (i = 0; i < insn_n_operands[insn_code_number]; i++) { if (GET_CODE (recog_operand[i]) == SUBREG) recog_operand[i] = alter_subreg (recog_operand[i]); } insn_extract (delay_insn); if (! constrain_operands (insn_code_number, 1)) fatal_insn_not_found (delay_insn); template = insn_template[insn_code_number]; if (template == 0) template = (*insn_outfun[insn_code_number]) (recog_operand, delay_insn); output_asm_insn (template, recog_operand); } CC_STATUS_INIT; return "";}/* Output a newly constructed insn DELAY_INSN. */char *output_delay_insn (delay_insn) rtx delay_insn;{ char *template; int insn_code_number; int i; /* Now recognize the insn which we put in its delay slot. We must do this after outputting the branch insn, since operands may just be a pointer to `recog_operand'. */ insn_code_number = recog_memoized (delay_insn); if (insn_code_number == -1) abort (); /* Extract the operands of this delay insn. */ INSN_CODE (delay_insn) = insn_code_number; insn_extract (delay_insn); /* It is possible that this insn has not been properly scanned by final yet. If this insn's operands don't appear in the peephole's actual operands, then they won't be fixed up by final, so we make sure they get fixed up here. -- This is a kludge. */ for (i = 0; i < insn_n_operands[insn_code_number]; i++) { if (GET_CODE (recog_operand[i]) == SUBREG) recog_operand[i] = alter_subreg (recog_operand[i]); }#ifdef REGISTER_CONSTRAINTS if (! constrain_operands (insn_code_number)) abort ();#endif cc_prev_status = cc_status; /* Update `cc_status' for this instruction. The instruction's output routine may change it further. If the output routine for a jump insn needs to depend on the cc status, it should look at cc_prev_status. */ NOTICE_UPDATE_CC (PATTERN (delay_insn), delay_insn); /* Now get the template for what this insn would have been, without the branch. */ template = insn_template[insn_code_number]; if (template == 0) template = (*insn_outfun[insn_code_number]) (recog_operand, delay_insn); output_asm_insn (template, recog_operand); return "";}/* Special routine to convert an SFmode value represented as a CONST_DOUBLE into its equivalent unsigned long bit pattern. We convert the value from a double precision floating-point value to single precision first, and thence to a bit-wise equivalent unsigned long value. This routine is used when generating an immediate move of an SFmode value directly into a general register because the svr4 assembler doesn't grok floating literals in instruction operand contexts. */unsigned longsfmode_constant_to_ulong (x) rtx x;{ REAL_VALUE_TYPE d; union { float f; unsigned long i; } u2; if (GET_CODE (x) != CONST_DOUBLE || GET_MODE (x) != SFmode) abort ();#if TARGET_FLOAT_FORMAT != HOST_FLOAT_FORMAT error IEEE emulation needed#endif REAL_VALUE_FROM_CONST_DOUBLE (d, x); u2.f = d; return u2.i;}/* This function generates the assembly code for function entry. The macro FUNCTION_PROLOGUE in i860.h is defined to call this function. ASM_FILE is a stdio stream to output the code to. SIZE is an int: how many units of temporary storage to allocate. Refer to the array `regs_ever_live' to determine which registers to save; `regs_ever_live[I]' is nonzero if register number I is ever used in the function. This macro is responsible for knowing which registers should not be saved even if used. NOTE: `frame_lower_bytes' is the count of bytes which will lie between the new `fp' value and the new `sp' value after the prologue is done. `frame_upper_bytes' is the count of bytes that will lie between the new `fp' and the *old* `sp' value after the new `fp' is setup (in the prologue). The upper part of each frame always includes at least 2 words (8 bytes) to hold the saved frame pointer and the saved return address. The svr4 ABI for the i860 now requires that the values of the stack pointer and frame pointer registers be kept aligned to 16-byte boundaries at all times. We obey that restriction here. The svr4 ABI for the i860 is entirely vague when it comes to specifying exactly where the "preserved" registers should be saved. The native svr4 C compiler I now have doesn't help to clarify the requirements very much because it is plainly out-of-date and non-ABI-compliant (in at least one important way, i.e. how it generates function epilogues). The native svr4 C compiler saves the "preserved" registers (i.e. r4-r15 and f2-f7) in the lower part of a frame (i.e. at negative offsets from the frame pointer). Previous versions of GCC also saved the "preserved" registers in the "negative" part of the frame, but they saved them using positive offsets from the (adjusted) stack pointer (after it had been adjusted to allocate space for the new frame). That's just plain wrong because if the current function calls alloca(), the stack pointer will get moved, and it will be impossible to restore the registers properly again after that. Both compilers handled parameter registers (i.e. r16-r27 and f8-f15) by copying their values either into various "preserved" registers or into stack slots in the lower part of the current frame (as seemed appropriate, depending upon subsequent usage of these values). Here we want to save the preserved registers at some offset from the frame pointer register so as to avoid any possible problems arising from calls to alloca(). We can either save them at small positive offsets from the frame pointer, or at small negative offsets from the frame pointer. If we save them at small negative offsets from the frame pointer (i.e. in the lower part of the frame) then we must tell the rest of GCC (via STARTING_FRAME_OFFSET) exactly how many bytes of space we plan to use in the lower part of the frame for this purpose. Since other parts of the compiler reference the value of STARTING_FRAME_OFFSET long before final() calls this function, we would have to go ahead and assume the worst-case storage requirements for saving all of the "preserved" registers (and use that number, i.e. `80', to define STARTING_FRAME_OFFSET) if we wanted to save them in the lower part of the frame. That could potentially be very wasteful, and that wastefulness could really hamper people compiling for embedded i860 targets with very tight limits on stack space. Thus, we choose here to save the preserved registers in the upper part of the frame, so that we can decide at the very last minute how much (or how little) space we must allocate for this purpose. To satisfy the needs of the svr4 ABI "tdesc" scheme, preserved registers must always be saved so that the saved values of registers with higher numbers are at higher addresses. We obey that restriction here. There are two somewhat different ways that you can generate prologues here... i.e. pedantically ABI-compliant, and the "other" way. The "other" way is more consistent with what is currently generated by the "native" svr4 C compiler for the i860. That's important if you want to use the current (as of 8/91) incarnation of svr4 SDB for the i860. The SVR4 SDB for the i860 insists on having function prologues be non-ABI-compliant! To get fully ABI-compliant prologues, define I860_STRICT_ABI_PROLOGUES in the i860svr4.h file. (By default this is *not* defined). The differences between the ABI-compliant and non-ABI-compliant prologues are that (a) the ABI version seems to require the use of *signed* (rather than unsigned) adds and subtracts, and (b) the ordering of the various steps (e.g. saving preserved registers, saving the return address, setting up the new frame pointer value) is different. For strict ABI compliance, it seems to be the case that the very last thing that is supposed to happen in the prologue is getting the frame pointer set to its new value (but only after everything else has already been properly setup). We do that here, but only if the symbol I860_STRICT_ABI_PROLOGUES is defined.*/#ifndef STACK_ALIGNMENT#define STACK_ALIGNMENT 16#endifextern char call_used_regs[];extern int leaf_function_p ();char *current_function_original_name;static int must_preserve_r1;static unsigned must_preserve_bytes;voidfunction_prologue (asm_file, local_bytes) register FILE *asm_file; register unsigned local_bytes;{ register unsigned frame_lower_bytes; register unsigned frame_upper_bytes; register unsigned total_fsize; register unsigned preserved_reg_bytes = 0; register unsigned i; register unsigned preserved_so_far = 0; must_preserve_r1 = (optimize < 2 || ! leaf_function_p ()); must_preserve_bytes = 4 + (must_preserve_r1 ? 4 : 0); /* Count registers that need preserving. Ignore r0. It never needs preserving. */ for (i = 1; i < FIRST_PSEUDO_REGISTER; i++) { if (regs_ever_live[i] && ! call_used_regs[i]) preserved_reg_bytes += 4; } /* Round-up the frame_lower_bytes so that it's a multiple of 16. */ frame_lower_bytes = (local_bytes + STACK_ALIGNMENT - 1) & -STACK_ALIGNMENT; /* The upper part of each frame will contain the saved fp, the saved r1, and stack slots for all of the other "preserved" registers that we find we will need to save & restore. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -