📄 mcore.c
字号:
int localregarg; int localreg; int outbounds; unsigned int growths; int step; /* Might have to spill bytes to re-assemble a big argument that was passed partially in registers and partially on the stack. */ nbytes = current_function_pretend_args_size; /* Determine how much space for spilled anonymous args (e.g., stdarg). */ if (current_function_anonymous_args) nbytes += (NPARM_REGS - number_of_regs_before_varargs) * UNITS_PER_WORD; infp->arg_size = nbytes; /* How much space to save non-volatile registers we stomp. */ infp->reg_mask = calc_live_regs (& n); infp->reg_size = n * 4; /* And the rest of it... locals and space for overflowed outbounds. */ infp->local_size = get_frame_size (); infp->outbound_size = current_function_outgoing_args_size; /* Make sure we have a whole number of words for the locals. */ if (infp->local_size % STACK_BYTES) infp->local_size = (infp->local_size + STACK_BYTES - 1) & ~ (STACK_BYTES -1); /* Only thing we know we have to pad is the outbound space, since we've aligned our locals assuming that base of locals is aligned. */ infp->pad_local = 0; infp->pad_reg = 0; infp->pad_outbound = 0; if (infp->outbound_size % STACK_BYTES) infp->pad_outbound = STACK_BYTES - (infp->outbound_size % STACK_BYTES); /* Now we see how we want to stage the prologue so that it does the most appropriate stack growth and register saves to either: (1) run fast, (2) reduce instruction space, or (3) reduce stack space. */ for (i = 0; i < ARRAY_SIZE (infp->growth); i++) infp->growth[i] = 0; regarg = infp->reg_size + infp->arg_size; localregarg = infp->local_size + regarg; localreg = infp->local_size + infp->reg_size; outbounds = infp->outbound_size + infp->pad_outbound; growths = 0; /* XXX: Consider one where we consider localregarg + outbound too! */ /* Frame of <= 32 bytes and using stm would get <= 2 registers. use stw's with offsets and buy the frame in one shot. */ if (localregarg <= ADDI_REACH && (infp->reg_size <= 8 || (infp->reg_mask & 0xc000) != 0xc000)) { /* Make sure we'll be aligned. */ if (localregarg % STACK_BYTES) infp->pad_reg = STACK_BYTES - (localregarg % STACK_BYTES); step = localregarg + infp->pad_reg; infp->reg_offset = infp->local_size; if (outbounds + step <= ADDI_REACH && !frame_pointer_needed) { step += outbounds; infp->reg_offset += outbounds; outbounds = 0; } infp->arg_offset = step - 4; infp->growth[growths++] = step; infp->reg_growth = growths; infp->local_growth = growths; /* If we haven't already folded it in. */ if (outbounds) infp->growth[growths++] = outbounds; goto finish; } /* Frame can't be done with a single subi, but can be done with 2 insns. If the 'stm' is getting <= 2 registers, we use stw's and shift some of the stack purchase into the first subi, so both are single instructions. */ if (localregarg <= STORE_REACH && (infp->local_size > ADDI_REACH) && (infp->reg_size <= 8 || (infp->reg_mask & 0xc000) != 0xc000)) { int all; /* Make sure we'll be aligned; use either pad_reg or pad_local. */ if (localregarg % STACK_BYTES) infp->pad_reg = STACK_BYTES - (localregarg % STACK_BYTES); all = localregarg + infp->pad_reg + infp->pad_local; step = ADDI_REACH; /* As much up front as we can. */ if (step > all) step = all; /* XXX: Consider whether step will still be aligned; we believe so. */ infp->arg_offset = step - 4; infp->growth[growths++] = step; infp->reg_growth = growths; infp->reg_offset = step - infp->pad_reg - infp->reg_size; all -= step; /* Can we fold in any space required for outbounds? */ if (outbounds + all <= ADDI_REACH && !frame_pointer_needed) { all += outbounds; outbounds = 0; } /* Get the rest of the locals in place. */ step = all; infp->growth[growths++] = step; infp->local_growth = growths; all -= step; assert (all == 0); /* Finish off if we need to do so. */ if (outbounds) infp->growth[growths++] = outbounds; goto finish; } /* Registers + args is nicely aligned, so we'll buy that in one shot. Then we buy the rest of the frame in 1 or 2 steps depending on whether we need a frame pointer. */ if ((regarg % STACK_BYTES) == 0) { infp->growth[growths++] = regarg; infp->reg_growth = growths; infp->arg_offset = regarg - 4; infp->reg_offset = 0; if (infp->local_size % STACK_BYTES) infp->pad_local = STACK_BYTES - (infp->local_size % STACK_BYTES); step = infp->local_size + infp->pad_local; if (!frame_pointer_needed) { step += outbounds; outbounds = 0; } infp->growth[growths++] = step; infp->local_growth = growths; /* If there's any left to be done. */ if (outbounds) infp->growth[growths++] = outbounds; goto finish; } /* XXX: optimizations that we'll want to play with.... -- regarg is not aligned, but it's a small number of registers; use some of localsize so that regarg is aligned and then save the registers. */ /* Simple encoding; plods down the stack buying the pieces as it goes. -- does not optimize space consumption. -- does not attempt to optimize instruction counts. -- but it is safe for all alignments. */ if (regarg % STACK_BYTES != 0) infp->pad_reg = STACK_BYTES - (regarg % STACK_BYTES); infp->growth[growths++] = infp->arg_size + infp->reg_size + infp->pad_reg; infp->reg_growth = growths; infp->arg_offset = infp->growth[0] - 4; infp->reg_offset = 0; if (frame_pointer_needed) { if (infp->local_size % STACK_BYTES != 0) infp->pad_local = STACK_BYTES - (infp->local_size % STACK_BYTES); infp->growth[growths++] = infp->local_size + infp->pad_local; infp->local_growth = growths; infp->growth[growths++] = outbounds; } else { if ((infp->local_size + outbounds) % STACK_BYTES != 0) infp->pad_local = STACK_BYTES - ((infp->local_size + outbounds) % STACK_BYTES); infp->growth[growths++] = infp->local_size + infp->pad_local + outbounds; infp->local_growth = growths; } /* Anything else that we've forgotten?, plus a few consistency checks. */ finish: assert (infp->reg_offset >= 0); assert (growths <= MAX_STACK_GROWS); for (i = 0; i < growths; i++) { if (infp->growth[i] % STACK_BYTES) { fprintf (stderr,"stack growth of %d is not %d aligned\n", infp->growth[i], STACK_BYTES); abort (); } }}/* Define the offset between two registers, one to be eliminated, and the other its replacement, at the start of a routine. */intmcore_initial_elimination_offset (from, to) int from; int to;{ int above_frame; int below_frame; struct mcore_frame fi; layout_mcore_frame (& fi); /* fp to ap */ above_frame = fi.local_size + fi.pad_local + fi.reg_size + fi.pad_reg; /* sp to fp */ below_frame = fi.outbound_size + fi.pad_outbound; if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM) return above_frame; if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) return above_frame + below_frame; if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) return below_frame; abort (); return 0;}/* Keep track of some information about varargs for the prolog. */voidmcore_setup_incoming_varargs (args_so_far, mode, type, ptr_pretend_size) CUMULATIVE_ARGS args_so_far; enum machine_mode mode; tree type; int * ptr_pretend_size ATTRIBUTE_UNUSED;{ current_function_anonymous_args = 1; /* We need to know how many argument registers are used before the varargs start, so that we can push the remaining argument registers during the prologue. */ number_of_regs_before_varargs = args_so_far + mcore_num_arg_regs (mode, type); /* There is a bug somwehere in the arg handling code. Until I can find it this workaround always pushes the last named argument onto the stack. */ number_of_regs_before_varargs = args_so_far; /* The last named argument may be split between argument registers and the stack. Allow for this here. */ if (number_of_regs_before_varargs > NPARM_REGS) number_of_regs_before_varargs = NPARM_REGS;}voidmcore_expand_prolog (){ struct mcore_frame fi; int space_allocated = 0; int growth = 0; /* Find out what we're doing. */ layout_mcore_frame (&fi); space_allocated = fi.arg_size + fi.reg_size + fi.local_size + fi.outbound_size + fi.pad_outbound + fi.pad_local + fi.pad_reg; if (TARGET_CG_DATA) { /* Emit a symbol for this routine's frame size. */ rtx x; x = DECL_RTL (current_function_decl); if (GET_CODE (x) != MEM) abort (); x = XEXP (x, 0); if (GET_CODE (x) != SYMBOL_REF) abort (); if (mcore_current_function_name) free (mcore_current_function_name); mcore_current_function_name = xstrdup (XSTR (x, 0)); ASM_OUTPUT_CG_NODE (asm_out_file, mcore_current_function_name, space_allocated); if (current_function_calls_alloca) ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name, "alloca", 1); /* 970425: RBE: We're looking at how the 8byte alignment affects stack layout and where we had to pad things. This emits information we can extract which tells us about frame sizes and the like. */ fprintf (asm_out_file, "\t.equ\t__$frame$info$_%s_$_%d_%d_x%x_%d_%d_%d,0\n", mcore_current_function_name, fi.arg_size, fi.reg_size, fi.reg_mask, fi.local_size, fi.outbound_size, frame_pointer_needed); } if (mcore_naked_function_p ()) return; /* Handle stdarg+regsaves in one shot: can't be more than 64 bytes. */ output_stack_adjust (-1, fi.growth[growth++]); /* grows it */ /* If we have a parameter passed partially in regs and partially in memory, the registers will have been stored to memory already in function.c. So we only need to do something here for varargs functions. */ if (fi.arg_size != 0 && current_function_pretend_args_size == 0) { int offset; int rn = FIRST_PARM_REG + NPARM_REGS - 1; int remaining = fi.arg_size; for (offset = fi.arg_offset; remaining >= 4; offset -= 4, rn--, remaining -= 4) { emit_insn (gen_movsi (gen_rtx (MEM, SImode, plus_constant (stack_pointer_rtx, offset)), gen_rtx (REG, SImode, rn))); } } /* Do we need another stack adjustment before we do the register saves? */ if (growth < fi.reg_growth) output_stack_adjust (-1, fi.growth[growth++]); /* grows it */ if (fi.reg_size != 0) { int i; int offs = fi.reg_offset; for (i = 15; i >= 0; i--) { if (offs == 0 && i == 15 && ((fi.reg_mask & 0xc000) == 0xc000)) { int first_reg = 15; while (fi.reg_mask & (1 << first_reg)) first_reg--; first_reg++; emit_insn (gen_store_multiple (gen_rtx (MEM, SImode, stack_pointer_rtx), gen_rtx (REG, SImode, first_reg), GEN_INT (16 - first_reg))); i -= (15 - first_reg); offs += (16 - first_reg) * 4; } else if (fi.reg_mask & (1 << i)) { emit_insn (gen_movsi (gen_rtx (MEM, SImode, plus_constant (stack_pointer_rtx, offs)), gen_rtx (REG, SImode, i))); offs += 4; } } } /* Figure the locals + outbounds. */ if (frame_pointer_needed) { /* If we haven't already purchased to 'fp'. */ if (growth < fi.local_growth) output_stack_adjust (-1, fi.growth[growth++]); /* grows it */ emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx)); /* ... and then go any remaining distance for outbounds, etc. */ if (fi.growth[growth]) output_stack_adjust (-1, fi.growth[growth++]); } else { if (growth < fi.local_growth) output_stack_adjust (-1, fi.growth[growth++]); /* grows it */ if (fi.growth[growth]) output_stack_adjust (-1, fi.growth[growth++]); }}voidmcore_expand_epilog (){ struct mcore_frame fi; int i; int offs; int growth = MAX_STACK_GROWS - 1 ; /* Find out what we're doing. */ layout_mcore_frame(&fi); if (mcore_naked_function_p ()) return; /* If we had a frame pointer, restore the sp from that. */ if (frame_pointer_needed) { emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx)); growth = fi.local_growth - 1; } else { /* XXX: while loop should accumulate and do a single sell. */ while (growth >= fi.local_growth) { if (fi.growth[growth] != 0) output_stack_adjust (1, fi.growth[growth]); growth--; } } /* Make sure we've shrunk stack back to the point where the registers were laid down. This is typically 0/1 iterations. Then pull the register save information back off the stack. */ while (growth >= fi.reg_growth) output_stack_adjust ( 1, fi.growth[growth--]); offs = fi.reg_offset; for (i = 15; i >= 0; i--) { if (offs == 0 && i == 15 && ((fi.reg_mask & 0xc000) == 0xc000)) { int first_reg; /* Find the starting register. */ first_reg = 15; while (fi.reg_mask & (1 << first_reg)) first_reg--; first_reg++; emit_insn (gen_load_multiple (gen_rtx (REG, SImode, first_reg), gen_rtx (MEM, SImode, stack_pointer_rtx), GEN_INT (16 - first_reg))); i -= (15 - first_reg); offs += (16 - first_reg) * 4; } else if (fi.reg_mask & (1 << i)) { emit_insn (gen_movsi (gen_rtx (REG, SImode, i), gen_rtx (MEM, SImode, plus_constant (stack_pointer_rtx, offs)))); offs += 4; } } /* Give back anything else. */ /* XXX: Should accumuate total and then give it back. */ while (growth >= 0) output_stack_adjust ( 1, fi.growth[growth--]);}/* This code is borrowed from the SH port. *//* The MCORE cannot load a large constant into a register, constants have to come from a pc relative load. The reference of a pc relative load instruction must be less than 1k infront of the instruction. This means that we often have to dump a constant inside a function, and generate code to branch around it. It is important to minimize this, since the branches will slow things down and make things bigger. Worst case code looks like: lrw L1,r0 br L2 align L1: .long value L2: .. lrw L3,r0 br L4 align L3: .long value L4: .. We fix this by performing
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -