📄 sh.c
字号:
Worst case code looks like: mov.l L1,rn bra L2 nop align L1: .long value L2: .. mov.l L3,rn bra L4 nop align L3: .long value L4: .. We fix this by performing a scan before scheduling, which notices which instructions need to have their operands fetched from the constant table and builds the table. The algorithm is: scan, find an instruction which needs a pcrel move. Look forward, find the last barrier which is within MAX_COUNT bytes of the requirement. If there isn't one, make one. Process all the instructions between the find and the barrier. In the above example, we can tell that L3 is within 1k of L1, so the first move can be shrunk from the 3 insn+constant sequence into just 1 insn, and the constant moved to L3 to make: mov.l L1,rn .. mov.l L3,rn bra L4 nop align L3:.long value L4:.long value Then the second move becomes the target for the shortening process. */typedef struct{ rtx value; /* Value in table. */ rtx label; /* Label of value. */ enum machine_mode mode; /* Mode of value. */} pool_node;/* The maximum number of constants that can fit into one pool, since the pc relative range is 0...1020 bytes and constants are at least 4 bytes long. */#define MAX_POOL_SIZE (1020/4)static pool_node pool_vector[MAX_POOL_SIZE];static int pool_size;/* ??? If we need a constant in HImode which is the truncated value of a constant we need in SImode, we could combine the two entries thus saving two bytes. Is this common enough to be worth the effort of implementing it? *//* ??? This stuff should be done at the same time that we shorten branches. As it is now, we must assume that all branches are the maximum size, and this causes us to almost always output constant pools sooner than necessary. *//* Add a constant to the pool and return its label. */static rtxadd_constant (x, mode) rtx x; enum machine_mode mode;{ int i; rtx lab; /* First see if we've already got it. */ for (i = 0; i < pool_size; i++) { if (x->code == pool_vector[i].value->code && mode == pool_vector[i].mode) { if (x->code == CODE_LABEL) { if (XINT (x, 3) != XINT (pool_vector[i].value, 3)) continue; } if (rtx_equal_p (x, pool_vector[i].value)) return pool_vector[i].label; } } /* Need a new one. */ pool_vector[pool_size].value = x; lab = gen_label_rtx (); pool_vector[pool_size].mode = mode; pool_vector[pool_size].label = lab; pool_size++; return lab;}/* Output the literal table. */static voiddump_table (scan) rtx scan;{ int i; int need_align = 1; /* Do two passes, first time dump out the HI sized constants. */ for (i = 0; i < pool_size; i++) { pool_node *p = &pool_vector[i]; if (p->mode == HImode) { if (need_align) { scan = emit_insn_after (gen_align_2 (), scan); need_align = 0; } scan = emit_label_after (p->label, scan); scan = emit_insn_after (gen_consttable_2 (p->value), scan); } } need_align = 1; for (i = 0; i < pool_size; i++) { pool_node *p = &pool_vector[i]; switch (p->mode) { case HImode: break; case SImode: if (need_align) { need_align = 0; scan = emit_label_after (gen_label_rtx (), scan); scan = emit_insn_after (gen_align_4 (), scan); } scan = emit_label_after (p->label, scan); scan = emit_insn_after (gen_consttable_4 (p->value), scan); break; case DImode: if (need_align) { need_align = 0; scan = emit_label_after (gen_label_rtx (), scan); scan = emit_insn_after (gen_align_4 (), scan); } scan = emit_label_after (p->label, scan); scan = emit_insn_after (gen_consttable_8 (p->value), scan); break; default: abort (); break; } } scan = emit_insn_after (gen_consttable_end (), scan); scan = emit_barrier_after (scan); pool_size = 0;}/* Return non-zero if constant would be an ok source for a mov.w instead of a mov.l. */static inthi_const (src) rtx src;{ return (GET_CODE (src) == CONST_INT && INTVAL (src) >= -32768 && INTVAL (src) <= 32767);}/* Non-zero if the insn is a move instruction which needs to be fixed. *//* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't need to fix it if the input value is CONST_OK_FOR_I. */static intbroken_move (insn) rtx insn;{ if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SET /* We can load any 8 bit value if we don't care what the high order bits end up as. */ && GET_MODE (SET_DEST (PATTERN (insn))) != QImode && CONSTANT_P (SET_SRC (PATTERN (insn))) && (GET_CODE (SET_SRC (PATTERN (insn))) != CONST_INT || ! CONST_OK_FOR_I (INTVAL (SET_SRC (PATTERN (insn)))))) return 1; return 0;}/* Find the last barrier from insn FROM which is close enough to hold the constant pool. If we can't find one, then create one near the end of the range. *//* ??? It would be good to put constant pool tables between a case jump and the jump table. This fails for two reasons. First, there is no barrier after the case jump. This is a bug in the casesi pattern. Second, inserting the table here may break the mova instruction that loads the jump table address, by moving the jump table too far away. We fix that problem by never outputting the constant pool between a mova and its label. */static rtxfind_barrier (from) rtx from;{ int count_si = 0; int count_hi = 0; int found_hi = 0; int found_si = 0; rtx found_barrier = 0; rtx found_mova = 0; /* For HImode: range is 510, add 4 because pc counts from address of second instruction after this one, subtract 2 for the jump instruction that we may need to emit before the table. This gives 512. For SImode: range is 1020, add 4 because pc counts from address of second instruction after this one, subtract 2 in case pc is 2 byte aligned, subtract 2 for the jump instruction that we may need to emit before the table. This gives 1020. */ while (from && count_si < 1020 && count_hi < 512) { int inc = get_attr_length (from); if (GET_CODE (from) == BARRIER) found_barrier = from; if (broken_move (from)) { rtx src = SET_SRC (PATTERN (from)); if (hi_const (src)) { found_hi = 1; /* We put the short constants before the long constants, so we must count the length of short constants in the range for the long constants. */ /* ??? This isn't optimal, but is easy to do. */ if (found_si) count_si += 2; } else found_si = 1; } if (GET_CODE (from) == INSN && GET_CODE (PATTERN (from)) == SET && GET_CODE (SET_SRC (PATTERN (from))) == UNSPEC && XINT (SET_SRC (PATTERN (from)), 1) == 1) found_mova = from; else if (GET_CODE (from) == JUMP_INSN && (GET_CODE (PATTERN (from)) == ADDR_VEC || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)) found_mova = 0; if (found_si) count_si += inc; if (found_hi) count_hi += inc; from = NEXT_INSN (from); } /* Insert the constant pool table before the mova instruction, to prevent the mova label reference from going out of range. */ if (found_mova) from = found_mova; if (! found_barrier) { /* We didn't find a barrier in time to dump our stuff, so we'll make one. */ rtx label = gen_label_rtx (); /* If we exceeded the range, then we must back up over the last instruction we looked at. Otherwise, we just need to undo the NEXT_INSN at the end of the loop. */ if (count_hi > 512 || count_si > 1020) from = PREV_INSN (PREV_INSN (from)); else from = PREV_INSN (from); /* Walk back to be just before any jump or label. Putting it before a label reduces the number of times the branch around the constant pool table will be hit. Putting it before a jump makes it more likely that the bra delay slot will be filled. */ while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE || GET_CODE (from) == CODE_LABEL) from = PREV_INSN (from); from = emit_jump_insn_after (gen_jump (label), from); JUMP_LABEL (from) = label; LABEL_NUSES (label) = 1; found_barrier = emit_barrier_after (from); emit_label_after (label, found_barrier); } return found_barrier;}/* Exported to toplev.c. Scan the function looking for move instructions which have to be changed to pc-relative loads and insert the literal tables. */voidmachine_dependent_reorg (first) rtx first;{ rtx insn; for (insn = first; insn; insn = NEXT_INSN (insn)) { if (broken_move (insn)) { rtx scan; /* Scan ahead looking for a barrier to stick the constant table behind. */ rtx barrier = find_barrier (insn); /* Now find all the moves between the points and modify them. */ for (scan = insn; scan != barrier; scan = NEXT_INSN (scan)) { if (broken_move (scan)) { rtx pat = PATTERN (scan); rtx src = SET_SRC (pat); rtx dst = SET_DEST (pat); enum machine_mode mode = GET_MODE (dst); rtx lab; rtx newinsn; rtx newsrc; if (mode == SImode && hi_const (src)) { int offset = 0; mode = HImode; while (GET_CODE (dst) == SUBREG) { offset += SUBREG_WORD (dst); dst = SUBREG_REG (dst); } dst = gen_rtx (REG, HImode, REGNO (dst) + offset); } lab = add_constant (src, mode); newsrc = gen_rtx (MEM, mode, gen_rtx (LABEL_REF, VOIDmode, lab)); RTX_UNCHANGING_P (newsrc) = 1; newinsn = emit_insn_after (gen_rtx (SET, VOIDmode, dst, newsrc), scan); delete_insn (scan); scan = newinsn; } } dump_table (barrier); } }}/* Dump out instruction addresses, which is useful for debugging the constant pool table stuff. *//* ??? This is unnecessary, and probably should be deleted. This makes the insn_addresses declaration above unnecessary. *//* ??? The addresses printed by this routine for insns are nonsense for insns which are inside of a sequence where none of the inner insns have variable length. This is because the second pass of shorten_branches does not bother to update them. */voidfinal_prescan_insn (insn, opvec, noperands) rtx insn; rtx *opvec; int noperands;{ if (TARGET_DUMPISIZE) fprintf (asm_out_file, "\n! at %04x\n", insn_addresses[INSN_UID (insn)]);}/* Dump out any constants accumulated in the final pass. These will will only be labels. */char *output_jump_label_table (){ int i; if (pool_size) { fprintf (asm_out_file, "\t.align 2\n"); for (i = 0; i < pool_size; i++) { pool_node *p = &pool_vector[i]; ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (p->label)); output_asm_insn (".long %O0", &p->value); } pool_size = 0; } return "";}/* A full frame looks like: arg-5 arg-4 [ if current_function_anonymous_args arg-3 arg-2 arg-1 arg-0 ] saved-fp saved-r10 saved-r11 saved-r12 saved-pr local-n .. local-1 local-0 <- fp points here. *//* Number of bytes pushed for anonymous args, used to pass information between expand_prologue and expand_epilogue. */static int extra_push;/* Adjust the stack and return the number of bytes taken to do it. */static voidoutput_stack_adjust (size, reg) int size; rtx reg;{ if (size) { rtx val = GEN_INT (size); rtx insn; if (! CONST_OK_FOR_I (size)) { rtx reg = gen_rtx (REG, SImode, 3); emit_insn (gen_movsi (reg, val)); val = reg; } insn = gen_addsi3 (reg, reg, val); emit_insn (insn); }}/* Output RTL to push register RN onto the stack. */static voidpush (rn) int rn;{ rtx x; x = emit_insn (gen_push (gen_rtx (REG, SImode, rn))); REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC, gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);}/* Output RTL to pop register RN from the stack. */static voidpop (rn)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -