📄 ia64.c
字号:
ia64_expand_widen_sum (rtx operands[3], bool unsignedp){ rtx l, h, x, s; enum machine_mode wmode, mode; rtx (*unpack_l) (rtx, rtx, rtx); rtx (*unpack_h) (rtx, rtx, rtx); rtx (*plus) (rtx, rtx, rtx); wmode = GET_MODE (operands[0]); mode = GET_MODE (operands[1]); switch (mode) { case V8QImode: unpack_l = gen_unpack1_l; unpack_h = gen_unpack1_h; plus = gen_addv4hi3; break; case V4HImode: unpack_l = gen_unpack2_l; unpack_h = gen_unpack2_h; plus = gen_addv2si3; break; default: gcc_unreachable (); } /* Fill in x with the sign extension of each element in op1. */ if (unsignedp) x = CONST0_RTX (mode); else { bool neg; x = gen_reg_rtx (mode); neg = ia64_expand_vecint_compare (LT, mode, x, operands[1], CONST0_RTX (mode)); gcc_assert (!neg); } l = gen_reg_rtx (wmode); h = gen_reg_rtx (wmode); s = gen_reg_rtx (wmode); emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x)); emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x)); emit_insn (plus (s, l, operands[2])); emit_insn (plus (operands[0], h, s));}/* Emit a signed or unsigned V8QI dot product operation. */voidia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp){ rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3; /* Fill in x1 and x2 with the sign extension of each element. */ if (unsignedp) x1 = x2 = CONST0_RTX (V8QImode); else { bool neg; x1 = gen_reg_rtx (V8QImode); x2 = gen_reg_rtx (V8QImode); neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1], CONST0_RTX (V8QImode)); gcc_assert (!neg); neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2], CONST0_RTX (V8QImode)); gcc_assert (!neg); } l1 = gen_reg_rtx (V4HImode); l2 = gen_reg_rtx (V4HImode); h1 = gen_reg_rtx (V4HImode); h2 = gen_reg_rtx (V4HImode); emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1)); emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2)); emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1)); emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2)); p1 = gen_reg_rtx (V2SImode); p2 = gen_reg_rtx (V2SImode); p3 = gen_reg_rtx (V2SImode); p4 = gen_reg_rtx (V2SImode); emit_insn (gen_pmpy2_r (p1, l1, l2)); emit_insn (gen_pmpy2_l (p2, l1, l2)); emit_insn (gen_pmpy2_r (p3, h1, h2)); emit_insn (gen_pmpy2_l (p4, h1, h2)); s1 = gen_reg_rtx (V2SImode); s2 = gen_reg_rtx (V2SImode); s3 = gen_reg_rtx (V2SImode); emit_insn (gen_addv2si3 (s1, p1, p2)); emit_insn (gen_addv2si3 (s2, p3, p4)); emit_insn (gen_addv2si3 (s3, s1, operands[3])); emit_insn (gen_addv2si3 (operands[0], s2, s3));}/* Emit the appropriate sequence for a call. */voidia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED, int sibcall_p){ rtx insn, b0; addr = XEXP (addr, 0); addr = convert_memory_address (DImode, addr); b0 = gen_rtx_REG (DImode, R_BR (0)); /* ??? Should do this for functions known to bind local too. */ if (TARGET_NO_PIC || TARGET_AUTO_PIC) { if (sibcall_p) insn = gen_sibcall_nogp (addr); else if (! retval) insn = gen_call_nogp (addr, b0); else insn = gen_call_value_nogp (retval, addr, b0); insn = emit_call_insn (insn); } else { if (sibcall_p) insn = gen_sibcall_gp (addr); else if (! retval) insn = gen_call_gp (addr, b0); else insn = gen_call_value_gp (retval, addr, b0); insn = emit_call_insn (insn); use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); } if (sibcall_p) use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);}voidia64_reload_gp (void){ rtx tmp; if (current_frame_info.reg_save_gp) tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp); else { HOST_WIDE_INT offset; offset = (current_frame_info.spill_cfa_off + current_frame_info.spill_size); if (frame_pointer_needed) { tmp = hard_frame_pointer_rtx; offset = -offset; } else { tmp = stack_pointer_rtx; offset = current_frame_info.total_size - offset; } if (CONST_OK_FOR_I (offset)) emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, GEN_INT (offset))); else { emit_move_insn (pic_offset_table_rtx, GEN_INT (offset)); emit_insn (gen_adddi3 (pic_offset_table_rtx, pic_offset_table_rtx, tmp)); } tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx); } emit_move_insn (pic_offset_table_rtx, tmp);}voidia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r, rtx scratch_b, int noreturn_p, int sibcall_p){ rtx insn; bool is_desc = false; /* If we find we're calling through a register, then we're actually calling through a descriptor, so load up the values. */ if (REG_P (addr) && GR_REGNO_P (REGNO (addr))) { rtx tmp; bool addr_dead_p; /* ??? We are currently constrained to *not* use peep2, because we can legitimately change the global lifetime of the GP (in the form of killing where previously live). This is because a call through a descriptor doesn't use the previous value of the GP, while a direct call does, and we do not commit to either form until the split here. That said, this means that we lack precise life info for whether ADDR is dead after this call. This is not terribly important, since we can fix things up essentially for free with the POST_DEC below, but it's nice to not use it when we can immediately tell it's not necessary. */ addr_dead_p = ((noreturn_p || sibcall_p || TEST_HARD_REG_BIT (regs_invalidated_by_call, REGNO (addr))) && !FUNCTION_ARG_REGNO_P (REGNO (addr))); /* Load the code address into scratch_b. */ tmp = gen_rtx_POST_INC (Pmode, addr); tmp = gen_rtx_MEM (Pmode, tmp); emit_move_insn (scratch_r, tmp); emit_move_insn (scratch_b, scratch_r); /* Load the GP address. If ADDR is not dead here, then we must revert the change made above via the POST_INCREMENT. */ if (!addr_dead_p) tmp = gen_rtx_POST_DEC (Pmode, addr); else tmp = addr; tmp = gen_rtx_MEM (Pmode, tmp); emit_move_insn (pic_offset_table_rtx, tmp); is_desc = true; addr = scratch_b; } if (sibcall_p) insn = gen_sibcall_nogp (addr); else if (retval) insn = gen_call_value_nogp (retval, addr, retaddr); else insn = gen_call_nogp (addr, retaddr); emit_call_insn (insn); if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p) ia64_reload_gp ();}/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically. This differs from the generic code in that we know about the zero-extending properties of cmpxchg, and the zero-extending requirements of ar.ccv. We also know that ld.acq+cmpxchg.rel equals a full barrier. The loop we want to generate looks like cmp_reg = mem; label: old_reg = cmp_reg; new_reg = cmp_reg op val; cmp_reg = compare-and-swap(mem, old_reg, new_reg) if (cmp_reg != old_reg) goto label; Note that we only do the plain load from memory once. Subsequent iterations use the value loaded by the compare-and-swap pattern. */voidia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx old_dst, rtx new_dst){ enum machine_mode mode = GET_MODE (mem); rtx old_reg, new_reg, cmp_reg, ar_ccv, label; enum insn_code icode; /* Special case for using fetchadd. */ if ((mode == SImode || mode == DImode) && (code == PLUS || code == MINUS) && fetchadd_operand (val, mode)) { if (code == MINUS) val = GEN_INT (-INTVAL (val)); if (!old_dst) old_dst = gen_reg_rtx (mode); emit_insn (gen_memory_barrier ()); if (mode == SImode) icode = CODE_FOR_fetchadd_acq_si; else icode = CODE_FOR_fetchadd_acq_di; emit_insn (GEN_FCN (icode) (old_dst, mem, val)); if (new_dst) { new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst, true, OPTAB_WIDEN); if (new_reg != new_dst) emit_move_insn (new_dst, new_reg); } return; } /* Because of the volatile mem read, we get an ld.acq, which is the front half of the full barrier. The end half is the cmpxchg.rel. */ gcc_assert (MEM_VOLATILE_P (mem)); old_reg = gen_reg_rtx (DImode); cmp_reg = gen_reg_rtx (DImode); label = gen_label_rtx (); if (mode != DImode) { val = simplify_gen_subreg (DImode, val, mode, 0); emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1)); } else emit_move_insn (cmp_reg, mem); emit_label (label); ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); emit_move_insn (old_reg, cmp_reg); emit_move_insn (ar_ccv, cmp_reg); if (old_dst) emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg)); new_reg = cmp_reg; if (code == NOT) { new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true); code = AND; } new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX, true, OPTAB_DIRECT); if (mode != DImode) new_reg = gen_lowpart (mode, new_reg); if (new_dst) emit_move_insn (new_dst, new_reg); switch (mode) { case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break; case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break; case SImode: icode = CODE_FOR_cmpxchg_rel_si; break; case DImode: icode = CODE_FOR_cmpxchg_rel_di; break; default: gcc_unreachable (); } emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg)); emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);}/* Begin the assembly file. */static voidia64_file_start (void){ /* Variable tracking should be run after all optimizations which change order of insns. It also needs a valid CFG. This can't be done in ia64_override_options, because flag_var_tracking is finalized after that. */ ia64_flag_var_tracking = flag_var_tracking; flag_var_tracking = 0; default_file_start (); emit_safe_across_calls ();}voidemit_safe_across_calls (void){ unsigned int rs, re; int out_state; rs = 1; out_state = 0; while (1) { while (rs < 64 && call_used_regs[PR_REG (rs)]) rs++; if (rs >= 64) break; for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++) continue; if (out_state == 0) { fputs ("\t.pred.safe_across_calls ", asm_out_file); out_state = 1; } else fputc (',', asm_out_file); if (re == rs + 1) fprintf (asm_out_file, "p%u", rs); else fprintf (asm_out_file, "p%u-p%u", rs, re - 1); rs = re + 1; } if (out_state) fputc ('\n', asm_out_file);}/* Helper function for ia64_compute_frame_size: find an appropriate general register to spill some special register to. SPECIAL_SPILL_MASK contains bits in GR0 to GR31 that have already been allocated by this routine. TRY_LOCALS is true if we should attempt to locate a local regnum. */static intfind_gr_spill (int try_locals){ int regno; /* If this is a leaf function, first try an otherwise unused call-clobbered register. */ if (current_function_is_leaf) { for (regno = GR_REG (1); regno <= GR_REG (31); regno++) if (! regs_ever_live[regno] && call_used_regs[regno] && ! fixed_regs[regno] && ! global_regs[regno] && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) { current_frame_info.gr_used_mask |= 1 << regno; return regno; } } if (try_locals) { regno = current_frame_info.n_local_regs; /* If there is a frame pointer, then we can't use loc79, because that is HARD_FRAME_POINTER_REGNUM. In particular, see the reg_name switching code in ia64_expand_prologue. */ if (regno < (80 - frame_pointer_needed)) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -