📄 sh.md
字号:
(eq (symbol_ref "INSN_CODE (prev_nonnote_insn (insn))") (symbol_ref "code_for_indirect_jump_scratch"))) (if_then_else (eq_attr "braf_branch_p" "yes") (const_int 6) (const_int 10)) (eq_attr "braf_branch_p" "yes") (const_int 10);; ??? using pc is not computed transitively. (ne (match_dup 0) (match_dup 0)) (const_int 12) (ne (symbol_ref ("flag_pic")) (const_int 0)) (const_int 22) ] (const_int 14)) (eq_attr "type" "pt_media") (if_then_else (ne (symbol_ref "TARGET_SHMEDIA64") (const_int 0)) (const_int 20) (const_int 12)) ] (if_then_else (ne (symbol_ref "TARGET_SHMEDIA") (const_int 0)) (const_int 4) (const_int 2))));; (define_function_unit {name} {num-units} {n-users} {test};; {ready-delay} {issue-delay} [{conflict-list}]);; Load and store instructions save a cycle if they are aligned on a;; four byte boundary. Using a function unit for stores encourages;; gcc to separate load and store instructions by one instruction,;; which makes it more likely that the linker will be able to word;; align them when relaxing.;; Loads have a latency of two.;; However, call insns can have a delay slot, so that we want one more;; insn to be scheduled between the load of the function address and the call.;; This is equivalent to a latency of three.;; We cannot use a conflict list for this, because we need to distinguish;; between the actual call address and the function arguments.;; ADJUST_COST can only properly handle reductions of the cost, so we;; use a latency of three here.;; We only do this for SImode loads of general registers, to make the work;; for ADJUST_COST easier.(define_function_unit "memory" 1 0 (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "load_si,pcload_si")) 3 2)(define_function_unit "memory" 1 0 (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "load,pcload,pload,store,pstore")) 2 2)(define_function_unit "int" 1 0 (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "arith3,arith3b")) 3 3)(define_function_unit "int" 1 0 (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "dyn_shift")) 2 2)(define_function_unit "int" 1 0 (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "!arith3,arith3b,dyn_shift")) 1 1);; ??? These are approximations.(define_function_unit "mpy" 1 0 (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "smpy")) 2 2)(define_function_unit "mpy" 1 0 (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "dmpy")) 3 3)(define_function_unit "fp" 1 0 (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "fp,fmove")) 2 1)(define_function_unit "fp" 1 0 (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "fdiv")) 13 12);; SH-5 SHmedia scheduling;; When executing SHmedia code, the SH-5 is a fairly straightforward;; single-issue machine. It has four pipelines, the branch unit (br),;; the integer and multimedia unit (imu), the load/store unit (lsu), and;; the floating point unit (fpu).;; Here model the instructions with a latency greater than one cycle.;; Every instruction on SH-5 occupies the issue resource for at least one;; cycle.(define_function_unit "sh5issue" 1 0 (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "!pt_media,ptabs_media,invalidate_line_media,dmpy_media,load_media,fload_media,fcmp_media,fmove_media,fparith_media,dfparith_media,fpconv_media,dfpconv_media,dfmul_media,store_media,fstore_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media")) 1 1);; Specify the various types of instruction which have latency > 1(define_function_unit "sh5issue" 1 0 (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "mcmp_media")) 2 1)(define_function_unit "sh5issue" 1 0 (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dmpy_media,load_media,fcmp_media,mac_media")) 3 1);; but see sh_adjust_cost for mac_media exception.(define_function_unit "sh5issue" 1 0 (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "fload_media,fmove_media")) 4 1)(define_function_unit "sh5issue" 1 0 (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "d2mpy_media")) 4 2)(define_function_unit "sh5issue" 1 0 (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "pt_media,ptabs_media")) 5 1)(define_function_unit "sh5issue" 1 0 (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "fparith_media,dfparith_media,fpconv_media,dfpconv_media")) 6 1)(define_function_unit "sh5issue" 1 0 (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "invalidate_line_media")) 7 7)(define_function_unit "sh5issue" 1 0 (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfmul_media")) 9 4)(define_function_unit "sh5issue" 1 0 (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "atrans_media")) 10 5);; Floating-point divide and square-root occupy an additional resource,;; which is not internally pipelined. However, other instructions;; can continue to issue.(define_function_unit "sh5fds" 1 0 (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "fdiv_media")) 19 19)(define_function_unit "sh5fds" 1 0 (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfdiv_media")) 35 35); Definitions for filling branch delay slots.(define_attr "needs_delay_slot" "yes,no" (const_string "no"));; ??? This should be (nil) instead of (const_int 0)(define_attr "hit_stack" "yes,no" (cond [(eq (symbol_ref "find_regno_note (insn, REG_INC, SP_REG)") (const_int 0)) (const_string "no")] (const_string "yes")))(define_attr "interrupt_function" "no,yes" (const (symbol_ref "current_function_interrupt")))(define_attr "in_delay_slot" "yes,no" (cond [(eq_attr "type" "cbranch") (const_string "no") (eq_attr "type" "pcload,pcload_si") (const_string "no") (eq_attr "needs_delay_slot" "yes") (const_string "no") (eq_attr "length" "2") (const_string "yes") ] (const_string "no")))(define_attr "cond_delay_slot" "yes,no" (cond [(eq_attr "in_delay_slot" "yes") (const_string "yes") ] (const_string "no")))(define_attr "is_sfunc" "" (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0)))(define_attr "is_mac_media" "" (if_then_else (eq_attr "type" "mac_media") (const_int 1) (const_int 0)))(define_attr "branch_zero" "yes,no" (cond [(eq_attr "type" "!cbranch") (const_string "no") (ne (symbol_ref "(next_active_insn (insn)\ == (prev_active_insn\ (XEXP (SET_SRC (PATTERN (insn)), 1))))\ && get_attr_length (next_active_insn (insn)) == 2") (const_int 0)) (const_string "yes")] (const_string "no")));; SH4 Double-precision computation with double-precision result -;; the two halves are ready at different times.(define_attr "dfp_comp" "yes,no" (cond [(eq_attr "type" "dfp_arith,dfp_conv,dfdiv") (const_string "yes")] (const_string "no")));; Insns for which the latency of a preceding fp insn is decreased by one.(define_attr "late_fp_use" "yes,no" (const_string "no"));; And feeding insns for which this relevant.(define_attr "any_fp_comp" "yes,no" (cond [(eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_conv,dfdiv") (const_string "yes")] (const_string "no")))(define_attr "any_int_load" "yes,no" (cond [(eq_attr "type" "load,load_si,pcload,pcload_si") (const_string "yes")] (const_string "no")))(define_delay (eq_attr "needs_delay_slot" "yes") [(eq_attr "in_delay_slot" "yes") (nil) (nil)]);; On the SH and SH2, the rte instruction reads the return pc from the stack,;; and thus we can't put a pop instruction in its delay slot.;; ??? On the SH3, the rte instruction does not use the stack, so a pop;; instruction can go in the delay slot.;; Since a normal return (rts) implicitly uses the PR register,;; we can't allow PR register loads in an rts delay slot.(define_delay (eq_attr "type" "return") [(and (eq_attr "in_delay_slot" "yes") (ior (and (eq_attr "interrupt_function" "no") (eq_attr "type" "!pload,prset")) (and (eq_attr "interrupt_function" "yes") (ior (ne (symbol_ref "TARGET_SH3") (const_int 0)) (eq_attr "hit_stack" "no"))))) (nil) (nil)]);; Since a call implicitly uses the PR register, we can't allow;; a PR register store in a jsr delay slot.(define_delay (ior (eq_attr "type" "call") (eq_attr "type" "sfunc")) [(and (eq_attr "in_delay_slot" "yes") (eq_attr "type" "!pstore,prget")) (nil) (nil)]);; Say that we have annulled true branches, since this gives smaller and;; faster code when branches are predicted as not taken.(define_delay (and (eq_attr "type" "cbranch") (ne (symbol_ref "TARGET_SH2") (const_int 0))) [(eq_attr "in_delay_slot" "yes") (eq_attr "cond_delay_slot" "yes") (nil)]);; -------------------------------------------------------------------------;; SImode signed integer comparisons;; -------------------------------------------------------------------------(define_insn "" [(set (reg:SI T_REG) (eq:SI (and:SI (match_operand:SI 0 "arith_reg_operand" "z,r") (match_operand:SI 1 "arith_operand" "L,r")) (const_int 0)))] "TARGET_SH1" "tst %1,%0" [(set_attr "type" "mt_group")]);; ??? Perhaps should only accept reg/constant if the register is reg 0.;; That would still allow reload to create cmpi instructions, but would;; perhaps allow forcing the constant into a register when that is better.;; Probably should use r0 for mem/imm compares, but force constant into a;; register for pseudo/imm compares.(define_insn "cmpeqsi_t" [(set (reg:SI T_REG) (eq:SI (match_operand:SI 0 "arith_reg_operand" "r,z,r") (match_operand:SI 1 "arith_operand" "N,rI,r")))] "TARGET_SH1" "@ tst %0,%0 cmp/eq %1,%0 cmp/eq %1,%0" [(set_attr "type" "mt_group")])(define_insn "cmpgtsi_t" [(set (reg:SI T_REG) (gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r") (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))] "TARGET_SH1" "@ cmp/gt %1,%0 cmp/pl %0" [(set_attr "type" "mt_group")])(define_insn "cmpgesi_t" [(set (reg:SI T_REG) (ge:SI (match_operand:SI 0 "arith_reg_operand" "r,r") (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))] "TARGET_SH1" "@ cmp/ge %1,%0 cmp/pz %0" [(set_attr "type" "mt_group")]);; -------------------------------------------------------------------------;; SImode unsigned integer comparisons;; -------------------------------------------------------------------------(define_insn "cmpgeusi_t" [(set (reg:SI T_REG) (geu:SI (match_operand:SI 0 "arith_reg_operand" "r") (match_operand:SI 1 "arith_reg_operand" "r")))] "TARGET_SH1" "cmp/hs %1,%0" [(set_attr "type" "mt_group")])(define_insn "cmpgtusi_t" [(set (reg:SI T_REG) (gtu:SI (match_operand:SI 0 "arith_reg_operand" "r") (match_operand:SI 1 "arith_reg_operand" "r")))] "TARGET_SH1" "cmp/hi %1,%0" [(set_attr "type" "mt_group")]);; We save the compare operands in the cmpxx patterns and use them when;; we generate the branch.(define_expand "cmpsi" [(set (reg:SI T_REG) (compare (match_operand:SI 0 "arith_operand" "") (match_operand:SI 1 "arith_operand" "")))] "TARGET_SH1" "{ sh_compare_op0 = operands[0]; sh_compare_op1 = operands[1]; DONE;}");; -------------------------------------------------------------------------;; DImode signed integer comparisons;; -------------------------------------------------------------------------;; ??? Could get better scheduling by splitting the initial test from the;; rest of the insn after reload. However, the gain would hardly justify;; the sh.md size increase necessary to do that.(define_insn "" [(set (reg:SI T_REG) (eq:SI (and:DI (match_operand:DI 0 "arith_reg_operand" "r") (match_operand:DI 1 "arith_operand" "r")) (const_int 0)))] "TARGET_SH1" "* return output_branchy_insn (EQ, \"tst\\t%S1,%S0\;bf\\t%l9\;tst\\t%R1,%R0\", insn, operands);" [(set_attr "length" "6") (set_attr "type" "arith3b")])(define_insn "cmpeqdi_t" [(set (reg:SI T_REG) (eq:SI (match_operand:DI 0 "arith_reg_operand" "r,r") (match_operand:DI 1 "arith_reg_or_0_operand" "N,r")))] "TARGET_SH1" "@ tst %S0,%S0\;bf %,Ldi%=\;tst %R0,%R0\\n%,Ldi%=: cmp/eq %S1,%S0\;bf %,Ldi%=\;cmp/eq %R1,%R0\\n%,Ldi%=:" [(set_attr "length" "6") (set_attr "type" "arith3b")])(define_split [(set (reg:SI T_REG) (eq:SI (match_operand:DI 0 "arith_reg_operand" "") (match_operand:DI 1 "arith_reg_or_0_operand" "")))];; If we applied this split when not optimizing, it would only be;; applied during the machine-dependent reorg, when no new basic blocks;; may be created. "TARGET_SH1 && reload_completed && optimize" [(set (reg:SI T_REG) (eq:SI (match_dup 2) (match_dup 3))) (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) (label_ref (match_dup 6)) (pc))) (set (reg:SI T_REG) (eq:SI (match_dup 4) (match_dup 5))) (match_dup 6)] "{ operands[2] = gen_rtx_REG (SImode, true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0)); operands[3] = (operands[1] == const0_rtx ? const0_rtx : gen_rtx_REG (SImode, true_regnum (operands[1]) + (TARGET_LITTLE_ENDIAN ? 1 : 0))); operands[4] = gen_lowpart (SImode, operands[0]); operands[5] = gen_lowpart (SImode, operands[1]); operands[6] = gen_label_rtx ();}")(define_insn "cmpgtdi_t" [(set (reg:SI T_REG) (gt:SI (match_operand:DI 0 "arith_reg_operand" "r,r") (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))] "TARGET_SH2" "@ cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/gt\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=: tst\\t%S0,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/pl\\t%S0\;cmp/hi\\t%S0,%R0\\n%,Ldi%=:" [(set_attr "length" "8") (set_attr "type" "arith3")])(define_insn "cmpgedi_t" [(set (reg:SI T_REG) (ge:SI (match_operand:DI 0 "arith_reg_operand" "r,r") (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))] "TARGET_SH2" "@
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -