📄 sh.md
字号:
(and (eq_attr "issues" "2") (eq_attr "type" "load,pcload,pload")) 20 10)(define_function_unit "load_store" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "load_si,pcload_si,load,pcload,pload,store,pstore,fmove")) 10 10)(define_function_unit "int" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "arith,dyn_shift")) 10 10);; Again, we have to pretend a lower latency for the "int" unit to avoid a;; spurious FIFO constraint; the multiply instructions use the "int";; unit actually only for two cycles.(define_function_unit "int" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 20 20);; We use a fictous "mpy" unit to express the actual latency.(define_function_unit "mpy" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 40 20);; Again, we have to pretend a lower latency for the "int" unit to avoid a;; spurious FIFO constraint.(define_function_unit "int" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "gp_fpul")) 10 10);; We use a fictous "gp_fpul" unit to express the actual latency.(define_function_unit "gp_fpul" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "gp_fpul")) 20 10);; ??? multiply uses the floating point unit, but with a two cycle delay.;; Thus, a simple single-precision fp operation could finish if issued in;; the very next cycle, but stalls when issued two or three cycles later.;; Similarily, a divide / sqrt can work without stalls if issued in;; the very next cycle, while it would have to block if issued two or;; three cycles later.;; There is no way to model this with gcc's function units. This problem is;; actually mentioned in md.texi. Tackling this problem requires first that;; it is possible to speak about the target in an open discussion.;; ;; However, simple double-precision operations always conflict.(define_function_unit "fp" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 40 40 [(eq_attr "type" "dfp_cmp,dfp_conv,dfp_arith")]);; The "fp" unit is for pipeline stages F1 and F2.(define_function_unit "fp" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "fp")) 30 10);; Again, we have to pretend a lower latency for the "fp" unit to avoid a;; spurious FIFO constraint; the bulk of the fdiv type insns executes in;; the F3 stage.(define_function_unit "fp" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "fdiv")) 30 10);; The "fdiv" function unit models the aggregate effect of the F1, F2 and F3;; pipeline stages on the pipelining of fdiv/fsqrt insns.;; We also use it to give the actual latency here.;; fsqrt is actually one cycle faster than fdiv (and the value used here),;; but that will hardly matter in practice for scheduling.(define_function_unit "fdiv" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "fdiv")) 120 100);; There is again a late use of the "fp" unit by [d]fdiv type insns;; that we can't express.(define_function_unit "fp" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "dfp_cmp,dfp_conv")) 40 20)(define_function_unit "fp" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "dfp_arith")) 80 60)(define_function_unit "fp" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "dfdiv")) 230 10)(define_function_unit "fdiv" 1 0 (and (eq_attr "issues" "2") (eq_attr "type" "dfdiv")) 230 210); Definitions for filling branch delay slots.(define_attr "needs_delay_slot" "yes,no" (const_string "no"));; ??? This should be (nil) instead of (const_int 0)(define_attr "hit_stack" "yes,no" (cond [(eq (symbol_ref "find_regno_note (insn, REG_INC, 15)") (const_int 0)) (const_string "no")] (const_string "yes")))(define_attr "interrupt_function" "no,yes" (const (symbol_ref "pragma_interrupt")))(define_attr "in_delay_slot" "yes,no" (cond [(eq_attr "type" "cbranch") (const_string "no") (eq_attr "type" "pcload,pcload_si") (const_string "no") (eq_attr "needs_delay_slot" "yes") (const_string "no") (eq_attr "length" "2") (const_string "yes") ] (const_string "no")))(define_attr "is_sfunc" "" (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0)))(define_delay (eq_attr "needs_delay_slot" "yes") [(eq_attr "in_delay_slot" "yes") (nil) (nil)]);; On the SH and SH2, the rte instruction reads the return pc from the stack,;; and thus we can't put a pop instruction in its delay slot.;; ??? On the SH3, the rte instruction does not use the stack, so a pop;; instruction can go in the delay slot.;; Since a normal return (rts) implicitly uses the PR register,;; we can't allow PR register loads in an rts delay slot.(define_delay (eq_attr "type" "return") [(and (eq_attr "in_delay_slot" "yes") (ior (and (eq_attr "interrupt_function" "no") (eq_attr "type" "!pload")) (and (eq_attr "interrupt_function" "yes") (eq_attr "hit_stack" "no")))) (nil) (nil)]);; Since a call implicitly uses the PR register, we can't allow;; a PR register store in a jsr delay slot.(define_delay (ior (eq_attr "type" "call") (eq_attr "type" "sfunc")) [(and (eq_attr "in_delay_slot" "yes") (eq_attr "type" "!pstore")) (nil) (nil)]);; Say that we have annulled true branches, since this gives smaller and;; faster code when branches are predicted as not taken.(define_delay (and (eq_attr "type" "cbranch") (ne (symbol_ref "TARGET_SH2") (const_int 0))) [(eq_attr "in_delay_slot" "yes") (eq_attr "in_delay_slot" "yes") (nil)]);; -------------------------------------------------------------------------;; SImode signed integer comparisons;; -------------------------------------------------------------------------(define_insn "" [(set (reg:SI 18) (eq:SI (and:SI (match_operand:SI 0 "arith_reg_operand" "z,r") (match_operand:SI 1 "arith_operand" "L,r")) (const_int 0)))] "" "tst %1,%0");; ??? Perhaps should only accept reg/constant if the register is reg 0.;; That would still allow reload to create cmpi instructions, but would;; perhaps allow forcing the constant into a register when that is better.;; Probably should use r0 for mem/imm compares, but force constant into a;; register for pseudo/imm compares.(define_insn "cmpeqsi_t" [(set (reg:SI 18) (eq:SI (match_operand:SI 0 "arith_reg_operand" "r,z,r") (match_operand:SI 1 "arith_operand" "N,rI,r")))] "" "@ tst %0,%0 cmp/eq %1,%0 cmp/eq %1,%0")(define_insn "cmpgtsi_t" [(set (reg:SI 18) (gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r") (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))] "" "@ cmp/gt %1,%0 cmp/pl %0")(define_insn "cmpgesi_t" [(set (reg:SI 18) (ge:SI (match_operand:SI 0 "arith_reg_operand" "r,r") (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))] "" "@ cmp/ge %1,%0 cmp/pz %0");; -------------------------------------------------------------------------;; SImode unsigned integer comparisons;; -------------------------------------------------------------------------(define_insn "cmpgeusi_t" [(set (reg:SI 18) (geu:SI (match_operand:SI 0 "arith_reg_operand" "r") (match_operand:SI 1 "arith_reg_operand" "r")))] "" "cmp/hs %1,%0")(define_insn "cmpgtusi_t" [(set (reg:SI 18) (gtu:SI (match_operand:SI 0 "arith_reg_operand" "r") (match_operand:SI 1 "arith_reg_operand" "r")))] "" "cmp/hi %1,%0");; We save the compare operands in the cmpxx patterns and use them when;; we generate the branch.(define_expand "cmpsi" [(set (reg:SI 18) (compare (match_operand:SI 0 "arith_operand" "") (match_operand:SI 1 "arith_operand" "")))] "" "{ sh_compare_op0 = operands[0]; sh_compare_op1 = operands[1]; DONE;}");; -------------------------------------------------------------------------;; DImode signed integer comparisons;; -------------------------------------------------------------------------;; ??? Could get better scheduling by splitting the initial test from the;; rest of the insn after reload. However, the gain would hardly justify;; the sh.md size increase necessary to do that.(define_insn "" [(set (reg:SI 18) (eq:SI (and:DI (match_operand:DI 0 "arith_reg_operand" "r") (match_operand:DI 1 "arith_operand" "r")) (const_int 0)))] "" "* return output_branchy_insn (EQ, \"tst\\t%S1,%S0\;bf\\t%l9\;tst\\t%R1,%R0\", insn, operands);" [(set_attr "length" "6") (set_attr "type" "arith3b")])(define_insn "cmpeqdi_t" [(set (reg:SI 18) (eq:SI (match_operand:DI 0 "arith_reg_operand" "r,r") (match_operand:DI 1 "arith_reg_or_0_operand" "N,r")))] "" "* return output_branchy_insn (EQ, (which_alternative ? \"cmp/eq\\t%S1,%S0\;bf\\t%l9\;cmp/eq\\t%R1,%R0\" : \"tst\\t%S0,%S0\;bf\\t%l9\;tst\\t%R0,%R0\"), insn, operands);" [(set_attr "length" "6") (set_attr "type" "arith3b")])(define_insn "cmpgtdi_t" [(set (reg:SI 18) (gt:SI (match_operand:DI 0 "arith_reg_operand" "r,r") (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))] "TARGET_SH2" "@ cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/gt\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=: tst\\t%S0,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/pl\\t%S0\;cmp/hi\\t%S0,%R0\\n%,Ldi%=:" [(set_attr "length" "8") (set_attr "type" "arith3")])(define_insn "cmpgedi_t" [(set (reg:SI 18) (ge:SI (match_operand:DI 0 "arith_reg_operand" "r,r") (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))] "TARGET_SH2" "@ cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/ge\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=: cmp/pz\\t%S0" [(set_attr "length" "8,2") (set_attr "type" "arith3,arith")]);; -------------------------------------------------------------------------;; DImode unsigned integer comparisons;; -------------------------------------------------------------------------(define_insn "cmpgeudi_t" [(set (reg:SI 18) (geu:SI (match_operand:DI 0 "arith_reg_operand" "r") (match_operand:DI 1 "arith_reg_operand" "r")))] "TARGET_SH2" "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hs\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=:" [(set_attr "length" "8") (set_attr "type" "arith3")])(define_insn "cmpgtudi_t" [(set (reg:SI 18) (gtu:SI (match_operand:DI 0 "arith_reg_operand" "r") (match_operand:DI 1 "arith_reg_operand" "r")))] "TARGET_SH2" "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hi\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=:" [(set_attr "length" "8") (set_attr "type" "arith3")]);; We save the compare operands in the cmpxx patterns and use them when;; we generate the branch.(define_expand "cmpdi" [(set (reg:SI 18) (compare (match_operand:DI 0 "arith_operand" "") (match_operand:DI 1 "arith_operand" "")))] "TARGET_SH2" "{ sh_compare_op0 = operands[0]; sh_compare_op1 = operands[1]; DONE;}");; -------------------------------------------------------------------------;; Addition instructions;; -------------------------------------------------------------------------;; ??? This should be a define expand.(define_insn "adddi3" [(set (match_operand:DI 0 "arith_reg_operand" "=r") (plus:DI (match_operand:DI 1 "arith_reg_operand" "%0") (match_operand:DI 2 "arith_reg_operand" "r"))) (clobber (reg:SI 18))] "" "#" [(set_attr "length" "6")])(define_split [(set (match_operand:DI 0 "arith_reg_operand" "=r") (plus:DI (match_operand:DI 1 "arith_reg_operand" "%0") (match_operand:DI 2 "arith_reg_operand" "r"))) (clobber (reg:SI 18))] "reload_completed" [(const_int 0)] "{ rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]); high0 = gen_rtx (REG, SImode, true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0)); high2 = gen_rtx (REG, SImode, true_regnum (operands[2]) + (TARGET_LITTLE_ENDIAN ? 1 : 0)); emit_insn (gen_clrt ()); emit_insn (gen_addc (low0, low0, gen_lowpart (SImode, operands[2]))); emit_insn (gen_addc1 (high0, high0, high2)); DONE;}")(define_insn "addc" [(set (match_operand:SI 0 "arith_reg_operand" "=r") (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0") (match_operand:SI 2 "arith_reg_operand" "r")) (reg:SI 18))) (set (reg:SI 18) (ltu:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))] "" "addc %2,%0" [(set_attr "type" "arith")])(define_insn "addc1" [(set (match_operand:SI 0 "arith_reg_operand" "=r") (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0") (match_operand:SI 2 "arith_reg_operand" "r")) (reg:SI 18))) (clobber (reg:SI 18))] "" "addc %2,%0" [(set_attr "type" "arith")])(define_insn "addsi3" [(set (match_operand:SI 0 "arith_reg_operand" "=r") (plus:SI (match_operand:SI 1 "arith_operand" "%0") (match_operand:SI 2 "arith_operand" "rI")))] "" "add %2,%0" [(set_attr "type" "arith")]);; -------------------------------------------------------------------------;; Subtraction instructions;; -------------------------------------------------------------------------;; ??? This should be a define expand.(define_insn "subdi3" [(set (match_operand:DI 0 "arith_reg_operand" "=r") (minus:DI (match_operand:DI 1 "arith_reg_operand" "0") (match_operand:DI 2 "arith_reg_operand" "r"))) (clobber (reg:SI 18))] "" "#" [(set_attr "length" "6")])(define_split [(set (match_operand:DI 0 "arith_reg_operand" "=r") (minus:DI (match_operand:DI 1 "arith_reg_operand" "0") (match_operand:DI 2 "arith_reg_operand" "r"))) (clobber (reg:SI 18))] "reload_completed" [(const_int 0)] "
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -