📄 sh.md

📁 linux下的gcc编译器
💻 MD
📖 第 1 页 / 共 5 页
字号:
		     (eq (symbol_ref "INSN_CODE (prev_nonnote_insn (insn))")			 (symbol_ref "code_for_indirect_jump_scratch")))		(if_then_else (eq_attr "braf_branch_p" "yes")			      (const_int 6)			      (const_int 10))		(eq_attr "braf_branch_p" "yes")		(const_int 10);; ??? using pc is not computed transitively.		(ne (match_dup 0) (match_dup 0))		(const_int 12)		(ne (symbol_ref ("flag_pic")) (const_int 0))		(const_int 22)		] (const_int 14))	 (eq_attr "type" "pt_media")	 (if_then_else (ne (symbol_ref "TARGET_SHMEDIA64") (const_int 0))		       (const_int 20) (const_int 12))	 ] (if_then_else (ne (symbol_ref "TARGET_SHMEDIA") (const_int 0))			 (const_int 4)			 (const_int 2))));; (define_function_unit {name} {num-units} {n-users} {test};;                       {ready-delay} {issue-delay} [{conflict-list}]);; Load and store instructions save a cycle if they are aligned on a;; four byte boundary.  Using a function unit for stores encourages;; gcc to separate load and store instructions by one instruction,;; which makes it more likely that the linker will be able to word;; align them when relaxing.;; Loads have a latency of two.;; However, call insns can have a delay slot, so that we want one more;; insn to be scheduled between the load of the function address and the call.;; This is equivalent to a latency of three.;; We cannot use a conflict list for this, because we need to distinguish;; between the actual call address and the function arguments.;; ADJUST_COST can only properly handle reductions of the cost, so we;; use a latency of three here.;; We only do this for SImode loads of general registers, to make the work;; for ADJUST_COST easier.(define_function_unit "memory" 1 0  (and (eq_attr "pipe_model" "sh1")       (eq_attr "type" "load_si,pcload_si"))  3 2)(define_function_unit "memory" 1 0  (and (eq_attr "pipe_model" "sh1")       (eq_attr "type" "load,pcload,pload,store,pstore"))  2 2)(define_function_unit "int"    1 0  (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "arith3,arith3b")) 3 3)(define_function_unit "int"    1 0  (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "dyn_shift")) 2 2)(define_function_unit "int"    1 0  (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "!arith3,arith3b,dyn_shift")) 1 1);; ??? These are approximations.(define_function_unit "mpy"    1 0  (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "smpy")) 2 2)(define_function_unit "mpy"    1 0  (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "dmpy")) 3 3)(define_function_unit "fp"     1 0  (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "fp,fmove")) 2 1)(define_function_unit "fp"     1 0  (and (eq_attr "pipe_model" "sh1") (eq_attr "type" "fdiv")) 13 12);; SH-5 SHmedia scheduling;; When executing SHmedia code, the SH-5 is a fairly straightforward;; single-issue machine.  It has four pipelines, the branch unit (br),;; the integer and multimedia unit (imu), the load/store unit (lsu), and;; the floating point unit (fpu).;; Here model the instructions with a latency greater than one cycle.;; Every instruction on SH-5 occupies the issue resource for at least one;; cycle.(define_function_unit "sh5issue" 1 0  (and (eq_attr "pipe_model" "sh5media")       (eq_attr "type" "!pt_media,ptabs_media,invalidate_line_media,dmpy_media,load_media,fload_media,fcmp_media,fmove_media,fparith_media,dfparith_media,fpconv_media,dfpconv_media,dfmul_media,store_media,fstore_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media")) 1 1);; Specify the various types of instruction which have latency > 1(define_function_unit "sh5issue" 1 0  (and (eq_attr "pipe_model" "sh5media")       (eq_attr "type" "mcmp_media")) 2 1)(define_function_unit "sh5issue" 1 0  (and (eq_attr "pipe_model" "sh5media")       (eq_attr "type" "dmpy_media,load_media,fcmp_media,mac_media")) 3 1);; but see sh_adjust_cost for mac_media exception.(define_function_unit "sh5issue" 1 0  (and (eq_attr "pipe_model" "sh5media")       (eq_attr "type" "fload_media,fmove_media")) 4 1)(define_function_unit "sh5issue" 1 0  (and (eq_attr "pipe_model" "sh5media")       (eq_attr "type" "d2mpy_media")) 4 2)(define_function_unit "sh5issue" 1 0  (and (eq_attr "pipe_model" "sh5media")       (eq_attr "type" "pt_media,ptabs_media")) 5 1)(define_function_unit "sh5issue" 1 0  (and (eq_attr "pipe_model" "sh5media")       (eq_attr "type" "fparith_media,dfparith_media,fpconv_media,dfpconv_media")) 6 1)(define_function_unit "sh5issue" 1 0  (and (eq_attr "pipe_model" "sh5media")       (eq_attr "type" "invalidate_line_media")) 7 7)(define_function_unit "sh5issue" 1 0  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfmul_media")) 9 4)(define_function_unit "sh5issue" 1 0  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "atrans_media")) 10 5);; Floating-point divide and square-root occupy an additional resource,;; which is not internally pipelined.  However, other instructions;; can continue to issue.(define_function_unit "sh5fds" 1 0  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "fdiv_media"))  19 19)(define_function_unit "sh5fds" 1 0  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfdiv_media")) 35 35); Definitions for filling branch delay slots.(define_attr "needs_delay_slot" "yes,no" (const_string "no"));; ??? This should be (nil) instead of (const_int 0)(define_attr "hit_stack" "yes,no"	(cond [(eq (symbol_ref "find_regno_note (insn, REG_INC, SP_REG)")		   (const_int 0))	       (const_string "no")]	      (const_string "yes")))(define_attr "interrupt_function" "no,yes"  (const (symbol_ref "current_function_interrupt")))(define_attr "in_delay_slot" "yes,no"  (cond [(eq_attr "type" "cbranch") (const_string "no")	 (eq_attr "type" "pcload,pcload_si") (const_string "no")	 (eq_attr "needs_delay_slot" "yes") (const_string "no")	 (eq_attr "length" "2") (const_string "yes")	 ] (const_string "no")))(define_attr "cond_delay_slot" "yes,no"  (cond [(eq_attr "in_delay_slot" "yes") (const_string "yes")	 ] (const_string "no")))(define_attr "is_sfunc" ""  (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0)))(define_attr "is_mac_media" ""  (if_then_else (eq_attr "type" "mac_media") (const_int 1) (const_int 0)))(define_attr "branch_zero" "yes,no"  (cond [(eq_attr "type" "!cbranch") (const_string "no")	 (ne (symbol_ref "(next_active_insn (insn)\			   == (prev_active_insn\			       (XEXP (SET_SRC (PATTERN (insn)), 1))))\			  && get_attr_length (next_active_insn (insn)) == 2")	     (const_int 0))	 (const_string "yes")]	(const_string "no")));; SH4 Double-precision computation with double-precision result -;; the two halves are ready at different times.(define_attr "dfp_comp" "yes,no"  (cond [(eq_attr "type" "dfp_arith,dfp_conv,dfdiv") (const_string "yes")]	(const_string "no")));; Insns for which the latency of a preceding fp insn is decreased by one.(define_attr "late_fp_use" "yes,no" (const_string "no"));; And feeding insns for which this relevant.(define_attr "any_fp_comp" "yes,no"  (cond [(eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_conv,dfdiv")	 (const_string "yes")]	(const_string "no")))(define_attr "any_int_load" "yes,no"  (cond [(eq_attr "type" "load,load_si,pcload,pcload_si")	 (const_string "yes")]	(const_string "no")))(define_delay  (eq_attr "needs_delay_slot" "yes")  [(eq_attr "in_delay_slot" "yes") (nil) (nil)]);; On the SH and SH2, the rte instruction reads the return pc from the stack,;; and thus we can't put a pop instruction in its delay slot.;; ??? On the SH3, the rte instruction does not use the stack, so a pop;; instruction can go in the delay slot.;; Since a normal return (rts) implicitly uses the PR register,;; we can't allow PR register loads in an rts delay slot.(define_delay  (eq_attr "type" "return")  [(and (eq_attr "in_delay_slot" "yes")	(ior (and (eq_attr "interrupt_function" "no")		  (eq_attr "type" "!pload,prset"))	     (and (eq_attr "interrupt_function" "yes")		  (ior		   (ne (symbol_ref "TARGET_SH3") (const_int 0))		   (eq_attr "hit_stack" "no"))))) (nil) (nil)]);; Since a call implicitly uses the PR register, we can't allow;; a PR register store in a jsr delay slot.(define_delay  (ior (eq_attr "type" "call") (eq_attr "type" "sfunc"))  [(and (eq_attr "in_delay_slot" "yes")	(eq_attr "type" "!pstore,prget")) (nil) (nil)]);; Say that we have annulled true branches, since this gives smaller and;; faster code when branches are predicted as not taken.(define_delay  (and (eq_attr "type" "cbranch")       (ne (symbol_ref "TARGET_SH2") (const_int 0)))  [(eq_attr "in_delay_slot" "yes") (eq_attr "cond_delay_slot" "yes") (nil)]);; -------------------------------------------------------------------------;; SImode signed integer comparisons;; -------------------------------------------------------------------------(define_insn ""  [(set (reg:SI T_REG)	(eq:SI (and:SI (match_operand:SI 0 "arith_reg_operand" "z,r")		       (match_operand:SI 1 "arith_operand" "L,r"))	       (const_int 0)))]  "TARGET_SH1"  "tst	%1,%0"  [(set_attr "type" "mt_group")]);; ??? Perhaps should only accept reg/constant if the register is reg 0.;; That would still allow reload to create cmpi instructions, but would;; perhaps allow forcing the constant into a register when that is better.;; Probably should use r0 for mem/imm compares, but force constant into a;; register for pseudo/imm compares.(define_insn "cmpeqsi_t"  [(set (reg:SI T_REG)	(eq:SI (match_operand:SI 0 "arith_reg_operand" "r,z,r")	       (match_operand:SI 1 "arith_operand" "N,rI,r")))]  "TARGET_SH1"  "@	tst	%0,%0	cmp/eq	%1,%0	cmp/eq	%1,%0"   [(set_attr "type" "mt_group")])(define_insn "cmpgtsi_t"  [(set (reg:SI T_REG)	(gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r")	       (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))]  "TARGET_SH1"  "@	cmp/gt	%1,%0	cmp/pl	%0"   [(set_attr "type" "mt_group")])(define_insn "cmpgesi_t"  [(set (reg:SI T_REG)	(ge:SI (match_operand:SI 0 "arith_reg_operand" "r,r")	       (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))]  "TARGET_SH1"  "@	cmp/ge	%1,%0	cmp/pz	%0"   [(set_attr "type" "mt_group")]);; -------------------------------------------------------------------------;; SImode unsigned integer comparisons;; -------------------------------------------------------------------------(define_insn "cmpgeusi_t"  [(set (reg:SI T_REG)	(geu:SI (match_operand:SI 0 "arith_reg_operand" "r")		(match_operand:SI 1 "arith_reg_operand" "r")))]  "TARGET_SH1"  "cmp/hs	%1,%0"   [(set_attr "type" "mt_group")])(define_insn "cmpgtusi_t"  [(set (reg:SI T_REG)	(gtu:SI (match_operand:SI 0 "arith_reg_operand" "r")		(match_operand:SI 1 "arith_reg_operand" "r")))]  "TARGET_SH1"  "cmp/hi	%1,%0"   [(set_attr "type" "mt_group")]);; We save the compare operands in the cmpxx patterns and use them when;; we generate the branch.(define_expand "cmpsi"  [(set (reg:SI T_REG)	(compare (match_operand:SI 0 "arith_operand" "")		 (match_operand:SI 1 "arith_operand" "")))]  "TARGET_SH1"  "{  sh_compare_op0 = operands[0];  sh_compare_op1 = operands[1];  DONE;}");; -------------------------------------------------------------------------;; DImode signed integer comparisons;; -------------------------------------------------------------------------;; ??? Could get better scheduling by splitting the initial test from the;; rest of the insn after reload.  However, the gain would hardly justify;; the sh.md size increase necessary to do that.(define_insn ""  [(set (reg:SI T_REG)	(eq:SI (and:DI (match_operand:DI 0 "arith_reg_operand" "r")		       (match_operand:DI 1 "arith_operand" "r"))	       (const_int 0)))]  "TARGET_SH1"  "* return output_branchy_insn (EQ, \"tst\\t%S1,%S0\;bf\\t%l9\;tst\\t%R1,%R0\",				 insn, operands);"  [(set_attr "length" "6")   (set_attr "type" "arith3b")])(define_insn "cmpeqdi_t"  [(set (reg:SI T_REG)	(eq:SI (match_operand:DI 0 "arith_reg_operand" "r,r")	       (match_operand:DI 1 "arith_reg_or_0_operand" "N,r")))]  "TARGET_SH1"  "@	tst	%S0,%S0\;bf	%,Ldi%=\;tst	%R0,%R0\\n%,Ldi%=:	cmp/eq	%S1,%S0\;bf	%,Ldi%=\;cmp/eq	%R1,%R0\\n%,Ldi%=:"  [(set_attr "length" "6")   (set_attr "type" "arith3b")])(define_split  [(set (reg:SI T_REG)	(eq:SI (match_operand:DI 0 "arith_reg_operand" "")	       (match_operand:DI 1 "arith_reg_or_0_operand" "")))];; If we applied this split when not optimizing, it would only be;; applied during the machine-dependent reorg, when no new basic blocks;; may be created.  "TARGET_SH1 && reload_completed && optimize"  [(set (reg:SI T_REG) (eq:SI (match_dup 2) (match_dup 3)))   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))			   (label_ref (match_dup 6))			   (pc)))   (set (reg:SI T_REG) (eq:SI (match_dup 4) (match_dup 5)))   (match_dup 6)]  "{  operands[2]    = gen_rtx_REG (SImode,		   true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));  operands[3]    = (operands[1] == const0_rtx       ? const0_rtx       : gen_rtx_REG (SImode,		      true_regnum (operands[1])		      + (TARGET_LITTLE_ENDIAN ? 1 : 0)));  operands[4] = gen_lowpart (SImode, operands[0]);  operands[5] = gen_lowpart (SImode, operands[1]);  operands[6] = gen_label_rtx ();}")(define_insn "cmpgtdi_t"  [(set (reg:SI T_REG)	(gt:SI (match_operand:DI 0 "arith_reg_operand" "r,r")	       (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]  "TARGET_SH2"  "@	cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/gt\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=:	tst\\t%S0,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/pl\\t%S0\;cmp/hi\\t%S0,%R0\\n%,Ldi%=:"  [(set_attr "length" "8")   (set_attr "type" "arith3")])(define_insn "cmpgedi_t"  [(set (reg:SI T_REG)	(ge:SI (match_operand:DI 0 "arith_reg_operand" "r,r")	       (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]  "TARGET_SH2"  "@
💿 文件大小 30755 K
👤 上传用户 xfzzp_0321
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#linux #gcc #编译器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -