i386.md

来自「gcc3.2.1源代码」· Markdown 代码 · 共 2,041 行 · 第 1/5 页

MD
2,041
字号
(define_function_unit "pent_np" 1 0  (and (eq_attr "cpu" "pentium")       (and (eq_attr "type" "fmov")            (ior (match_operand 1 "immediate_operand" "")	         (eq_attr "memory" "store"))))  2 2)(define_function_unit "pent_np" 1 0  (and (eq_attr "cpu" "pentium")       (eq_attr "type" "cld"))  2 2)(define_function_unit "fpu" 1 0  (and (eq_attr "cpu" "pentium")       (and (eq_attr "type" "fmov")	    (eq_attr "memory" "none,load")))  1 1); Read/Modify/Write instructions usually take 3 cycles.(define_function_unit "pent_u" 1 0  (and (eq_attr "cpu" "pentium")       (and (eq_attr "type" "alu,alu1,ishift")	    (and (eq_attr "pent_pair" "pu")		 (eq_attr "memory" "both"))))  3 3)(define_function_unit "pent_uv" 2 0  (and (eq_attr "cpu" "pentium")       (and (eq_attr "type" "alu,alu1,ishift")	    (and (eq_attr "pent_pair" "!np")		 (eq_attr "memory" "both"))))  3 3)(define_function_unit "pent_np" 1 0  (and (eq_attr "cpu" "pentium")       (and (eq_attr "type" "alu,alu1,negnot,ishift")	    (and (eq_attr "pent_pair" "np")		 (eq_attr "memory" "both"))))  3 3); Read/Modify or Modify/Write instructions usually take 2 cycles.(define_function_unit "pent_u" 1 0  (and (eq_attr "cpu" "pentium")       (and (eq_attr "type" "alu,ishift")	    (and (eq_attr "pent_pair" "pu")		 (eq_attr "memory" "load,store"))))  2 2)(define_function_unit "pent_uv" 2 0  (and (eq_attr "cpu" "pentium")       (and (eq_attr "type" "alu,ishift")	    (and (eq_attr "pent_pair" "!np")		 (eq_attr "memory" "load,store"))))  2 2)(define_function_unit "pent_np" 1 0  (and (eq_attr "cpu" "pentium")       (and (eq_attr "type" "alu,ishift")	    (and (eq_attr "pent_pair" "np")		 (eq_attr "memory" "load,store"))))  2 2); Insns w/o memory operands and move instructions usually take one cycle.(define_function_unit "pent_u" 1 0  (and (eq_attr "cpu" "pentium")       (eq_attr "pent_pair" "pu"))  1 1)(define_function_unit "pent_v" 1 0  (and (eq_attr "cpu" "pentium")       (eq_attr "pent_pair" "pv"))  1 1)(define_function_unit "pent_uv" 2 0  (and (eq_attr "cpu" "pentium")       (eq_attr "pent_pair" "!np"))  1 1)(define_function_unit "pent_np" 1 0  (and (eq_attr "cpu" "pentium")       (eq_attr "pent_pair" "np"))  1 1); Pairable insns only conflict with other non-pairable insns.(define_function_unit "pent_np" 1 0  (and (eq_attr "cpu" "pentium")       (and (eq_attr "type" "alu,alu1,ishift")	    (and (eq_attr "pent_pair" "!np")		 (eq_attr "memory" "both"))))  3 3  [(eq_attr "pent_pair" "np")])(define_function_unit "pent_np" 1 0  (and (eq_attr "cpu" "pentium")       (and (eq_attr "type" "alu,alu1,ishift")	    (and (eq_attr "pent_pair" "!np")		 (eq_attr "memory" "load,store"))))  2 2  [(eq_attr "pent_pair" "np")])(define_function_unit "pent_np" 1 0  (and (eq_attr "cpu" "pentium")       (eq_attr "pent_pair" "!np"))  1 1  [(eq_attr "pent_pair" "np")]); Floating point instructions usually blocks cycle longer when combined with; integer instructions, because of the inpaired fxch instruction.(define_function_unit "pent_np" 1 0  (and (eq_attr "cpu" "pentium")       (eq_attr "type" "fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp"))  2 2  [(eq_attr "type" "!fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp")])(define_function_unit "fpu" 1 0  (and (eq_attr "cpu" "pentium")       (eq_attr "type" "fcmp,fxch,fsgn"))  1 1); Addition takes 3 cycles; assume other random cruft does as well.; ??? Trivial fp operations such as fabs or fchs takes only one cycle.(define_function_unit "fpu" 1 0  (and (eq_attr "cpu" "pentium")       (eq_attr "type" "fop,fop1,fistp"))  3 1); Multiplication takes 3 cycles and is only half pipelined.(define_function_unit "fpu" 1 0  (and (eq_attr "cpu" "pentium")       (eq_attr "type" "fmul"))  3 1)(define_function_unit "pent_mul" 1 1  (and (eq_attr "cpu" "pentium")       (eq_attr "type" "fmul"))  2 2); ??? This is correct only for fdiv and sqrt -- sin/cos take 65-100 cycles. ; They can overlap with integer insns.  Only the last two cycles can overlap; with other fp insns.  Only fsin/fcos can overlap with multiplies.; Only last two cycles of fsin/fcos can overlap with other instructions.(define_function_unit "fpu" 1 0  (and (eq_attr "cpu" "pentium")       (eq_attr "type" "fdiv"))  39 37)(define_function_unit "pent_mul" 1 1  (and (eq_attr "cpu" "pentium")       (eq_attr "type" "fdiv"))  39 39)(define_function_unit "fpu" 1 0  (and (eq_attr "cpu" "pentium")       (eq_attr "type" "fpspc"))  70 68)(define_function_unit "pent_mul" 1 1  (and (eq_attr "cpu" "pentium")       (eq_attr "type" "fpspc"))  70 70);; Pentium Pro/PII Scheduling;;;; The PPro has an out-of-order core, but the instruction decoders are;; naturally in-order and asymmetric.  We get best performance by scheduling;; for the decoders, for in doing so we give the oo execution unit the ;; most choices.;; Categorize how many uops an ia32 instruction evaluates to:;;   one --  an instruction with 1 uop can be decoded by any of the;;           three decoders.;;   few --  an instruction with 1 to 4 uops can be decoded only by ;;	     decoder 0.;;   many -- a complex instruction may take an unspecified number of;;	     cycles to decode in decoder 0.(define_attr "ppro_uops" "one,few,many"  (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str")	   (const_string "many")	 (eq_attr "type" "icmov,fcmov,str,cld")	   (const_string "few")	 (eq_attr "type" "imov")	   (if_then_else (eq_attr "memory" "store,both")	     (const_string "few")	     (const_string "one"))	 (eq_attr "memory" "!none")	   (const_string "few")	]	(const_string "one")));; Rough readiness numbers.  Fine tuning happens in i386.c.;;;; p0	describes port 0.;; p01	describes ports 0 and 1 as a pair; alu insns can issue to either.;; p2	describes port 2 for loads.;; p34	describes ports 3 and 4 for stores.;; fpu	describes the fpu accessed via port 0. ;;	??? It is less than clear if there are separate fadd and fmul units;;	that could operate in parallel.;;;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.(define_function_unit "ppro_p0" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "ishift,lea,ibr,cld"))  1 1)(define_function_unit "ppro_p0" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "imul"))  4 1);; ??? Does the divider lock out the pipe while it works,;; or is there a disconnected unit?(define_function_unit "ppro_p0" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "idiv"))  17 17)(define_function_unit "ppro_p0" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "fop,fop1,fsgn,fistp"))  3 1)(define_function_unit "ppro_p0" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "fcmov"))  2 1)(define_function_unit "ppro_p0" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "fcmp"))  1 1)(define_function_unit "ppro_p0" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "fmov"))  1 1)(define_function_unit "ppro_p0" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "fmul"))  5 1)(define_function_unit "ppro_p0" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "fdiv,fpspc"))  56 1)(define_function_unit "ppro_p01" 2 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "!imov,fmov"))  1 1)(define_function_unit "ppro_p01" 2 0  (and (and (eq_attr "cpu" "pentiumpro")            (eq_attr "type" "imov,fmov"))       (eq_attr "memory" "none"))  1 1)(define_function_unit "ppro_p2" 1 0  (and (eq_attr "cpu" "pentiumpro")       (ior (eq_attr "type" "pop")	    (eq_attr "memory" "load,both")))  3 1)(define_function_unit "ppro_p34" 1 0  (and (eq_attr "cpu" "pentiumpro")       (ior (eq_attr "type" "push")	    (eq_attr "memory" "store,both")))  1 1)(define_function_unit "fpu" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "fop,fop1,fsgn,fmov,fcmp,fcmov,fistp"))  1 1)(define_function_unit "fpu" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "fmul"))  5 2)(define_function_unit "fpu" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "fdiv,fpspc"))  56 56);; imul uses the fpu.  ??? does it have the same throughput as fmul?(define_function_unit "fpu" 1 0  (and (eq_attr "cpu" "pentiumpro")       (eq_attr "type" "imul"))  4 1);; AMD K6/K6-2 Scheduling;;;; The K6 has similar architecture to PPro.  Important difference is, that;; there are only two decoders and they seems to be much slower than execution;; units.  So we have to pay much more attention to proper decoding for;; schedulers.  We share most of scheduler code for PPro in i386.c;;;; The fp unit is not pipelined and do one operation per two cycles including;; the FXCH.;;;; alu	  describes both ALU units (ALU-X and ALU-Y).;; alux   describes X alu unit;; fpu    describes FPU unit;; load   describes load unit.;; branch describes branch unit.;; store  decsribes store unit.  This unit is not modelled completely and only;;        used to model lea operation.  Otherwise it lie outside of the critical;;        path.;;;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.;; The decoder specification is in the PPro section above!;; Shift instructions and certain arithmetic are issued only to X pipe.(define_function_unit "k6_alux" 1 0  (and (eq_attr "cpu" "k6")       (eq_attr "type" "ishift,alu1,negnot,cld"))  1 1);; The QI mode arithmetic is issued to X pipe only.(define_function_unit "k6_alux" 1 0  (and (eq_attr "cpu" "k6")       (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec")	    (match_operand:QI 0 "general_operand" "")))  1 1)(define_function_unit "k6_alu" 2 0  (and (eq_attr "cpu" "k6")       (eq_attr "type" "ishift,alu1,negnot,alu,icmp,test,imovx,incdec,setcc,lea"))  1 1)(define_function_unit "k6_alu" 2 0  (and (eq_attr "cpu" "k6")       (and (eq_attr "type" "imov")       	    (eq_attr "memory" "none")))  1 1)(define_function_unit "k6_branch" 1 0  (and (eq_attr "cpu" "k6")       (eq_attr "type" "call,callv,ibr"))  1 1);; Load unit have two cycle latency, but we take care for it in adjust_cost(define_function_unit "k6_load" 1 0  (and (eq_attr "cpu" "k6")       (ior (eq_attr "type" "pop")	    (eq_attr "memory" "load,both")))  1 1)(define_function_unit "k6_load" 1 0  (and (eq_attr "cpu" "k6")       (and (eq_attr "type" "str")	    (eq_attr "memory" "load,both")))  10 10);; Lea have two instructions, so latency is probably 2(define_function_unit "k6_store" 1 0  (and (eq_attr "cpu" "k6")       (eq_attr "type" "lea"))  2 1)(define_function_unit "k6_store" 1 0  (and (eq_attr "cpu" "k6")       (eq_attr "type" "str"))  10 10)(define_function_unit "k6_store" 1 0  (and (eq_attr "cpu" "k6")       (ior (eq_attr "type" "push")	    (eq_attr "memory" "store,both")))  1 1)(define_function_unit "k6_fpu" 1 1  (and (eq_attr "cpu" "k6")       (eq_attr "type" "fop,fop1,fmov,fcmp,fistp"))  2 2)(define_function_unit "k6_fpu" 1 1  (and (eq_attr "cpu" "k6")       (eq_attr "type" "fmul"))  2 2);; ??? Guess(define_function_unit "k6_fpu" 1 1  (and (eq_attr "cpu" "k6")       (eq_attr "type" "fdiv,fpspc"))  56 56)(define_function_unit "k6_alu" 2 0  (and (eq_attr "cpu" "k6")       (eq_attr "type" "imul"))  2 2)(define_function_unit "k6_alux" 1 0  (and (eq_attr "cpu" "k6")       (eq_attr "type" "imul"))  2 2);; ??? Guess(define_function_unit "k6_alu" 2 0  (and (eq_attr "cpu" "k6")       (eq_attr "type" "idiv"))  17 17)(define_function_unit "k6_alux" 1 0

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?