i386.md
来自「gcc3.2.1源代码」· Markdown 代码 · 共 2,041 行 · 第 1/5 页
MD
2,041 行
(define_function_unit "pent_np" 1 0 (and (eq_attr "cpu" "pentium") (and (eq_attr "type" "fmov") (ior (match_operand 1 "immediate_operand" "") (eq_attr "memory" "store")))) 2 2)(define_function_unit "pent_np" 1 0 (and (eq_attr "cpu" "pentium") (eq_attr "type" "cld")) 2 2)(define_function_unit "fpu" 1 0 (and (eq_attr "cpu" "pentium") (and (eq_attr "type" "fmov") (eq_attr "memory" "none,load"))) 1 1); Read/Modify/Write instructions usually take 3 cycles.(define_function_unit "pent_u" 1 0 (and (eq_attr "cpu" "pentium") (and (eq_attr "type" "alu,alu1,ishift") (and (eq_attr "pent_pair" "pu") (eq_attr "memory" "both")))) 3 3)(define_function_unit "pent_uv" 2 0 (and (eq_attr "cpu" "pentium") (and (eq_attr "type" "alu,alu1,ishift") (and (eq_attr "pent_pair" "!np") (eq_attr "memory" "both")))) 3 3)(define_function_unit "pent_np" 1 0 (and (eq_attr "cpu" "pentium") (and (eq_attr "type" "alu,alu1,negnot,ishift") (and (eq_attr "pent_pair" "np") (eq_attr "memory" "both")))) 3 3); Read/Modify or Modify/Write instructions usually take 2 cycles.(define_function_unit "pent_u" 1 0 (and (eq_attr "cpu" "pentium") (and (eq_attr "type" "alu,ishift") (and (eq_attr "pent_pair" "pu") (eq_attr "memory" "load,store")))) 2 2)(define_function_unit "pent_uv" 2 0 (and (eq_attr "cpu" "pentium") (and (eq_attr "type" "alu,ishift") (and (eq_attr "pent_pair" "!np") (eq_attr "memory" "load,store")))) 2 2)(define_function_unit "pent_np" 1 0 (and (eq_attr "cpu" "pentium") (and (eq_attr "type" "alu,ishift") (and (eq_attr "pent_pair" "np") (eq_attr "memory" "load,store")))) 2 2); Insns w/o memory operands and move instructions usually take one cycle.(define_function_unit "pent_u" 1 0 (and (eq_attr "cpu" "pentium") (eq_attr "pent_pair" "pu")) 1 1)(define_function_unit "pent_v" 1 0 (and (eq_attr "cpu" "pentium") (eq_attr "pent_pair" "pv")) 1 1)(define_function_unit "pent_uv" 2 0 (and (eq_attr "cpu" "pentium") (eq_attr "pent_pair" "!np")) 1 1)(define_function_unit "pent_np" 1 0 (and (eq_attr "cpu" "pentium") (eq_attr "pent_pair" "np")) 1 1); Pairable insns only conflict with other non-pairable insns.(define_function_unit "pent_np" 1 0 (and (eq_attr "cpu" "pentium") (and (eq_attr "type" "alu,alu1,ishift") (and (eq_attr "pent_pair" "!np") (eq_attr "memory" "both")))) 3 3 [(eq_attr "pent_pair" "np")])(define_function_unit "pent_np" 1 0 (and (eq_attr "cpu" "pentium") (and (eq_attr "type" "alu,alu1,ishift") (and (eq_attr "pent_pair" "!np") (eq_attr "memory" "load,store")))) 2 2 [(eq_attr "pent_pair" "np")])(define_function_unit "pent_np" 1 0 (and (eq_attr "cpu" "pentium") (eq_attr "pent_pair" "!np")) 1 1 [(eq_attr "pent_pair" "np")]); Floating point instructions usually blocks cycle longer when combined with; integer instructions, because of the inpaired fxch instruction.(define_function_unit "pent_np" 1 0 (and (eq_attr "cpu" "pentium") (eq_attr "type" "fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp")) 2 2 [(eq_attr "type" "!fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp")])(define_function_unit "fpu" 1 0 (and (eq_attr "cpu" "pentium") (eq_attr "type" "fcmp,fxch,fsgn")) 1 1); Addition takes 3 cycles; assume other random cruft does as well.; ??? Trivial fp operations such as fabs or fchs takes only one cycle.(define_function_unit "fpu" 1 0 (and (eq_attr "cpu" "pentium") (eq_attr "type" "fop,fop1,fistp")) 3 1); Multiplication takes 3 cycles and is only half pipelined.(define_function_unit "fpu" 1 0 (and (eq_attr "cpu" "pentium") (eq_attr "type" "fmul")) 3 1)(define_function_unit "pent_mul" 1 1 (and (eq_attr "cpu" "pentium") (eq_attr "type" "fmul")) 2 2); ??? This is correct only for fdiv and sqrt -- sin/cos take 65-100 cycles. ; They can overlap with integer insns. Only the last two cycles can overlap; with other fp insns. Only fsin/fcos can overlap with multiplies.; Only last two cycles of fsin/fcos can overlap with other instructions.(define_function_unit "fpu" 1 0 (and (eq_attr "cpu" "pentium") (eq_attr "type" "fdiv")) 39 37)(define_function_unit "pent_mul" 1 1 (and (eq_attr "cpu" "pentium") (eq_attr "type" "fdiv")) 39 39)(define_function_unit "fpu" 1 0 (and (eq_attr "cpu" "pentium") (eq_attr "type" "fpspc")) 70 68)(define_function_unit "pent_mul" 1 1 (and (eq_attr "cpu" "pentium") (eq_attr "type" "fpspc")) 70 70);; Pentium Pro/PII Scheduling;;;; The PPro has an out-of-order core, but the instruction decoders are;; naturally in-order and asymmetric. We get best performance by scheduling;; for the decoders, for in doing so we give the oo execution unit the ;; most choices.;; Categorize how many uops an ia32 instruction evaluates to:;; one -- an instruction with 1 uop can be decoded by any of the;; three decoders.;; few -- an instruction with 1 to 4 uops can be decoded only by ;; decoder 0.;; many -- a complex instruction may take an unspecified number of;; cycles to decode in decoder 0.(define_attr "ppro_uops" "one,few,many" (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str") (const_string "many") (eq_attr "type" "icmov,fcmov,str,cld") (const_string "few") (eq_attr "type" "imov") (if_then_else (eq_attr "memory" "store,both") (const_string "few") (const_string "one")) (eq_attr "memory" "!none") (const_string "few") ] (const_string "one")));; Rough readiness numbers. Fine tuning happens in i386.c.;;;; p0 describes port 0.;; p01 describes ports 0 and 1 as a pair; alu insns can issue to either.;; p2 describes port 2 for loads.;; p34 describes ports 3 and 4 for stores.;; fpu describes the fpu accessed via port 0. ;; ??? It is less than clear if there are separate fadd and fmul units;; that could operate in parallel.;;;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.(define_function_unit "ppro_p0" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "ishift,lea,ibr,cld")) 1 1)(define_function_unit "ppro_p0" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "imul")) 4 1);; ??? Does the divider lock out the pipe while it works,;; or is there a disconnected unit?(define_function_unit "ppro_p0" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "idiv")) 17 17)(define_function_unit "ppro_p0" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "fop,fop1,fsgn,fistp")) 3 1)(define_function_unit "ppro_p0" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "fcmov")) 2 1)(define_function_unit "ppro_p0" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "fcmp")) 1 1)(define_function_unit "ppro_p0" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "fmov")) 1 1)(define_function_unit "ppro_p0" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "fmul")) 5 1)(define_function_unit "ppro_p0" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "fdiv,fpspc")) 56 1)(define_function_unit "ppro_p01" 2 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "!imov,fmov")) 1 1)(define_function_unit "ppro_p01" 2 0 (and (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "imov,fmov")) (eq_attr "memory" "none")) 1 1)(define_function_unit "ppro_p2" 1 0 (and (eq_attr "cpu" "pentiumpro") (ior (eq_attr "type" "pop") (eq_attr "memory" "load,both"))) 3 1)(define_function_unit "ppro_p34" 1 0 (and (eq_attr "cpu" "pentiumpro") (ior (eq_attr "type" "push") (eq_attr "memory" "store,both"))) 1 1)(define_function_unit "fpu" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "fop,fop1,fsgn,fmov,fcmp,fcmov,fistp")) 1 1)(define_function_unit "fpu" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "fmul")) 5 2)(define_function_unit "fpu" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "fdiv,fpspc")) 56 56);; imul uses the fpu. ??? does it have the same throughput as fmul?(define_function_unit "fpu" 1 0 (and (eq_attr "cpu" "pentiumpro") (eq_attr "type" "imul")) 4 1);; AMD K6/K6-2 Scheduling;;;; The K6 has similar architecture to PPro. Important difference is, that;; there are only two decoders and they seems to be much slower than execution;; units. So we have to pay much more attention to proper decoding for;; schedulers. We share most of scheduler code for PPro in i386.c;;;; The fp unit is not pipelined and do one operation per two cycles including;; the FXCH.;;;; alu describes both ALU units (ALU-X and ALU-Y).;; alux describes X alu unit;; fpu describes FPU unit;; load describes load unit.;; branch describes branch unit.;; store decsribes store unit. This unit is not modelled completely and only;; used to model lea operation. Otherwise it lie outside of the critical;; path.;;;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.;; The decoder specification is in the PPro section above!;; Shift instructions and certain arithmetic are issued only to X pipe.(define_function_unit "k6_alux" 1 0 (and (eq_attr "cpu" "k6") (eq_attr "type" "ishift,alu1,negnot,cld")) 1 1);; The QI mode arithmetic is issued to X pipe only.(define_function_unit "k6_alux" 1 0 (and (eq_attr "cpu" "k6") (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec") (match_operand:QI 0 "general_operand" ""))) 1 1)(define_function_unit "k6_alu" 2 0 (and (eq_attr "cpu" "k6") (eq_attr "type" "ishift,alu1,negnot,alu,icmp,test,imovx,incdec,setcc,lea")) 1 1)(define_function_unit "k6_alu" 2 0 (and (eq_attr "cpu" "k6") (and (eq_attr "type" "imov") (eq_attr "memory" "none"))) 1 1)(define_function_unit "k6_branch" 1 0 (and (eq_attr "cpu" "k6") (eq_attr "type" "call,callv,ibr")) 1 1);; Load unit have two cycle latency, but we take care for it in adjust_cost(define_function_unit "k6_load" 1 0 (and (eq_attr "cpu" "k6") (ior (eq_attr "type" "pop") (eq_attr "memory" "load,both"))) 1 1)(define_function_unit "k6_load" 1 0 (and (eq_attr "cpu" "k6") (and (eq_attr "type" "str") (eq_attr "memory" "load,both"))) 10 10);; Lea have two instructions, so latency is probably 2(define_function_unit "k6_store" 1 0 (and (eq_attr "cpu" "k6") (eq_attr "type" "lea")) 2 1)(define_function_unit "k6_store" 1 0 (and (eq_attr "cpu" "k6") (eq_attr "type" "str")) 10 10)(define_function_unit "k6_store" 1 0 (and (eq_attr "cpu" "k6") (ior (eq_attr "type" "push") (eq_attr "memory" "store,both"))) 1 1)(define_function_unit "k6_fpu" 1 1 (and (eq_attr "cpu" "k6") (eq_attr "type" "fop,fop1,fmov,fcmp,fistp")) 2 2)(define_function_unit "k6_fpu" 1 1 (and (eq_attr "cpu" "k6") (eq_attr "type" "fmul")) 2 2);; ??? Guess(define_function_unit "k6_fpu" 1 1 (and (eq_attr "cpu" "k6") (eq_attr "type" "fdiv,fpspc")) 56 56)(define_function_unit "k6_alu" 2 0 (and (eq_attr "cpu" "k6") (eq_attr "type" "imul")) 2 2)(define_function_unit "k6_alux" 1 0 (and (eq_attr "cpu" "k6") (eq_attr "type" "imul")) 2 2);; ??? Guess(define_function_unit "k6_alu" 2 0 (and (eq_attr "cpu" "k6") (eq_attr "type" "idiv")) 17 17)(define_function_unit "k6_alux" 1 0
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?