📄 ppro.md
字号:
(eq_attr "type" "fmov")))) "decoder0,(p0+p4),(p0+p3)");; fmul executes on port 0 with latency 5. It has issue latency 2,;; but we don't model this.(define_insn_reservation "ppro_fmul" 5 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (eq_attr "type" "fmul"))) "decoder0,p0*2")(define_insn_reservation "ppro_fmul_load" 6 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (eq_attr "type" "fmul"))) "decoder0,p2+p0,p0");; fdiv latencies depend on the mode of the operands. XFmode gives;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18.;; Division by a power of 2 takes only 9 cycles, but we cannot model;; that. Throughput is equal to latency - 1, which we model using the;; ppro_div automaton.(define_insn_reservation "ppro_fdiv_SF" 18 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "SF") (eq_attr "type" "fdiv,fpspc")))) "decodern,p0+fdiv,fdiv*16")(define_insn_reservation "ppro_fdiv_SF_load" 19 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "SF") (eq_attr "type" "fdiv,fpspc")))) "decoder0,p2+p0+fdiv,fdiv*16")(define_insn_reservation "ppro_fdiv_DF" 32 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "DF") (eq_attr "type" "fdiv,fpspc")))) "decodern,p0+fdiv,fdiv*30")(define_insn_reservation "ppro_fdiv_DF_load" 33 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "DF") (eq_attr "type" "fdiv,fpspc")))) "decoder0,p2+p0+fdiv,fdiv*30")(define_insn_reservation "ppro_fdiv_XF" 38 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "XF") (eq_attr "type" "fdiv,fpspc")))) "decodern,p0+fdiv,fdiv*36")(define_insn_reservation "ppro_fdiv_XF_load" 39 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "XF") (eq_attr "type" "fdiv,fpspc")))) "decoder0,p2+p0+fdiv,fdiv*36");; MMX instructions can execute on either port 0 or port 1 with a;; throughput of 1/cycle.;; on port 0: - ALU (latency 1);; - Multiplier Unit (latency 3);; on port 1: - ALU (latency 1);; - Shift Unit (latency 1);;;; MMX instructions are either of the type reg-reg, or read-modify, and;; except for mmxshft and mmxmul they can execute on port 0 or port 1,;; so they behave as "simple" instructions that need no special modelling.;; We only have to model mmxshft and mmxmul.(define_insn_reservation "ppro_mmx_shft" 1 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (eq_attr "type" "mmxshft"))) "decodern,p1")(define_insn_reservation "ppro_mmx_shft_load" 2 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (eq_attr "type" "mmxshft"))) "decoder0,p2+p1")(define_insn_reservation "ppro_mmx_mul" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (eq_attr "type" "mmxmul"))) "decodern,p0")(define_insn_reservation "ppro_mmx_mul_load" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (eq_attr "type" "mmxmul"))) "decoder0,p2+p0")(define_insn_reservation "ppro_sse_mmxcvt" 4 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "mode" "DI") (eq_attr "type" "mmxcvt"))) "decodern,p1");; FIXME: These are Pentium III only, but we cannot tell here if;; we're generating code for PentiumPro/Pentium II or Pentium III;; (define_insn_reservation "ppro_sse_mmxshft" 2;; (and (eq_attr "cpu" "pentiumpro,generic32");; (and (eq_attr "mode" "DI");; (eq_attr "type" "mmxshft")));; "decodern,p0");; SSE is very complicated, and takes a bit more effort.;; ??? I assumed that all SSE instructions decode on decoder0,;; but is this correct?;; The sfence instruction.(define_insn_reservation "ppro_sse_sfence" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "unknown") (eq_attr "type" "sse"))) "decoder0,p4+p3");; FIXME: This reservation is all wrong when we're scheduling sqrtss.(define_insn_reservation "ppro_sse_SF" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "mode" "SF") (eq_attr "type" "sse"))) "decodern,p0")(define_insn_reservation "ppro_sse_add_SF" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "SF") (eq_attr "type" "sseadd")))) "decodern,p1")(define_insn_reservation "ppro_sse_add_SF_load" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "SF") (eq_attr "type" "sseadd")))) "decoder0,p2+p1")(define_insn_reservation "ppro_sse_cmp_SF" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "SF") (eq_attr "type" "ssecmp")))) "decoder0,p1")(define_insn_reservation "ppro_sse_cmp_SF_load" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "SF") (eq_attr "type" "ssecmp")))) "decoder0,p2+p1")(define_insn_reservation "ppro_sse_comi_SF" 1 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "SF") (eq_attr "type" "ssecomi")))) "decodern,p0")(define_insn_reservation "ppro_sse_comi_SF_load" 1 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "SF") (eq_attr "type" "ssecomi")))) "decoder0,p2+p0")(define_insn_reservation "ppro_sse_mul_SF" 4 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "SF") (eq_attr "type" "ssemul")))) "decodern,p0")(define_insn_reservation "ppro_sse_mul_SF_load" 4 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "SF") (eq_attr "type" "ssemul")))) "decoder0,p2+p0");; FIXME: ssediv doesn't close p0 for 17 cycles, surely???(define_insn_reservation "ppro_sse_div_SF" 18 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "SF") (eq_attr "type" "ssediv")))) "decoder0,p0*17")(define_insn_reservation "ppro_sse_div_SF_load" 18 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "SF") (eq_attr "type" "ssediv")))) "decoder0,(p2+p0),p0*16")(define_insn_reservation "ppro_sse_icvt_SF" 4 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "mode" "SF") (eq_attr "type" "sseicvt"))) "decoder0,(p2+p1)*2")(define_insn_reservation "ppro_sse_icvt_SI" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "mode" "SI") (eq_attr "type" "sseicvt"))) "decoder0,(p2+p1)")(define_insn_reservation "ppro_sse_mov_SF" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "SF") (eq_attr "type" "ssemov")))) "decoder0,(p0|p1)")(define_insn_reservation "ppro_sse_mov_SF_load" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "SF") (eq_attr "type" "ssemov")))) "decoder0,p2+(p0|p1)")(define_insn_reservation "ppro_sse_mov_SF_store" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "store") (and (eq_attr "mode" "SF") (eq_attr "type" "ssemov")))) "decoder0,p4+p3")(define_insn_reservation "ppro_sse_V4SF" 4 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "mode" "V4SF") (eq_attr "type" "sse"))) "decoder0,p1*2")(define_insn_reservation "ppro_sse_add_V4SF" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "V4SF") (eq_attr "type" "sseadd")))) "decoder0,p1*2")(define_insn_reservation "ppro_sse_add_V4SF_load" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "V4SF") (eq_attr "type" "sseadd")))) "decoder0,(p2+p1)*2")(define_insn_reservation "ppro_sse_cmp_V4SF" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "V4SF") (eq_attr "type" "ssecmp")))) "decoder0,p1*2")(define_insn_reservation "ppro_sse_cmp_V4SF_load" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "V4SF") (eq_attr "type" "ssecmp")))) "decoder0,(p2+p1)*2")(define_insn_reservation "ppro_sse_cvt_V4SF" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none,unknown") (and (eq_attr "mode" "V4SF") (eq_attr "type" "ssecvt")))) "decoder0,p1*2")(define_insn_reservation "ppro_sse_cvt_V4SF_other" 4 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "!none,unknown") (and (eq_attr "mode" "V4SF") (eq_attr "type" "ssecmp")))) "decoder0,p1,p4+p3")(define_insn_reservation "ppro_sse_mul_V4SF" 5 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "V4SF") (eq_attr "type" "ssemul")))) "decoder0,p0*2")(define_insn_reservation "ppro_sse_mul_V4SF_load" 5 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "V4SF") (eq_attr "type" "ssemul")))) "decoder0,(p2+p0)*2");; FIXME: p0 really closed this long???(define_insn_reservation "ppro_sse_div_V4SF" 48 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "V4SF") (eq_attr "type" "ssediv")))) "decoder0,p0*34")(define_insn_reservation "ppro_sse_div_V4SF_load" 48 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "V4SF") (eq_attr "type" "ssediv")))) "decoder0,(p2+p0)*2,p0*32")(define_insn_reservation "ppro_sse_log_V4SF" 2 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "V4SF") (eq_attr "type" "sselog,sselog1")))) "decodern,p1")(define_insn_reservation "ppro_sse_log_V4SF_load" 2 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "V4SF") (eq_attr "type" "sselog,sselog1")))) "decoder0,(p2+p1)")(define_insn_reservation "ppro_sse_mov_V4SF" 1 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none") (and (eq_attr "mode" "V4SF") (eq_attr "type" "ssemov")))) "decoder0,(p0|p1)*2")(define_insn_reservation "ppro_sse_mov_V4SF_load" 2 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (and (eq_attr "mode" "V4SF") (eq_attr "type" "ssemov")))) "decoder0,p2*2")(define_insn_reservation "ppro_sse_mov_V4SF_store" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "store") (and (eq_attr "mode" "V4SF") (eq_attr "type" "ssemov")))) "decoder0,(p4+p3)*2");; All other instructions are modelled as simple instructions.;; We have already modelled all i387 floating point instructions, so all;; other instructions execute on either port 0 or port 1. This includes;; the ALU units, and the MMX units.;;;; reg-reg instructions produce 1 uop so they can be decoded on any of;; the three decoders.(define_insn_reservation "ppro_insn" 1 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "none,unknown") (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp"))) "decodern,(p0|p1)");; read-modify and register-memory instructions have 2 or three uops,;; so they have to be decoded on decoder0.(define_insn_reservation "ppro_insn_load" 3 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "load") (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp"))) "decoder0,p2+(p0|p1)")(define_insn_reservation "ppro_insn_store" 1 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "store") (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp"))) "decoder0,(p0|p1),p4+p3");; read-modify-store instructions produce 4 uops so they have to be;; decoded on decoder0 as well.(define_insn_reservation "ppro_insn_both" 4 (and (eq_attr "cpu" "pentiumpro,generic32") (and (eq_attr "memory" "both") (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp"))) "decoder0,p2+(p0|p1),p4+p3"); APPLE LOCAL end mainline 2006-04-19 4434601
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -