📄 pa.md
字号:
"i1_7100lc,mem_7100lc");; The 7300 has no penalty for store-store or store-load(define_insn_reservation "Y10" 2 (and (eq_attr "type" "store") (eq_attr "cpu" "7300")) "i1_7100lc")(define_insn_reservation "Y11" 2 (and (eq_attr "type" "fpstore") (eq_attr "cpu" "7300")) "i1_7100lc");; We have an "anti-bypass" for FP loads which feed an FP store.(define_bypass 3 "Y3" "Y5,Y9,Y11" "hppa_fpstore_bypass_p");; Scheduling for the PA8000 is somewhat different than scheduling for a;; traditional architecture.;;;; The PA8000 has a large (56) entry reorder buffer that is split between;; memory and non-memory operations.;;;; The PA8000 can issue two memory and two non-memory operations per cycle to;; the function units, with the exception of branches and multi-output;; instructions. The PA8000 can retire two non-memory operations per cycle;; and two memory operations per cycle, only one of which may be a store.;;;; Given the large reorder buffer, the processor can hide most latencies.;; According to HP, they've got the best results by scheduling for retirement;; bandwidth with limited latency scheduling for floating point operations.;; Latency for integer operations and memory references is ignored.;;;;;; We claim floating point operations have a 2 cycle latency and are;; fully pipelined, except for div and sqrt which are not pipelined and;; take from 17 to 31 cycles to complete.;;;; It's worth noting that there is no way to saturate all the functional;; units on the PA8000 as there is not enough issue bandwidth.(define_automaton "pa8000")(define_cpu_unit "inm0_8000, inm1_8000, im0_8000, im1_8000" "pa8000")(define_cpu_unit "rnm0_8000, rnm1_8000, rm0_8000, rm1_8000" "pa8000")(define_cpu_unit "store_8000" "pa8000")(define_cpu_unit "f0_8000, f1_8000" "pa8000")(define_cpu_unit "fdivsqrt0_8000, fdivsqrt1_8000" "pa8000")(define_reservation "inm_8000" "inm0_8000 | inm1_8000")(define_reservation "im_8000" "im0_8000 | im1_8000")(define_reservation "rnm_8000" "rnm0_8000 | rnm1_8000")(define_reservation "rm_8000" "rm0_8000 | rm1_8000")(define_reservation "f_8000" "f0_8000 | f1_8000")(define_reservation "fdivsqrt_8000" "fdivsqrt0_8000 | fdivsqrt1_8000");; We can issue any two memops per cycle, but we can only retire;; one memory store per cycle. We assume that the reorder buffer;; will hide any memory latencies per HP's recommendation.(define_insn_reservation "Z0" 0 (and (eq_attr "type" "load,fpload") (eq_attr "cpu" "8000")) "im_8000,rm_8000")(define_insn_reservation "Z1" 0 (and (eq_attr "type" "store,fpstore") (eq_attr "cpu" "8000")) "im_8000,rm_8000+store_8000");; We can issue and retire two non-memory operations per cycle with;; a few exceptions (branches). This group catches those we want;; to assume have zero latency.(define_insn_reservation "Z2" 0 (and (eq_attr "type" "!load,fpload,store,fpstore,uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch,fpcc,fpalu,fpmulsgl,fpmuldbl,fpsqrtsgl,fpsqrtdbl,fpdivsgl,fpdivdbl") (eq_attr "cpu" "8000")) "inm_8000,rnm_8000");; Branches use both slots in the non-memory issue and;; retirement unit.(define_insn_reservation "Z3" 0 (and (eq_attr "type" "uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch") (eq_attr "cpu" "8000")) "inm0_8000+inm1_8000,rnm0_8000+rnm1_8000");; We partial latency schedule the floating point units.;; They can issue/retire two at a time in the non-memory;; units. We fix their latency at 2 cycles and they;; are fully pipelined.(define_insn_reservation "Z4" 1 (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl") (eq_attr "cpu" "8000")) "inm_8000,f_8000,rnm_8000");; The fdivsqrt units are not pipelined and have a very long latency. ;; To keep the DFA from exploding, we do not show all the;; reservations for the divsqrt unit.(define_insn_reservation "Z5" 17 (and (eq_attr "type" "fpdivsgl,fpsqrtsgl") (eq_attr "cpu" "8000")) "inm_8000,fdivsqrt_8000*6,rnm_8000")(define_insn_reservation "Z6" 31 (and (eq_attr "type" "fpdivdbl,fpsqrtdbl") (eq_attr "cpu" "8000")) "inm_8000,fdivsqrt_8000*6,rnm_8000");; Compare instructions.;; This controls RTL generation and register allocation.;; We generate RTL for comparisons and branches by having the cmpxx;; patterns store away the operands. Then, the scc and bcc patterns;; emit RTL for both the compare and the branch.;;(define_expand "cmpdi" [(set (reg:CC 0) (compare:CC (match_operand:DI 0 "reg_or_0_operand" "") (match_operand:DI 1 "register_operand" "")))] "TARGET_64BIT" "{ hppa_compare_op0 = operands[0]; hppa_compare_op1 = operands[1]; hppa_branch_type = CMP_SI; DONE;}")(define_expand "cmpsi" [(set (reg:CC 0) (compare:CC (match_operand:SI 0 "reg_or_0_operand" "") (match_operand:SI 1 "arith5_operand" "")))] "" "{ hppa_compare_op0 = operands[0]; hppa_compare_op1 = operands[1]; hppa_branch_type = CMP_SI; DONE;}")(define_expand "cmpsf" [(set (reg:CCFP 0) (compare:CCFP (match_operand:SF 0 "reg_or_0_operand" "") (match_operand:SF 1 "reg_or_0_operand" "")))] "! TARGET_SOFT_FLOAT" "{ hppa_compare_op0 = operands[0]; hppa_compare_op1 = operands[1]; hppa_branch_type = CMP_SF; DONE;}")(define_expand "cmpdf" [(set (reg:CCFP 0) (compare:CCFP (match_operand:DF 0 "reg_or_0_operand" "") (match_operand:DF 1 "reg_or_0_operand" "")))] "! TARGET_SOFT_FLOAT" "{ hppa_compare_op0 = operands[0]; hppa_compare_op1 = operands[1]; hppa_branch_type = CMP_DF; DONE;}")(define_insn "" [(set (reg:CCFP 0) (match_operator:CCFP 2 "comparison_operator" [(match_operand:SF 0 "reg_or_0_operand" "fG") (match_operand:SF 1 "reg_or_0_operand" "fG")]))] "! TARGET_SOFT_FLOAT" "fcmp,sgl,%Y2 %f0,%f1" [(set_attr "length" "4") (set_attr "type" "fpcc")])(define_insn "" [(set (reg:CCFP 0) (match_operator:CCFP 2 "comparison_operator" [(match_operand:DF 0 "reg_or_0_operand" "fG") (match_operand:DF 1 "reg_or_0_operand" "fG")]))] "! TARGET_SOFT_FLOAT" "fcmp,dbl,%Y2 %f0,%f1" [(set_attr "length" "4") (set_attr "type" "fpcc")]);; Provide a means to emit the movccfp0 and movccfp1 optimization;; placeholders. This is necessary in rare situations when a;; placeholder is re-emitted (see PR 8705).(define_expand "movccfp" [(set (reg:CCFP 0) (match_operand 0 "const_int_operand" ""))] "! TARGET_SOFT_FLOAT" "{ if ((unsigned HOST_WIDE_INT) INTVAL (operands[0]) > 1) FAIL;}");; The following patterns are optimization placeholders. In almost;; all cases, the user of the condition code will be simplified and the;; original condition code setting insn should be eliminated.(define_insn "*movccfp0" [(set (reg:CCFP 0) (const_int 0))] "! TARGET_SOFT_FLOAT" "fcmp,dbl,= %%fr0,%%fr0" [(set_attr "length" "4") (set_attr "type" "fpcc")])(define_insn "*movccfp1" [(set (reg:CCFP 0) (const_int 1))] "! TARGET_SOFT_FLOAT" "fcmp,dbl,!= %%fr0,%%fr0" [(set_attr "length" "4") (set_attr "type" "fpcc")]);; scc insns.(define_expand "seq" [(set (match_operand:SI 0 "register_operand" "") (eq:SI (match_dup 1) (match_dup 2)))] "!TARGET_64BIT" "{ /* fp scc patterns rarely match, and are not a win on the PA. */ if (hppa_branch_type != CMP_SI) FAIL; /* set up operands from compare. */ operands[1] = hppa_compare_op0; operands[2] = hppa_compare_op1; /* fall through and generate default code */}")(define_expand "sne" [(set (match_operand:SI 0 "register_operand" "") (ne:SI (match_dup 1) (match_dup 2)))] "!TARGET_64BIT" "{ /* fp scc patterns rarely match, and are not a win on the PA. */ if (hppa_branch_type != CMP_SI) FAIL; operands[1] = hppa_compare_op0; operands[2] = hppa_compare_op1;}")(define_expand "slt" [(set (match_operand:SI 0 "register_operand" "") (lt:SI (match_dup 1) (match_dup 2)))] "!TARGET_64BIT" "{ /* fp scc patterns rarely match, and are not a win on the PA. */ if (hppa_branch_type != CMP_SI) FAIL; operands[1] = hppa_compare_op0; operands[2] = hppa_compare_op1;}")(define_expand "sgt" [(set (match_operand:SI 0 "register_operand" "") (gt:SI (match_dup 1) (match_dup 2)))] "!TARGET_64BIT" "{ /* fp scc patterns rarely match, and are not a win on the PA. */ if (hppa_branch_type != CMP_SI) FAIL; operands[1] = hppa_compare_op0; operands[2] = hppa_compare_op1;}")(define_expand "sle" [(set (match_operand:SI 0 "register_operand" "") (le:SI (match_dup 1) (match_dup 2)))] "!TARGET_64BIT" "{ /* fp scc patterns rarely match, and are not a win on the PA. */ if (hppa_branch_type != CMP_SI) FAIL; operands[1] = hppa_compare_op0; operands[2] = hppa_compare_op1;}")(define_expand "sge" [(set (match_operand:SI 0 "register_operand" "") (ge:SI (match_dup 1) (match_dup 2)))] "!TARGET_64BIT" "{ /* fp scc patterns rarely match, and are not a win on the PA. */ if (hppa_branch_type != CMP_SI) FAIL; operands[1] = hppa_compare_op0; operands[2] = hppa_compare_op1;}")(define_expand "sltu" [(set (match_operand:SI 0 "register_operand" "") (ltu:SI (match_dup 1) (match_dup 2)))] "!TARGET_64BIT" "{ if (hppa_branch_type != CMP_SI) FAIL; operands[1] = hppa_compare_op0; operands[2] = hppa_compare_op1;}")(define_expand "sgtu" [(set (match_operand:SI 0 "register_operand" "") (gtu:SI (match_dup 1) (match_dup 2)))] "!TARGET_64BIT" "{ if (hppa_branch_type != CMP_SI) FAIL; operands[1] = hppa_compare_op0; operands[2] = hppa_compare_op1;}")(define_expand "sleu" [(set (match_operand:SI 0 "register_operand" "") (leu:SI (match_dup 1) (match_dup 2)))] "!TARGET_64BIT" "{ if (hppa_branch_type != CMP_SI) FAIL; operands[1] = hppa_compare_op0; operands[2] = hppa_compare_op1;}")(define_expand "sgeu" [(set (match_operand:SI 0 "register_operand" "") (geu:SI (match_dup 1) (match_dup 2)))] "!TARGET_64BIT" "{ if (hppa_branch_type != CMP_SI) FAIL; operands[1] = hppa_compare_op0; operands[2] = hppa_compare_op1;}");; Instruction canonicalization puts immediate operands second, which;; is the reverse of what we want.(define_insn "scc" [(set (match_operand:SI 0 "register_operand" "=r") (match_operator:SI 3 "comparison_operator" [(match_operand:SI 1 "register_operand" "r") (match_operand:SI 2 "arith11_operand" "rI")]))] "" "{com%I2clr|cmp%I2clr},%B3 %2,%1,%0\;ldi 1,%0" [(set_attr "type" "binary") (set_attr "length" "8")])(define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") (match_operator:DI 3 "comparison_operator" [(match_operand:DI 1 "register_operand" "r") (match_operand:DI 2 "arith11_operand" "rI")]))] "TARGET_64BIT" "cmp%I2clr,*%B3 %2,%1,%0\;ldi 1,%0" [(set_attr "type" "binary") (set_attr "length" "8")])(define_insn "iorscc" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (match_operator:SI 3 "comparison_operator" [(match_operand:SI 1 "register_operand" "r") (match_operand:SI 2 "arith11_operand" "rI")]) (match_operator:SI 6 "comparison_operator" [(match_operand:SI 4 "register_operand" "r") (match_operand:SI 5 "arith11_operand" "rI")])))] "" "{com%I2clr|cmp%I2clr},%S3 %2,%1,%%r0\;{com%I5clr|cmp%I5clr},%B6 %5,%4,%0\;ldi 1,%0" [(set_attr "type" "binary") (set_attr "length" "12")])(define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (match_operator:DI 3 "comparison_operator" [(match_operand:DI 1 "register_operand" "r") (match_operand:DI 2 "arith11_operand" "rI")]) (match_operator:DI 6 "comparison_operator" [(match_operand:DI 4 "register_operand" "r") (match_operand:DI 5 "arith11_operand" "rI")])))] "TARGET_64BIT" "cmp%I2clr,*%S3 %2,%1,%%r0\;cmp%I5clr,*%B6 %5,%4,%0\;ldi 1,%0" [(set_attr "type" "binary") (set_attr "length" "12")]);; Combiner patterns for common operations performed with the output;; from an scc insn (negscc and incscc).(define_insn "negscc" [(set (match_operand:SI 0 "register_operand" "=r") (neg:SI (match_operator:SI 3 "comparison_operator" [(match_operand:SI 1 "register_operand" "r") (match_operand:SI 2 "arith11_operand" "rI")])))] "" "{com%I2clr|cmp%I2clr},%B3 %2,%1,%0\;ldi -1,%0" [(set_attr "type" "binary") (set_attr "length" "8")])(define_insn "" [(set (match_operand:DI 0 "register_operand" "=r") (neg:DI (match_operator:DI 3 "comparison_operator" [(match_operand:DI 1 "register_operand" "r") (match_operand:DI 2 "arith11_operand" "rI")])))] "TARGET_64BIT"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -