📄 pa.md
字号:
;;- Machine description for HP PA-RISC architecture for GNU C compiler;; Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,;; 2002, 2003 Free Software Foundation, Inc.;; Contributed by the Center for Software Science at the University;; of Utah.;; This file is part of GNU CC.;; GNU CC is free software; you can redistribute it and/or modify;; it under the terms of the GNU General Public License as published by;; the Free Software Foundation; either version 2, or (at your option);; any later version.;; GNU CC is distributed in the hope that it will be useful,;; but WITHOUT ANY WARRANTY; without even the implied warranty of;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;; GNU General Public License for more details.;; You should have received a copy of the GNU General Public License;; along with GNU CC; see the file COPYING. If not, write to;; the Free Software Foundation, 59 Temple Place - Suite 330,;; Boston, MA 02111-1307, USA.;; This gcc Version 2 machine description is inspired by sparc.md and;; mips.md.;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.;; Insn type. Used to default other attribute values.;; type "unary" insns have one input operand (1) and one output operand (0);; type "binary" insns have two input operands (1,2) and one output (0)(define_attr "type" "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,parallel_branch" (const_string "binary"))(define_attr "pa_combine_type" "fmpy,faddsub,uncond_branch,addmove,none" (const_string "none"));; Processor type (for scheduling, not code generation) -- this attribute;; must exactly match the processor_type enumeration in pa.h.;;;; FIXME: Add 800 scheduling for completeness?(define_attr "cpu" "700,7100,7100LC,7200,7300,8000" (const (symbol_ref "pa_cpu_attr")));; Length (in # of bytes).(define_attr "length" "" (cond [(eq_attr "type" "load,fpload") (if_then_else (match_operand 1 "symbolic_memory_operand" "") (const_int 8) (const_int 4)) (eq_attr "type" "store,fpstore") (if_then_else (match_operand 0 "symbolic_memory_operand" "") (const_int 8) (const_int 4)) (eq_attr "type" "binary,shift,nullshift") (if_then_else (match_operand 2 "arith_operand" "") (const_int 4) (const_int 12)) (eq_attr "type" "move,unary,shift,nullshift") (if_then_else (match_operand 1 "arith_operand" "") (const_int 4) (const_int 8))] (const_int 4)))(define_asm_attributes [(set_attr "length" "4") (set_attr "type" "multi")]);; Attributes for instruction and branch scheduling;; For conditional branches.(define_attr "in_branch_delay" "false,true" (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch") (eq_attr "length" "4")) (const_string "true") (const_string "false")));; Disallow instructions which use the FPU since they will tie up the FPU;; even if the instruction is nullified.(define_attr "in_nullified_branch_delay" "false,true" (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch") (eq_attr "length" "4")) (const_string "true") (const_string "false")));; For calls and millicode calls. Allow unconditional branches in the;; delay slot.(define_attr "in_call_delay" "false,true" (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch") (eq_attr "length" "4")) (const_string "true") (eq_attr "type" "uncond_branch") (if_then_else (ne (symbol_ref "TARGET_JUMP_IN_DELAY") (const_int 0)) (const_string "true") (const_string "false"))] (const_string "false")));; Call delay slot description.(define_delay (eq_attr "type" "call") [(eq_attr "in_call_delay" "true") (nil) (nil)]);; Millicode call delay slot description.(define_delay (eq_attr "type" "milli") [(eq_attr "in_call_delay" "true") (nil) (nil)]);; Return and other similar instructions.(define_delay (eq_attr "type" "branch,parallel_branch") [(eq_attr "in_branch_delay" "true") (nil) (nil)]);; Floating point conditional branch delay slot description and(define_delay (eq_attr "type" "fbranch") [(eq_attr "in_branch_delay" "true") (eq_attr "in_nullified_branch_delay" "true") (nil)]);; Integer conditional branch delay slot description.;; Nullification of conditional branches on the PA is dependent on the;; direction of the branch. Forward branches nullify true and;; backward branches nullify false. If the direction is unknown;; then nullification is not allowed.(define_delay (eq_attr "type" "cbranch") [(eq_attr "in_branch_delay" "true") (and (eq_attr "in_nullified_branch_delay" "true") (attr_flag "forward")) (and (eq_attr "in_nullified_branch_delay" "true") (attr_flag "backward"))])(define_delay (and (eq_attr "type" "uncond_branch") (eq (symbol_ref "following_call (insn)") (const_int 0))) [(eq_attr "in_branch_delay" "true") (nil) (nil)]);; Memory. Disregarding Cache misses, the Mustang memory times are:;; load: 2, fpload: 3;; store, fpstore: 3, no D-cache operations should be scheduled.;; The Timex (aka 700) has two floating-point units: ALU, and MUL/DIV/SQRT.;; Timings:;; Instruction Time Unit Minimum Distance (unit contention);; fcpy 3 ALU 2;; fabs 3 ALU 2;; fadd 3 ALU 2;; fsub 3 ALU 2;; fcmp 3 ALU 2;; fcnv 3 ALU 2;; fmpyadd 3 ALU,MPY 2;; fmpysub 3 ALU,MPY 2;; fmpycfxt 3 ALU,MPY 2;; fmpy 3 MPY 2;; fmpyi 3 MPY 2;; fdiv,sgl 10 MPY 10;; fdiv,dbl 12 MPY 12;; fsqrt,sgl 14 MPY 14;; fsqrt,dbl 18 MPY 18;;;; We don't model fmpyadd/fmpysub properly as those instructions;; keep both the FP ALU and MPY units busy. Given that these;; processors are obsolete, I'm not going to spend the time to;; model those instructions correctly.(define_automaton "pa700")(define_cpu_unit "dummy_700,mem_700,fpalu_700,fpmpy_700" "pa700")(define_insn_reservation "W0" 4 (and (eq_attr "type" "fpcc") (eq_attr "cpu" "700")) "fpalu_700*2")(define_insn_reservation "W1" 3 (and (eq_attr "type" "fpalu") (eq_attr "cpu" "700")) "fpalu_700*2")(define_insn_reservation "W2" 3 (and (eq_attr "type" "fpmulsgl,fpmuldbl") (eq_attr "cpu" "700")) "fpmpy_700*2")(define_insn_reservation "W3" 10 (and (eq_attr "type" "fpdivsgl") (eq_attr "cpu" "700")) "fpmpy_700*10")(define_insn_reservation "W4" 12 (and (eq_attr "type" "fpdivdbl") (eq_attr "cpu" "700")) "fpmpy_700*12")(define_insn_reservation "W5" 14 (and (eq_attr "type" "fpsqrtsgl") (eq_attr "cpu" "700")) "fpmpy_700*14")(define_insn_reservation "W6" 18 (and (eq_attr "type" "fpsqrtdbl") (eq_attr "cpu" "700")) "fpmpy_700*18")(define_insn_reservation "W7" 2 (and (eq_attr "type" "load") (eq_attr "cpu" "700")) "mem_700")(define_insn_reservation "W8" 2 (and (eq_attr "type" "fpload") (eq_attr "cpu" "700")) "mem_700")(define_insn_reservation "W9" 3 (and (eq_attr "type" "store") (eq_attr "cpu" "700")) "mem_700*3")(define_insn_reservation "W10" 3 (and (eq_attr "type" "fpstore") (eq_attr "cpu" "700")) "mem_700*3")(define_insn_reservation "W11" 1 (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,load,fpload,store,fpstore") (eq_attr "cpu" "700")) "dummy_700");; We have a bypass for all computations in the FP unit which feed an;; FP store as long as the sizes are the same.(define_bypass 2 "W1,W2" "W10" "hppa_fpstore_bypass_p")(define_bypass 9 "W3" "W10" "hppa_fpstore_bypass_p")(define_bypass 11 "W4" "W10" "hppa_fpstore_bypass_p")(define_bypass 13 "W5" "W10" "hppa_fpstore_bypass_p")(define_bypass 17 "W6" "W10" "hppa_fpstore_bypass_p");; We have an "anti-bypass" for FP loads which feed an FP store.(define_bypass 4 "W8" "W10" "hppa_fpstore_bypass_p");; Function units for the 7100 and 7150. The 7100/7150 can dual-issue;; floating point computations with non-floating point computations (fp loads;; and stores are not fp computations).;;;; Memory. Disregarding Cache misses, memory loads take two cycles; stores also;; take two cycles, during which no Dcache operations should be scheduled.;; Any special cases are handled in pa_adjust_cost. The 7100, 7150 and 7100LC;; all have the same memory characteristics if one disregards cache misses.;;;; The 7100/7150 has three floating-point units: ALU, MUL, and DIV.;; There's no value in modeling the ALU and MUL separately though;; since there can never be a functional unit conflict given the;; latency and issue rates for those units.;;;; Timings:;; Instruction Time Unit Minimum Distance (unit contention);; fcpy 2 ALU 1;; fabs 2 ALU 1;; fadd 2 ALU 1;; fsub 2 ALU 1;; fcmp 2 ALU 1;; fcnv 2 ALU 1;; fmpyadd 2 ALU,MPY 1;; fmpysub 2 ALU,MPY 1;; fmpycfxt 2 ALU,MPY 1;; fmpy 2 MPY 1;; fmpyi 2 MPY 1;; fdiv,sgl 8 DIV 8;; fdiv,dbl 15 DIV 15;; fsqrt,sgl 8 DIV 8;; fsqrt,dbl 15 DIV 15(define_automaton "pa7100")(define_cpu_unit "i_7100, f_7100,fpmac_7100,fpdivsqrt_7100,mem_7100" "pa7100")(define_insn_reservation "X0" 2 (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl") (eq_attr "cpu" "7100")) "f_7100,fpmac_7100")(define_insn_reservation "X1" 8 (and (eq_attr "type" "fpdivsgl,fpsqrtsgl") (eq_attr "cpu" "7100")) "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*7")(define_insn_reservation "X2" 15 (and (eq_attr "type" "fpdivdbl,fpsqrtdbl") (eq_attr "cpu" "7100")) "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*14")(define_insn_reservation "X3" 2 (and (eq_attr "type" "load") (eq_attr "cpu" "7100")) "i_7100+mem_7100")(define_insn_reservation "X4" 2 (and (eq_attr "type" "fpload") (eq_attr "cpu" "7100")) "i_7100+mem_7100")(define_insn_reservation "X5" 2 (and (eq_attr "type" "store") (eq_attr "cpu" "7100")) "i_7100+mem_7100,mem_7100")(define_insn_reservation "X6" 2 (and (eq_attr "type" "fpstore") (eq_attr "cpu" "7100")) "i_7100+mem_7100,mem_7100")(define_insn_reservation "X7" 1 (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore") (eq_attr "cpu" "7100")) "i_7100");; We have a bypass for all computations in the FP unit which feed an;; FP store as long as the sizes are the same.(define_bypass 1 "X0" "X6" "hppa_fpstore_bypass_p")(define_bypass 7 "X1" "X6" "hppa_fpstore_bypass_p")(define_bypass 14 "X2" "X6" "hppa_fpstore_bypass_p");; We have an "anti-bypass" for FP loads which feed an FP store.(define_bypass 3 "X4" "X6" "hppa_fpstore_bypass_p");; The 7100LC has three floating-point units: ALU, MUL, and DIV.;; There's no value in modeling the ALU and MUL separately though;; since there can never be a functional unit conflict that;; can be avoided given the latency, issue rates and mandatory;; one cycle cpu-wide lock for a double precision fp multiply.;;;; Timings:;; Instruction Time Unit Minimum Distance (unit contention);; fcpy 2 ALU 1;; fabs 2 ALU 1;; fadd 2 ALU 1;; fsub 2 ALU 1;; fcmp 2 ALU 1;; fcnv 2 ALU 1;; fmpyadd,sgl 2 ALU,MPY 1;; fmpyadd,dbl 3 ALU,MPY 2;; fmpysub,sgl 2 ALU,MPY 1;; fmpysub,dbl 3 ALU,MPY 2;; fmpycfxt,sgl 2 ALU,MPY 1;; fmpycfxt,dbl 3 ALU,MPY 2;; fmpy,sgl 2 MPY 1;; fmpy,dbl 3 MPY 2;; fmpyi 3 MPY 2;; fdiv,sgl 8 DIV 8;; fdiv,dbl 15 DIV 15;; fsqrt,sgl 8 DIV 8;; fsqrt,dbl 15 DIV 15;;;; The PA7200 is just like the PA7100LC except that there is;; no store-store penalty.;;;; The PA7300 is just like the PA7200 except that there is;; no store-load penalty.;;;; Note there are some aspects of the 7100LC we are not modeling;; at the moment. I'll be reviewing the 7100LC scheduling info;; shortly and updating this description.;;;; load-load pairs;; store-store pairs;; other issue modeling(define_automaton "pa7100lc")(define_cpu_unit "i0_7100lc, i1_7100lc, f_7100lc" "pa7100lc")(define_cpu_unit "fpmac_7100lc" "pa7100lc")(define_cpu_unit "mem_7100lc" "pa7100lc");; Double precision multiplies lock the entire CPU for one;; cycle. There is no way to avoid this lock and trying to;; schedule around the lock is pointless and thus there is no;; value in trying to model this lock.;;;; Not modeling the lock allows us to treat fp multiplies just;; like any other FP alu instruction. It allows for a smaller;; DFA and may reduce register pressure.(define_insn_reservation "Y0" 2 (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl") (eq_attr "cpu" "7100LC,7200,7300")) "f_7100lc,fpmac_7100lc");; fp division and sqrt instructions lock the entire CPU for;; 7 cycles (single precision) or 14 cycles (double precision).;; There is no way to avoid this lock and trying to schedule;; around the lock is pointless and thus there is no value in;; trying to model this lock. Not modeling the lock allows;; for a smaller DFA and may reduce register pressure.(define_insn_reservation "Y1" 1 (and (eq_attr "type" "fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl") (eq_attr "cpu" "7100LC,7200,7300")) "f_7100lc")(define_insn_reservation "Y2" 2 (and (eq_attr "type" "load") (eq_attr "cpu" "7100LC,7200,7300")) "i1_7100lc+mem_7100lc")(define_insn_reservation "Y3" 2 (and (eq_attr "type" "fpload") (eq_attr "cpu" "7100LC,7200,7300")) "i1_7100lc+mem_7100lc")(define_insn_reservation "Y4" 2 (and (eq_attr "type" "store") (eq_attr "cpu" "7100LC")) "i1_7100lc+mem_7100lc,mem_7100lc")(define_insn_reservation "Y5" 2 (and (eq_attr "type" "fpstore") (eq_attr "cpu" "7100LC")) "i1_7100lc+mem_7100lc,mem_7100lc")(define_insn_reservation "Y6" 1 (and (eq_attr "type" "shift,nullshift") (eq_attr "cpu" "7100LC,7200,7300")) "i1_7100lc")(define_insn_reservation "Y7" 1 (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,shift,nullshift") (eq_attr "cpu" "7100LC,7200,7300")) "(i0_7100lc|i1_7100lc)");; The 7200 has a store-load penalty(define_insn_reservation "Y8" 2 (and (eq_attr "type" "store") (eq_attr "cpu" "7200")) "i1_7100lc,mem_7100lc")(define_insn_reservation "Y9" 2 (and (eq_attr "type" "fpstore") (eq_attr "cpu" "7200"))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -