⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pa.md

📁 linux下的gcc编译器
💻 MD
📖 第 1 页 / 共 5 页
字号:
;;- Machine description for HP PA-RISC architecture for GNU C compiler;;   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,;;   2002, 2003 Free Software Foundation, Inc.;;   Contributed by the Center for Software Science at the University;;   of Utah.;; This file is part of GNU CC.;; GNU CC is free software; you can redistribute it and/or modify;; it under the terms of the GNU General Public License as published by;; the Free Software Foundation; either version 2, or (at your option);; any later version.;; GNU CC is distributed in the hope that it will be useful,;; but WITHOUT ANY WARRANTY; without even the implied warranty of;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the;; GNU General Public License for more details.;; You should have received a copy of the GNU General Public License;; along with GNU CC; see the file COPYING.  If not, write to;; the Free Software Foundation, 59 Temple Place - Suite 330,;; Boston, MA 02111-1307, USA.;; This gcc Version 2 machine description is inspired by sparc.md and;; mips.md.;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.;; Insn type.  Used to default other attribute values.;; type "unary" insns have one input operand (1) and one output operand (0);; type "binary" insns have two input operands (1,2) and one output (0)(define_attr "type"  "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,parallel_branch"  (const_string "binary"))(define_attr "pa_combine_type"  "fmpy,faddsub,uncond_branch,addmove,none"  (const_string "none"));; Processor type (for scheduling, not code generation) -- this attribute;; must exactly match the processor_type enumeration in pa.h.;;;; FIXME: Add 800 scheduling for completeness?(define_attr "cpu" "700,7100,7100LC,7200,7300,8000" (const (symbol_ref "pa_cpu_attr")));; Length (in # of bytes).(define_attr "length" ""  (cond [(eq_attr "type" "load,fpload")	 (if_then_else (match_operand 1 "symbolic_memory_operand" "")		       (const_int 8) (const_int 4))	 (eq_attr "type" "store,fpstore")	 (if_then_else (match_operand 0 "symbolic_memory_operand" "")		       (const_int 8) (const_int 4))	 (eq_attr "type" "binary,shift,nullshift")	 (if_then_else (match_operand 2 "arith_operand" "")		       (const_int 4) (const_int 12))	 (eq_attr "type" "move,unary,shift,nullshift")	 (if_then_else (match_operand 1 "arith_operand" "")		       (const_int 4) (const_int 8))]	(const_int 4)))(define_asm_attributes  [(set_attr "length" "4")   (set_attr "type" "multi")]);; Attributes for instruction and branch scheduling;; For conditional branches.(define_attr "in_branch_delay" "false,true"  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch")		     (eq_attr "length" "4"))		(const_string "true")		(const_string "false")));; Disallow instructions which use the FPU since they will tie up the FPU;; even if the instruction is nullified.(define_attr "in_nullified_branch_delay" "false,true"  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch")		     (eq_attr "length" "4"))		(const_string "true")		(const_string "false")));; For calls and millicode calls.  Allow unconditional branches in the;; delay slot.(define_attr "in_call_delay" "false,true"  (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch")	      (eq_attr "length" "4"))	   (const_string "true")	 (eq_attr "type" "uncond_branch")	   (if_then_else (ne (symbol_ref "TARGET_JUMP_IN_DELAY")			     (const_int 0))			 (const_string "true")			 (const_string "false"))]	(const_string "false")));; Call delay slot description.(define_delay (eq_attr "type" "call")  [(eq_attr "in_call_delay" "true") (nil) (nil)]);; Millicode call delay slot description.(define_delay (eq_attr "type" "milli")  [(eq_attr "in_call_delay" "true") (nil) (nil)]);; Return and other similar instructions.(define_delay (eq_attr "type" "branch,parallel_branch")  [(eq_attr "in_branch_delay" "true") (nil) (nil)]);; Floating point conditional branch delay slot description and(define_delay (eq_attr "type" "fbranch")  [(eq_attr "in_branch_delay" "true")   (eq_attr "in_nullified_branch_delay" "true")   (nil)]);; Integer conditional branch delay slot description.;; Nullification of conditional branches on the PA is dependent on the;; direction of the branch.  Forward branches nullify true and;; backward branches nullify false.  If the direction is unknown;; then nullification is not allowed.(define_delay (eq_attr "type" "cbranch")  [(eq_attr "in_branch_delay" "true")   (and (eq_attr "in_nullified_branch_delay" "true")	(attr_flag "forward"))   (and (eq_attr "in_nullified_branch_delay" "true")	(attr_flag "backward"))])(define_delay (and (eq_attr "type" "uncond_branch")		   (eq (symbol_ref "following_call (insn)")		       (const_int 0)))  [(eq_attr "in_branch_delay" "true") (nil) (nil)]);; Memory. Disregarding Cache misses, the Mustang memory times are:;; load: 2, fpload: 3;; store, fpstore: 3, no D-cache operations should be scheduled.;; The Timex (aka 700) has two floating-point units: ALU, and MUL/DIV/SQRT.;; Timings:;; Instruction	Time	Unit	Minimum Distance (unit contention);; fcpy		3	ALU	2;; fabs		3	ALU	2;; fadd		3	ALU	2;; fsub		3	ALU	2;; fcmp		3	ALU	2;; fcnv		3	ALU	2;; fmpyadd	3	ALU,MPY	2;; fmpysub	3	ALU,MPY 2;; fmpycfxt	3	ALU,MPY 2;; fmpy		3	MPY	2;; fmpyi	3	MPY	2;; fdiv,sgl	10	MPY	10;; fdiv,dbl	12	MPY	12;; fsqrt,sgl	14	MPY	14;; fsqrt,dbl	18	MPY	18;;;; We don't model fmpyadd/fmpysub properly as those instructions;; keep both the FP ALU and MPY units busy.  Given that these;; processors are obsolete, I'm not going to spend the time to;; model those instructions correctly.(define_automaton "pa700")(define_cpu_unit "dummy_700,mem_700,fpalu_700,fpmpy_700" "pa700")(define_insn_reservation "W0" 4  (and (eq_attr "type" "fpcc")       (eq_attr "cpu" "700"))  "fpalu_700*2")(define_insn_reservation "W1" 3  (and (eq_attr "type" "fpalu")       (eq_attr "cpu" "700"))  "fpalu_700*2")(define_insn_reservation "W2" 3  (and (eq_attr "type" "fpmulsgl,fpmuldbl")       (eq_attr "cpu" "700"))  "fpmpy_700*2")(define_insn_reservation "W3" 10  (and (eq_attr "type" "fpdivsgl")       (eq_attr "cpu" "700"))  "fpmpy_700*10")(define_insn_reservation "W4" 12  (and (eq_attr "type" "fpdivdbl")       (eq_attr "cpu" "700"))  "fpmpy_700*12")(define_insn_reservation "W5" 14  (and (eq_attr "type" "fpsqrtsgl")       (eq_attr "cpu" "700"))  "fpmpy_700*14")(define_insn_reservation "W6" 18  (and (eq_attr "type" "fpsqrtdbl")       (eq_attr "cpu" "700"))  "fpmpy_700*18")(define_insn_reservation "W7" 2  (and (eq_attr "type" "load")       (eq_attr "cpu" "700"))  "mem_700")(define_insn_reservation "W8" 2  (and (eq_attr "type" "fpload")       (eq_attr "cpu" "700"))  "mem_700")(define_insn_reservation "W9" 3  (and (eq_attr "type" "store")       (eq_attr "cpu" "700"))  "mem_700*3")(define_insn_reservation "W10" 3  (and (eq_attr "type" "fpstore")       (eq_attr "cpu" "700"))  "mem_700*3")(define_insn_reservation "W11" 1  (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,load,fpload,store,fpstore")       (eq_attr "cpu" "700"))  "dummy_700");; We have a bypass for all computations in the FP unit which feed an;; FP store as long as the sizes are the same.(define_bypass 2 "W1,W2" "W10" "hppa_fpstore_bypass_p")(define_bypass 9 "W3" "W10" "hppa_fpstore_bypass_p")(define_bypass 11 "W4" "W10" "hppa_fpstore_bypass_p")(define_bypass 13 "W5" "W10" "hppa_fpstore_bypass_p")(define_bypass 17 "W6" "W10" "hppa_fpstore_bypass_p");; We have an "anti-bypass" for FP loads which feed an FP store.(define_bypass 4 "W8" "W10" "hppa_fpstore_bypass_p");; Function units for the 7100 and 7150.  The 7100/7150 can dual-issue;; floating point computations with non-floating point computations (fp loads;; and stores are not fp computations).;;;; Memory. Disregarding Cache misses, memory loads take two cycles; stores also;; take two cycles, during which no Dcache operations should be scheduled.;; Any special cases are handled in pa_adjust_cost.  The 7100, 7150 and 7100LC;; all have the same memory characteristics if one disregards cache misses.;;;; The 7100/7150 has three floating-point units: ALU, MUL, and DIV.;; There's no value in modeling the ALU and MUL separately though;; since there can never be a functional unit conflict given the;; latency and issue rates for those units.;;;; Timings:;; Instruction	Time	Unit	Minimum Distance (unit contention);; fcpy		2	ALU	1;; fabs		2	ALU	1;; fadd		2	ALU	1;; fsub		2	ALU	1;; fcmp		2	ALU	1;; fcnv		2	ALU	1;; fmpyadd	2	ALU,MPY	1;; fmpysub	2	ALU,MPY 1;; fmpycfxt	2	ALU,MPY 1;; fmpy		2	MPY	1;; fmpyi	2	MPY	1;; fdiv,sgl	8	DIV	8;; fdiv,dbl	15	DIV	15;; fsqrt,sgl	8	DIV	8;; fsqrt,dbl	15	DIV	15(define_automaton "pa7100")(define_cpu_unit "i_7100, f_7100,fpmac_7100,fpdivsqrt_7100,mem_7100" "pa7100")(define_insn_reservation "X0" 2  (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")       (eq_attr "cpu" "7100"))  "f_7100,fpmac_7100")(define_insn_reservation "X1" 8  (and (eq_attr "type" "fpdivsgl,fpsqrtsgl")       (eq_attr "cpu" "7100"))  "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*7")(define_insn_reservation "X2" 15  (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")       (eq_attr "cpu" "7100"))  "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*14")(define_insn_reservation "X3" 2  (and (eq_attr "type" "load")       (eq_attr "cpu" "7100"))  "i_7100+mem_7100")(define_insn_reservation "X4" 2  (and (eq_attr "type" "fpload")       (eq_attr "cpu" "7100"))  "i_7100+mem_7100")(define_insn_reservation "X5" 2  (and (eq_attr "type" "store")       (eq_attr "cpu" "7100"))  "i_7100+mem_7100,mem_7100")(define_insn_reservation "X6" 2  (and (eq_attr "type" "fpstore")       (eq_attr "cpu" "7100"))  "i_7100+mem_7100,mem_7100")(define_insn_reservation "X7" 1  (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore")       (eq_attr "cpu" "7100"))  "i_7100");; We have a bypass for all computations in the FP unit which feed an;; FP store as long as the sizes are the same.(define_bypass 1 "X0" "X6" "hppa_fpstore_bypass_p")(define_bypass 7 "X1" "X6" "hppa_fpstore_bypass_p")(define_bypass 14 "X2" "X6" "hppa_fpstore_bypass_p");; We have an "anti-bypass" for FP loads which feed an FP store.(define_bypass 3 "X4" "X6" "hppa_fpstore_bypass_p");; The 7100LC has three floating-point units: ALU, MUL, and DIV.;; There's no value in modeling the ALU and MUL separately though;; since there can never be a functional unit conflict that;; can be avoided given the latency, issue rates and mandatory;; one cycle cpu-wide lock for a double precision fp multiply.;;;; Timings:;; Instruction	Time	Unit	Minimum Distance (unit contention);; fcpy		2	ALU	1;; fabs		2	ALU	1;; fadd		2	ALU	1;; fsub		2	ALU	1;; fcmp		2	ALU	1;; fcnv		2	ALU	1;; fmpyadd,sgl	2	ALU,MPY	1;; fmpyadd,dbl	3	ALU,MPY	2;; fmpysub,sgl	2	ALU,MPY 1;; fmpysub,dbl	3	ALU,MPY 2;; fmpycfxt,sgl	2	ALU,MPY 1;; fmpycfxt,dbl	3	ALU,MPY 2;; fmpy,sgl	2	MPY	1;; fmpy,dbl	3	MPY	2;; fmpyi	3	MPY	2;; fdiv,sgl	8	DIV	8;; fdiv,dbl	15	DIV	15;; fsqrt,sgl	8	DIV	8;; fsqrt,dbl	15	DIV	15;;;; The PA7200 is just like the PA7100LC except that there is;; no store-store penalty.;;;; The PA7300 is just like the PA7200 except that there is;; no store-load penalty.;;;; Note there are some aspects of the 7100LC we are not modeling;; at the moment.  I'll be reviewing the 7100LC scheduling info;; shortly and updating this description.;;;;   load-load pairs;;   store-store pairs;;   other issue modeling(define_automaton "pa7100lc")(define_cpu_unit "i0_7100lc, i1_7100lc, f_7100lc" "pa7100lc")(define_cpu_unit "fpmac_7100lc" "pa7100lc")(define_cpu_unit "mem_7100lc" "pa7100lc");; Double precision multiplies lock the entire CPU for one;; cycle.  There is no way to avoid this lock and trying to;; schedule around the lock is pointless and thus there is no;; value in trying to model this lock.;;;; Not modeling the lock allows us to treat fp multiplies just;; like any other FP alu instruction.  It allows for a smaller;; DFA and may reduce register pressure.(define_insn_reservation "Y0" 2  (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")       (eq_attr "cpu" "7100LC,7200,7300"))  "f_7100lc,fpmac_7100lc");; fp division and sqrt instructions lock the entire CPU for;; 7 cycles (single precision) or 14 cycles (double precision).;; There is no way to avoid this lock and trying to schedule;; around the lock is pointless and thus there is no value in;; trying to model this lock.  Not modeling the lock allows;; for a smaller DFA and may reduce register pressure.(define_insn_reservation "Y1" 1  (and (eq_attr "type" "fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")       (eq_attr "cpu" "7100LC,7200,7300"))  "f_7100lc")(define_insn_reservation "Y2" 2  (and (eq_attr "type" "load")       (eq_attr "cpu" "7100LC,7200,7300"))  "i1_7100lc+mem_7100lc")(define_insn_reservation "Y3" 2  (and (eq_attr "type" "fpload")       (eq_attr "cpu" "7100LC,7200,7300"))  "i1_7100lc+mem_7100lc")(define_insn_reservation "Y4" 2  (and (eq_attr "type" "store")       (eq_attr "cpu" "7100LC"))  "i1_7100lc+mem_7100lc,mem_7100lc")(define_insn_reservation "Y5" 2  (and (eq_attr "type" "fpstore")       (eq_attr "cpu" "7100LC"))  "i1_7100lc+mem_7100lc,mem_7100lc")(define_insn_reservation "Y6" 1  (and (eq_attr "type" "shift,nullshift")       (eq_attr "cpu" "7100LC,7200,7300"))  "i1_7100lc")(define_insn_reservation "Y7" 1  (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,shift,nullshift")       (eq_attr "cpu" "7100LC,7200,7300"))  "(i0_7100lc|i1_7100lc)");; The 7200 has a store-load penalty(define_insn_reservation "Y8" 2  (and (eq_attr "type" "store")       (eq_attr "cpu" "7200"))  "i1_7100lc,mem_7100lc")(define_insn_reservation "Y9" 2  (and (eq_attr "type" "fpstore")       (eq_attr "cpu" "7200"))

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -