📄 pa.md
字号:
;;- Machine description for HP PA-RISC architecture for GNU C compiler;; Copyright (C) 1992, 93-98, 1999 Free Software Foundation, Inc.;; Contributed by the Center for Software Science at the University;; of Utah.;; This file is part of GNU CC.;; GNU CC is free software; you can redistribute it and/or modify;; it under the terms of the GNU General Public License as published by;; the Free Software Foundation; either version 2, or (at your option);; any later version.;; GNU CC is distributed in the hope that it will be useful,;; but WITHOUT ANY WARRANTY; without even the implied warranty of;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;; GNU General Public License for more details.;; You should have received a copy of the GNU General Public License;; along with GNU CC; see the file COPYING. If not, write to;; the Free Software Foundation, 59 Temple Place - Suite 330,;; Boston, MA 02111-1307, USA.;; This gcc Version 2 machine description is inspired by sparc.md and;; mips.md.;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.;; Insn type. Used to default other attribute values.;; type "unary" insns have one input operand (1) and one output operand (0);; type "binary" insns have two input operands (1,2) and one output (0)(define_attr "type" "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,parallel_branch" (const_string "binary"))(define_attr "pa_combine_type" "fmpy,faddsub,uncond_branch,addmove,none" (const_string "none"));; Processor type (for scheduling, not code generation) -- this attribute;; must exactly match the processor_type enumeration in pa.h.;;;; FIXME: Add 800 scheduling for completeness?(define_attr "cpu" "700,7100,7100LC,7200,8000" (const (symbol_ref "pa_cpu_attr")));; Length (in # of insns).(define_attr "length" "" (cond [(eq_attr "type" "load,fpload") (if_then_else (match_operand 1 "symbolic_memory_operand" "") (const_int 8) (const_int 4)) (eq_attr "type" "store,fpstore") (if_then_else (match_operand 0 "symbolic_memory_operand" "") (const_int 8) (const_int 4)) (eq_attr "type" "binary,shift,nullshift") (if_then_else (match_operand 2 "arith_operand" "") (const_int 4) (const_int 12)) (eq_attr "type" "move,unary,shift,nullshift") (if_then_else (match_operand 1 "arith_operand" "") (const_int 4) (const_int 8))] (const_int 4)))(define_asm_attributes [(set_attr "length" "4") (set_attr "type" "multi")]);; Attributes for instruction and branch scheduling;; For conditional branches.(define_attr "in_branch_delay" "false,true" (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch") (eq_attr "length" "4")) (const_string "true") (const_string "false")));; Disallow instructions which use the FPU since they will tie up the FPU;; even if the instruction is nullified.(define_attr "in_nullified_branch_delay" "false,true" (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch") (eq_attr "length" "4")) (const_string "true") (const_string "false")));; For calls and millicode calls. Allow unconditional branches in the;; delay slot.(define_attr "in_call_delay" "false,true" (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch") (eq_attr "length" "4")) (const_string "true") (eq_attr "type" "uncond_branch") (if_then_else (ne (symbol_ref "TARGET_JUMP_IN_DELAY") (const_int 0)) (const_string "true") (const_string "false"))] (const_string "false")));; Call delay slot description.(define_delay (eq_attr "type" "call") [(eq_attr "in_call_delay" "true") (nil) (nil)]);; millicode call delay slot description. Note it disallows delay slot;; when TARGET_PORTABLE_RUNTIME is true.(define_delay (eq_attr "type" "milli") [(and (eq_attr "in_call_delay" "true") (eq (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))) (nil) (nil)]);; Return and other similar instructions.(define_delay (eq_attr "type" "branch,parallel_branch") [(eq_attr "in_branch_delay" "true") (nil) (nil)]);; Floating point conditional branch delay slot description and(define_delay (eq_attr "type" "fbranch") [(eq_attr "in_branch_delay" "true") (eq_attr "in_nullified_branch_delay" "true") (nil)]);; Integer conditional branch delay slot description.;; Nullification of conditional branches on the PA is dependent on the;; direction of the branch. Forward branches nullify true and;; backward branches nullify false. If the direction is unknown;; then nullification is not allowed.(define_delay (eq_attr "type" "cbranch") [(eq_attr "in_branch_delay" "true") (and (eq_attr "in_nullified_branch_delay" "true") (attr_flag "forward")) (and (eq_attr "in_nullified_branch_delay" "true") (attr_flag "backward"))])(define_delay (and (eq_attr "type" "uncond_branch") (eq (symbol_ref "following_call (insn)") (const_int 0))) [(eq_attr "in_branch_delay" "true") (nil) (nil)]);; Function units of the HPPA. The following data is for the 700 CPUs;; (Mustang CPU + Timex FPU aka PA-89) because that's what I have the docs for.;; Scheduling instructions for PA-83 machines according to the Snake;; constraints shouldn't hurt.;; (define_function_unit {name} {num-units} {n-users} {test};; {ready-delay} {issue-delay} [{conflict-list}]);; The integer ALU.;; (Noted only for documentation; units that take one cycle do not need to;; be specified.);; (define_function_unit "alu" 1 0;; (and (eq_attr "type" "unary,shift,nullshift,binary,move,address");; (eq_attr "cpu" "700"));; 1 0);; Memory. Disregarding Cache misses, the Mustang memory times are:;; load: 2, fpload: 3;; store, fpstore: 3, no D-cache operations should be scheduled.(define_function_unit "pa700memory" 1 0 (and (eq_attr "type" "load,fpload") (eq_attr "cpu" "700")) 2 0)(define_function_unit "pa700memory" 1 0 (and (eq_attr "type" "store,fpstore") (eq_attr "cpu" "700")) 3 3);; The Timex (aka 700) has two floating-point units: ALU, and MUL/DIV/SQRT.;; Timings:;; Instruction Time Unit Minimum Distance (unit contention);; fcpy 3 ALU 2;; fabs 3 ALU 2;; fadd 3 ALU 2;; fsub 3 ALU 2;; fcmp 3 ALU 2;; fcnv 3 ALU 2;; fmpyadd 3 ALU,MPY 2;; fmpysub 3 ALU,MPY 2;; fmpycfxt 3 ALU,MPY 2;; fmpy 3 MPY 2;; fmpyi 3 MPY 2;; fdiv,sgl 10 MPY 10;; fdiv,dbl 12 MPY 12;; fsqrt,sgl 14 MPY 14;; fsqrt,dbl 18 MPY 18(define_function_unit "pa700fp_alu" 1 0 (and (eq_attr "type" "fpcc") (eq_attr "cpu" "700")) 4 2)(define_function_unit "pa700fp_alu" 1 0 (and (eq_attr "type" "fpalu") (eq_attr "cpu" "700")) 3 2)(define_function_unit "pa700fp_mpy" 1 0 (and (eq_attr "type" "fpmulsgl,fpmuldbl") (eq_attr "cpu" "700")) 3 2)(define_function_unit "pa700fp_mpy" 1 0 (and (eq_attr "type" "fpdivsgl") (eq_attr "cpu" "700")) 10 10)(define_function_unit "pa700fp_mpy" 1 0 (and (eq_attr "type" "fpdivdbl") (eq_attr "cpu" "700")) 12 12)(define_function_unit "pa700fp_mpy" 1 0 (and (eq_attr "type" "fpsqrtsgl") (eq_attr "cpu" "700")) 14 14)(define_function_unit "pa700fp_mpy" 1 0 (and (eq_attr "type" "fpsqrtdbl") (eq_attr "cpu" "700")) 18 18);; Function units for the 7100 and 7150. The 7100/7150 can dual-issue;; floating point computations with non-floating point computations (fp loads;; and stores are not fp computations).;;;; Memory. Disregarding Cache misses, memory loads take two cycles; stores also;; take two cycles, during which no Dcache operations should be scheduled.;; Any special cases are handled in pa_adjust_cost. The 7100, 7150 and 7100LC;; all have the same memory characteristics if one disregards cache misses.(define_function_unit "pa7100memory" 1 0 (and (eq_attr "type" "load,fpload") (eq_attr "cpu" "7100,7100LC")) 2 0)(define_function_unit "pa7100memory" 1 0 (and (eq_attr "type" "store,fpstore") (eq_attr "cpu" "7100,7100LC")) 2 2);; The 7100/7150 has three floating-point units: ALU, MUL, and DIV.;; Timings:;; Instruction Time Unit Minimum Distance (unit contention);; fcpy 2 ALU 1;; fabs 2 ALU 1;; fadd 2 ALU 1;; fsub 2 ALU 1;; fcmp 2 ALU 1;; fcnv 2 ALU 1;; fmpyadd 2 ALU,MPY 1;; fmpysub 2 ALU,MPY 1;; fmpycfxt 2 ALU,MPY 1;; fmpy 2 MPY 1;; fmpyi 2 MPY 1;; fdiv,sgl 8 DIV 8;; fdiv,dbl 15 DIV 15;; fsqrt,sgl 8 DIV 8;; fsqrt,dbl 15 DIV 15(define_function_unit "pa7100fp_alu" 1 0 (and (eq_attr "type" "fpcc,fpalu") (eq_attr "cpu" "7100")) 2 1)(define_function_unit "pa7100fp_mpy" 1 0 (and (eq_attr "type" "fpmulsgl,fpmuldbl") (eq_attr "cpu" "7100")) 2 1)(define_function_unit "pa7100fp_div" 1 0 (and (eq_attr "type" "fpdivsgl,fpsqrtsgl") (eq_attr "cpu" "7100")) 8 8)(define_function_unit "pa7100fp_div" 1 0 (and (eq_attr "type" "fpdivdbl,fpsqrtdbl") (eq_attr "cpu" "7100")) 15 15);; To encourage dual issue we define function units corresponding to;; the instructions which can be dual issued. This is a rather crude;; approximation, the "pa7100nonflop" test in particular could be refined.(define_function_unit "pa7100flop" 1 1 (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl") (eq_attr "cpu" "7100")) 1 1)(define_function_unit "pa7100nonflop" 1 1 (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl") (eq_attr "cpu" "7100")) 1 1);; Memory subsystem works just like 7100/7150 (except for cache miss times which;; we don't model here). ;; The 7100LC has three floating-point units: ALU, MUL, and DIV.;; Note divides and sqrt flops lock the cpu until the flop is;; finished. fmpy and xmpyu (fmpyi) lock the cpu for one cycle.;; There's no way to avoid the penalty.;; Timings:;; Instruction Time Unit Minimum Distance (unit contention);; fcpy 2 ALU 1;; fabs 2 ALU 1;; fadd 2 ALU 1;; fsub 2 ALU 1;; fcmp 2 ALU 1;; fcnv 2 ALU 1;; fmpyadd,sgl 2 ALU,MPY 1;; fmpyadd,dbl 3 ALU,MPY 2;; fmpysub,sgl 2 ALU,MPY 1;; fmpysub,dbl 3 ALU,MPY 2;; fmpycfxt,sgl 2 ALU,MPY 1;; fmpycfxt,dbl 3 ALU,MPY 2;; fmpy,sgl 2 MPY 1;; fmpy,dbl 3 MPY 2;; fmpyi 3 MPY 2;; fdiv,sgl 8 DIV 8;; fdiv,dbl 15 DIV 15;; fsqrt,sgl 8 DIV 8;; fsqrt,dbl 15 DIV 15(define_function_unit "pa7100LCfp_alu" 1 0 (and (eq_attr "type" "fpcc,fpalu") (eq_attr "cpu" "7100LC,7200")) 2 1)(define_function_unit "pa7100LCfp_mpy" 1 0 (and (eq_attr "type" "fpmulsgl") (eq_attr "cpu" "7100LC,7200")) 2 1)(define_function_unit "pa7100LCfp_mpy" 1 0 (and (eq_attr "type" "fpmuldbl") (eq_attr "cpu" "7100LC,7200")) 3 2)(define_function_unit "pa7100LCfp_div" 1 0 (and (eq_attr "type" "fpdivsgl,fpsqrtsgl") (eq_attr "cpu" "7100LC,7200")) 8 8)(define_function_unit "pa7100LCfp_div" 1 0 (and (eq_attr "type" "fpdivdbl,fpsqrtdbl") (eq_attr "cpu" "7100LC,7200")) 15 15);; Define the various functional units for dual-issue.;; There's only one floating point unit.(define_function_unit "pa7100LCflop" 1 1 (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl") (eq_attr "cpu" "7100LC,7200")) 1 1);; Shifts and memory ops execute in only one of the integer ALUs(define_function_unit "pa7100LCshiftmem" 1 1 (and (eq_attr "type" "shift,nullshift,load,fpload,store,fpstore") (eq_attr "cpu" "7100LC,7200")) 1 1);; We have two basic ALUs.(define_function_unit "pa7100LCalu" 2 1 (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl") (eq_attr "cpu" "7100LC,7200")) 1 1);; I don't have complete information on the PA7200; however, most of;; what I've heard makes it look like a 7100LC without the store-store;; penalty. So that's how we'll model it.;; Memory. Disregarding Cache misses, memory loads and stores take;; two cycles. Any special cases are handled in pa_adjust_cost.(define_function_unit "pa7200memory" 1 0 (and (eq_attr "type" "load,fpload,store,fpstore") (eq_attr "cpu" "7200")) 2 0);; I don't have detailed information on the PA7200 FP pipeline, so I;; treat it just like the 7100LC pipeline.;; Similarly for the multi-issue fake units.;; ;; Scheduling for the PA8000 is somewhat different than scheduling for a;; traditional architecture.;;;; The PA8000 has a large (56) entry reorder buffer that is split between;; memory and non-memory operations.;;;; The PA800 can issue two memory and two non-memory operations per cycle to;; the function units. Similarly, the PA8000 can retire two memory and two;; non-memory operations per cycle.;;;; Given the large reorder buffer, the processor can hide most latencies.;; According to HP, they've got the best results by scheduling for retirement;; bandwidth with limited latency scheduling for floating point operations.;; Latency for integer operations and memory references is ignored.;;;; We claim floating point operations have a 2 cycle latency and are;; fully pipelined, except for div and sqrt which are not pipelined.;;;; It is not necessary to define the shifter and integer alu units.;;;; These first two define_unit_unit descriptions model retirement from;; the reorder buffer.(define_function_unit "pa8000lsu" 2 1 (and (eq_attr "type" "load,fpload,store,fpstore") (eq_attr "cpu" "8000")) 1 1)(define_function_unit "pa8000alu" 2 1 (and (eq_attr "type" "!load,fpload,store,fpstore") (eq_attr "cpu" "8000")) 1 1);; Claim floating point ops have a 2 cycle latency, excluding div and;; sqrt, which are not pipelined and issue to different units.(define_function_unit "pa8000fmac" 2 0 (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl") (eq_attr "cpu" "8000")) 2 1)(define_function_unit "pa8000fdiv" 2 1 (and (eq_attr "type" "fpdivsgl,fpsqrtsgl") (eq_attr "cpu" "8000")) 17 17)(define_function_unit "pa8000fdiv" 2 1 (and (eq_attr "type" "fpdivdbl,fpsqrtdbl") (eq_attr "cpu" "8000")) 31 31);; Compare instructions.;; This controls RTL generation and register allocation.;; We generate RTL for comparisons and branches by having the cmpxx;; patterns store away the operands. Then, the scc and bcc patterns;; emit RTL for both the compare and the branch.;;(define_expand "cmpsi" [(set (reg:CC 0) (compare:CC (match_operand:SI 0 "reg_or_0_operand" "") (match_operand:SI 1 "arith5_operand" "")))] "" "{ hppa_compare_op0 = operands[0]; hppa_compare_op1 = operands[1]; hppa_branch_type = CMP_SI; DONE;}")(define_expand "cmpsf" [(set (reg:CCFP 0) (compare:CCFP (match_operand:SF 0 "reg_or_0_operand" "") (match_operand:SF 1 "reg_or_0_operand" "")))] "! TARGET_SOFT_FLOAT" "{ hppa_compare_op0 = operands[0];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -