📄 sparc.md
字号:
;;- Machine description for SPARC chip for GNU C compiler;; Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,;; 1999 Free Software Foundation, Inc.;; Contributed by Michael Tiemann (tiemann@cygnus.com);; 64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,;; at Cygnus Support.;; This file is part of GNU CC.;; GNU CC is free software; you can redistribute it and/or modify;; it under the terms of the GNU General Public License as published by;; the Free Software Foundation; either version 2, or (at your option);; any later version.;; GNU CC is distributed in the hope that it will be useful,;; but WITHOUT ANY WARRANTY; without even the implied warranty of;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;; GNU General Public License for more details.;; You should have received a copy of the GNU General Public License;; along with GNU CC; see the file COPYING. If not, write to;; the Free Software Foundation, 59 Temple Place - Suite 330,;; Boston, MA 02111-1307, USA.;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.;; Uses of UNSPEC and UNSPEC_VOLATILE in this file:;;;; UNSPEC: 0 movsi_{lo_sum,high}_pic;; pic_lo_sum_di;; pic_sethi_di;; 1 update_return;; 2 get_pc;; 5 movsi_{,lo_sum_,high_}pic_label_ref;; 6 seth44;; 7 setm44;; 8 setl44;; 9 sethh;; 10 setlm;; 11 embmedany_sethi, embmedany_brsum;; 12 movsf_const_high;; 13 embmedany_textuhi;; 14 embmedany_texthi;; 15 embmedany_textulo;; 16 embmedany_textlo;; 17 movsf_const_lo;; 18 sethm;; 19 setlo;;;; UNSPEC_VOLATILE: 0 blockage;; 1 flush_register_windows;; 2 goto_handler_and_restore;; 3 goto_handler_and_restore_v9*;; 4 flush;; 5 nonlocal_goto_receiver;;;; The upper 32 fp regs on the v9 can't hold SFmode values. To deal with this;; a second register class, EXTRA_FP_REGS, exists for the v9 chip. The name;; is a bit of a misnomer as it covers all 64 fp regs. The corresponding;; constraint letter is 'e'. To avoid any confusion, 'e' is used instead of;; 'f' for all DF/TFmode values, including those that are specific to the v8.;;;; -mlive-g0 is *not* supported for TARGET_ARCH64, so we don't bother to;; test TARGET_LIVE_G0 if we have TARGET_ARCH64.;; Attribute for cpu type.;; These must match the values for enum processor_type in sparc.h.(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,hypersparc,sparclite86x,sparclet,tsc701,v9,ultrasparc" (const (symbol_ref "sparc_cpu_attr")));; Attribute for the instruction set.;; At present we only need to distinguish v9/!v9, but for clarity we;; test TARGET_V8 too.(define_attr "isa" "v6,v8,v9,sparclet" (const (cond [(symbol_ref "TARGET_V9") (const_string "v9") (symbol_ref "TARGET_V8") (const_string "v8") (symbol_ref "TARGET_SPARCLET") (const_string "sparclet")] (const_string "v6"))));; Architecture size.(define_attr "arch" "arch32bit,arch64bit" (const (cond [(symbol_ref "TARGET_ARCH64") (const_string "arch64bit")] (const_string "arch32bit"))));; Whether -mlive-g0 is in effect.(define_attr "live_g0" "no,yes" (const (cond [(symbol_ref "TARGET_LIVE_G0") (const_string "yes")] (const_string "no"))));; Insn type. Used to default other attribute values.;; type "unary" insns have one input operand (1) and one output operand (0);; type "binary" insns have two input operands (1,2) and one output (0);; type "compare" insns have one or two input operands (0,1) and no output;; type "call_no_delay_slot" is a call followed by an unimp instruction.(define_attr "type" "move,unary,binary,compare,load,sload,store,ialu,shift,uncond_branch,branch,call,call_no_delay_slot,return,address,imul,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrt,cmove,multi,misc" (const_string "binary"));; Set true if insn uses call-clobbered intermediate register.(define_attr "use_clobbered" "false,true" (if_then_else (and (eq_attr "type" "address") (match_operand 0 "clobbered_register" "")) (const_string "true") (const_string "false")));; Length (in # of insns).(define_attr "length" "" (cond [(eq_attr "type" "load,sload,fpload") (if_then_else (match_operand 1 "symbolic_memory_operand" "") (const_int 2) (const_int 1)) (eq_attr "type" "store,fpstore") (if_then_else (match_operand 0 "symbolic_memory_operand" "") (const_int 2) (const_int 1)) (eq_attr "type" "address") (const_int 2) (eq_attr "type" "binary") (if_then_else (ior (match_operand 2 "arith_operand" "") (match_operand 2 "arith_double_operand" "")) (const_int 1) (const_int 3)) (eq_attr "type" "multi") (const_int 2) (eq_attr "type" "move,unary") (if_then_else (ior (match_operand 1 "arith_operand" "") (match_operand 1 "arith_double_operand" "")) (const_int 1) (const_int 2))] (const_int 1)))(define_asm_attributes [(set_attr "length" "1") (set_attr "type" "multi")]);; Attributes for instruction and branch scheduling(define_attr "in_call_delay" "false,true" (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,return,multi") (const_string "false") (eq_attr "type" "load,fpload,store,fpstore") (if_then_else (eq_attr "length" "1") (const_string "true") (const_string "false")) (eq_attr "type" "address") (if_then_else (eq_attr "use_clobbered" "false") (const_string "true") (const_string "false"))] (if_then_else (eq_attr "length" "1") (const_string "true") (const_string "false"))))(define_delay (eq_attr "type" "call") [(eq_attr "in_call_delay" "true") (nil) (nil)])(define_attr "leaf_function" "false,true" (const (symbol_ref "current_function_uses_only_leaf_regs")))(define_attr "in_return_delay" "false,true" (if_then_else (and (and (and (eq_attr "type" "move,load,sload,store,binary,ialu") (eq_attr "length" "1")) (eq_attr "leaf_function" "false")) (match_insn "eligible_for_return_delay")) (const_string "true") (const_string "false")))(define_delay (and (eq_attr "type" "return") (eq_attr "isa" "v9")) [(eq_attr "in_return_delay" "true") (nil) (nil)]);; ??? Should implement the notion of predelay slots for floating point;; branches. This would allow us to remove the nop always inserted before;; a floating point branch.;; ??? It is OK for fill_simple_delay_slots to put load/store instructions;; in a delay slot, but it is not OK for fill_eager_delay_slots to do so.;; This is because doing so will add several pipeline stalls to the path;; that the load/store did not come from. Unfortunately, there is no way;; to prevent fill_eager_delay_slots from using load/store without completely;; disabling them. For the SPEC benchmark set, this is a serious lose,;; because it prevents us from moving back the final store of inner loops.(define_attr "in_branch_delay" "false,true" (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,call_no_delay_slot,multi") (eq_attr "length" "1")) (const_string "true") (const_string "false")))(define_attr "in_uncond_branch_delay" "false,true" (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,call_no_delay_slot,multi") (eq_attr "length" "1")) (const_string "true") (const_string "false")))(define_attr "in_annul_branch_delay" "false,true" (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,call_no_delay_slot,multi") (eq_attr "length" "1")) (const_string "true") (const_string "false")))(define_delay (eq_attr "type" "branch") [(eq_attr "in_branch_delay" "true") (nil) (eq_attr "in_annul_branch_delay" "true")])(define_delay (eq_attr "type" "uncond_branch") [(eq_attr "in_uncond_branch_delay" "true") (nil) (nil)]) ;; Function units of the SPARC;; (define_function_unit {name} {num-units} {n-users} {test};; {ready-delay} {issue-delay} [{conflict-list}]);; The integer ALU.;; (Noted only for documentation; units that take one cycle do not need to;; be specified.);; On the sparclite, integer multiply takes 1, 3, or 5 cycles depending on;; the inputs.;; (define_function_unit "alu" 1 0;; (eq_attr "type" "unary,binary,move,address") 1 0);; ---- cypress CY7C602 scheduling:;; Memory with load-delay of 1 (i.e., 2 cycle load).(define_function_unit "memory" 1 0 (and (eq_attr "cpu" "cypress") (eq_attr "type" "load,sload,fpload")) 2 2);; SPARC has two floating-point units: the FP ALU,;; and the FP MUL/DIV/SQRT unit.;; Instruction timings on the CY7C602 are as follows;; FABSs 4;; FADDs/d 5/5;; FCMPs/d 4/4;; FDIVs/d 23/37;; FMOVs 4;; FMULs/d 5/7;; FNEGs 4;; FSQRTs/d 34/63;; FSUBs/d 5/5;; FdTOi/s 5/5;; FsTOi/d 5/5;; FiTOs/d 9/5;; The CY7C602 can only support 2 fp isnsn simultaneously.;; More insns cause the chip to stall.(define_function_unit "fp_alu" 1 0 (and (eq_attr "cpu" "cypress") (eq_attr "type" "fp,fpmove")) 5 5)(define_function_unit "fp_mds" 1 0 (and (eq_attr "cpu" "cypress") (eq_attr "type" "fpmul")) 7 7)(define_function_unit "fp_mds" 1 0 (and (eq_attr "cpu" "cypress") (eq_attr "type" "fpdivs,fpdivd")) 37 37)(define_function_unit "fp_mds" 1 0 (and (eq_attr "cpu" "cypress") (eq_attr "type" "fpsqrt")) 63 63);; ----- The TMS390Z55 scheduling;; The Supersparc can issue 1 - 3 insns per cycle: up to two integer,;; one ld/st, one fp.;; Memory delivers its result in one cycle to IU, zero cycles to FP(define_function_unit "memory" 1 0 (and (eq_attr "cpu" "supersparc") (eq_attr "type" "load,sload")) 1 1)(define_function_unit "memory" 1 0 (and (eq_attr "cpu" "supersparc") (eq_attr "type" "fpload")) 0 1)(define_function_unit "memory" 1 0 (and (eq_attr "cpu" "supersparc") (eq_attr "type" "store,fpstore")) 1 1)(define_function_unit "shift" 1 0 (and (eq_attr "cpu" "supersparc") (eq_attr "type" "shift")) 1 1);; There are only two write ports to the integer register file;; A store also uses a write port(define_function_unit "iwport" 2 0 (and (eq_attr "cpu" "supersparc") (eq_attr "type" "load,sload,store,shift,ialu")) 1 1);; Timings; throughput/latency;; FADD 1/3 add/sub, format conv, compar, abs, neg;; FMUL 1/3;; FDIVs 4/6;; FDIVd 7/9;; FSQRTs 6/8;; FSQRTd 10/12;; IMUL 4/4(define_function_unit "fp_alu" 1 0 (and (eq_attr "cpu" "supersparc") (eq_attr "type" "fp,fpmove,fpcmp")) 3 1)(define_function_unit "fp_mds" 1 0 (and (eq_attr "cpu" "supersparc") (eq_attr "type" "fpmul")) 3 1)(define_function_unit "fp_mds" 1 0 (and (eq_attr "cpu" "supersparc") (eq_attr "type" "fpdivs")) 6 4)(define_function_unit "fp_mds" 1 0 (and (eq_attr "cpu" "supersparc") (eq_attr "type" "fpdivd")) 9 7)(define_function_unit "fp_mds" 1 0 (and (eq_attr "cpu" "supersparc") (eq_attr "type" "fpsqrt")) 12 10)(define_function_unit "fp_mds" 1 0 (and (eq_attr "cpu" "supersparc") (eq_attr "type" "imul")) 4 4);; ----- hypersparc/sparclite86x scheduling;; The Hypersparc can issue 1 - 2 insns per cycle. The dual issue cases are:;; L-Ld/St I-Int F-Float B-Branch LI/LF/LB/II/IF/IB/FF/FB;; II/FF case is only when loading a 32 bit hi/lo constant;; Single issue insns include call, jmpl, u/smul, u/sdiv, lda, sta, fcmp;; Memory delivers its result in one cycle to IU(define_function_unit "memory" 1 0 (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) (eq_attr "type" "load,sload,fpload")) 1 1)(define_function_unit "memory" 1 0 (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) (eq_attr "type" "store,fpstore")) 2 1)(define_function_unit "fp_alu" 1 0 (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) (eq_attr "type" "fp,fpmove,fpcmp")) 1 1)(define_function_unit "fp_mds" 1 0 (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) (eq_attr "type" "fpmul")) 1 1)(define_function_unit "fp_mds" 1 0 (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) (eq_attr "type" "fpdivs")) 8 6)(define_function_unit "fp_mds" 1 0 (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) (eq_attr "type" "fpdivd")) 12 10)(define_function_unit "fp_mds" 1 0 (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) (eq_attr "type" "fpsqrt")) 17 15)(define_function_unit "fp_mds" 1 0 (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x")) (eq_attr "type" "imul")) 17 15);; ----- sparclet tsc701 scheduling;; The tsc701 issues 1 insn per cycle.;; Results may be written back out of order.;; Loads take 2 extra cycles to complete and 4 can be buffered at a time.(define_function_unit "tsc701_load" 4 1 (and (eq_attr "cpu" "tsc701") (eq_attr "type" "load,sload")) 3 1);; Stores take 2(?) extra cycles to complete.;; It is desirable to not have any memory operation in the following 2 cycles.;; (??? or 2 memory ops in the case of std).(define_function_unit "tsc701_store" 1 0 (and (eq_attr "cpu" "tsc701") (eq_attr "type" "store")) 3 3 [(eq_attr "type" "load,sload,store")]);; The multiply unit has a latency of 5.(define_function_unit "tsc701_mul" 1 0 (and (eq_attr "cpu" "tsc701") (eq_attr "type" "imul")) 5 5);; ----- The UltraSPARC-1 scheduling;; UltraSPARC has two integer units. Shift instructions can only execute;; on IE0. Condition code setting instructions, call, and jmpl (including;; the ret and retl pseudo-instructions) can only execute on IE1.;; Branch on register uses IE1, but branch on condition code does not.;; Conditional moves take 2 cycles. No other instruction can issue in the;; same cycle as a conditional move.;; Multiply and divide take many cycles during which no other instructions;; can issue.;; Memory delivers its result in two cycles (except for signed loads,;; which take one cycle more). One memory instruction can be issued per;; cycle.(define_function_unit "memory" 1 0 (and (eq_attr "cpu" "ultrasparc") (eq_attr "type" "load,fpload")) 2 1)(define_function_unit "memory" 1 0 (and (eq_attr "cpu" "ultrasparc") (eq_attr "type" "sload")) 3 1)(define_function_unit "memory" 1 0 (and (eq_attr "cpu" "ultrasparc") (eq_attr "type" "store,fpstore")) 1 1)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -