📄 alpha.md
字号:
;; Machine description for DEC Alpha for GNU C compiler;; Copyright (C) 1992, 93-98, 1999 Free Software Foundation, Inc.;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu);; This file is part of GNU CC.;; GNU CC is free software; you can redistribute it and/or modify;; it under the terms of the GNU General Public License as published by;; the Free Software Foundation; either version 2, or (at your option);; any later version.;; GNU CC is distributed in the hope that it will be useful,;; but WITHOUT ANY WARRANTY; without even the implied warranty of;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;; GNU General Public License for more details.;; You should have received a copy of the GNU General Public License;; along with GNU CC; see the file COPYING. If not, write to;; the Free Software Foundation, 59 Temple Place - Suite 330,;; Boston, MA 02111-1307, USA.;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.;; Uses of UNSPEC in this file:;;;; 0 arg_home;; 1 cttz;; 2 insxh;; 3 mskxh;; 4 cvtlq;; 5 cvtql;; 6 nt_lda;; ;; UNSPEC_VOLATILE:;;;; 0 imb;; 1 blockage;; 2 builtin_setjmp_receiver;; 3 builtin_longjmp;; 4 trapb;; 5 prologue_stack_probe_loop;; 6 realign;; 7 exception_receiver;; Processor type -- this attribute must exactly match the processor_type;; enumeration in alpha.h.(define_attr "cpu" "ev4,ev5,ev6" (const (symbol_ref "alpha_cpu")));; Define an insn type attribute. This is used in function unit delay;; computations, among other purposes. For the most part, we use the names;; defined in the EV4 documentation, but add a few that we have to know about;; separately.(define_attr "type" "ild,fld,ldsym,ist,fst,ibr,fbr,jsr,iadd,ilog,shift,icmov,fcmov,icmp,imul,fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi" (const_string "iadd"));; Describe a user's asm statement.(define_asm_attributes [(set_attr "type" "multi")]);; Define the operand size an insn operates on. Used primarily by mul;; and div operations that have size dependant timings.(define_attr "opsize" "si,di,udi" (const_string "di"));; The TRAP_TYPE attribute marks instructions that may generate traps;; (which are imprecise and may need a trapb if software completion;; is desired).(define_attr "trap" "no,yes" (const_string "no"));; The length of an instruction sequence in bytes.(define_attr "length" "" (const_int 4));; On EV4 there are two classes of resources to consider: resources needed;; to issue, and resources needed to execute. IBUS[01] are in the first;; category. ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second.;; (There are a few other register-like resources, but ...); First, describe all of the issue constraints with single cycle delays.; All insns need a bus, but all except loads require one or the other.(define_function_unit "ev4_ibus0" 1 0 (and (eq_attr "cpu" "ev4") (eq_attr "type" "fst,fbr,iadd,imul,ilog,shift,icmov,icmp")) 1 1)(define_function_unit "ev4_ibus1" 1 0 (and (eq_attr "cpu" "ev4") (eq_attr "type" "ist,ibr,jsr,fadd,fcmov,fcpys,fmul,fdiv,misc")) 1 1); Memory delivers its result in three cycles. Actually return one and; take care of this in adjust_cost, since we want to handle user-defined; memory latencies.(define_function_unit "ev4_abox" 1 0 (and (eq_attr "cpu" "ev4") (eq_attr "type" "ild,fld,ldsym,ist,fst")) 1 1); Branches have no delay cost, but do tie up the unit for two cycles.(define_function_unit "ev4_bbox" 1 1 (and (eq_attr "cpu" "ev4") (eq_attr "type" "ibr,fbr,jsr")) 2 2); Arithmetic insns are normally have their results available after; two cycles. There are a number of exceptions. They are encoded in; ADJUST_COST. Some of the other insns have similar exceptions.(define_function_unit "ev4_ebox" 1 0 (and (eq_attr "cpu" "ev4") (eq_attr "type" "iadd,ilog,shift,icmov,icmp,misc")) 2 1)(define_function_unit "imul" 1 0 (and (eq_attr "cpu" "ev4") (and (eq_attr "type" "imul") (eq_attr "opsize" "si"))) 21 19)(define_function_unit "imul" 1 0 (and (eq_attr "cpu" "ev4") (and (eq_attr "type" "imul") (eq_attr "opsize" "!si"))) 23 21)(define_function_unit "ev4_fbox" 1 0 (and (eq_attr "cpu" "ev4") (eq_attr "type" "fadd,fmul,fcpys,fcmov")) 6 1)(define_function_unit "fdiv" 1 0 (and (eq_attr "cpu" "ev4") (and (eq_attr "type" "fdiv") (eq_attr "opsize" "si"))) 34 30)(define_function_unit "fdiv" 1 0 (and (eq_attr "cpu" "ev4") (and (eq_attr "type" "fdiv") (eq_attr "opsize" "di"))) 63 59);; EV5 scheduling. EV5 can issue 4 insns per clock.;;;; EV5 has two asymetric integer units. Model this with E0 & E1 along;; with the combined resource EBOX.(define_function_unit "ev5_ebox" 2 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "!fbr,fcmov,fadd,fmul,fcpys,fdiv")) 1 1); Memory takes at least 2 clocks. Return one from here and fix up with; user-defined latencies in adjust_cost.(define_function_unit "ev5_ebox" 2 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "ild,fld,ldsym")) 1 1); Loads can dual issue with one another, but loads and stores do not mix.(define_function_unit "ev5_e0" 1 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "ild,fld,ldsym")) 1 1 [(eq_attr "type" "ist,fst")]); Stores, shifts, multiplies can only issue to E0(define_function_unit "ev5_e0" 1 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "ist,fst,shift,imul")) 1 1); Motion video insns also issue only to E0, and take two ticks.(define_function_unit "ev5_e0" 1 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "mvi")) 2 1); Conditional moves always take 2 ticks.(define_function_unit "ev5_ebox" 2 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "icmov")) 2 1); Branches can only issue to E1(define_function_unit "ev5_e1" 1 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "ibr,jsr")) 1 1); Multiplies also use the integer multiplier.; ??? How to: "No instruction can be issued to pipe E0 exactly two; cycles before an integer multiplication completes."(define_function_unit "imul" 1 0 (and (eq_attr "cpu" "ev5") (and (eq_attr "type" "imul") (eq_attr "opsize" "si"))) 8 4)(define_function_unit "imul" 1 0 (and (eq_attr "cpu" "ev5") (and (eq_attr "type" "imul") (eq_attr "opsize" "di"))) 12 8)(define_function_unit "imul" 1 0 (and (eq_attr "cpu" "ev5") (and (eq_attr "type" "imul") (eq_attr "opsize" "udi"))) 14 8);; Similarly for the FPU we have two asymetric units. But fcpys can issue;; on either so we have to play the game again.(define_function_unit "ev5_fbox" 2 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "fadd,fcmov,fmul,fcpys,fbr,fdiv")) 4 1) (define_function_unit "ev5_fm" 1 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "fmul")) 4 1); Add and cmov as you would expect; fbr never produces a result;; fdiv issues through fa to the divider, (define_function_unit "ev5_fa" 1 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "fadd,fcmov,fbr,fdiv")) 4 1); ??? How to: "No instruction can be issued to pipe FA exactly five; cycles before a floating point divide completes."(define_function_unit "fdiv" 1 0 (and (eq_attr "cpu" "ev5") (and (eq_attr "type" "fdiv") (eq_attr "opsize" "si"))) 15 15) ; 15 to 31 data dependant(define_function_unit "fdiv" 1 0 (and (eq_attr "cpu" "ev5") (and (eq_attr "type" "fdiv") (eq_attr "opsize" "di"))) 22 22) ; 22 to 60 data dependant;; EV6 scheduling. EV6 can issue 4 insns per clock.;;;; EV6 has two symmetric pairs ("clusters") of two asymetric integer units;; ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.;; Conditional moves decompose into two independant primitives, each ;; taking one cycle. Since ev6 is out-of-order, we can't see anything;; but two cycles.(define_function_unit "ev6_ebox" 4 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "icmov")) 2 1)(define_function_unit "ev6_ebox" 4 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "!fbr,fcmov,fadd,fmul,fcpys,fdiv,fsqrt")) 1 1);; Integer loads take at least 3 clocks, and only issue to lower units.;; Return one from here and fix up with user-defined latencies in adjust_cost.(define_function_unit "ev6_l" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "ild,ldsym,ist,fst")) 1 1);; FP loads take at least 4 clocks. Return two from here...(define_function_unit "ev6_l" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "fld")) 2 1);; Motion video insns also issue only to U0, and take three ticks.(define_function_unit "ev6_u0" 1 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "mvi")) 3 1)(define_function_unit "ev6_u" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "mvi")) 3 1);; Shifts issue to either upper pipe.(define_function_unit "ev6_u" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "shift")) 1 1);; Multiplies issue only to U1, and all take 7 ticks.;; Rather than create a new function unit just for U1, reuse IMUL(define_function_unit "imul" 1 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "imul")) 7 1)(define_function_unit "ev6_u" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "imul")) 7 1);; Branches issue to either upper pipe(define_function_unit "ev6_u" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "ibr")) 3 1);; Calls only issue to L0.(define_function_unit "ev6_l0" 1 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "jsr")) 1 1)(define_function_unit "ev6_l" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "jsr")) 1 1);; Ftoi/itof only issue to lower pipes(define_function_unit "ev6_l" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "ftoi")) 3 1)(define_function_unit "ev6_l" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "itof")) 4 1);; For the FPU we are very similar to EV5, except there's no insn that;; can issue to fm & fa, so we get to leave that out. (define_function_unit "ev6_fm" 1 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "fmul")) 4 1)(define_function_unit "ev6_fa" 1 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "fadd,fcpys,fbr,fdiv,fsqrt")) 4 1)(define_function_unit "ev6_fa" 1 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "fcmov")) 8 1)(define_function_unit "fdiv" 1 0 (and (eq_attr "cpu" "ev6") (and (eq_attr "type" "fdiv") (eq_attr "opsize" "si"))) 12 10)(define_function_unit "fdiv" 1 0 (and (eq_attr "cpu" "ev6") (and (eq_attr "type" "fdiv") (eq_attr "opsize" "di"))) 15 13)(define_function_unit "fsqrt" 1 0 (and (eq_attr "cpu" "ev6") (and (eq_attr "type" "fsqrt") (eq_attr "opsize" "si"))) 16 14)(define_function_unit "fsqrt" 1 0 (and (eq_attr "cpu" "ev6") (and (eq_attr "type" "fsqrt") (eq_attr "opsize" "di"))) 32 30); ??? The FPU communicates with memory and the integer register file; via two fp store units. We need a slot in the fst immediately, and; a slot in LOW after the operand data is ready. At which point the; data may be moved either to the store queue or the integer register; file and the insn retired.;; First define the arithmetic insns. Note that the 32-bit forms also;; sign-extend.;; Handle 32-64 bit extension from memory to a floating point register;; specially, since this ocurrs frequently in int->double conversions.;; This is done with a define_split after reload converting the plain;; sign-extension into a load+unspec, which of course results in lds+cvtlq.;;;; Note that while we must retain the =f case in the insn for reload's;; benefit, it should be eliminated after reload, so we should never emit;; code for that case. But we don't reject the possibility.(define_insn "extendsidi2" [(set (match_operand:DI 0 "register_operand" "=r,r,?f") (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,m")))] "" "@ addl %1,$31,%0 ldl %0,%1 lds %0,%1\;cvtlq %0,%0" [(set_attr "type" "iadd,ild,fld") (set_attr "length" "*,*,8")]);; Due to issues with CLASS_CANNOT_CHANGE_SIZE, we cannot use a subreg here.(define_split [(set (match_operand:DI 0 "hard_fp_register_operand" "") (sign_extend:DI (match_operand:SI 1 "memory_operand" "")))] "reload_completed" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (unspec:DI [(match_dup 2)] 4))] "operands[2] = gen_rtx_REG (SImode, REGNO (operands[0]));")(define_insn "" [(set (match_operand:DI 0 "register_operand" "=f") (unspec:DI [(match_operand:SI 1 "register_operand" "f")] 4))] "" "cvtlq %1,%0" [(set_attr "type" "fadd")]);; Do addsi3 the way expand_binop would do if we didn't have one. This
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -