📄 alpha.md
字号:
;; Machine description for DEC Alpha for GNU C compiler;; Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,;; 2000, 2001 Free Software Foundation, Inc.;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu);;;; This file is part of GNU CC.;;;; GNU CC is free software; you can redistribute it and/or modify;; it under the terms of the GNU General Public License as published by;; the Free Software Foundation; either version 2, or (at your option);; any later version.;;;; GNU CC is distributed in the hope that it will be useful,;; but WITHOUT ANY WARRANTY; without even the implied warranty of;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;; GNU General Public License for more details.;;;; You should have received a copy of the GNU General Public License;; along with GNU CC; see the file COPYING. If not, write to;; the Free Software Foundation, 59 Temple Place - Suite 330,;; Boston, MA 02111-1307, USA.;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.;; Uses of UNSPEC in this file:(define_constants [(UNSPEC_ARG_HOME 0) (UNSPEC_CTTZ 1) (UNSPEC_INSXH 2) (UNSPEC_MSKXH 3) (UNSPEC_CVTQL 4) (UNSPEC_NT_LDA 5) (UNSPEC_UMK_LAUM 6) (UNSPEC_UMK_LALM 7) (UNSPEC_UMK_LAL 8) (UNSPEC_UMK_LOAD_CIW 9) (UNSPEC_LDGP2 10) (UNSPEC_LITERAL 11) (UNSPEC_LITUSE 12) (UNSPEC_SIBCALL 13) (UNSPEC_SYMBOL 14) ]);; UNSPEC_VOLATILE:(define_constants [(UNSPECV_IMB 0) (UNSPECV_BLOCKAGE 1) (UNSPECV_SETJMPR 2) ; builtin_setjmp_receiver (UNSPECV_LONGJMP 3) ; builtin_longjmp (UNSPECV_TRAPB 4) (UNSPECV_PSPL 5) ; prologue_stack_probe_loop (UNSPECV_REALIGN 6) (UNSPECV_EHR 7) ; exception_receiver (UNSPECV_MCOUNT 8) (UNSPECV_FORCE_MOV 9) (UNSPECV_LDGP1 10) (UNSPECV_PLDGP2 11) ; prologue ldgp ]);; Where necessary, the suffixes _le and _be are used to distinguish between;; little-endian and big-endian patterns.;;;; Note that the Unicos/Mk assembler does not support the following;; opcodes: mov, fmov, nop, fnop, unop.;; Processor type -- this attribute must exactly match the processor_type;; enumeration in alpha.h.(define_attr "cpu" "ev4,ev5,ev6" (const (symbol_ref "alpha_cpu")));; Define an insn type attribute. This is used in function unit delay;; computations, among other purposes. For the most part, we use the names;; defined in the EV4 documentation, but add a few that we have to know about;; separately.(define_attr "type" "ild,fld,ldsym,ist,fst,ibr,fbr,jsr,iadd,ilog,shift,icmov,fcmov,icmp,imul,\fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi" (const_string "iadd"));; Describe a user's asm statement.(define_asm_attributes [(set_attr "type" "multi")]);; Define the operand size an insn operates on. Used primarily by mul;; and div operations that have size dependent timings.(define_attr "opsize" "si,di,udi" (const_string "di"));; The TRAP attribute marks instructions that may generate traps;; (which are imprecise and may need a trapb if software completion;; is desired).(define_attr "trap" "no,yes" (const_string "no"));; The ROUND_SUFFIX attribute marks which instructions require a;; rounding-mode suffix. The value NONE indicates no suffix,;; the value NORMAL indicates a suffix controled by alpha_fprm.(define_attr "round_suffix" "none,normal,c" (const_string "none"));; The TRAP_SUFFIX attribute marks instructions requiring a trap-mode suffix:;; NONE no suffix;; SU accepts only /su (cmpt et al);; SUI accepts only /sui (cvtqt and cvtqs);; V_SV accepts /v and /sv (cvtql only);; V_SV_SVI accepts /v, /sv and /svi (cvttq only);; U_SU_SUI accepts /u, /su and /sui (most fp instructions);;;; The actual suffix emitted is controled by alpha_fptm.(define_attr "trap_suffix" "none,su,sui,v_sv,v_sv_svi,u_su_sui" (const_string "none"));; The length of an instruction sequence in bytes.(define_attr "length" "" (const_int 4));; On EV4 there are two classes of resources to consider: resources needed;; to issue, and resources needed to execute. IBUS[01] are in the first;; category. ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second.;; (There are a few other register-like resources, but ...); First, describe all of the issue constraints with single cycle delays.; All insns need a bus, but all except loads require one or the other.(define_function_unit "ev4_ibus0" 1 0 (and (eq_attr "cpu" "ev4") (eq_attr "type" "fst,fbr,iadd,imul,ilog,shift,icmov,icmp")) 1 1)(define_function_unit "ev4_ibus1" 1 0 (and (eq_attr "cpu" "ev4") (eq_attr "type" "ist,ibr,jsr,fadd,fcmov,fcpys,fmul,fdiv,misc")) 1 1); Memory delivers its result in three cycles. Actually return one and; take care of this in adjust_cost, since we want to handle user-defined; memory latencies.(define_function_unit "ev4_abox" 1 0 (and (eq_attr "cpu" "ev4") (eq_attr "type" "ild,fld,ldsym,ist,fst")) 1 1); Branches have no delay cost, but do tie up the unit for two cycles.(define_function_unit "ev4_bbox" 1 1 (and (eq_attr "cpu" "ev4") (eq_attr "type" "ibr,fbr,jsr")) 2 2); Arithmetic insns are normally have their results available after; two cycles. There are a number of exceptions. They are encoded in; ADJUST_COST. Some of the other insns have similar exceptions.(define_function_unit "ev4_ebox" 1 0 (and (eq_attr "cpu" "ev4") (eq_attr "type" "iadd,ilog,shift,icmov,icmp,misc")) 2 1)(define_function_unit "imul" 1 0 (and (eq_attr "cpu" "ev4") (and (eq_attr "type" "imul") (eq_attr "opsize" "si"))) 21 19)(define_function_unit "imul" 1 0 (and (eq_attr "cpu" "ev4") (and (eq_attr "type" "imul") (eq_attr "opsize" "!si"))) 23 21)(define_function_unit "ev4_fbox" 1 0 (and (eq_attr "cpu" "ev4") (eq_attr "type" "fadd,fmul,fcpys,fcmov")) 6 1)(define_function_unit "fdiv" 1 0 (and (eq_attr "cpu" "ev4") (and (eq_attr "type" "fdiv") (eq_attr "opsize" "si"))) 34 30)(define_function_unit "fdiv" 1 0 (and (eq_attr "cpu" "ev4") (and (eq_attr "type" "fdiv") (eq_attr "opsize" "di"))) 63 59);; EV5 scheduling. EV5 can issue 4 insns per clock.;;;; EV5 has two asymetric integer units. Model this with E0 & E1 along;; with the combined resource EBOX.(define_function_unit "ev5_ebox" 2 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "!fbr,fcmov,fadd,fmul,fcpys,fdiv")) 1 1); Memory takes at least 2 clocks. Return one from here and fix up with; user-defined latencies in adjust_cost.(define_function_unit "ev5_ebox" 2 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "ild,fld,ldsym")) 1 1); Loads can dual issue with one another, but loads and stores do not mix.(define_function_unit "ev5_e0" 1 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "ild,fld,ldsym")) 1 1 [(eq_attr "type" "ist,fst")]); Stores, shifts, multiplies can only issue to E0(define_function_unit "ev5_e0" 1 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "ist,fst,shift,imul")) 1 1); Motion video insns also issue only to E0, and take two ticks.(define_function_unit "ev5_e0" 1 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "mvi")) 2 1); Conditional moves always take 2 ticks.(define_function_unit "ev5_ebox" 2 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "icmov")) 2 1); Branches can only issue to E1(define_function_unit "ev5_e1" 1 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "ibr,jsr")) 1 1); Multiplies also use the integer multiplier.; ??? How to: "No instruction can be issued to pipe E0 exactly two; cycles before an integer multiplication completes."(define_function_unit "imul" 1 0 (and (eq_attr "cpu" "ev5") (and (eq_attr "type" "imul") (eq_attr "opsize" "si"))) 8 4)(define_function_unit "imul" 1 0 (and (eq_attr "cpu" "ev5") (and (eq_attr "type" "imul") (eq_attr "opsize" "di"))) 12 8)(define_function_unit "imul" 1 0 (and (eq_attr "cpu" "ev5") (and (eq_attr "type" "imul") (eq_attr "opsize" "udi"))) 14 8);; Similarly for the FPU we have two asymetric units. But fcpys can issue;; on either so we have to play the game again.(define_function_unit "ev5_fbox" 2 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "fadd,fcmov,fmul,fcpys,fbr,fdiv")) 4 1)(define_function_unit "ev5_fm" 1 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "fmul")) 4 1); Add and cmov as you would expect; fbr never produces a result;; fdiv issues through fa to the divider,(define_function_unit "ev5_fa" 1 0 (and (eq_attr "cpu" "ev5") (eq_attr "type" "fadd,fcmov,fbr,fdiv")) 4 1); ??? How to: "No instruction can be issued to pipe FA exactly five; cycles before a floating point divide completes."(define_function_unit "fdiv" 1 0 (and (eq_attr "cpu" "ev5") (and (eq_attr "type" "fdiv") (eq_attr "opsize" "si"))) 15 15) ; 15 to 31 data dependent(define_function_unit "fdiv" 1 0 (and (eq_attr "cpu" "ev5") (and (eq_attr "type" "fdiv") (eq_attr "opsize" "di"))) 22 22) ; 22 to 60 data dependent;; EV6 scheduling. EV6 can issue 4 insns per clock.;;;; EV6 has two symmetric pairs ("clusters") of two asymetric integer units;; ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.;; Conditional moves decompose into two independent primitives, each;; taking one cycle. Since ev6 is out-of-order, we can't see anything;; but two cycles.(define_function_unit "ev6_ebox" 4 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "icmov")) 2 1)(define_function_unit "ev6_ebox" 4 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "!fbr,fcmov,fadd,fmul,fcpys,fdiv,fsqrt")) 1 1);; Integer loads take at least 3 clocks, and only issue to lower units.;; Return one from here and fix up with user-defined latencies in adjust_cost.(define_function_unit "ev6_l" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "ild,ldsym,ist,fst")) 1 1);; FP loads take at least 4 clocks. Return two from here...(define_function_unit "ev6_l" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "fld")) 2 1);; Motion video insns also issue only to U0, and take three ticks.(define_function_unit "ev6_u0" 1 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "mvi")) 3 1)(define_function_unit "ev6_u" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "mvi")) 3 1);; Shifts issue to either upper pipe.(define_function_unit "ev6_u" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "shift")) 1 1);; Multiplies issue only to U1, and all take 7 ticks.;; Rather than create a new function unit just for U1, reuse IMUL(define_function_unit "imul" 1 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "imul")) 7 1)(define_function_unit "ev6_u" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "imul")) 7 1);; Branches issue to either upper pipe(define_function_unit "ev6_u" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "ibr")) 3 1);; Calls only issue to L0.(define_function_unit "ev6_l0" 1 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "jsr")) 1 1)(define_function_unit "ev6_l" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "jsr")) 1 1);; Ftoi/itof only issue to lower pipes(define_function_unit "ev6_l" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "ftoi")) 3 1)(define_function_unit "ev6_l" 2 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "itof")) 4 1);; For the FPU we are very similar to EV5, except there's no insn that;; can issue to fm & fa, so we get to leave that out.(define_function_unit "ev6_fm" 1 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "fmul")) 4 1)(define_function_unit "ev6_fa" 1 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "fadd,fcpys,fbr,fdiv,fsqrt")) 4 1)(define_function_unit "ev6_fa" 1 0 (and (eq_attr "cpu" "ev6") (eq_attr "type" "fcmov")) 8 1)(define_function_unit "fdiv" 1 0 (and (eq_attr "cpu" "ev6") (and (eq_attr "type" "fdiv") (eq_attr "opsize" "si"))) 12 10)(define_function_unit "fdiv" 1 0 (and (eq_attr "cpu" "ev6") (and (eq_attr "type" "fdiv") (eq_attr "opsize" "di"))) 15 13)(define_function_unit "fsqrt" 1 0 (and (eq_attr "cpu" "ev6") (and (eq_attr "type" "fsqrt") (eq_attr "opsize" "si"))) 16 14)(define_function_unit "fsqrt" 1 0 (and (eq_attr "cpu" "ev6") (and (eq_attr "type" "fsqrt") (eq_attr "opsize" "di"))) 32 30)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -