📄 athlon.md
字号:
;; AMD Athlon Scheduling;; Copyright (C) 2002 Free Software Foundation, Inc.;;;; This file is part of GNU CC.;;;; GNU CC is free software; you can redistribute it and/or modify;; it under the terms of the GNU General Public License as published by;; the Free Software Foundation; either version 2, or (at your option);; any later version.;;;; GNU CC is distributed in the hope that it will be useful,;; but WITHOUT ANY WARRANTY; without even the implied warranty of;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;; GNU General Public License for more details.;;;; You should have received a copy of the GNU General Public License;; along with GNU CC; see the file COPYING. If not, write to;; the Free Software Foundation, 59 Temple Place - Suite 330,;; Boston, MA 02111-1307, USA. */(define_attr "athlon_decode" "direct,vector" (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov") (const_string "vector") (and (eq_attr "type" "push") (match_operand 1 "memory_operand" "")) (const_string "vector") (and (eq_attr "type" "fmov") (and (eq_attr "memory" "load,store") (eq_attr "mode" "XF"))) (const_string "vector")] (const_string "direct")));; The Athlon does contain three pipelined FP units, three integer units and;; three address generation units. ;;;; The predecode logic is determining boundaries of instructions in the 64;; byte cache line. So the cache line straddling problem of K6 might be issue;; here as well, but it is not noted in the documentation.;;;; Three DirectPath instructions decoders and only one VectorPath decoder;; is available. They can decode three DirectPath instructions or one VectorPath;; instruction per cycle.;; Decoded macro instructions are then passed to 72 entry instruction control;; unit, that passes;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.;;;; The load/store queue unit is not attached to the schedulers but;; communicates with all the execution units separately instead.(define_function_unit "athlon_vectordec" 1 0 (and (eq_attr "cpu" "athlon") (eq_attr "athlon_decode" "vector")) 1 1)(define_function_unit "athlon_directdec" 3 0 (and (eq_attr "cpu" "athlon") (eq_attr "athlon_decode" "direct")) 1 1)(define_function_unit "athlon_vectordec" 1 0 (and (eq_attr "cpu" "athlon") (eq_attr "athlon_decode" "direct")) 1 1 [(eq_attr "athlon_decode" "vector")])(define_function_unit "athlon_ieu" 3 0 (and (eq_attr "cpu" "athlon") (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,ishift1,rotate,rotate1,ibr,call,callv,icmov,cld,pop,setcc,push,pop")) 1 1)(define_function_unit "athlon_ieu" 3 0 (and (eq_attr "cpu" "athlon") (eq_attr "type" "str")) 15 15)(define_function_unit "athlon_ieu" 3 0 (and (eq_attr "cpu" "athlon") (eq_attr "type" "imul")) 5 0)(define_function_unit "athlon_ieu" 3 0 (and (eq_attr "cpu" "athlon") (eq_attr "type" "idiv")) 42 0)(define_function_unit "athlon_muldiv" 1 0 (and (eq_attr "cpu" "athlon") (eq_attr "type" "imul")) 5 0)(define_function_unit "athlon_muldiv" 1 0 (and (eq_attr "cpu" "athlon") (eq_attr "type" "idiv")) 42 42)(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any" (cond [(eq_attr "type" "fop,fcmp,fistp") (const_string "add") (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov") (const_string "mul") (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both")) (const_string "store") (and (eq_attr "type" "fmov") (eq_attr "memory" "load")) (const_string "any") (and (eq_attr "type" "fmov") (ior (match_operand:SI 1 "register_operand" "") (match_operand 1 "immediate_operand" ""))) (const_string "store") (eq_attr "type" "fmov") (const_string "muladd")] (const_string "none")));; We use latencies 1 for definitions. This is OK to model colisions;; in execution units. The real latencies are modeled in the "fp" pipeline.;; fsin, fcos: 96-192;; fsincos: 107-211;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode.(define_function_unit "athlon_fp" 3 0 (and (eq_attr "cpu" "athlon") (eq_attr "type" "fpspc")) 100 1);; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode.(define_function_unit "athlon_fp" 3 0 (and (eq_attr "cpu" "athlon") (eq_attr "type" "fdiv")) 24 1)(define_function_unit "athlon_fp" 3 0 (and (eq_attr "cpu" "athlon") (eq_attr "type" "fop,fmul,fistp")) 4 1);; XFmode loads are slow.;; XFmode store is slow too (8 cycles), but we don't need to model it, because;; there are no dependent instructions.(define_function_unit "athlon_fp" 3 0 (and (eq_attr "cpu" "athlon") (and (eq_attr "type" "fmov") (and (eq_attr "memory" "load") (eq_attr "mode" "XF")))) 10 1)(define_function_unit "athlon_fp" 3 0 (and (eq_attr "cpu" "athlon") (eq_attr "type" "fmov,fsgn")) 2 1);; fcmp and ftst instructions(define_function_unit "athlon_fp" 3 0 (and (eq_attr "cpu" "athlon") (and (eq_attr "type" "fcmp") (eq_attr "athlon_decode" "direct"))) 3 1);; fcmpi instructions.(define_function_unit "athlon_fp" 3 0 (and (eq_attr "cpu" "athlon") (and (eq_attr "type" "fcmp") (eq_attr "athlon_decode" "vector"))) 3 1)(define_function_unit "athlon_fp" 3 0 (and (eq_attr "cpu" "athlon") (eq_attr "type" "fcmov")) 7 1)(define_function_unit "athlon_fp_mul" 1 0 (and (eq_attr "cpu" "athlon") (eq_attr "athlon_fpunits" "mul")) 1 1)(define_function_unit "athlon_fp_add" 1 0 (and (eq_attr "cpu" "athlon") (eq_attr "athlon_fpunits" "add")) 1 1)(define_function_unit "athlon_fp_muladd" 2 0 (and (eq_attr "cpu" "athlon") (eq_attr "athlon_fpunits" "muladd,mul,add")) 1 1)(define_function_unit "athlon_fp_store" 1 0 (and (eq_attr "cpu" "athlon") (eq_attr "athlon_fpunits" "store")) 1 1);; We don't need to model the Address Generation Unit, since we don't model;; the re-order buffer yet and thus we never schedule more than three operations;; at time. Later we may want to experiment with MD_SCHED macros modeling the;; decoders independently on the functional units.;(define_function_unit "athlon_agu" 3 0; (and (eq_attr "cpu" "athlon"); (and (eq_attr "memory" "!none"); (eq_attr "athlon_fpunits" "none"))); 1 1);; Model load unit to avoid too long sequences of loads. We don't need to;; model store queue, since it is hardly going to be bottleneck.(define_function_unit "athlon_load" 2 0 (and (eq_attr "cpu" "athlon") (eq_attr "memory" "load,both")) 1 1)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -