📄 init.f90
字号:
!
!
! 6.2 Analysis of vectorisation
! ------------------------------
! The following table breaks down a CG iteration into its discrete
! components of computation. Column 2 shows the frequency of execution
! of these components per CG iteration. In addition, for components
! that involve scalar arithmetic, the number of flops for a single
! execution of the component is given as a function of N, the number of
! lattice sites. For vectorised components, the vector length and the
! number of flops per instruction are given.
!
! N = total number of lattice sites (even and odd).
!
! ----------------------------------------------------------------------
! | | | operation count | |
! | operation | times |------------------------------| total |
! | | used | scalar | vector | flops |
! | | | |vec.len (flops/elem)| |
! |-----------------|---------|---------|--------------------|---------|
! | SU(2) | | | | |
! |-----------------| | | | |
! | {u}.{v} | 2 | 4 N | - - | 8 N |
! | | | | | |
! | inner loop in | | | | |
! | DVECDIR | 48 | - | N/2 ( 7 ) | 224 N |
! | | | | | |
! | {u} + const {v} | 4 | - | 2 N ( 2 ) | 16 N |
! | | | | | |
! | const*{u} or | | | | |
! | {u} + {v} | 16 | - | 2 N ( 1 ) | 32 N |
! |----------------------------------------------------------|---------|
! | total (scalar+vector) | 280 N |
! |----------------------------------------------------------|---------|
! | SU(3) | | | | |
! |-----------------| | | | |
! | {u}.{v} | 2 | 6 N | - - | 12 N |
! | | | | | |
! | inner loop in | | | | |
! | DVECDIR | 96 | - | N/2 ( 11 ) | 528 N |
! | | | | | |
! | {u} + const {v} | 4 | - | 3 N ( 2 ) | 24 N |
! | | | | | |
! | const*{u} or | | | | |
! | {u} + {v} | 16 | - | 3 N ( 1 ) | 48 N |
! |----------------------------------------------------------|---------|
! | total (scalar+vector) | 612 N |
! ----------------------------------------------------------------------
!
!
! As remarked earlier, the only part of the calculation that is not
! vectorisable in the hermitian scalar product, {u}.{v}. This accounts for
! 2.9% of the floating point arithmetic for SU(2) and 2.0% for SU(3) --
! the remaining 97% and 98% respectively are vectorised.
!
! The vector length of vectorised instructions is of the order N
! (the total no. of lattice sites).
!
!
! 6.3 Critical code
! ------------------
! The above table shows that the critical pieces of code (i.e. the
! pieces that perform most of the floating point arithmetic) are the inner
! loops of subroutine DVECDIR, which compute [gauge] * {vector} products.
!
!-----------------------------------------------------------------------------
! }}}
! {{{ PROGRAM cg2init
PROGRAM cg2init
!--------------
INCLUDE 'procs.inc'
INCLUDE 'consts.inc'
INCLUDE 'gauge.inc' ! for array 'gauge'
INCLUDE 'sites.inc' ! for arrays 'psi' & 'psidot'
! ----------------------------
! | Initialise fields : |
! ----------------------------
print *, 'point 1'
CALL init_rand ()
print *, 'point 2'
CALL init_su2 ()
print *, 'point 3'
CALL init_pd ()
print *, 'point 4'
CALL init_psi ()
print *, 'point 5'
OPEN (10, FILE="gauge.dat", FORM="UNFORMATTED", STATUS="REPLACE")
DO ipar = 0, 1
DO idir = 1, ndir
DO igauge = 1, ngauge
WRITE (10) gauge (:,:,:,:, igauge, idir, ipar)
ENDDO
ENDDO
ENDDO
CLOSE (10)
print *, 'point 6'
OPEN (10, FILE="sites.dat", FORM="UNFORMATTED", STATUS="REPLACE")
DO icmplx = 1, ncmplx
DO icolor = 1, ncolor
WRITE (10) psi (:,:,:,:, icolor, icmplx)
ENDDO
ENDDO
DO icmplx = 1, ncmplx
DO icolor = 1, ncolor
WRITE (10) psidot (:,:,:,:, icolor, icmplx)
ENDDO
ENDDO
print *, 'point 7'
CLOSE (10)
print *, 'point 8'
! STOP
END
! }}}
! {{{ SUBROUTINE init_rand ()
SUBROUTINE init_rand ()
!----------------------
INTEGER seed
REAL stored_normal
LOGICAL stored
COMMON / random / seed, stored_normal, stored
!------/ random /
seed = 172375
stored = .FALSE.
END
! }}}
! {{{ SUBROUTINE init_su2 ()
SUBROUTINE init_su2 ()
!---------------------
INCLUDE 'procs.inc'
INCLUDE 'consts.inc'
INCLUDE 'gauge.inc' ! for array 'gauge'
INTEGER parity, dir, z, y, x, t
REAL a (ngauge)
! ----------------------------------------------------------------
! | The gauge field (on the links) is initialised to randomly |
! | distributed SU(2) matrices. They are parameterised by |
! | unit 4-vectors, a(4), thus : |
! | |
! | [ a4 + i.a3 a1 + i.a2 ] |
! | gauge = SU(2) matrix = [ ] |
! | [ -a1 + i.a2 a4 - i.a3 ] |
! ----------------------------------------------------------------
DO parity = 0, 1
DO dir = 1, ndir
DO z = 1, nx
DO y = 1, nx
DO x = 1, nx
DO t = 1, nt / 2
CALL rands3 (a)
gauge (t, x, y, z, :, dir, parity) = a
ENDDO
ENDDO
ENDDO
ENDDO
ENDDO
ENDDO
END
! }}}
! {{{ SUBROUTINE init_pd ()
SUBROUTINE init_pd ()
!-------------------
! --------------------------------------------------------------------
! | Initialise the fermion vector {psidot} at t = 0 . |
! | The correct distribution is obtained by solving : |
! | |
! | [-D + m] {psidot} = {z1 + i.z2} |
! | |
! | where z1, z2 are normally distributed with std.dev. SQRT (2). |
! | [D] is the Dirac matrix, which is a function of the gauge |
! | fields (see the introduction). |
! | |
! | This is solved for {psidot} using Conjugate Gradient (CG) |
! | iteration. However, CG can only solve for hermitian matrices, |
! | so need to multiply both sides by hc.[-D + m] = [ D + m], |
! | and solve : |
! | |
! | [-D*D + m*m] {psidot} = [D + m] {z1 + i.z2} |
! | |
! | N.B. !! Need the gauge fields U to be defined already !! |
! --------------------------------------------------------------------
INCLUDE 'procs.inc'
INCLUDE 'consts.inc'
INCLUDE 'sites.inc' ! for arrays 'psi' & 'psidot'
REAL, DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx) :: random
!HPF$ DISTRIBUTE (*, BLOCK, BLOCK, BLOCK, *, *) ONTO procs :: random
! ---------------------------------------------------------------------
! |(i) |
! | {random} = [D] {z1 + i.z2} |
! | |
! | {z1 + i.z2} is put in {psidot} for temporary storage. It is |
! | defined at ODD sites, so {random} is defined at EVEN sites. |
! ---------------------------------------------------------------------
CALL randvec (psidot)
CALL D_u (random, psidot, plus, odd)
! ---------------------------------------------------------------------
! |(ii) |
! | {random} = {random} + m {z1 + i.z2} |
! | |
! | where {z1 + i.z2} is defined on EVEN sites this time and so is |
! | a different random vector. (Again it is put in {psidot} for |
! | temporary storage. |
! | Therefore, |
! | {random} = [D + m] {z1 + i.z2} |
! | |
! | defined at even sites. |
! ---------------------------------------------------------------------
CALL randvec (psidot)
random = random + mquark * psidot
! ----------------------------------------
! |(iii) |
! | Solve [F] {psidot} = {random} |
! ----------------------------------------
CALL solve (psidot, random)
END
! }}}
! {{{ SUBROUTINE init_psi ()
SUBROUTINE init_psi ()
!---------------------
! -------------------------------------------------------------------
! | Initialise {psi} at t = -(1/2): |
! | |
! | {psi} = {z1 + i.z2}/omega - half.h * {psidot} |
! | |
! | where z1, z2 are normally distributed with std.dev = SQRT(2). |
! | |
! | N.B.1 !! We assume that 'omega' = 1. !! |
! | N.B.2 !! Need {psidot} to be defined already !! |
! -------------------------------------------------------------------
INCLUDE 'procs.inc'
INCLUDE 'consts.inc'
INCLUDE 'sites.inc' ! for arrays 'psi' & 'psidot'
CALL randvec (psi)
psi = psi - halfh * psidot
END
! }}}
! {{{ REAL FUNCTION RAND (seed)
REAL FUNCTION RAND (seed)
!------------------------
INTEGER seed
!------ Random number generator (uses the INMOS algorithm)
DOUBLE PRECISION a, irange, mospos
PARAMETER (a = 1664525.0D0, &
irange = 4294967296.0D0, &
mospos = 2147483647.0D0)
DOUBLE PRECISION rseed
rseed = seed
IF (seed < 0) rseed = rseed + irange
rseed = MOD ((a * rseed) + 1.0, irange)
RAND = rseed / irange
IF (rseed > mospos) rseed = rseed - irange
seed = rseed
END
! }}}
! {{{ SUBROUTINE rands3 (x)
SUBROUTINE rands3 (x)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -