📄 cg2.f90
字号:
! ------------------------------
! The following table breaks down a CG iteration into its discrete
! components of computation. Column 2 shows the frequency of execution
! of these components per CG iteration. In addition, for components
! that involve scalar arithmetic, the number of flops for a single
! execution of the component is given as a function of N, the number of
! lattice sites. For vectorised components, the vector length and the
! number of flops per instruction are given.
!
! N = total number of lattice sites (even and odd).
!
! ----------------------------------------------------------------------
! | | | operation count | |
! | operation | times |------------------------------| total |
! | | used | scalar | vector | flops |
! | | | |vec.len (flops/elem)| |
! |-----------------|---------|---------|--------------------|---------|
! | SU(2) | | | | |
! |-----------------| | | | |
! | {u}.{v} | 2 | 4 N | - - | 8 N |
! | | | | | |
! | inner loop in | | | | |
! | DVECDIR | 48 | - | N/2 ( 7 ) | 224 N |
! | | | | | |
! | {u} + const {v} | 4 | - | 2 N ( 2 ) | 16 N |
! | | | | | |
! | const*{u} or | | | | |
! | {u} + {v} | 16 | - | 2 N ( 1 ) | 32 N |
! |----------------------------------------------------------|---------|
! | total (scalar+vector) | 280 N |
! |----------------------------------------------------------|---------|
! | SU(3) | | | | |
! |-----------------| | | | |
! | {u}.{v} | 2 | 6 N | - - | 12 N |
! | | | | | |
! | inner loop in | | | | |
! | DVECDIR | 96 | - | N/2 ( 11 ) | 528 N |
! | | | | | |
! | {u} + const {v} | 4 | - | 3 N ( 2 ) | 24 N |
! | | | | | |
! | const*{u} or | | | | |
! | {u} + {v} | 16 | - | 3 N ( 1 ) | 48 N |
! |----------------------------------------------------------|---------|
! | total (scalar+vector) | 612 N |
! ----------------------------------------------------------------------
!
!
! As remarked earlier, the only part of the calculation that is not
! vectorisable in the hermitian scalar product, {u}.{v}. This accounts for
! 2.9% of the floating point arithmetic for SU(2) and 2.0% for SU(3) --
! the remaining 97% and 98% respectively are vectorised.
!
! The vector length of vectorised instructions is of the order N
! (the total no. of lattice sites).
!
!
! 6.3 Critical code
! ------------------
! The above table shows that the critical pieces of code (i.e. the
! pieces that perform most of the floating point arithmetic) are the inner
! loops of subroutine DVECDIR, which compute [gauge] * {vector} products.
!
!-----------------------------------------------------------------------------
! }}}
! {{{ PROGRAM cg2file
PROGRAM cg2
!----------
INCLUDE 'consts.inc'
INCLUDE 'procs.inc'
INCLUDE 'sites.inc' ! for arrays 'psi' & 'psidot'
INTEGER isweep
REAL t
REAL, DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx) :: rhs
!HPF$ ALIGN (*, :, :, :, *, *) WITH tmpl :: rhs
! --------------------------------------
! | Input and initialise fields : |
! --------------------------------------
CALL timer (t) ! just to initialise timer!
CALL timer (t) ! " " " "
CALL main_heading ()
CALL init ()
CALL col_heading ()
CALL output () !-- 1st line of results is for initialising 'psidot'
DO isweep = 1, nsweep
! ---------------------------------------------------------------
! | Update the vector {psi} : |
! | |
! | {psi (1/2)} = {psi (-1/2)} + h {psidot (0)} |
! | |
! | (The numbers in brackets represent the timestep). |
! ---------------------------------------------------------------
psi = psi + h * psidot
! ---------------------------------------------------------------
! | To update the vector {psidot}, solve |
! | |
! | [F] {psidot (1)} = [F] {psidot (0)} - h * psi (1/2)} |
! | |
! | (using the CG algorithm) |
! ---------------------------------------------------------------
CALL F_u (rhs, psidot)
rhs = rhs - h * psi
CALL solve (psidot, rhs)
CALL output ()
ENDDO
STOP
END
! }}}
! {{{ SUBROUTINE main_heading ()
SUBROUTINE main_heading ()
!-------------------------
INCLUDE 'consts.inc'
INCLUDE 'procs.inc'
!--------------------------------------------------------------------------
! Heading is as follows:
! ----------------------
! ---------------------------------------------------------------
! | Benchmark for Conjugate Gradient iteration in |
! | SU(2) lattice gauge theory with Kogut-Susskind fermions |
! | |
! | HPF version |
! ---------------------------------------------------------------
!
!
! Lattice size = 8 **4
!
!( The following parameters just affect total running time, not performance :
! Quark mass = 0.2
! No. of sweeps = 10 )
!
!
!------------------------------------------------------------------------------
print '(A/A/A/A/A/A//)', &
' ---------------------------------------------------------------', &
' | Benchmark for Conjugate Gradient iteration in |', &
' | SU(2) lattice gauge theory with Kogut-Susskind fermions |', &
' | |', &
' | HPF version |', &
' ---------------------------------------------------------------'
IF (nt == nx) THEN
PRINT '(A,I2,A/)', ' Lattice size = ', nt, ' ** 4'
ELSE
PRINT '(A,I2,A,I2,A/)', ' Lattice size = ', nt, ' * ', nx, ' ** 3'
ENDIF
PRINT '(A,I2,A,I2,A,I2)', ' Processor array size = ', &
nprocs0, ' * ', nprocs1, ' * ', nprocs2
PRINT '(A/A,F3.1/A,I4,A//)', &
' ( The following parameters just affect total running time, not performance :', &
' Quark mass = ', mquark, &
' No. of sweeps = ', nsweep, ' )'
END
! }}}
! {{{ SUBROUTINE col_heading ()
SUBROUTINE col_heading ()
!------------------------
INCLUDE 'consts.inc'
INCLUDE 'procs.inc'
!--------------------------------------------------------------------------
! Heading is as follows:
! ----------------------
! kinetic potential total no.CG time/iter performance
! energy energy energy iters (secs) (Mflops/sec)
! ------- --------- -------- ----- ----------- ------------
!------------------------------------------------------------------------------
PRINT '(A/A/A)', &
' kinetic potential total no.CG time/iter performance', &
' energy energy energy iters (secs) (Mflops/sec)', &
' ------- --------- -------- ----- ----------- ------------'
END
! }}}
! {{{ SUBROUTINE output ()
SUBROUTINE output ()
!-------------------
! --------------------------------------------------------------------
! | Calculate and output the KE, PE, and total E for the current |
! | timestep. |
! | The quantities are evaluated at integer time, i.e. at the |
! | time at which {psidot} is defined. {psi} is defined at a |
! | time (h/2) earlier, and so has to be extrapolated forward. |
! --------------------------------------------------------------------
INCLUDE 'consts.inc'
INCLUDE 'procs.inc'
INCLUDE 'sites.inc' ! for arrays 'psi' & 'psidot'
INTEGER niter
REAL Mflops, time
REAL KE, PE
REAL, DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx) :: temp
!HPF$ ALIGN (*, :, :, :, *, *) WITH tmpl :: temp
COMMON / results / niter, Mflops, time
! ----------------------------------------------
! | KE = ( psidot, F, psidot ) -- exactly |
! ----------------------------------------------
CALL F_u (temp, psidot)
KE = SUM (psidot * temp) ! hermitian scalar product: (psidot+, temp)
! --------------------------------------------------------------------
! | PE = (omega * |psi|) **2 (we take omega = 1). |
! | Have to extrapolate {psi} (t=-1/2) forward by half a timestep. |
! --------------------------------------------------------------------
temp = psi + halfh * psidot
PE = SUM (temp * temp) ! hermitian scalar product, (temp+, temp)
! --------------------------------------
! | Output results for this sweep |
! --------------------------------------
PRINT '(F9.3, 4X, F9.3, 4X, F9.3, 4X, I4, 4X, F11.4, 4X, F11.5)', &
KE, PE, (KE + PE), niter, time, Mflops
END
! }}}
! {{{ SUBROUTINE init ()
SUBROUTINE init ()
!-----------------
CALL init_mask ()
CALL init_gauge () ! reads data from file "gauge.dat"
CALL init_sites () ! reads data from file "sites.dat"
END
! }}}
! {{{ SUBROUTINE init_mask ()
SUBROUTINE init_mask ()
!----------------------
! -------------------------------------------------
! | Initialise the mask array 'even_xyz' to: |
! | 1 -- (x+y+z) is even |
! | 0 -- (x+y+z) is odd |
! | This is used as a mask for CSHIFTs in 'D_u'. |
! -------------------------------------------------
INCLUDE 'consts.inc'
INCLUDE 'procs.inc'
INCLUDE 'even.inc' ! for 'even_xyz'
INTEGER x, y, z
DO z = 1, nx
DO y = 1, nx
DO x = 1, nx
even_xyz (:,x,y,z,:,:) = (MOD (x + y + z, 2) == 0)
ENDDO
ENDDO
ENDDO
END
! }}}
! {{{ SUBROUTINE init_gauge ()
SUBROUTINE init_gauge ()
!-----------------------
INCLUDE 'consts.inc'
INCLUDE 'procs.inc'
INCLUDE 'gauge.inc' ! for array 'gauge'
! ---------------------------------------------------------------
! | Read the initial values of array 'gauge' from a file |
! ---------------------------------------------------------------
OPEN (10, FILE="gauge.dat", FORM="UNFORMATTED", STATUS="OLD")
DO ipar = 0, 1
DO idir = 1, ndir
DO igauge = 1, ngauge
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -