📄 init.f90

📁 shpf 1.9一个并行编译器
💻 F90
📖 第 1 页 / 共 4 页
字号:
!   
!   
!   6.2  Analysis of vectorisation
!   ------------------------------
!        The following table breaks down a CG iteration into its discrete 
!   components of computation.   Column 2 shows the frequency of execution 
!   of these components per CG iteration.   In addition, for components
!   that involve scalar arithmetic, the number of flops for a single 
!   execution of the component is given as a function of N, the number of 
!   lattice sites.   For vectorised components, the vector length and the 
!   number of flops per instruction are given.
!   
!        N = total number of lattice sites (even and odd).
!  
!   ----------------------------------------------------------------------
!   |                 |         |        operation count       |         |
!   |    operation    |  times  |------------------------------|  total  |
!   |                 |   used  |  scalar |       vector       |  flops  |
!   |                 |         |         |vec.len (flops/elem)|         |
!   |-----------------|---------|---------|--------------------|---------|
!   |      SU(2)      |         |         |                    |         |
!   |-----------------|         |         |                    |         |
!   | {u}.{v}         |    2    |   4 N   |    -         -     |    8 N  |
!   |                 |         |         |                    |         |
!   | inner loop in   |         |         |                    |         |
!   | DVECDIR         |   48    |    -    |   N/2      ( 7 )   |  224 N  |
!   |                 |         |         |                    |         |
!   | {u} + const {v} |    4    |    -    |   2 N      ( 2 )   |   16 N  |
!   |                 |         |         |                    |         |
!   | const*{u}  or   |         |         |                    |         |
!   | {u} + {v}       |   16    |    -    |   2 N      ( 1 )   |   32 N  |
!   |----------------------------------------------------------|---------|
!   | total  (scalar+vector)                                   |  280 N  |
!   |----------------------------------------------------------|---------|
!   |      SU(3)      |         |         |                    |         |
!   |-----------------|         |         |                    |         |
!   | {u}.{v}         |    2    |   6 N   |    -         -     |   12 N  |
!   |                 |         |         |                    |         |
!   | inner loop in   |         |         |                    |         |
!   | DVECDIR         |   96    |    -    |   N/2     ( 11 )   |  528 N  |
!   |                 |         |         |                    |         |
!   | {u} + const {v} |    4    |    -    |   3 N      ( 2 )   |   24 N  |
!   |                 |         |         |                    |         |
!   | const*{u}  or   |         |         |                    |         |
!   | {u} + {v}       |   16    |    -    |   3 N      ( 1 )   |   48 N  |
!   |----------------------------------------------------------|---------|
!   | total (scalar+vector)                                    |  612 N  |
!   ----------------------------------------------------------------------
!   
!   
!        As remarked earlier, the only part of the calculation that is not 
!   vectorisable in the hermitian scalar product, {u}.{v}.  This accounts for
!   2.9%  of the floating point arithmetic for SU(2) and  2.0%  for SU(3) -- 
!   the remaining 97% and 98% respectively are vectorised.
!   
!        The vector length of vectorised instructions is of the order N 
!   (the total no. of lattice sites).
!  
!  
!   6.3  Critical code
!   ------------------
!        The above table shows that the critical pieces of code (i.e. the
!   pieces that perform most of the floating point arithmetic)  are the inner
!   loops of subroutine DVECDIR, which compute  [gauge] * {vector}  products.
!   
!-----------------------------------------------------------------------------
! }}}
! {{{  PROGRAM cg2init
PROGRAM cg2init
!--------------
  INCLUDE 'procs.inc'
  INCLUDE 'consts.inc'
  INCLUDE 'gauge.inc'             ! for array  'gauge'
  INCLUDE 'sites.inc'             ! for arrays 'psi' & 'psidot'

! ----------------------------      
! |  Initialise fields :     |
! ----------------------------      
print *, 'point 1'
  CALL init_rand ()
print *, 'point 2'
  CALL init_su2 ()
print *, 'point 3'
  CALL init_pd ()
print *, 'point 4'
  CALL init_psi ()
print *, 'point 5'

  OPEN (10, FILE="gauge.dat", FORM="UNFORMATTED", STATUS="REPLACE")
  DO ipar = 0, 1
    DO idir = 1, ndir
      DO igauge = 1, ngauge
        WRITE (10) gauge (:,:,:,:, igauge, idir, ipar)
      ENDDO
    ENDDO
  ENDDO
  CLOSE (10)
print *, 'point 6'

  OPEN (10, FILE="sites.dat", FORM="UNFORMATTED", STATUS="REPLACE")
  DO icmplx = 1, ncmplx
    DO icolor = 1, ncolor
      WRITE (10) psi (:,:,:,:, icolor, icmplx)
    ENDDO
  ENDDO
  DO icmplx = 1, ncmplx
    DO icolor = 1, ncolor
      WRITE (10) psidot (:,:,:,:, icolor, icmplx)
    ENDDO
  ENDDO
print *, 'point 7'
  CLOSE (10)
print *, 'point 8'

!  STOP
  END
! }}}
! {{{  SUBROUTINE init_rand ()  
SUBROUTINE init_rand ()  
!----------------------
  INTEGER  seed
  REAL     stored_normal
  LOGICAL  stored
  COMMON / random / seed, stored_normal, stored

!------/ random /

  seed   = 172375
  stored = .FALSE.
END
! }}}
! {{{  SUBROUTINE init_su2 ()
SUBROUTINE init_su2 ()
!---------------------
  INCLUDE 'procs.inc'
  INCLUDE 'consts.inc'
  INCLUDE 'gauge.inc'            ! for array 'gauge'

  INTEGER  parity, dir, z, y, x, t
  REAL     a (ngauge)
! ----------------------------------------------------------------
! |  The gauge field (on the links) is initialised to randomly   |
! |   distributed SU(2) matrices.  They are parameterised by     |
! |   unit 4-vectors, a(4), thus  :                              |
! |                                                              |
! |                             [  a4 + i.a3      a1 + i.a2  ]   |
! |  gauge  =  SU(2) matrix  =  [                            ]   |
! |                             [ -a1 + i.a2      a4 - i.a3  ]   |
! ----------------------------------------------------------------

  DO parity = 0, 1
    DO dir = 1, ndir
      DO z = 1, nx
        DO y = 1, nx
          DO x = 1, nx
            DO t = 1, nt / 2
      
              CALL  rands3 (a)
              gauge (t, x, y, z, :, dir, parity) = a
      
            ENDDO
          ENDDO
        ENDDO
      ENDDO
    ENDDO
  ENDDO

END
! }}}
! {{{  SUBROUTINE init_pd ()  
SUBROUTINE init_pd ()  
!-------------------
! --------------------------------------------------------------------
! |  Initialise the fermion vector  {psidot}  at t = 0 .             |
! |  The correct distribution is obtained by solving :               |
! |                                                                  |
! |               [-D + m] {psidot}  =  {z1 + i.z2}                  |
! |                                                                  |
! |  where z1, z2 are normally distributed with std.dev. SQRT (2).   |
! |  [D] is the Dirac matrix, which is a function of the gauge       |
! |  fields (see the introduction).                                  |
! |                                                                  |
! |    This is solved for {psidot} using Conjugate Gradient (CG)     |
! |  iteration.  However, CG can only solve for hermitian matrices,  |
! |  so need to multiply both sides by  hc.[-D + m] = [ D + m],      |
! |  and solve :                                                     |
! |                                                                  |
! |           [-D*D + m*m] {psidot}  =  [D + m] {z1 + i.z2}          |
! |                                                                  |
! |  N.B. !! Need the gauge fields U to be defined already !!    |
! --------------------------------------------------------------------
  INCLUDE 'procs.inc'
  INCLUDE 'consts.inc'
  INCLUDE 'sites.inc'             ! for arrays 'psi' & 'psidot'
  REAL, DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx)  ::  random
!HPF$ DISTRIBUTE (*, BLOCK, BLOCK, BLOCK, *, *) ONTO procs :: random

! ---------------------------------------------------------------------
! |(i)                                                                |
! |                 {random} = [D] {z1 + i.z2}                        |
! |                                                                   |
! |  {z1 + i.z2}  is put in  {psidot}  for temporary storage.  It is  |
! |  defined at ODD sites, so  {random}  is defined at EVEN sites.    |
! ---------------------------------------------------------------------
  CALL randvec (psidot)                   

  CALL D_u (random, psidot, plus, odd)
                         
! ---------------------------------------------------------------------
! |(ii)                                                               |
! |                 {random} = {random} + m {z1 + i.z2}               |
! |                                                                   |
! |  where {z1 + i.z2} is defined on EVEN sites this time and so is   |
! |  a different random vector.  (Again it is put in  {psidot}  for   |
! |  temporary storage.                                               |
! |    Therefore,                                                     |
! |                 {random} = [D + m] {z1 + i.z2}                    |
! |                                                                   |
! |  defined at even sites.                                           |
! ---------------------------------------------------------------------

  CALL randvec (psidot)
  random = random + mquark * psidot


! ----------------------------------------
! |(iii)                                 |
! |  Solve    [F] {psidot} = {random}    |
! ----------------------------------------

  CALL solve (psidot, random)

END
! }}}
! {{{  SUBROUTINE init_psi ()  
SUBROUTINE init_psi ()  
!---------------------

! -------------------------------------------------------------------
! |  Initialise {psi} at t = -(1/2):                                |
! |                                                                 |
! |      {psi}  =  {z1 + i.z2}/omega  -  half.h * {psidot}          |
! |                                                                 |
! |  where z1, z2 are normally distributed with std.dev = SQRT(2).  |
! |                                                                 |
! |  N.B.1   !! We assume that 'omega' = 1. !!                      |
! |  N.B.2   !! Need {psidot} to be defined already !!              |
! -------------------------------------------------------------------
  INCLUDE 'procs.inc'
  INCLUDE 'consts.inc'
  INCLUDE 'sites.inc'             ! for arrays 'psi' & 'psidot'


  CALL randvec (psi)
  psi = psi - halfh * psidot

END
! }}}

! {{{  REAL FUNCTION RAND (seed)
REAL FUNCTION RAND (seed)
!------------------------
  INTEGER seed

!------ Random number generator (uses the INMOS algorithm)

  DOUBLE PRECISION a, irange, mospos
  PARAMETER (a      = 1664525.0D0,         &
             irange = 4294967296.0D0,      &
             mospos = 2147483647.0D0)
  DOUBLE PRECISION rseed

  rseed = seed
  IF (seed < 0) rseed = rseed + irange
  rseed = MOD ((a * rseed) + 1.0, irange)
  RAND  = rseed / irange
  IF (rseed > mospos) rseed = rseed - irange
  seed  = rseed
END
! }}}
! {{{  SUBROUTINE rands3 (x)
SUBROUTINE rands3 (x)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -