📄 cg2.f90

📁 shpf 1.9一个并行编译器
💻 F90
📖 第 1 页 / 共 4 页
字号:
!   ------------------------------
!        The following table breaks down a CG iteration into its discrete 
!   components of computation.   Column 2 shows the frequency of execution 
!   of these components per CG iteration.   In addition, for components
!   that involve scalar arithmetic, the number of flops for a single 
!   execution of the component is given as a function of N, the number of 
!   lattice sites.   For vectorised components, the vector length and the 
!   number of flops per instruction are given.
!   
!        N = total number of lattice sites (even and odd).
!  
!   ----------------------------------------------------------------------
!   |                 |         |        operation count       |         |
!   |    operation    |  times  |------------------------------|  total  |
!   |                 |   used  |  scalar |       vector       |  flops  |
!   |                 |         |         |vec.len (flops/elem)|         |
!   |-----------------|---------|---------|--------------------|---------|
!   |      SU(2)      |         |         |                    |         |
!   |-----------------|         |         |                    |         |
!   | {u}.{v}         |    2    |   4 N   |    -         -     |    8 N  |
!   |                 |         |         |                    |         |
!   | inner loop in   |         |         |                    |         |
!   | DVECDIR         |   48    |    -    |   N/2      ( 7 )   |  224 N  |
!   |                 |         |         |                    |         |
!   | {u} + const {v} |    4    |    -    |   2 N      ( 2 )   |   16 N  |
!   |                 |         |         |                    |         |
!   | const*{u}  or   |         |         |                    |         |
!   | {u} + {v}       |   16    |    -    |   2 N      ( 1 )   |   32 N  |
!   |----------------------------------------------------------|---------|
!   | total  (scalar+vector)                                   |  280 N  |
!   |----------------------------------------------------------|---------|
!   |      SU(3)      |         |         |                    |         |
!   |-----------------|         |         |                    |         |
!   | {u}.{v}         |    2    |   6 N   |    -         -     |   12 N  |
!   |                 |         |         |                    |         |
!   | inner loop in   |         |         |                    |         |
!   | DVECDIR         |   96    |    -    |   N/2     ( 11 )   |  528 N  |
!   |                 |         |         |                    |         |
!   | {u} + const {v} |    4    |    -    |   3 N      ( 2 )   |   24 N  |
!   |                 |         |         |                    |         |
!   | const*{u}  or   |         |         |                    |         |
!   | {u} + {v}       |   16    |    -    |   3 N      ( 1 )   |   48 N  |
!   |----------------------------------------------------------|---------|
!   | total (scalar+vector)                                    |  612 N  |
!   ----------------------------------------------------------------------
!   
!   
!        As remarked earlier, the only part of the calculation that is not 
!   vectorisable in the hermitian scalar product, {u}.{v}.  This accounts for
!   2.9%  of the floating point arithmetic for SU(2) and  2.0%  for SU(3) -- 
!   the remaining 97% and 98% respectively are vectorised.
!   
!        The vector length of vectorised instructions is of the order N 
!   (the total no. of lattice sites).
!  
!  
!   6.3  Critical code
!   ------------------
!        The above table shows that the critical pieces of code (i.e. the
!   pieces that perform most of the floating point arithmetic)  are the inner
!   loops of subroutine DVECDIR, which compute  [gauge] * {vector}  products.
!   
!-----------------------------------------------------------------------------
! }}}
! {{{  PROGRAM cg2file
PROGRAM cg2
!----------
  INCLUDE 'consts.inc'
  INCLUDE 'procs.inc'
  INCLUDE 'sites.inc'             ! for arrays 'psi' & 'psidot'

  INTEGER  isweep
  REAL     t
  REAL, DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx)  ::  rhs
!HPF$ ALIGN (*, :, :, :, *, *) WITH tmpl :: rhs

! --------------------------------------      
! |  Input and initialise fields :     |
! --------------------------------------      
  CALL timer (t)	! just to initialise timer!
  CALL timer (t)	!     "     "     "     "

  CALL main_heading ()
  CALL init ()
  CALL col_heading ()
  CALL output ()     !-- 1st line of results is for initialising 'psidot'

  DO isweep = 1, nsweep
!   ---------------------------------------------------------------
!   |  Update the vector {psi} :                                  |
!   |                                                             |
!   |     {psi (1/2)} = {psi (-1/2)} + h {psidot (0)}             |
!   |                                                             |
!   |  (The numbers in brackets represent the timestep).          |
!   ---------------------------------------------------------------
    psi = psi + h * psidot


!   ---------------------------------------------------------------
!   |  To update the vector {psidot}, solve                       |
!   |                                                             |
!   |     [F] {psidot (1)} = [F] {psidot (0)} - h * psi (1/2)}    |
!   |                                                             |
!   |  (using the CG algorithm)                                   |
!   ---------------------------------------------------------------
    CALL F_u   (rhs, psidot)
    rhs = rhs - h * psi
    CALL solve  (psidot, rhs)

    CALL output ()
  ENDDO

  STOP
  END
! }}}
! {{{  SUBROUTINE main_heading ()
SUBROUTINE main_heading ()
!-------------------------
  INCLUDE 'consts.inc'
  INCLUDE 'procs.inc'
!--------------------------------------------------------------------------
!  Heading is as follows:
!  ----------------------
!    ---------------------------------------------------------------
!    |        Benchmark for Conjugate Gradient iteration in        |
!    |   SU(2) lattice gauge theory with Kogut-Susskind fermions   |
!    |                                                             |
!    |                         HPF version                         |
!    ---------------------------------------------------------------
!
!
!  Lattice size  =  8 **4
!
!( The following parameters just affect total running time, not performance :
!  Quark mass    =  0.2
!  No. of sweeps =  10 )
!
!
!------------------------------------------------------------------------------

  print '(A/A/A/A/A/A//)',                                                  &
'         ---------------------------------------------------------------', &
'         |        Benchmark for Conjugate Gradient iteration in        |', &
'         |   SU(2) lattice gauge theory with Kogut-Susskind fermions   |', &
'         |                                                             |', &
'         |                         HPF version                         |', &
'         ---------------------------------------------------------------'

  IF  (nt == nx)  THEN
      PRINT '(A,I2,A/)',      '   Lattice size  = ', nt, ' ** 4'
  ELSE
      PRINT '(A,I2,A,I2,A/)', '   Lattice size  = ', nt, ' * ', nx, ' ** 3'
  ENDIF
  PRINT '(A,I2,A,I2,A,I2)',   '   Processor array size = ',      &
         nprocs0, ' * ', nprocs1, ' * ', nprocs2

  PRINT '(A/A,F3.1/A,I4,A//)',                  &
' ( The following parameters just affect total running time, not performance :', &
'   Quark mass    =  ',  mquark,                &
'   No. of sweeps = ',   nsweep,  ' )'

END
! }}}
! {{{  SUBROUTINE col_heading ()
SUBROUTINE col_heading ()
!------------------------
  INCLUDE 'consts.inc'
  INCLUDE 'procs.inc'
!--------------------------------------------------------------------------
!  Heading is as follows:
!  ----------------------
!  kinetic    potential    total       no.CG     time/iter     performance
!  energy     energy       energy      iters       (secs)      (Mflops/sec)
!  -------    ---------    --------    -----    -----------    ------------
!------------------------------------------------------------------------------

  PRINT  '(A/A/A)',          &
'  kinetic    potential    total       no.CG     time/iter     performance', &
'  energy     energy       energy      iters       (secs)      (Mflops/sec)', &
'  -------    ---------    --------    -----    -----------    ------------'
END
! }}}
! {{{  SUBROUTINE output ()
SUBROUTINE output ()
!-------------------
! --------------------------------------------------------------------
! |    Calculate and output the KE, PE, and total E for the current  |
! |  timestep.                                                       |
! |    The quantities are evaluated at integer time, i.e. at the     |
! |  time at which {psidot} is defined.   {psi} is defined at a      |
! |  time (h/2) earlier, and so has to be extrapolated forward.      |
! --------------------------------------------------------------------  
  INCLUDE 'consts.inc'
  INCLUDE 'procs.inc'
  INCLUDE 'sites.inc'             ! for arrays 'psi' & 'psidot'

  INTEGER  niter
  REAL     Mflops, time
  REAL     KE,  PE
  REAL, DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx)  ::  temp
!HPF$ ALIGN (*, :, :, :, *, *) WITH tmpl :: temp

  COMMON / results / niter, Mflops, time


! ----------------------------------------------
! |  KE  =  ( psidot, F, psidot ) -- exactly   |
! ----------------------------------------------

  CALL F_u (temp, psidot)
  KE = SUM (psidot * temp)	! hermitian scalar product: (psidot+, temp)

! --------------------------------------------------------------------
! |  PE  =  (omega * |psi|) **2     (we take omega = 1).             |
! |  Have to extrapolate {psi} (t=-1/2) forward by half a timestep.  |
! --------------------------------------------------------------------

  temp = psi + halfh * psidot
  PE   = SUM (temp * temp)	! hermitian scalar product, (temp+, temp)

! --------------------------------------
! |   Output results for this sweep    |
! --------------------------------------

  PRINT '(F9.3, 4X, F9.3, 4X, F9.3, 4X, I4, 4X, F11.4, 4X, F11.5)',  &
           KE,       PE,    (KE + PE),  niter,  time,     Mflops

END
! }}}

! {{{  SUBROUTINE init ()
SUBROUTINE init ()
!-----------------

  CALL init_mask ()
  CALL init_gauge ()	! reads data from file "gauge.dat"
  CALL init_sites ()	! reads data from file "sites.dat"
END
! }}}
! {{{  SUBROUTINE init_mask ()  
SUBROUTINE init_mask ()  
!----------------------

! -------------------------------------------------
! |  Initialise the mask array 'even_xyz' to:     |
! |     1   -- (x+y+z) is even                    |
! |     0   -- (x+y+z) is odd                     |
! | This is used as a mask for CSHIFTs in 'D_u'.  |
! -------------------------------------------------
  INCLUDE 'consts.inc'
  INCLUDE 'procs.inc'
  INCLUDE 'even.inc'             ! for 'even_xyz'
  INTEGER  x, y, z

  DO z = 1, nx
    DO y = 1, nx
      DO x = 1, nx
        even_xyz (:,x,y,z,:,:) = (MOD (x + y + z, 2) == 0)
      ENDDO
    ENDDO
  ENDDO

END
! }}}
! {{{  SUBROUTINE init_gauge ()
SUBROUTINE init_gauge ()
!-----------------------
  INCLUDE 'consts.inc'
  INCLUDE 'procs.inc'
  INCLUDE 'gauge.inc'            ! for array 'gauge'

! ---------------------------------------------------------------
! |  Read the initial values of array 'gauge' from a file	|
! ---------------------------------------------------------------

  OPEN (10, FILE="gauge.dat", FORM="UNFORMATTED", STATUS="OLD")
  DO ipar = 0, 1
    DO idir = 1, ndir
      DO igauge = 1, ngauge
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -