📄 cg2.f90

📁 shpf 1.9一个并行编译器
💻 F90
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34

END
! }}}
! {{{  SUBROUTINE D_u_dir (v, gauge, u, sense) -- [TIMED] -- no comms
SUBROUTINE D_u_dir (v, gauge, u, sense)
!--------------------------------------
  INCLUDE 'consts.inc'
  INCLUDE 'procs.inc'

  REAL, INTENT (OUT), DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx)  ::  v
  REAL, INTENT (IN),  DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx)  ::  u
!HPF$ ALIGN (*, :, :, :, *, *) WITH *tmpl :: v, u

  REAL, INTENT (IN), DIMENSION (nt/2, nx, nx, nx, ngauge)  ::  gauge
!HPF$ ALIGN (*, :, :, :, *) WITH *tmpl :: gauge

  INTEGER, INTENT (IN) ::  sense

! -----------------------------------------------------------------------
!  Calculates :                                                       |
!                                                                     |
!          v (i) = D (i,j) * u (j),                                   |
!                                                                     |
!  for all sites 'i' at which {v} is defined (i.e. all even or odd    |
!  sites), for a particular link direction and sense.                 |
!                                                                     |
!  'sense' = sense of the link (ij)                                   |
!            (positive = 'possense', negative = 'negsense'            |
!                                                                     |
!  'D' = [ g4 + i.g3    g1 + i.g2 ]                                   |
!        [-g1 + i.g2    g4 - i.g3 ]                                   |
!                                                                     |
!        This subroutine is called with the arrays {u}, {v} and       |
!  [gauge] all aligned, such that the corresponding elements,         |
!  v (i), gauge (i,j) and u (j), are stored at the same array         |
!   positions.   Thus the inner loop over sites can be VECTORISED !   |
!                                                                     |
! -----------------------------------------------------------------------

! {{{    IF  (sense == possense) THEN
  IF  (sense == possense) THEN
! ----------------------------
!----------------------------------------------------------------------
!  Positive sense link                                                |
!  -------------------                                                |
!       v (i) = D (i,j) * u (j),   where site j = (i+mu).             |
!                                                                     |
!       In this case,  D(i,j) = gauge(i)  (only that segment of       |
!  the total gauge field array corresponding to the given link        |
!  direction and sense is passed in).   Also, the {u} vector          |
!  that is input has been shifted so that the sites are aligned:      |
!  u (i) = 'real' u (j).   Thus, we calculate:                        |
!                                                                     |
!       v (i) = gauge (i) * u (i)     for all sites 'i'               |
!----------------------------------------------------------------------
    v (:,:,:,:, 1, re) =   gauge (:,:,:,:, 4) * u (:,:,:,:, 1, re)   &
                         - gauge (:,:,:,:, 3) * u (:,:,:,:, 1, im)   &
                         + gauge (:,:,:,:, 1) * u (:,:,:,:, 2, re)   &
                         - gauge (:,:,:,:, 2) * u (:,:,:,:, 2, im)

    v (:,:,:,:, 1, im) =   gauge (:,:,:,:, 4) * u (:,:,:,:, 1, im)   &
                         + gauge (:,:,:,:, 3) * u (:,:,:,:, 1, re)   &
                         + gauge (:,:,:,:, 1) * u (:,:,:,:, 2, im)   &
                         + gauge (:,:,:,:, 2) * u (:,:,:,:, 2, re)

    v (:,:,:,:, 2, re) = - gauge (:,:,:,:, 1) * u (:,:,:,:, 1, re)   &
                         - gauge (:,:,:,:, 2) * u (:,:,:,:, 1, im)   &
                         + gauge (:,:,:,:, 4) * u (:,:,:,:, 2, re)   &
                         + gauge (:,:,:,:, 3) * u (:,:,:,:, 2, im)

    v (:,:,:,:, 2, im) = - gauge (:,:,:,:, 1) * u (:,:,:,:, 1, im)   &
                         + gauge (:,:,:,:, 2) * u (:,:,:,:, 1, re)   &
                         + gauge (:,:,:,:, 4) * u (:,:,:,:, 2, im)   &
                         - gauge (:,:,:,:, 3) * u (:,:,:,:, 2, re)
! }}}
! {{{    ELSE
  ELSE
! ----
!----------------------------------------------------------------------
!  Negative sense link                                                |
!  -------------------                                                |
!       v (i) = D (i,j) * u (j),   where site j = (i-mu).             |
!                                                                     |
!       In this case,  D(i,j) = - herm.conj [ gauge(j,i) ]            |
!                             = - herm.conj [ gauge (j) ]             |
!                                                                     |
!  since only that segment of the total gauge field array             |
!  corresponding to the given link direction and sense is passed      |
!  in.   Also, the {v} vector that is input has been shifted so       |
!  that the sites are aligned:   v (j) = 'real' v (i).                | 
!       Thus, we calculate:                                           |
!                                                                     |
!  v (j)  =  - herm.conj [ gauge (j) ] * u (j)      for all sites 'j' |
!----------------------------------------------------------------------
    v (:,:,:,:, 1, re) = - gauge (:,:,:,:, 4) * u (:,:,:,:, 1, re)   &
                         - gauge (:,:,:,:, 3) * u (:,:,:,:, 1, im)   &
                         + gauge (:,:,:,:, 1) * u (:,:,:,:, 2, re)   &
                         - gauge (:,:,:,:, 2) * u (:,:,:,:, 2, im)

    v (:,:,:,:, 1, im) = - gauge (:,:,:,:, 4) * u (:,:,:,:, 1, im)   &
                         + gauge (:,:,:,:, 3) * u (:,:,:,:, 1, re)   &
                         + gauge (:,:,:,:, 1) * u (:,:,:,:, 2, im)   &
                         + gauge (:,:,:,:, 2) * u (:,:,:,:, 2, re)

    v (:,:,:,:, 2, re) = - gauge (:,:,:,:, 1) * u (:,:,:,:, 1, re)   &
                         - gauge (:,:,:,:, 2) * u (:,:,:,:, 1, im)   &
                         - gauge (:,:,:,:, 4) * u (:,:,:,:, 2, re)   &
                         + gauge (:,:,:,:, 3) * u (:,:,:,:, 2, im)

    v (:,:,:,:, 2, im) = - gauge (:,:,:,:, 1) * u (:,:,:,:, 1, im)   &
                         + gauge (:,:,:,:, 2) * u (:,:,:,:, 1, re)   &
                         - gauge (:,:,:,:, 4) * u (:,:,:,:, 2, im)   &
                         - gauge (:,:,:,:, 3) * u (:,:,:,:, 2, re)
  ENDIF
! }}}

END
! }}}

! {{{  SUBROUTINE solve (x, b) -- [CONTENTS TIMED] -- no comms in timed part
SUBROUTINE solve (x, b)
!----------------------
  INCLUDE 'consts.inc'
  INCLUDE 'procs.inc'
  REAL, INTENT (INOUT), DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx)  ::  x
  REAL, INTENT (IN),    DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx)  ::  b
!HPF$ ALIGN (*, :, :, :, *, *) WITH *tmpl :: x, b
! -------------------------------------------------------------------
! |  Uses the CG algorithm to solve :                               |
! |                        [F] {x} = {b}                            |
! |  for {x}, given {b}.                                            |
! |                                                                 |
! |  {}    denotes a fermion vector (defined on even sites)         |
! |  [F] = (- [D][D] + msq),  where [D] is the Dirac matrix.        |
! |                                                                 |
! |  At start, {x} has the initial trial solution.                  |
! -------------------------------------------------------------------
  INTEGER  niter
  REAL     Mflops, time
  REAL     rsq,  rsqtol
  REAL     t0, t1, t2
  REAL, DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx) :: p, r
!HPF$ ALIGN (*, :, :, :, *, *) WITH tmpl :: p, r
  COMMON / results / niter, Mflops, time

  INTEGER  flops
  PARAMETER (flops = 280 * nt * nx * nx * nx)
 
! ---------------------------------------------------
! |  Initialise {p}, {r}, rsq, rsqtol  and  niter   |
! |                                                 |
! |     {p}  =  {r}  =  {b} - {Fx}                  |
! |     rsq  =  {r}.{r}                             |
! ---------------------------------------------------

  CALL F_u (r, x)
  r = b - r
  p = r
  rsqtol = SUM (b * b) * (1.0E-02 ** (sigfigs))
  rsq    = SUM (r * r)		! hermitian scalar product, (r+, r)
  niter  = 0

! ------------------------------------------
! |  Do CG iterations until {x} converged  |
! ------------------------------------------

  CALL timer (t0)
  CALL timer (t1)

1 CONTINUE
    niter = niter + 1
    CALL CGiter (x, p, r, rsq)
  IF  ((rsq > rsqtol)  .AND.  (niter < maxiter)) go to 1

  CALL timer (t2)

!------ 'time'   = average time (in secs) per CG iteration.
!------ 'Mflops' = performance in Mflop/s averaged over all CG iterations.

  time = ((t2-t1) - (t1-t0)) / REAL (niter)
  IF  (time > 0.0) THEN
    Mflops = REAL (flops) / (1.0E6 * time)
  ELSE
    time   = - 1.0E20
    Mflops = - 1.0E20
  ENDIF

END
! }}}
! {{{  SUBROUTINE CGiter (x, p, r, rsq)  [TIMED] -- comms via SUM
SUBROUTINE CGiter (x, p, r, rsq)
!-------------------------------
  INCLUDE 'consts.inc'
  INCLUDE 'procs.inc'

  REAL, INTENT (INOUT),           &
             DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx)  :: x, p, r
!HPF$ ALIGN (*, :, :, :, *, *) WITH *tmpl :: x, p, r
  REAL, INTENT (INOUT) ::  rsq

  REAL, DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx)  ::  Fp
!HPF$ ALIGN (*, :, :, :, *, *) WITH tmpl :: Fp
  REAL  alpha,  newrsq

! ------------------------------------------------------------
! |  One iteration of the Conjugate Gradient (CG) algorithm  |
! |  - no preconditioning.                                   |
! ------------------------------------------------------------

! -----------------------------------
! |   alpha  =  (r, r) / (p, Fp)    |
! -----------------------------------
  CALL F_u (Fp, p)
  alpha = rsq / SUM (p * Fp)	! hermitian scalar product, (p+, Fp)


! --------------------------------------
! |   {new.x}  =  {x} + alpha * {p}    |
! --------------------------------------
  x = x + alpha * p


! --------------------------------------
! |   {new.r}  =  {r} - alpha * {Fp}   |
! --------------------------------------
  r = r - alpha * Fp


! ----------------------------------------------------------------
! |   {new.p}  =  ((new.r, new.r) / (r, r)) * {p}  +  {new.r}    |
! ----------------------------------------------------------------
  newrsq = SUM (r * r)		! hermitian scalar product, (r+, r)
  p      = r + (newrsq / rsq) * p
  rsq    = newrsq

END
! }}}

! {{{  SUBROUTINE timer (t)
SUBROUTINE timer (t)
!---------------------------------------------------------------
! Returns the elapsed time in seconds since the first call to
! 'timer ()'.
!
! N.B. The 'SYSTEM_CLOCK ()' intrinsic is dangerous in an HPF
! program, as it violates HPF semantics by potentially returning
! a  different value on each processor.  Do not use the result 't'
! in any expression that governs the program's control flow
! (e.g. an IF or WHILE condition)!
!	We plan to provide a 'safe' version of 'SYSTEM_CLOCK ()'
! (e.g. one that returns the time as measured on a particular
! processor) in a future release of 'shpf'.
!---------------------------------------------------------------
  REAL, INTENT (OUT) ::  t
  INTEGER :: cnt0, cnt, rate, max
  SAVE       cnt0          ! its initial value should be 0!

  CALL SYSTEM_CLOCK (cnt, rate, max)

  IF (cnt0 == 0) THEN
    cnt0 = cnt
    cnt  = 0
  ELSE IF (cnt >= cnt0) THEN
    cnt = cnt - cnt0
  ELSE
    cnt = max - (cnt0 - cnt) + 1
  ENDIF

  IF (rate > 0) THEN
    t = REAL (cnt) / REAL (rate)
  ELSE
    t = 0.0    !! no clock !!
  ENDIF

  RETURN
END
! }}}
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -