📄 cg2.f90
字号:
END
! }}}
! {{{ SUBROUTINE D_u_dir (v, gauge, u, sense) -- [TIMED] -- no comms
SUBROUTINE D_u_dir (v, gauge, u, sense)
!--------------------------------------
INCLUDE 'consts.inc'
INCLUDE 'procs.inc'
REAL, INTENT (OUT), DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx) :: v
REAL, INTENT (IN), DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx) :: u
!HPF$ ALIGN (*, :, :, :, *, *) WITH *tmpl :: v, u
REAL, INTENT (IN), DIMENSION (nt/2, nx, nx, nx, ngauge) :: gauge
!HPF$ ALIGN (*, :, :, :, *) WITH *tmpl :: gauge
INTEGER, INTENT (IN) :: sense
! -----------------------------------------------------------------------
! Calculates : |
! |
! v (i) = D (i,j) * u (j), |
! |
! for all sites 'i' at which {v} is defined (i.e. all even or odd |
! sites), for a particular link direction and sense. |
! |
! 'sense' = sense of the link (ij) |
! (positive = 'possense', negative = 'negsense' |
! |
! 'D' = [ g4 + i.g3 g1 + i.g2 ] |
! [-g1 + i.g2 g4 - i.g3 ] |
! |
! This subroutine is called with the arrays {u}, {v} and |
! [gauge] all aligned, such that the corresponding elements, |
! v (i), gauge (i,j) and u (j), are stored at the same array |
! positions. Thus the inner loop over sites can be VECTORISED ! |
! |
! -----------------------------------------------------------------------
! {{{ IF (sense == possense) THEN
IF (sense == possense) THEN
! ----------------------------
!----------------------------------------------------------------------
! Positive sense link |
! ------------------- |
! v (i) = D (i,j) * u (j), where site j = (i+mu). |
! |
! In this case, D(i,j) = gauge(i) (only that segment of |
! the total gauge field array corresponding to the given link |
! direction and sense is passed in). Also, the {u} vector |
! that is input has been shifted so that the sites are aligned: |
! u (i) = 'real' u (j). Thus, we calculate: |
! |
! v (i) = gauge (i) * u (i) for all sites 'i' |
!----------------------------------------------------------------------
v (:,:,:,:, 1, re) = gauge (:,:,:,:, 4) * u (:,:,:,:, 1, re) &
- gauge (:,:,:,:, 3) * u (:,:,:,:, 1, im) &
+ gauge (:,:,:,:, 1) * u (:,:,:,:, 2, re) &
- gauge (:,:,:,:, 2) * u (:,:,:,:, 2, im)
v (:,:,:,:, 1, im) = gauge (:,:,:,:, 4) * u (:,:,:,:, 1, im) &
+ gauge (:,:,:,:, 3) * u (:,:,:,:, 1, re) &
+ gauge (:,:,:,:, 1) * u (:,:,:,:, 2, im) &
+ gauge (:,:,:,:, 2) * u (:,:,:,:, 2, re)
v (:,:,:,:, 2, re) = - gauge (:,:,:,:, 1) * u (:,:,:,:, 1, re) &
- gauge (:,:,:,:, 2) * u (:,:,:,:, 1, im) &
+ gauge (:,:,:,:, 4) * u (:,:,:,:, 2, re) &
+ gauge (:,:,:,:, 3) * u (:,:,:,:, 2, im)
v (:,:,:,:, 2, im) = - gauge (:,:,:,:, 1) * u (:,:,:,:, 1, im) &
+ gauge (:,:,:,:, 2) * u (:,:,:,:, 1, re) &
+ gauge (:,:,:,:, 4) * u (:,:,:,:, 2, im) &
- gauge (:,:,:,:, 3) * u (:,:,:,:, 2, re)
! }}}
! {{{ ELSE
ELSE
! ----
!----------------------------------------------------------------------
! Negative sense link |
! ------------------- |
! v (i) = D (i,j) * u (j), where site j = (i-mu). |
! |
! In this case, D(i,j) = - herm.conj [ gauge(j,i) ] |
! = - herm.conj [ gauge (j) ] |
! |
! since only that segment of the total gauge field array |
! corresponding to the given link direction and sense is passed |
! in. Also, the {v} vector that is input has been shifted so |
! that the sites are aligned: v (j) = 'real' v (i). |
! Thus, we calculate: |
! |
! v (j) = - herm.conj [ gauge (j) ] * u (j) for all sites 'j' |
!----------------------------------------------------------------------
v (:,:,:,:, 1, re) = - gauge (:,:,:,:, 4) * u (:,:,:,:, 1, re) &
- gauge (:,:,:,:, 3) * u (:,:,:,:, 1, im) &
+ gauge (:,:,:,:, 1) * u (:,:,:,:, 2, re) &
- gauge (:,:,:,:, 2) * u (:,:,:,:, 2, im)
v (:,:,:,:, 1, im) = - gauge (:,:,:,:, 4) * u (:,:,:,:, 1, im) &
+ gauge (:,:,:,:, 3) * u (:,:,:,:, 1, re) &
+ gauge (:,:,:,:, 1) * u (:,:,:,:, 2, im) &
+ gauge (:,:,:,:, 2) * u (:,:,:,:, 2, re)
v (:,:,:,:, 2, re) = - gauge (:,:,:,:, 1) * u (:,:,:,:, 1, re) &
- gauge (:,:,:,:, 2) * u (:,:,:,:, 1, im) &
- gauge (:,:,:,:, 4) * u (:,:,:,:, 2, re) &
+ gauge (:,:,:,:, 3) * u (:,:,:,:, 2, im)
v (:,:,:,:, 2, im) = - gauge (:,:,:,:, 1) * u (:,:,:,:, 1, im) &
+ gauge (:,:,:,:, 2) * u (:,:,:,:, 1, re) &
- gauge (:,:,:,:, 4) * u (:,:,:,:, 2, im) &
- gauge (:,:,:,:, 3) * u (:,:,:,:, 2, re)
ENDIF
! }}}
END
! }}}
! {{{ SUBROUTINE solve (x, b) -- [CONTENTS TIMED] -- no comms in timed part
SUBROUTINE solve (x, b)
!----------------------
INCLUDE 'consts.inc'
INCLUDE 'procs.inc'
REAL, INTENT (INOUT), DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx) :: x
REAL, INTENT (IN), DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx) :: b
!HPF$ ALIGN (*, :, :, :, *, *) WITH *tmpl :: x, b
! -------------------------------------------------------------------
! | Uses the CG algorithm to solve : |
! | [F] {x} = {b} |
! | for {x}, given {b}. |
! | |
! | {} denotes a fermion vector (defined on even sites) |
! | [F] = (- [D][D] + msq), where [D] is the Dirac matrix. |
! | |
! | At start, {x} has the initial trial solution. |
! -------------------------------------------------------------------
INTEGER niter
REAL Mflops, time
REAL rsq, rsqtol
REAL t0, t1, t2
REAL, DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx) :: p, r
!HPF$ ALIGN (*, :, :, :, *, *) WITH tmpl :: p, r
COMMON / results / niter, Mflops, time
INTEGER flops
PARAMETER (flops = 280 * nt * nx * nx * nx)
! ---------------------------------------------------
! | Initialise {p}, {r}, rsq, rsqtol and niter |
! | |
! | {p} = {r} = {b} - {Fx} |
! | rsq = {r}.{r} |
! ---------------------------------------------------
CALL F_u (r, x)
r = b - r
p = r
rsqtol = SUM (b * b) * (1.0E-02 ** (sigfigs))
rsq = SUM (r * r) ! hermitian scalar product, (r+, r)
niter = 0
! ------------------------------------------
! | Do CG iterations until {x} converged |
! ------------------------------------------
CALL timer (t0)
CALL timer (t1)
1 CONTINUE
niter = niter + 1
CALL CGiter (x, p, r, rsq)
IF ((rsq > rsqtol) .AND. (niter < maxiter)) go to 1
CALL timer (t2)
!------ 'time' = average time (in secs) per CG iteration.
!------ 'Mflops' = performance in Mflop/s averaged over all CG iterations.
time = ((t2-t1) - (t1-t0)) / REAL (niter)
IF (time > 0.0) THEN
Mflops = REAL (flops) / (1.0E6 * time)
ELSE
time = - 1.0E20
Mflops = - 1.0E20
ENDIF
END
! }}}
! {{{ SUBROUTINE CGiter (x, p, r, rsq) [TIMED] -- comms via SUM
SUBROUTINE CGiter (x, p, r, rsq)
!-------------------------------
INCLUDE 'consts.inc'
INCLUDE 'procs.inc'
REAL, INTENT (INOUT), &
DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx) :: x, p, r
!HPF$ ALIGN (*, :, :, :, *, *) WITH *tmpl :: x, p, r
REAL, INTENT (INOUT) :: rsq
REAL, DIMENSION (nt/2, nx, nx, nx, ncolor, ncmplx) :: Fp
!HPF$ ALIGN (*, :, :, :, *, *) WITH tmpl :: Fp
REAL alpha, newrsq
! ------------------------------------------------------------
! | One iteration of the Conjugate Gradient (CG) algorithm |
! | - no preconditioning. |
! ------------------------------------------------------------
! -----------------------------------
! | alpha = (r, r) / (p, Fp) |
! -----------------------------------
CALL F_u (Fp, p)
alpha = rsq / SUM (p * Fp) ! hermitian scalar product, (p+, Fp)
! --------------------------------------
! | {new.x} = {x} + alpha * {p} |
! --------------------------------------
x = x + alpha * p
! --------------------------------------
! | {new.r} = {r} - alpha * {Fp} |
! --------------------------------------
r = r - alpha * Fp
! ----------------------------------------------------------------
! | {new.p} = ((new.r, new.r) / (r, r)) * {p} + {new.r} |
! ----------------------------------------------------------------
newrsq = SUM (r * r) ! hermitian scalar product, (r+, r)
p = r + (newrsq / rsq) * p
rsq = newrsq
END
! }}}
! {{{ SUBROUTINE timer (t)
SUBROUTINE timer (t)
!---------------------------------------------------------------
! Returns the elapsed time in seconds since the first call to
! 'timer ()'.
!
! N.B. The 'SYSTEM_CLOCK ()' intrinsic is dangerous in an HPF
! program, as it violates HPF semantics by potentially returning
! a different value on each processor. Do not use the result 't'
! in any expression that governs the program's control flow
! (e.g. an IF or WHILE condition)!
! We plan to provide a 'safe' version of 'SYSTEM_CLOCK ()'
! (e.g. one that returns the time as measured on a particular
! processor) in a future release of 'shpf'.
!---------------------------------------------------------------
REAL, INTENT (OUT) :: t
INTEGER :: cnt0, cnt, rate, max
SAVE cnt0 ! its initial value should be 0!
CALL SYSTEM_CLOCK (cnt, rate, max)
IF (cnt0 == 0) THEN
cnt0 = cnt
cnt = 0
ELSE IF (cnt >= cnt0) THEN
cnt = cnt - cnt0
ELSE
cnt = max - (cnt0 - cnt) + 1
ENDIF
IF (rate > 0) THEN
t = REAL (cnt) / REAL (rate)
ELSE
t = 0.0 !! no clock !!
ENDIF
RETURN
END
! }}}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -