⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 init.f90

📁 基于linux操作下的fortran快速付立变换的程序
💻 F90
字号:
! f90 -O5 -arch ev67 -omp -lmpi -ldxml testall.f90	Implicit real*8 (a-h,o-z)	logical parallel        integer count1,count2,count_rate,count_max	parameter(parallel=.true.,ndat=64,ntime=2)	parameter(itest=3)        include 'mpif.h'	dimension tuma(2),suma(2)! distributed Fourier space wavefunction        REAL(KIND=8), ALLOCATABLE, DIMENSION(:,:,:,:,:) :: zf! distributed real space wavefunction        REAL(KIND=8), ALLOCATABLE, DIMENSION(:,:,:,:,:) :: zr! distributed real space potential        REAL(KIND=8), ALLOCATABLE, DIMENSION(:,:,:) :: pot,rho!$      interface!$        integer ( kind=4 ) function omp_get_num_threads ( )!$        end function omp_get_num_threads!$      end interface!$      interface!$        integer ( kind=4 ) function omp_get_thread_num ( )!$        end function omp_get_thread_num!$      end interface        INTEGER, DIMENSION(7,104) :: idata         data ((idata(i,j),i=1,7),j=1,104) /                       &            3,   3, 1, 1, 1, 1, 1,       4,   4, 1, 1, 1, 1, 1,   &            5,   5, 1, 1, 1, 1, 1,       6,   6, 1, 1, 1, 1, 1,   &            8,   8, 1, 1, 1, 1, 1,       9,   3, 3, 1, 1, 1, 1,   &           10,   5, 2, 1, 1, 1, 1,                                &           12,   4, 3, 1, 1, 1, 1,      15,   5, 3, 1, 1, 1, 1,   &           16,   4, 4, 1, 1, 1, 1,      18,   6, 3, 1, 1, 1, 1,   &           20,   5, 4, 1, 1, 1, 1,      24,   8, 3, 1, 1, 1, 1,   &           25,   5, 5, 1, 1, 1, 1,      27,   3, 3, 3, 1, 1, 1,   &           30,   6, 5, 1, 1, 1, 1,      32,   8, 4, 1, 1, 1, 1,   &           36,   4, 3, 3, 1, 1, 1,      40,   8, 5, 1, 1, 1, 1,   &           45,   5, 3, 3, 1, 1, 1,      48,   4, 4, 3, 1, 1, 1,   &           50,   5, 5, 2, 1, 1, 1,                                &           54,   6, 3, 3, 1, 1, 1,      60,   5, 4, 3, 1, 1, 1,   &           64,   8, 8, 1, 1, 1, 1,      72,   8, 3, 3, 1, 1, 1,   &           75,   5, 5, 3, 1, 1, 1,      80,   5, 4, 4, 1, 1, 1,   &           81,   3, 3, 3, 3, 1, 1,      90,   6, 5, 3, 1, 1, 1,   &           96,   8, 4, 3, 1, 1, 1,     100,   5, 5, 4, 1, 1, 1,   &          108,   4, 3, 3, 3, 1, 1,     120,   8, 5, 3, 1, 1, 1,   &          125,   5, 5, 5, 1, 1, 1,     128,   8, 4, 4, 1, 1, 1,   &          135,   5, 3, 3, 3, 1, 1,     144,   6, 8, 3, 1, 1, 1,   &          150,   6, 5, 5, 1, 1, 1,     160,   8, 5, 4, 1, 1, 1,   &          162,   6, 3, 3, 3, 1, 1,     180,   5, 4, 3, 3, 1, 1,   &          192,   6, 8, 4, 1, 1, 1,     200,   8, 5, 5, 1, 1, 1,   &          216,   8, 3, 3, 3, 1, 1,     225,   5, 5, 3, 3, 1, 1,   &          240,   6, 8, 5, 1, 1, 1,     243,   3, 3, 3, 3, 3, 1,   &          250,   5, 5, 5, 2, 1, 1,                                &          256,   8, 8, 4, 1, 1, 1,     270,   6, 5, 3, 3, 1, 1,   &          288,   8, 4, 3, 3, 1, 1,     300,   5, 5, 4, 3, 1, 1,   &          320,   5, 4, 4, 4, 1, 1,     324,   4, 3, 3, 3, 3, 1,   &          360,   8, 5, 3, 3, 1, 1,     375,   5, 5, 5, 3, 1, 1,   &          384,   8, 4, 4, 3, 1, 1,     400,   5, 5, 4, 4, 1, 1,   &          405,   5, 3, 3, 3, 3, 1,     432,   4, 4, 3, 3, 3, 1,   &          450,   6, 5, 5, 3, 1, 1,     480,   8, 5, 4, 3, 1, 1,   &          486,   6, 3, 3, 3, 3, 1,     500,   5, 5, 5, 4, 1, 1,   &          512,   8, 8, 8, 1, 1, 1,     540,   5, 4, 3, 3, 3, 1,   &          576,   4, 4, 4, 3, 3, 1,     600,   8, 5, 5, 3, 1, 1,   &          625,   5, 5, 5, 5, 1, 1,     640,   8, 5, 4, 4, 1, 1,   &          648,   8, 3, 3, 3, 3, 1,     675,   5, 5, 3, 3, 3, 1,   &          720,   5, 4, 4, 3, 3, 1,     729,   3, 3, 3, 3, 3, 3,   &          750,   6, 5, 5, 5, 1, 1,     768,   4, 4, 4, 4, 3, 1,   &          800,   8, 5, 5, 4, 1, 1,     810,   6, 5, 3, 3, 3, 1,   &          864,   8, 4, 3, 3, 3, 1,     900,   5, 5, 4, 3, 3, 1,   &          960,   5, 4, 4, 4, 3, 1,     972,   4, 3, 3, 3, 3, 3,   &         1000,   8, 5, 5, 5, 1, 1,    1024,   4, 4, 4, 4, 4, 1,   &                                      1080,   8, 5, 3, 3, 3, 1,   &         1152,   8, 4, 4, 3, 3, 1,    1200,   5, 5, 4, 4, 3, 1,   &         1250,   5, 5, 5, 5, 2, 1,    1280,   5, 4, 4, 4, 4, 1,   &         1296,   4, 4, 3, 3, 3, 3,    1350,   6, 5, 5, 3, 3, 1,   &         1440,   6, 5, 4, 4, 3, 1,    1458,   6, 3, 3, 3, 3, 3,   &         1500,   5, 5, 5, 4, 3, 1,    1536,   8, 4, 4, 4, 3, 1,   &         1600,   8, 8, 5, 5, 1, 1,    1620,   5, 4, 3, 3, 3, 3,   &         1728,   8, 8, 3, 3, 3, 1,    1800,   8, 5, 5, 3, 3, 1,   &         1920,   8, 5, 4, 4, 3, 1,    1944,   8, 3, 3, 3, 3, 3,   &         2000,   5, 5, 5, 4, 4, 1,    2048,   8, 4, 4, 4, 4, 1    /! Start MPI in parallel version	if (parallel) then        call MPI_INIT(ierr)        call MPI_COMM_RANK(MPI_COMM_WORLD,iproc,ierr)        call MPI_COMM_SIZE(MPI_COMM_WORLD,nproc,ierr)!        write(6,*) 'mpi started',iproc,nproc	else	nproc=1	iproc=0	endif! Start OpenMP        npr=1!$omp parallel  default(private) shared(npr)!$      iam=omp_get_thread_num()!$      if (iam.eq.0) npr=omp_get_num_threads()!$omp end parallel        if (iproc.eq.0) then	open(unit=66,file='results',status='unknown')        write(66,*) ' nproc  ',nproc,' nthreads ',npr        write(66,*) ' ndat   ',ndat,' ntime ',ntime        endif!! Dimension m1,m2,m3 of the Fourier space cube !!           (Real space dimensions n1,n2,n3 are twice as large)!! 1) Hard coded dimensions       m1=63 ; m2=63 ; m3=63 !! 2) Read dimensions!	if (iproc.eq.0) then!	write(6,*) 'm1=? , m2=? , m3=?'!	read*, m1 , m2 , m3!	endif!	if (parallel) then!	call MPI_BCAST(m1,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)!	call MPI_BCAST(m2,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)!	call MPI_BCAST(m3,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)!	write(6,*) iproc, m1 , m2 , m3!	endif!! 3) loop over all possible dimensions!	do 9999,k1=1,104!	m1=idata(1,k1)!	do 9999,k2=1,104!	m2=idata(1,k2)!	do 9999,k3=1,104!	m3=idata(1,k3)!	if (m1*m2*m3.le.64**3  .and. max(m1,m2,m3).le.128) then!	if (iproc.eq.0) then!	write(6,*) '        '!	write(6,*) 'k1,k2,k3',k1,k2,k3!	write(6,*) 'm1,m2,m3',m1,m2,m3!	write(6,*) '        '!	endif! Fourier space grid dimensions (suitable for number of processors)        md1=m1+0;  md3=m3+0        md2=m2+0150	if (nproc*(md2/nproc).lt.m2) then	md2=md2+1	goto 150	endif! real space grid dimension (suitable for number of processors)	if (itest.eq.1) then	n1=m1; n2=m2; n3=m3	else	n1=2*m1; n2=2*m2; n3=2*m3	endif!       add buffer to avoid cache trashing        nd1=n1+0;  nd2=n2+0        nd3=n3+0250	if (nproc*(nd3/nproc).lt.n3) then	nd3=nd3+1	goto 250	endif	if (itest.eq.1) then 	nd2=md2	md3=nd3	md1=nd1	endif	if (itest.eq.1) then	  if (iproc.eq.0) write(6,*) 'testing ordinary FFT'          allocate(zr(2,nd1,nd2,nd3/nproc,ndat),zf(2,nd1,nd3,nd2/nproc,ndat))	else if (itest.eq.2) then	  if (iproc.eq.0) write(6,*) 'testing wavefunction FFT'          allocate(zr(2,nd1,nd2,nd3/nproc,ndat),zf(2,md1,md3,md2/nproc,ndat))	else if (itest.eq.3) then	  if (iproc.eq.0) write(6,*) 'testing APPLYPOT'          allocate(zf(2,md1,md3,md2/nproc,ndat),pot(nd1,nd2,nd3/nproc))	  do 68,i3=1,nd3/nproc	  do 68,i2=1,nd2	  do 68,i1=1,nd168	  pot(i1,i2,i3)=1.d0	else if (itest.eq.4) then	  if (iproc.eq.0) write(6,*) 'testing Accumulation of rho'          allocate(zf(2,md1,md3,md2/nproc,ndat),rho(nd1,nd2,nd3/nproc))	else	  if (iproc.eq.0) write(6,*) 'UNDEFINED itest'	  if (nproc.gt.1) call MPI_ABORT(MPI_COMM_WORLD,ierr)	  stop	endif        if (iproc.eq.0) then        Print*,'                    '        Print*,'                    '46      format(a,3(2x,i4))        write(6,46) 'FFT TEST for n1 ,n2 ,n3 =',n1,n2,n3        write(6,46) '          nd1 ,nd2 ,nd3 =',nd1,nd2,nd3        write(6,46) '          md1 ,md2 ,md3 =',md1,md2,md3        write(6,*) '          nproc  ',nproc,' nthreads ',npr        write(6,*) '          ndat   ',ndat,' ntime ',ntime        Print*,'                    '        endif	do idat=1,ndat    	call initf(idat,m1,m2,m3,md1,md2,md3,iproc,nproc,zf(1,1,1,1,idat))	enddo!Correctness 	if (itest.eq.1) then          icplex=2          call back(icplex,ndat,n1,n2,n3,nd1,nd2,nd3,nproc,iproc,zf,zr)!	  call slice(nproc,iproc,n1,n2,n3,n1,n2,n3,nd1,nd2,nd3,nd1,nd2,nd3,zf,zr)          call forw(icplex,ndat,n1,n2,n3,nd1,nd2,nd3,nproc,iproc,zr,zf)	else if (itest.eq.2) then          call back_wf(ndat,n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,nproc,iproc,zf,zr)!	  call slice(nproc,iproc,m1,m2,m3,n1,n2,n3,md1,md2,md3,nd1,nd2,nd3,zf,zr)          call forw_wf(ndat,n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,nproc,iproc,zr,zf)	else if (itest.eq.3) then          call applypot(ndat,n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,nproc,iproc,pot,zf)	else if (itest.eq.4) then          call accrho(ndat,n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,nproc,iproc,zf,rho)	endif	scal=1.d0/(n1*n2*n3)	if (itest.lt.4) then     do idat=1,ndat    	  call vgl(idat,scal,m1,m2,m3,md1,md2,md3,iproc,nproc,zf(1,1,1,1,idat),tum)	  if (nproc.gt.1) then            call MPI_REDUCE(tum,sum,1,MPI_double_precision,  &                        MPI_SUM,0,MPI_COMM_WORLD,ierr)	  else	    sum=tum	  endif	if (iproc.eq.0) then           write(6,*) 'error back-forward',idat,sqrt(sum)	  if (sqrt(sum).gt.1.d-10)  then	    write(6,*) 'ALGEBRAIC ERROR'	    if (nproc.gt.1) call MPI_ABORT(MPI_COMM_WORLD,ierr)	    stop	  endif	endif     enddo	else	call fnorm(ndat,m1,m2,m3,md1,md2,md3,iproc,nproc,zf,tuma(1)) 	call rnorm(n1,n2,n3,nd1,nd2,nd3,iproc,nproc,rho,tuma(2))	  if (nproc.gt.1) then            call MPI_REDUCE(tuma,suma,2,MPI_double_precision,  &                        MPI_SUM,0,MPI_COMM_WORLD,ierr)	  else	    suma(1)=tuma(1)	    suma(2)=tuma(2)	  endif	if (iproc.eq.0) then 	  tt=abs(1.d0-scal*suma(2)/suma(1))	write(6,*) 'rel DIFF. FOURIER, REAL NORM',tt	  if (tt.gt.1.d-10)  then	    write(6,*) 'ALGEBRAIC ERROR'	    if (nproc.gt.1) call MPI_ABORT(MPI_COMM_WORLD,ierr)	    stop	  endif	  endif	endif	if (nproc.gt.1) call MPI_BARRIER(MPI_COMM_WORLD,ierr)! Timings        call cpu_time(t1)        call system_clock(count1,count_rate,count_max)	if (itest.eq.1) then	do irep=1,ntime          icplex=2          call back(icplex,ndat,n1,n2,n3,nd1,nd2,nd3,nproc,iproc,zf,zr)          call forw(icplex,ndat,n1,n2,n3,nd1,nd2,nd3,nproc,iproc,zr,zf)	enddo	else if (itest.eq.2) then	do irep=1,ntime          call back_wf(ndat,n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,nproc,iproc,zf,zr)          call forw_wf(ndat,n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,nproc,iproc,zr,zf)	enddo	else if (itest.eq.3) then	do irep=1,ntime          call applypot(ndat,n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,nproc,iproc,pot,zf)	enddo	else if (itest.eq.4) then	do irep=1,ntime          call accrho(ndat,n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,nproc,iproc,zf,rho)	enddo	endif        call cpu_time(t2)        call system_clock(count2,count_rate,count_max)        time=(t2-t1)/(ntime*ndat)        tela=(count2-count1)/(float(count_rate)*ntime*ndat)	write(6,'(a,x,i4,2(x,e10.3))') 'iproc, CPU, elapsed time for forw + backw', &                                         iproc,time,tela	if (time*tela.ne.0.d0) then	if (itest.eq.1) then	flops=10.d0*(n1*n2*n3)* &        (log(1.d0*n2)/log(2.d0)+log(1.d0*n1)/log(2.d0)+log(1.d0*n3)/log(2.d0))	else if (itest.eq.2) then	flops=(10.d0/4.d0)*(n1*n2*n3)* &        (log(1.d0*n2)/log(2.d0)+2*log(1.d0*n1)/log(2.d0)+4*log(1.d0*n3)/log(2.d0))	else if (itest.eq.3) then	flops=2*n1*n2*n3 + (10.d0/4.d0)*(n1*n2*n3)* &        (log(1.d0*n2)/log(2.d0)+2*log(1.d0*n1)/log(2.d0)+4*log(1.d0*n3)/log(2.d0))	else if (itest.eq.4) then	flops=4*n1*n2*n3 + (5.d0/4.d0)*(n1*n2*n3)* &        (log(1.d0*n2)/log(2.d0)+2*log(1.d0*n1)/log(2.d0)+4*log(1.d0*n3)/log(2.d0))	endif        if (iproc.eq.0) write(6,'(a,x,i4,(x,e12.5))') &                          'iproc, Flops forw + backw',iproc,flops      	if (iproc.eq.0) write(6,'(a,2(x,f10.0))') 'speed (Mflops)', &                         1.d-6*flops/time,1.d-6*flops/tela	if (iproc.eq.0) write(66,'(i4,3(x,i4),2(x,e10.3),(x,f10.0))') &                         iproc,m1,m2,m3,time,tela,1.d-6*flops/tela	endif        if (itest.eq.1) then        deallocate(zr,zf)        else if (itest.eq.2) then        deallocate(zr,zf)        else if (itest.eq.3) then        deallocate(zf,pot)        else if (itest.eq.4) then        deallocate(zf,rho)	endif!	endif!9999	continue	if (nproc.gt.1) call MPI_FINALIZE(ierr)	end

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -