⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sphere.cpp

📁 The Spectral Toolkit is a C++ spectral transform library written by Rodney James and Chuck Panaccion
💻 CPP
字号:
//// spectral toolkit // copyright (c) 2005 university corporation for atmospheric research// licensed under the gnu general public license//#include "alloc.h"#include "sphere.h"#include "decompose.h"#include "legendre.h"#include "alf.h"using namespace spectral;/// Constructor for spherical harmonic transform.////// \param n number of latitude points between the equator and pole/// \param k number of fields to transformsphere::sphere(int n,int k){  Nodes=2;  G=new group(Nodes);  T=new thread*[Nodes];  T[0]=new thread(2*n,k,2*n,0,k/2,0,G);  T[1]=new thread(2*n,k,2*n-1,1,(k+1)/2,k/2,G);    }/// Constructor for multiple thread spherical harmonic transform.////// \param N number of latitude points between the equator and pole/// \param K number of fields to transform/// \param threads number of threads (>2) used for transformsphere::sphere(int N,int K,int threads){  Nodes=max(threads,2);  G=new group(Nodes);  T=new thread*[Nodes];  int KL,K0,ML,M0;  int M=2*N;  for(int n=0;n<Nodes;n++)    {      decompose(K,Nodes,n,KL,K0);      if(n%2)	{	  decompose(M/2,Nodes/2,n/2,ML,M0);	  T[n]=new thread(M,K,2*ML-1,M0*2+1,KL,K0,G);	}      else	{	  decompose((M+1)/2,(Nodes+1)/2,n/2,ML,M0);	  T[n]=new thread(M,K,ML*2,M0*2,KL,K0,G);	}    }}/// Destructor for spherical harmonic transform.sphere::~sphere(){  for(int i=0;i<Nodes;i++)    delete T[i];      delete [] T;  delete G;}/// Analysis method for multiple instance spherical harmonic transform. /// Computes the spectral coefficients/// \f[ \xi_n^m = \sum_{j=0}^{2N-1} \hat{\xi}_j^m \overline{P_n^m}(\mu_j) w_j \f]/// where \f$ \mu_j \f$ are the Gaussian latitude points, \f$ w_j \f$ the Gaussian/// weights, and/// \f[ \hat{\xi}_j^m = \frac{1}{4N} \sum_{k=0}^{4N-1} g_{j,k} e^{-2 \pi i k m/4N} \f]/// the Fourier transform of the initial data along a longitude.  The \f$ \xi_n^m \f$ are/// computed for \f$ m=0, \ldots, 2N-1 \f$ and \f$ n(m)=m, \ldots, 2N-1 \f$ for each \f$ m \f$./// The input array g is of size g[K][2N][4N] and the spectral coefficients are stored in the output array /// sc as \f$ \xi_n^m \f$ = sc[k][m][n].  The triangular array formed with alloct may be used to store /// sc, but a rectangular array will work as well.void sphere::analysis(real ***g,complex ***sc){  for(int i=0;i<Nodes;i++)    T[i]->analysis(g,sc);  for(int i=0;i<Nodes;i++)    T[i]->wait();}/// Synthesis method for multiple instance spherical harmonic transform./// Assembles the 3-d real array in physical space from the spectral/// coeffcients by/// \f[ g_{j,k} = \sum_{m=0}^{4N-1} \sum_{n=m}^{2N} \xi_n^m \overline{P_n^m}(j) e^{2 \pi i k m/4N} \f]/// for each instance.void sphere::synthesis(complex ***sc,real ***g){  for(int i=0;i<Nodes;i++)    T[i]->synthesis(sc,g);  for(int i=0;i<Nodes;i++)    T[i]->wait();}/// Internal thread analysis driver.void sphere::thread::analysis(real ***g_,complex ***sc_){  g=g_;  sc=sc_;  Method=ANALYSIS;  spawn();}/// Internal thread synthesis driver.void sphere::thread::synthesis(complex ***sc_,real ***g_){  g=g_;  sc=sc_;  Method=SYNTHESIS;  spawn();}/// Internal thread method started when thread is spawned.void sphere::thread::start(){  switch(Method)    {    case ANALYSIS:      Analysis();      break;    case SYNTHESIS:      Synthesis();      break;    }}/// Internal thread join method.void sphere::thread::wait(){  join();}/// Internal thread constructor. Each thread works over a specified range of M=2N/// for a specified parity, computing every other m value./// \param nlat number of latitudes/// \param ninst number of instances/// \param ml length of m-block/// \param m0 start of m-block/// \param kl length of k-block/// \param k0 start of k-block/// \param GROUP group object for syncsphere::thread::thread(int nlat,int ninst,int ml,int m0,int kl,int k0,group *GROUP){  M=nlat;  K=ninst;  N=(M+1)/2;  M0=m0;  ML=ml;  K0=k0;  KL=kl;  Group=GROUP;  w=alloc<real>(M);     RFFT = new rfft(2*M);  SC=alloc<complex>(2,K,N);  G=alloc<complex>(2,K,N);   legendre Gaussian(M);  for(int i=0;i<M;i++)    w[i]=Gaussian.weight(i)/(real)(2*M);  M0=m0;  N=(M+1)/2;  gen=alloct<generator>(M);  seed =alloc<real>(2,N,N);  P = alloc<real>(2,N,N);  Pmn = alloc<real>(2,2,N,N);  for(int n=M0;n<M;n++)    {      alf A(n,M0);      for(int i=0;i<N;i++)	seed[(M0+n)%2][n/2][i]=A(Gaussian.point(i));     }  for(int m=0;m<M;m++)    {      for(int n=m;n<M;n++)        {	  if(m<2)            {	      gen[m][n].c0 = 0.0;	      gen[m][n].c1 = 0.0;	      gen[m][n].c2 = 0.0;            }	  else            {	      real A=(real)n;	      real B=(real)m;	      gen[m][n].c0 = std::sqrt(((2.0*A+1.0)*(A+B-2.0)*(A+B-3.0))/((2.0*A-3.0)*(A+B)*(A+B-1)));	      gen[m][n].c1 = std::sqrt(((2.0*A+1.0)*(A-B)*(A-B-1.0))/((2.0*A-3.0)*(A+B)*(A+B-1.0)));	      gen[m][n].c2 =-std::sqrt(((A-B+2.0)*(A-B+1.0))/((A+B)*(A+B-1.0)));               }        }    }}/// Internal thread desctuctor.sphere::thread::~thread(){   delete RFFT;  dealloc<real>(w);  dealloc<complex>(SC);  dealloc<complex>(G);  dealloc<real>(Pmn);  dealloc<real>(P);  dealloc<real>(seed);  dealloc<generator>(gen);    }/// Internal thread analysis engine.  Does forward recursion over the Pnm's on each pass.void sphere::thread::Analysis(){  for(int k=K0;k<KL+K0;k++)    {      RFFT->analysis(g[k],M);        for(int i=0;i<N-M%2;i++)          {	  for(int j=0;j<2*M;j++)            { 	      real t1=g[k][i ][j];	      real t2=g[k][M-i-1][j];	      g[k][i ][j] = w[i]*(t1+t2);	      g[k][M-i-1][j] = w[i]*(t1-t2);            }        }      if(M%2)        {	  for(int j=0;j<2*M;j++)	    g[k][N-1][j]= w[N-1]*g[k][N-1][j];        }  	      }  sync(Group);  int mx=M0;  for(int m=M0;m<ML+M0;m+=2)    {      if(m==0)        {	  for(int k=0;k<K;k++)            {	      for(int n=0;n<N;n++)                {		  G[0][k][n].re = g[k][n ][0];		  G[0][k][n].im = 0.0;		  G[1][k][n].re = g[k][M-n-1][0];		  G[1][k][n].im = 0.0;                }            }        }      else if((M%2==0)&&(m==M-1))        {	  for(int k=0;k<K;k++)            {	      for(int n=0;n<N;n++)                {		  G[0][k][n].re = g[k][n ][m*2-1];		  G[0][k][n].im = 0.0;		  G[1][k][n].re = g[k][M-n-1][m*2-1];		  G[1][k][n].im = 0.0;                }            }        }      else        {	  for(int k=0;k<K;k++)            {	      for(int n=0;n<N;n++)                {		  G[0][k][n].re = g[k][n ][m*2-1];		  G[0][k][n].im = g[k][n ][m*2];		  G[1][k][n].re = g[k][M-n-1][m*2-1];		  G[1][k][n].im = g[k][M-n-1][m*2];                }            }        }      int p=(mx/2+mx%2)%2;      for(int n=mx;n<M;n++)        {	  int ip=(n+mx)%2;	  if(mx==M0)            {	      for(int j=0;j<N;j++)                {		  P[ip][n/2-(mx+ip)/2][j]                    = Pmn[p][ip][n/2][j]                     = seed[ip][n/2][j];                    }            }	  else            {	      for(int j=0;j<N;j++)		P[ip][n/2-(mx+ip)/2][j]		  = Pmn[p][ip][n/2][j] 		  = Pmn[1-p][ip][n/2-1][j]*gen[mx][n].c0 		  + Pmn[1-p][ip][n/2][j]*gen[mx][n].c2 		  + Pmn[p][ip][n/2-1][j]*gen[mx][n].c1;            }        }          multiply(G[0],P[0],SC[0],K,M/2-(mx  )/2,N);      multiply(G[1],P[1],SC[1],K,M/2-(mx+1)/2,N);      mx+=2;      for(int k=0;k<K;k++)	for(int n=m;n<M;n++)	  sc[k][m][n]=SC[(m+n)%2][k][(n-m)/2];    }}/// Internal thread synthesis engine.  Does backard recursion over the Pnm's on each pass.  void sphere::thread::Synthesis(){  int mx=M0;  for(int m=M0;m<ML+M0;m+=2)    {      for(int k=0;k<K;k++)	for(int n=m;n<M;n++)	  SC[(n+m)%2][k][(n-m)/2]=sc[k][m][n];              int p=(mx/2+mx%2)%2;      for(int n=mx;n<M;n++)        {	  int ip=(n+mx)%2;	  if(mx==M0)            {	      for(int j=0;j<N;j++)		P[ip][j][n/2-(mx+ip)/2] 		  = Pmn[p][ip][n/2][j] 		  = seed[ip][n/2][j];                  }	  else            {	      for(int j=0;j<N;j++)		P[ip][j][n/2-(mx+ip)/2]		  = Pmn[p][ip][n/2][j] 		  = Pmn[1-p][ip][n/2-1][j]*gen[mx][n].c0 		  + Pmn[1-p][ip][n/2][j]*gen[mx][n].c2 		  + Pmn[p][ip][n/2-1][j]*gen[mx][n].c1;            }        }          multiply(SC[0],P[0],G[0],K,N,M/2-(mx  )/2);      multiply(SC[1],P[1],G[1],K,N,M/2-(mx+1)/2);      mx+=2;      if(m==0)        {	  for(int k=0;k<K;k++)            {	      for(int n=0;n<N;n++)                {		  real t1 = G[0][k][n].re;		  real t2 = G[1][k][n].re;		  g[k][n][m] = t1+t2;		  g[k][M-n-1][m] = t1-t2;		  g[k][n][2*M-1] = 0.0;		  g[k][M-n-1][2*M-1] = 0.0;                }            }        }      else if((M%2==0)&&(m==M-1))        {	  for(int k=0;k<K;k++)            {	      for(int n=0;n<N;n++)                {		  real t1 = G[0][k][n].re;		  real t2 = G[1][k][n].re;		  g[k][n][m*2-1] = t1 + t2;		  g[k][M-n-1][m*2-1] = t1-t2;		  g[k][n][m*2] = 0.0;		  g[k][M-n-1][m*2] = 0.0;                }            }        }      else         {	  for(int k=0;k<K;k++)            {	      for(int n=0;n<N;n++)                {		  real t1 = G[0][k][n].re;		  real t2 = G[0][k][n].im;		  real t3 = G[1][k][n].re;		  real t4 = G[1][k][n].im;		  g[k][n][m*2-1] = t1 + t3;		  g[k][n][m*2] = t2 + t4;		  g[k][M-n-1][m*2-1] = t1 - t3;		  g[k][M-n-1][m*2] = t2 - t4;                }            }        }    }  sync(Group);  for(int k=K0;k<KL+K0;k++)    RFFT->synthesis(g[k],M);}/// Internal thread multiplication.  Performs matrix multiply for sum assembly.void sphere::thread::multiply(complex **A,real **B,complex **C,int M1,int N1,int K1){  complex c00,c01,c10,c11;  const int BlockM1=32;  const int BlockN1=32;      for(int mb=0;mb<M1;mb+=BlockM1)    {      int M1B=min(mb+BlockM1,M1);      for(int nb=0;nb<N1;nb+=BlockN1)        {	  int N1B=min(nb+BlockN1,N1);	  for(int m=mb;m<M1B-1;m+=2)            {	      for(int n=nb;n<N1B-1;n+=2)                {		  c00.re=c00.im=0.0;		  c01.re=c01.im=0.0;		  c10.re=c10.im=0.0;		  c11.re=c11.im=0.0;		  for(int k=0;k<K1;k++)                    {		      c00.re+=A[m][k].re*B[n][k];		      c00.im+=A[m][k].im*B[n][k];		      c01.re+=A[m][k].re*B[n+1][k];		      c01.im+=A[m][k].im*B[n+1][k];		      c10.re+=A[m+1][k].re*B[n][k];		      c10.im+=A[m+1][k].im*B[n][k];		      c11.re+=A[m+1][k].re*B[n+1][k];		      c11.im+=A[m+1][k].im*B[n+1][k];                    }		  C[m  ][n  ].re=c00.re;		  C[m  ][n  ].im=c00.im;		  C[m  ][n+1].re=c01.re;		  C[m  ][n+1].im=c01.im;		  C[m+1][n  ].re=c10.re;		  C[m+1][n  ].im=c10.im;		  C[m+1][n+1].re=c11.re;		  C[m+1][n+1].im=c11.im;                }            }	  if((M1B-mb)%2==1)            {	      for(int n=nb;n<N1B;n++)                {		  c00.re=c00.im=0.0;		  for(int k=0;k<K1;k++)                    {		      c00.re+=A[M1B-1][k].re*B[n][k];		      c00.im+=A[M1B-1][k].im*B[n][k];                    }		  C[M1B-1][n].re=c00.re;		  C[M1B-1][n].im=c00.im;                }            }	  if((N1B-nb)%2==1)            {	      for(int m=mb;m<M1B;m++)                {		  c00.re=c00.im=0.0;		  for(int k=0;k<K1;k++)                    {		      c00.re+=A[m][k].re*B[N1B-1][k];		      c00.im+=A[m][k].im*B[N1B-1][k];                    }		      		  C[m][N1B-1].re=c00.re;		  C[m][N1B-1].im=c00.im;                }            }        }    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -