⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pdgstrs.c.bak

📁 LU分解求解矩阵方程组的解
💻 BAK
📖 第 1 页 / 共 3 页
字号:
 */    Glu_persist_t *Glu_persist = LUstruct->Glu_persist;    LocalLU_t *Llu = LUstruct->Llu;    double alpha = 1.0;    double zero = 0.0;    double *lsum;  /* Local running sum of the updates to B-components */    double *x;     /* X component at step k. */		    /* NOTE: x and lsum are of same size. */    double *lusup, *dest;    double *recvbuf, *tempv;    double *rtemp; /* Result of full matrix-vector multiply. */    int_t  **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr;    int_t  *Urbs, *Urbs1; /* Number of row blocks in each block column of U. */    Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */    int_t  **Ucb_valptr;      /* Vertical linked list pointing to Unzval[] */    int_t  iam, kcol, krow, mycol, myrow;    int_t  i, ii, il, j, jj, k, lb, ljb, lk, lptr, luptr;    int_t  nb, nlb, nub, nsupers;    int_t  *xsup, *supno, *lsub, *usub;    int_t  *ilsum;    /* Starting position of each supernode in lsum (LOCAL)*/    int_t  Pc, Pr;    int    knsupc, nsupr;    int    ldalsum;   /* Number of lsum entries locally owned. */    int    maxrecvsz, p, pi;    int_t  **Lrowind_bc_ptr;    double **Lnzval_bc_ptr;    MPI_Status status;#ifdef ISEND_IRECV    MPI_Request *send_req, recv_req;#endif    pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm;    /*-- Counts used for L-solve --*/    int_t  *fmod;         /* Modification count for L-solve --                             Count the number of local block products to                             be summed into lsum[lk]. */    int_t  **fsendx_plist = Llu->fsendx_plist;    int_t  nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */    int_t  *frecv;        /* Count of lsum[lk] contributions to be received                             from processes in this row.                              It is only valid on the diagonal processes. */    int_t  nfrecvmod = 0; /* Count of total modifications to be recv'd. */    int_t  nleaf = 0, nroot = 0;    /*-- Counts used for U-solve --*/    int_t  *bmod;         /* Modification count for U-solve. */    int_t  **bsendx_plist = Llu->bsendx_plist;    int_t  nbrecvx = Llu->nbrecvx; /* Number of X components to be recv'd. */    int_t  *brecv;        /* Count of modifications to be recv'd from			     processes in this row. */    int_t  nbrecvmod = 0; /* Count of total modifications to be recv'd. */    double t;#if ( DEBUGlevel>=2 )    int_t Ublocks = 0;#endif    t = SuperLU_timer_();    /* Test input parameters. */    *info = 0;    if ( n < 0 ) *info = -1;    else if ( nrhs < 0 ) *info = -9;    if ( *info ) {	pxerbla("PDGSTRS", grid, -*info);	return;    }	    /*     * Initialization.     */    iam = grid->iam;    Pc = grid->npcol;    Pr = grid->nprow;    myrow = MYROW( iam, grid );    mycol = MYCOL( iam, grid );    xsup = Glu_persist->xsup;    supno = Glu_persist->supno;    nsupers = supno[n-1] + 1;    Lrowind_bc_ptr = Llu->Lrowind_bc_ptr;    Lnzval_bc_ptr = Llu->Lnzval_bc_ptr;    nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */#if ( DEBUGlevel>=1 )    CHECK_MALLOC(iam, "Enter pdgstrs()");#endif    stat->ops[SOLVE] = 0.0;    Llu->SolveMsgSent = 0;    /* Save the count to be altered so it can be used by       subsequent call to PDGSTRS. */    if ( !(fmod = intMalloc_dist(nlb)) )	ABORT("Calloc fails for fmod[].");    for (i = 0; i < nlb; ++i) fmod[i] = Llu->fmod[i];    if ( !(frecv = intMalloc_dist(nlb)) )	ABORT("Malloc fails for frecv[].");    Llu->frecv = frecv;#ifdef ISEND_IRECV    k = SUPERLU_MAX( Llu->nfsendx, Llu->nbsendx ) + nlb;    if ( !(send_req = (MPI_Request*) SUPERLU_MALLOC(k*sizeof(MPI_Request))) )	ABORT("Malloc fails for send_req[].");#endif#ifdef _CRAY    ftcs1 = _cptofcd("L", strlen("L"));    ftcs2 = _cptofcd("N", strlen("N"));    ftcs3 = _cptofcd("U", strlen("U"));#endif    /* Obtain ilsum[] and ldalsum for process column 0. */    ilsum = Llu->ilsum;    ldalsum = Llu->ldalsum;    /* Allocate working storage. */    knsupc = sp_ienv_dist(3);    maxrecvsz = knsupc * nrhs + SUPERLU_MAX( XK_H, LSUM_H );    if ( !(lsum = doubleCalloc_dist(((size_t)ldalsum)*nrhs + nlb*LSUM_H)) )	ABORT("Calloc fails for lsum[].");    if ( !(x = doubleMalloc_dist(ldalsum * nrhs + nlb * XK_H)) )	ABORT("Malloc fails for x[].");    if ( !(recvbuf = doubleMalloc_dist(maxrecvsz)) )	ABORT("Malloc fails for recvbuf[].");    if ( !(rtemp = doubleCalloc_dist(maxrecvsz)) )	ABORT("Malloc fails for rtemp[].");        /*---------------------------------------------------     * Forward solve Ly = b.     *---------------------------------------------------*/    /* Redistribute B into X on the diagonal processes. */    pdReDistribute_B_to_X(B, m_loc, nrhs, ldb, fst_row, ilsum, x, 			  ScalePermstruct, Glu_persist, grid, SOLVEstruct);    /* Set up the headers in lsum[]. */    ii = 0;    for (k = 0; k < nsupers; ++k) {	knsupc = SuperSize( k );	krow = PROW( k, grid );	if ( myrow == krow ) {	    lk = LBi( k, grid );   /* Local block number. */	    il = LSUM_BLK( lk );	    lsum[il - LSUM_H] = k; /* Block number prepended in the header. */	}	ii += knsupc;    }    /*     * Compute frecv[] and nfrecvmod counts on the diagonal processes.     */    {	superlu_scope_t *scp = &grid->rscp;	for (k = 0; k < nsupers; ++k) {	    krow = PROW( k, grid );	    if ( myrow == krow ) {		lk = LBi( k, grid );    /* Local block number. */		kcol = PCOL( k, grid ); /* Root process in this row scope. */		if ( mycol != kcol && fmod[lk] )		    i = 1;  /* Contribution from non-diagonal process. */		else i = 0;		MPI_Reduce( &i, &frecv[lk], 1, mpi_int_t,			   MPI_SUM, kcol, scp->comm );		if ( mycol == kcol ) { /* Diagonal process. */		    nfrecvmod += frecv[lk];		    if ( !frecv[lk] && !fmod[lk] ) ++nleaf;#if ( DEBUGlevel>=2 )		    printf("(%2d) frecv[%4d]  %2d\n", iam, k, frecv[lk]);		    assert( frecv[lk] < Pc );#endif		}	    }	}    }    /* ---------------------------------------------------------       Solve the leaf nodes first by all the diagonal processes.       --------------------------------------------------------- */#if ( DEBUGlevel>=1 )    printf("(%2d) nleaf %4d\n", iam, nleaf);#endif    for (k = 0; k < nsupers && nleaf; ++k) {	krow = PROW( k, grid );	kcol = PCOL( k, grid );	if ( myrow == krow && mycol == kcol ) { /* Diagonal process */	    knsupc = SuperSize( k );	    lk = LBi( k, grid );	    if ( frecv[lk]==0 && fmod[lk]==0 ) {		fmod[lk] = -1;  /* Do not solve X[k] in the future. */		ii = X_BLK( lk );		lk = LBj( k, grid ); /* Local block number, column-wise. */		lsub = Lrowind_bc_ptr[lk];		lusup = Lnzval_bc_ptr[lk];		nsupr = lsub[1];#ifdef _CRAY		STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha,		      lusup, &nsupr, &x[ii], &knsupc);#else		dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, 		       lusup, &nsupr, &x[ii], &knsupc);#endif		stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs;		--nleaf;#if ( DEBUGlevel>=2 )		printf("(%2d) Solve X[%2d]\n", iam, k);#endif				/*		 * Send Xk to process column Pc[k].		 */		for (p = 0; p < Pr; ++p) {		    if ( fsendx_plist[lk][p] != EMPTY ) {			pi = PNUM( p, kcol, grid );#ifdef ISEND_IRECV			MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H,				   MPI_DOUBLE, pi, Xk, grid->comm,                                   &send_req[Llu->SolveMsgSent++]);#else			MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H,				 MPI_DOUBLE, pi, Xk, grid->comm );#endif#if ( DEBUGlevel>=2 )			printf("(%2d) Sent X[%2.0f] to P %2d\n",			       iam, x[ii-XK_H], pi);#endif		    }		}		/*		 * Perform local block modifications: lsum[i] -= L_i,k * X[k]		 */		nb = lsub[0] - 1;		lptr = BC_HEADER + LB_DESCRIPTOR + knsupc;		luptr = knsupc; /* Skip diagonal block L(k,k). */				dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k,			   fmod, nb, lptr, luptr, xsup, grid, Llu, 			   send_req, stat);	    }	} /* if diagonal process ... */    } /* for k ... */    /* -----------------------------------------------------------       Compute the internal nodes asynchronously by all processes.       ----------------------------------------------------------- */#if ( DEBUGlevel>=1 )    printf("(%2d) nfrecvx %4d,  nfrecvmod %4d,  nleaf %4d\n",	   iam, nfrecvx, nfrecvmod, nleaf);#endif    while ( nfrecvx || nfrecvmod ) { /* While not finished. */	/* Receive a message. */#ifdef ISEND_IRECV	/* -MPI- FATAL: Remote protocol queue full */	MPI_Irecv( recvbuf, maxrecvsz, MPI_DOUBLE,                 MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &recv_req );	MPI_Wait( &recv_req, &status );#else	MPI_Recv( recvbuf, maxrecvsz, MPI_DOUBLE,                  MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status );#endif	k = *recvbuf;#if ( DEBUGlevel>=2 )	printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG);#endif		switch ( status.MPI_TAG ) {	  case Xk:	      --nfrecvx;	      lk = LBj( k, grid ); /* Local block number, column-wise. */	      lsub = Lrowind_bc_ptr[lk];	      lusup = Lnzval_bc_ptr[lk];	      if ( lsub ) {		  nb   = lsub[0];		  lptr = BC_HEADER;		  luptr = 0;		  knsupc = SuperSize( k );		  /*		   * Perform local block modifications: lsum[i] -= L_i,k * X[k]		   */		  dlsum_fmod(lsum, x, &recvbuf[XK_H], rtemp, nrhs, knsupc, k,			     fmod, nb, lptr, luptr, xsup, grid, Llu, 			     send_req, stat);	      } /* if lsub */	      break;	  case LSUM: /* Receiver must be a diagonal process */	      --nfrecvmod;	      lk = LBi( k, grid ); /* Local block number, row-wise. */	      ii = X_BLK( lk );	      knsupc = SuperSize( k );	      tempv = &recvbuf[LSUM_H];	      RHS_ITERATE(j) {		  for (i = 0; i < knsupc; ++i)		      x[i + ii + j*knsupc] += tempv[i + j*knsupc];	      }	      if ( (--frecv[lk])==0 && fmod[lk]==0 ) {		  fmod[lk] = -1; /* Do not solve X[k] in the future. */		  lk = LBj( k, grid ); /* Local block number, column-wise. */		  lsub = Lrowind_bc_ptr[lk];		  lusup = Lnzval_bc_ptr[lk];		  nsupr = lsub[1];#ifdef _CRAY		  STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha,			lusup, &nsupr, &x[ii], &knsupc);#else		  dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, 			 lusup, &nsupr, &x[ii], &knsupc);#endif		  stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs;#if ( DEBUGlevel>=2 )		  printf("(%2d) Solve X[%2d]\n", iam, k);#endif				  /*		   * Send Xk to process column Pc[k].		   */		  kcol = PCOL( k, grid );		  for (p = 0; p < Pr; ++p) {		      if ( fsendx_plist[lk][p] != EMPTY ) {			  pi = PNUM( p, kcol, grid );#ifdef ISEND_IRECV			  MPI_Isend( &x[ii-XK_H], knsupc * nrhs + XK_H,                                     MPI_DOUBLE, pi, Xk, grid->comm,                                     &send_req[Llu->SolveMsgSent++]);#else			  MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H,				    MPI_DOUBLE, pi, Xk, grid->comm );#endif#if ( DEBUGlevel>=2 )			  printf("(%2d) Sent X[%2.0f] to P %2d\n",				 iam, x[ii-XK_H], pi);#endif		      }                  }		  /*		   * Perform local block modifications.		   */		  nb = lsub[0] - 1;		  lptr = BC_HEADER + LB_DESCRIPTOR + knsupc;		  luptr = knsupc; /* Skip diagonal block L(k,k). */		  dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k,			     fmod, nb, lptr, luptr, xsup, grid, Llu,			     send_req, stat);	      } /* if */	      break;#if ( DEBUGlevel>=1 )	      	    default:	      printf("(%2d) Recv'd wrong message tag %4d\n", status.MPI_TAG);	      break;#endif	  } /* switch */    } /* while not finished ... */#if ( PRNTlevel>=2 )    t = SuperLU_timer_() - t;    if ( !iam ) printf(".. L-solve time\t%8.2f\n", t);    t = SuperLU_timer_();#endif#if ( DEBUGlevel==2 )    {      printf("(%d) .. After L-solve: y =\n", iam);      for (i = 0, k = 0; k < nsupers; ++k) {	  krow = PROW( k, grid );	  kcol = PCOL( k, grid );	  if ( myrow == krow && mycol == kcol ) { /* Diagonal process */	      knsupc = SuperSize( k );	      lk = LBi( k, grid );	      ii = X_BLK( lk );	      for (j = 0; j < knsupc; ++j)		printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+j, x[ii+j]);	      fflush(stdout);	  }	  MPI_Barrier( grid->comm );      }    }#endif    SUPERLU_FREE(fmod);    SUPERLU_FREE(frecv);    SUPERLU_FREE(rtemp);#ifdef ISEND_IRECV

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -