⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pdgstrs.c.bak

📁 LU分解求解矩阵方程组的解
💻 BAK
📖 第 1 页 / 共 3 页
字号:
    for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]);    Llu->SolveMsgSent = 0;#endif    /*---------------------------------------------------     * Back solve Ux = y.     *     * The Y components from the forward solve is already     * on the diagonal processes.     *---------------------------------------------------*/    /* Save the count to be altered so it can be used by       subsequent call to PDGSTRS. */    if ( !(bmod = intMalloc_dist(nlb)) )	ABORT("Calloc fails for bmod[].");    for (i = 0; i < nlb; ++i) bmod[i] = Llu->bmod[i];    if ( !(brecv = intMalloc_dist(nlb)) )	ABORT("Malloc fails for brecv[].");    Llu->brecv = brecv;    /*     * Compute brecv[] and nbrecvmod counts on the diagonal processes.     */    {	superlu_scope_t *scp = &grid->rscp;	for (k = 0; k < nsupers; ++k) {	    krow = PROW( k, grid );	    if ( myrow == krow ) {		lk = LBi( k, grid );    /* Local block number. */		kcol = PCOL( k, grid ); /* Root process in this row scope. */		if ( mycol != kcol && bmod[lk] )		    i = 1;  /* Contribution from non-diagonal process. */		else i = 0;		MPI_Reduce( &i, &brecv[lk], 1, mpi_int_t,			   MPI_SUM, kcol, scp->comm );		if ( mycol == kcol ) { /* Diagonal process. */		    nbrecvmod += brecv[lk];		    if ( !brecv[lk] && !bmod[lk] ) ++nroot;#if ( DEBUGlevel>=2 )		    printf("(%2d) brecv[%4d]  %2d\n", iam, k, brecv[lk]);		    assert( brecv[lk] < Pc );#endif		}	    }	}    }    /* Re-initialize lsum to zero. Each block header is already in place. */    for (k = 0; k < nsupers; ++k) {	krow = PROW( k, grid );	if ( myrow == krow ) {	    knsupc = SuperSize( k );	    lk = LBi( k, grid );	    il = LSUM_BLK( lk );	    dest = &lsum[il];	    RHS_ITERATE(j) {		for (i = 0; i < knsupc; ++i) dest[i + j*knsupc] = zero;	    }	}    }    /* Set up additional pointers for the index and value arrays of U.       nub is the number of local block columns. */    nub = CEILING( nsupers, Pc ); /* Number of local block columns. */    if ( !(Urbs = (int_t *) intCalloc_dist(2*nub)) )	ABORT("Malloc fails for Urbs[]"); /* Record number of nonzero					     blocks in a block column. */    Urbs1 = Urbs + nub;    if ( !(Ucb_indptr = SUPERLU_MALLOC(nub * sizeof(Ucb_indptr_t *))) )        ABORT("Malloc fails for Ucb_indptr[]");    if ( !(Ucb_valptr = SUPERLU_MALLOC(nub * sizeof(int_t *))) )        ABORT("Malloc fails for Ucb_valptr[]");    /* Count number of row blocks in a block column.        One pass of the skeleton graph of U. */    for (lk = 0; lk < nlb; ++lk) {	usub = Ufstnz_br_ptr[lk];	if ( usub ) { /* Not an empty block row. */	    /* usub[0] -- number of column blocks in this block row. */#if ( DEBUGlevel>=2 )	    Ublocks += usub[0];#endif	    i = BR_HEADER; /* Pointer in index array. */	    for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */		k = usub[i];            /* Global block number */		++Urbs[LBj(k,grid)];		i += UB_DESCRIPTOR + SuperSize( k );	    }	}    }    /* Set up the vertical linked lists for the row blocks.       One pass of the skeleton graph of U. */    for (lb = 0; lb < nub; ++lb) {	if ( Urbs[lb] ) { /* Not an empty block column. */	    if ( !(Ucb_indptr[lb]		   = SUPERLU_MALLOC(Urbs[lb] * sizeof(Ucb_indptr_t))) )		ABORT("Malloc fails for Ucb_indptr[lb][]");	    if ( !(Ucb_valptr[lb] = (int_t *) intMalloc_dist(Urbs[lb])) )		ABORT("Malloc fails for Ucb_valptr[lb][]");	}    }    for (lk = 0; lk < nlb; ++lk) { /* For each block row. */	usub = Ufstnz_br_ptr[lk];	if ( usub ) { /* Not an empty block row. */	    i = BR_HEADER; /* Pointer in index array. */	    j = 0;         /* Pointer in nzval array. */	    for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */		k = usub[i];          /* Global block number, column-wise. */		ljb = LBj( k, grid ); /* Local block number, column-wise. */		Ucb_indptr[ljb][Urbs1[ljb]].lbnum = lk;		Ucb_indptr[ljb][Urbs1[ljb]].indpos = i;		Ucb_valptr[ljb][Urbs1[ljb]] = j;		++Urbs1[ljb];		j += usub[i+1];		i += UB_DESCRIPTOR + SuperSize( k );	    }	}    }#if ( DEBUGlevel>=2 )    for (p = 0; p < Pr*Pc; ++p) {	if (iam == p) {	    printf("(%2d) .. Ublocks %d\n", iam, Ublocks);	    for (lb = 0; lb < nub; ++lb) {		printf("(%2d) Local col %2d: # row blocks %2d\n",		       iam, lb, Urbs[lb]);		if ( Urbs[lb] ) {		    for (i = 0; i < Urbs[lb]; ++i)			printf("(%2d) .. row blk %2d:\                               lbnum %d, indpos %d, valpos %d\n",			       iam, i, 			       Ucb_indptr[lb][i].lbnum,			       Ucb_indptr[lb][i].indpos,			       Ucb_valptr[lb][i]);		}	    }	}	MPI_Barrier( grid->comm );    }    for (p = 0; p < Pr*Pc; ++p) {	if ( iam == p ) {	    printf("\n(%d) bsendx_plist[][]", iam);	    for (lb = 0; lb < nub; ++lb) {		printf("\n(%d) .. local col %2d: ", iam, lb);		for (i = 0; i < Pr; ++i)		    printf("%4d", bsendx_plist[lb][i]);	    }	    printf("\n");	}	MPI_Barrier( grid->comm );    }#endif /* DEBUGlevel */#if ( PRNTlevel>=3 )    t = SuperLU_timer_() - t;    if ( !iam) printf(".. Setup U-solve time\t%8.2f\n", t);    t = SuperLU_timer_();#endif    /*     * Solve the roots first by all the diagonal processes.     */#if ( DEBUGlevel>=1 )    printf("(%2d) nroot %4d\n", iam, nroot);#endif    for (k = nsupers-1; k >= 0 && nroot; --k) {	krow = PROW( k, grid );	kcol = PCOL( k, grid );	if ( myrow == krow && mycol == kcol ) { /* Diagonal process. */	    knsupc = SuperSize( k );	    lk = LBi( k, grid ); /* Local block number, row-wise. */	    if ( brecv[lk]==0 && bmod[lk]==0 ) {		bmod[lk] = -1;       /* Do not solve X[k] in the future. */		ii = X_BLK( lk );		lk = LBj( k, grid ); /* Local block number, column-wise */		lsub = Lrowind_bc_ptr[lk];		lusup = Lnzval_bc_ptr[lk];		nsupr = lsub[1];#ifdef _CRAY		STRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha,		      lusup, &nsupr, &x[ii], &knsupc);#else		dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, 		       lusup, &nsupr, &x[ii], &knsupc);#endif		stat->ops[SOLVE] += knsupc * (knsupc + 1) * nrhs;		--nroot;#if ( DEBUGlevel>=2 )		printf("(%2d) Solve X[%2d]\n", iam, k);#endif		/*		 * Send Xk to process column Pc[k].		 */		for (p = 0; p < Pr; ++p) {		    if ( bsendx_plist[lk][p] != EMPTY ) {			pi = PNUM( p, kcol, grid );#ifdef ISEND_IRECV			MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H,                                   MPI_DOUBLE, pi, Xk, grid->comm,                                   &send_req[Llu->SolveMsgSent++]);#else			MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H,                                  MPI_DOUBLE, pi, Xk,                                  grid->comm );#endif#if ( DEBUGlevel>=2 )			printf("(%2d) Sent X[%2.0f] to P %2d\n",			       iam, x[ii-XK_H], pi);#endif		    }		}		/*		 * Perform local block modifications: lsum[i] -= U_i,k * X[k]		 */		if ( Urbs[lk] ) 		    dlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs,			       Ucb_indptr, Ucb_valptr, xsup, grid, Llu,			       send_req, stat);	    } /* if root ... */	} /* if diagonal process ... */    } /* for k ... */    /*     * Compute the internal nodes asychronously by all processes.     */    while ( nbrecvx || nbrecvmod ) { /* While not finished. */	/* Receive a message. */	MPI_Recv( recvbuf, maxrecvsz, MPI_DOUBLE,                  MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status );	k = *recvbuf;#if ( DEBUGlevel>=2 )	printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG);#endif	switch ( status.MPI_TAG ) {	    case Xk:	        --nbrecvx;		lk = LBj( k, grid ); /* Local block number, column-wise. */		/*		 * Perform local block modifications:		 *         lsum[i] -= U_i,k * X[k]		 */		dlsum_bmod(lsum, x, &recvbuf[XK_H], nrhs, k, bmod, Urbs,			   Ucb_indptr, Ucb_valptr, xsup, grid, Llu, 			   send_req, stat);	        break;	    case LSUM: /* Receiver must be a diagonal process */		--nbrecvmod;		lk = LBi( k, grid ); /* Local block number, row-wise. */		ii = X_BLK( lk );		knsupc = SuperSize( k );		tempv = &recvbuf[LSUM_H];		RHS_ITERATE(j) {		    for (i = 0; i < knsupc; ++i)			x[i + ii + j*knsupc] += tempv[i + j*knsupc];		}		if ( (--brecv[lk])==0 && bmod[lk]==0 ) {		    bmod[lk] = -1; /* Do not solve X[k] in the future. */		    lk = LBj( k, grid ); /* Local block number, column-wise. */		    lsub = Lrowind_bc_ptr[lk];		    lusup = Lnzval_bc_ptr[lk];		    nsupr = lsub[1];#ifdef _CRAY		    STRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha,			  lusup, &nsupr, &x[ii], &knsupc);#else		    dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, 			   lusup, &nsupr, &x[ii], &knsupc);#endif		    stat->ops[SOLVE] += knsupc * (knsupc + 1) * nrhs;#if ( DEBUGlevel>=2 )		    printf("(%2d) Solve X[%2d]\n", iam, k);#endif		    /*		     * Send Xk to process column Pc[k].		     */		    kcol = PCOL( k, grid );		    for (p = 0; p < Pr; ++p) {			if ( bsendx_plist[lk][p] != EMPTY ) {			    pi = PNUM( p, kcol, grid );#ifdef ISEND_IRECV			    MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H,                                       MPI_DOUBLE, pi, Xk, grid->comm,                                       &send_req[Llu->SolveMsgSent++] );#else			    MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H,                                      MPI_DOUBLE, pi, Xk,                                      grid->comm );#endif#if ( DEBUGlevel>=2 )			    printf("(%2d) Sent X[%2.0f] to P %2d\n",				   iam, x[ii - XK_H], pi);#endif			}		    }		    /*		     * Perform local block modifications: 		     *         lsum[i] -= U_i,k * X[k]		     */		    if ( Urbs[lk] )			dlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs,				   Ucb_indptr, Ucb_valptr, xsup, grid, Llu,				   send_req, stat);		} /* if becomes solvable */				break;#if ( DEBUGlevel>=1 )	      default:		printf("(%2d) Recv'd wrong message tag %4d\n", status.MPI_TAG);		break;#endif			} /* switch */    } /* while not finished ... */#if ( PRNTlevel>=3 )    t = SuperLU_timer_() - t;    if ( !iam ) printf(".. U-solve time\t%8.2f\n", t);#endif#if ( DEBUGlevel>=2 )    {	double *x_col;	int diag;	printf("\n(%d) .. After U-solve: x (ON DIAG PROCS) = \n", iam);	ii = 0;	for (k = 0; k < nsupers; ++k) {	    knsupc = SuperSize( k );	    krow = PROW( k, grid );	    kcol = PCOL( k, grid );	    diag = PNUM( krow, kcol, grid);	    if ( iam == diag ) { /* Diagonal process. */		lk = LBi( k, grid );		jj = X_BLK( lk );		x_col = &x[jj];		RHS_ITERATE(j) {		    for (i = 0; i < knsupc; ++i) { /* X stored in blocks */			printf("\t(%d)\t%4d\t%.10f\n",			       iam, xsup[k]+i, x_col[i]);		    }		    x_col += knsupc;		}	    }	    ii += knsupc;	} /* for k ... */    }#endif    pdReDistribute_X_to_B(n, B, m_loc, ldb, fst_row, nrhs, x, ilsum,			  ScalePermstruct, Glu_persist, grid, SOLVEstruct);    /* Deallocate storage. */    SUPERLU_FREE(lsum);    SUPERLU_FREE(x);    SUPERLU_FREE(recvbuf);    for (i = 0; i < nub; ++i) {	if ( Urbs[i] ) {	    SUPERLU_FREE(Ucb_indptr[i]);	    SUPERLU_FREE(Ucb_valptr[i]);	}    }    SUPERLU_FREE(Ucb_indptr);    SUPERLU_FREE(Ucb_valptr);    SUPERLU_FREE(Urbs);    SUPERLU_FREE(bmod);    SUPERLU_FREE(brecv);#ifdef ISEND_IRECV    for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]);    SUPERLU_FREE(send_req);#endif    stat->utime[SOLVE] = SuperLU_timer_() - t;#if ( DEBUGlevel>=1 )    CHECK_MALLOC(iam, "Exit pdgstrs()");#endif} /* PDGSTRS */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -