⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pdgstrf_x1.c

📁 LU分解求解矩阵方程组的解
💻 C
📖 第 1 页 / 共 3 页
字号:
		/*MPI_Recv( Lsub_buf, Llu->bufmax[0], mpi_int_t, kcol, 			 (4*k)%NTAGS, scp->comm, &status );*/		MPI_Wait( &recv_req[0], &status );		MPI_Get_count( &status, mpi_int_t, &msgcnt[0] );		/*probe_recv(iam, kcol, (4*k+1)%NTAGS, MPI_DOUBLE, scp->comm, 		  Llu->bufmax[1]);*/		/*MPI_Recv( Lval_buf, Llu->bufmax[1], MPI_DOUBLE, kcol, 			 (4*k+1)%NTAGS, scp->comm, &status );*/		MPI_Wait( &recv_req[1], &status );		MPI_Get_count( &status, MPI_DOUBLE, &msgcnt[1] );#if ( VAMPIR>=1 )		VT_end(2);#endif#if ( PROFlevel>=1 )		TOC(t2, t1);		stat->utime[COMM] += t2;#endif#if ( DEBUGlevel>=2 )		printf("(%d) Recv L(:,%4d): lsub %4d, lusup %4d from Pc %2d\n",		       iam, k, msgcnt[0], msgcnt[1], kcol);		fflush(stdout);#endif		lsub = Lsub_buf_2[k%2];		lusup = Lval_buf_2[k%2];#if ( PRNTlevel==3 )		++total_msg;		if ( !msgcnt[0] ) ++zero_msg;#endif	    } else msgcnt[0] = 0;	} /* if mycol = Pc(k) */	scp = &grid->cscp; /* The scope of process column. */	if ( myrow == krow ) {	    /* Parallel triangular solve across process row *krow* --	       U(k,j) = L(k,k) \ A(k,j).  */#ifdef _CRAY	    pdgstrs2(n, k, Glu_persist, grid, Llu, stat, ftcs1, ftcs2, ftcs3);#else	    pdgstrs2(n, k, Glu_persist, grid, Llu, stat);#endif	    /* Multicasts U(k,:) to process columns. */	    lk = LBi( k, grid );	    usub = Ufstnz_br_ptr[lk];	    uval = Unzval_br_ptr[lk];	    if ( usub )	{		msgcnt[2] = usub[2];		msgcnt[3] = usub[1];	    } else {		msgcnt[2] = msgcnt[3] = 0;	    }	    if ( ToSendD[lk] == YES ) {		for (pi = 0; pi < Pr; ++pi) {		    if ( pi != myrow ) {#if ( PROFlevel>=1 )			TIC(t1);#endif#if ( VAMPIR>=1 )			VT_begin(3);#endif			MPI_Send( usub, msgcnt[2], mpi_int_t, pi,				 (4*k+2)%NTAGS, scp->comm);			MPI_Send( uval, msgcnt[3], MPI_DOUBLE, pi,				 (4*k+3)%NTAGS, scp->comm);#if ( VAMPIR>=1 )			VT_end(3);#endif#if ( PROFlevel>=1 )			TOC(t2, t1);			stat->utime[COMM] += t2;			msg_cnt += 2;			msg_vol += msgcnt[2]*iword + msgcnt[3]*dword;#endif#if ( DEBUGlevel>=2 )			printf("(%d) Send U(%4d,:) to Pr %2d\n", iam, k, pi);#endif		    } /* if pi ... */		} /* for pi ... */	    } /* if ToSendD ... */	} else { /* myrow != krow */	    if ( ToRecv[k] == 2 ) { /* Recv block row U(k,:). */#if ( PROFlevel>=1 )		TIC(t1);#endif#if ( VAMPIR>=1 )		VT_begin(4);#endif		/*probe_recv(iam, krow, (4*k+2)%NTAGS, mpi_int_t, scp->comm, 		  Llu->bufmax[2]);*/		MPI_Recv( Usub_buf, Llu->bufmax[2], mpi_int_t, krow,			 (4*k+2)%NTAGS, scp->comm, &status );		MPI_Get_count( &status, mpi_int_t, &msgcnt[2] );		/*probe_recv(iam, krow, (4*k+3)%NTAGS, MPI_DOUBLE, scp->comm, 		  Llu->bufmax[3]);*/		MPI_Recv( Uval_buf, Llu->bufmax[3], MPI_DOUBLE, krow, 			 (4*k+3)%NTAGS, scp->comm, &status );		MPI_Get_count( &status, MPI_DOUBLE, &msgcnt[3] );#if ( VAMPIR>=1 )		VT_end(4);#endif#if ( PROFlevel>=1 )		TOC(t2, t1);		stat->utime[COMM] += t2;#endif		usub = Usub_buf;		uval = Uval_buf;#if ( DEBUGlevel>=2 )		printf("(%d) Recv U(%4d,:) from Pr %2d\n", iam, k, krow);#endif#if ( PRNTlevel==3 )		++total_msg;		if ( !msgcnt[2] ) ++zero_msg;#endif	    } else msgcnt[2] = 0;	} /* if myrow == Pr(k) */	  	/* 	 * Parallel rank-k update; pair up blocks L(i,k) and U(k,j).	 *  for (j = k+1; k < N; ++k) {	 *     for (i = k+1; i < N; ++i) 	 *         if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid )	 *              && L(i,k) != 0 && U(k,j) != 0 )	 *             A(i,j) = A(i,j) - L(i,k) * U(k,j);	 */	msg0 = msgcnt[0];	msg2 = msgcnt[2];	if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */	    nsupr = lsub[1]; /* LDA of lusup. */	    if ( myrow == krow ) { /* Skip diagonal block L(k,k). */		lptr0 = BC_HEADER + LB_DESCRIPTOR + lsub[BC_HEADER+1];		luptr0 = knsupc;		nlb = lsub[0] - 1;	    } else {		lptr0 = BC_HEADER;		luptr0 = 0;		nlb = lsub[0];	    }	    lptr = lptr0;	    for (lb = 0; lb < nlb; ++lb) { /* Initialize block row pointers. */		ib = lsub[lptr];		lib = LBi( ib, grid );		iuip[lib] = BR_HEADER;		ruip[lib] = 0;		lptr += LB_DESCRIPTOR + lsub[lptr+1];	    }	    nub = usub[0];    /* Number of blocks in the block row U(k,:) */	    iukp = BR_HEADER; /* Skip header; Pointer to index[] of U(k,:) */	    rukp = 0;         /* Pointer to nzval[] of U(k,:) */	    klst = FstBlockC( k+1 );	    	    /* ---------------------------------------------------	       Update the first block column A(:,k+1).	       --------------------------------------------------- */	    jb = usub[iukp];   /* Global block number of block U(k,j). */	    if ( jb == k+1 ) { /* First update (k+1)-th block. */		--nub;		lptr = lptr0;		luptr = luptr0;		ljb = LBj( jb, grid ); /* Local block number of U(k,j). */		nsupc = SuperSize( jb );		iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */		/* Prepare to call DGEMM. */		jj = iukp;		while ( usub[jj] == klst ) ++jj;		ldu = klst - usub[jj++];		ncols = 1;		full = 1;		for (; jj < iukp+nsupc; ++jj) {		    segsize = klst - usub[jj];		    if ( segsize ) {		        ++ncols;			if ( segsize != ldu ) full = 0;		        if ( segsize > ldu ) ldu = segsize;		    }		}#if ( DEBUGlevel>=3 )		++num_update;#endif		if ( full ) {		    tempu = &uval[rukp];		} else { /* Copy block U(k,j) into tempU2d. */#if ( DEBUGlevel>=3 )		  printf("(%d) full=%d,k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n",			 iam, full, k, jb, ldu, ncols, nsupc);		  ++num_copy;#endif		    tempu = tempU2d;		    for (jj = iukp; jj < iukp+nsupc; ++jj) {		        segsize = klst - usub[jj];			if ( segsize ) {			    lead_zero = ldu - segsize;			    for (i = 0; i < lead_zero; ++i) tempu[i] = 0.0;			    tempu += lead_zero;			    for (i = 0; i < segsize; ++i)				tempu[i] = uval[rukp+i];			    rukp += segsize;			    tempu += segsize;			}		    }		    tempu = tempU2d;		    rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */		} /* if full ... */		for (lb = 0; lb < nlb; ++lb) { 		    ib = lsub[lptr]; /* Row block L(i,k). */		    nbrow = lsub[lptr+1];  /* Number of full rows. */		    lptr += LB_DESCRIPTOR; /* Skip descriptor. */		    tempv = tempv2d;#ifdef _CRAY		    SGEMM(ftcs, ftcs, &nbrow, &ncols, &ldu, &alpha, 			  &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, 			  tempu, &ldu, &beta, tempv, &ldt);#else		    dgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, 			   &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, 			   tempu, &ldu, &beta, tempv, &ldt);#endif		    stat->ops[FACT] += 2 * nbrow * ldu * ncols;		    /* Now gather the result into the destination block. */		    if ( ib < jb ) { /* A(i,j) is in U. */			ilst = FstBlockC( ib+1 );			lib = LBi( ib, grid );			index = Ufstnz_br_ptr[lib];			ijb = index[iuip[lib]];			while ( ijb < jb ) { /* Search for dest block. */			    ruip[lib] += index[iuip[lib]+1];			    iuip[lib] += UB_DESCRIPTOR + SuperSize( ijb );			    ijb = index[iuip[lib]];			}			iuip[lib] += UB_DESCRIPTOR; /* Skip descriptor. */			tempv = tempv2d;			for (jj = 0; jj < nsupc; ++jj) {			    segsize = klst - usub[iukp + jj];			    fnz = index[iuip[lib]++];			    if ( segsize ) { /* Nonzero segment in U(k.j). */				ucol = &Unzval_br_ptr[lib][ruip[lib]];				for (i = 0, it = 0; i < nbrow; ++i) {				    rel = lsub[lptr + i] - fnz;				    ucol[rel] -= tempv[it++];				}				tempv += ldt;			    }			    ruip[lib] += ilst - fnz;			}		    } else { /* A(i,j) is in L. */			index = Lrowind_bc_ptr[ljb];			ldv = index[1];   /* LDA of the dest lusup. */			lptrj = BC_HEADER;			luptrj = 0;			ijb = index[lptrj];			while ( ijb != ib ) { /* Search for dest block -- 						 blocks are not ordered! */			    luptrj += index[lptrj+1];			    lptrj += LB_DESCRIPTOR + index[lptrj+1];			    ijb = index[lptrj];			}			/*			 * Build indirect table. This is needed because the			 * indices are not sorted.			 */			fnz = FstBlockC( ib );			lptrj += LB_DESCRIPTOR;			for (i = 0; i < index[lptrj-1]; ++i) {			    rel = index[lptrj + i] - fnz;			    indirect[rel] = i;			}			nzval = Lnzval_bc_ptr[ljb] + luptrj;			tempv = tempv2d;			for (jj = 0; jj < nsupc; ++jj) {			    segsize = klst - usub[iukp + jj];			    if ( segsize ) {/*#pragma _CRI cache_bypass nzval,tempv*/				for (it = 0, i = 0; i < nbrow; ++i) {				    rel = lsub[lptr + i] - fnz;				    nzval[indirect[rel]] -= tempv[it++];				}				tempv += ldt;			    }			    nzval += ldv;			}		    } /* if ib < jb ... */		    lptr += nbrow;		    luptr += nbrow;		} /* for lb ... */		rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */		iukp += nsupc;	    }  /* if jb == k+1 */	} /* if L(:,k) and U(k,:) not empty */	if ( k+1 < nsupers ) {	  kcol = PCOL( k+1, grid );	  if ( mycol == kcol ) {#if ( VAMPIR>=1 )	    VT_begin(5);#endif	    /* Factor diagonal and subdiagonal blocks and test for exact	       singularity.  */	    pdgstrf2(options, k+1, thresh, Glu_persist, grid, Llu, stat, info);#if ( VAMPIR>=1 )	    VT_end(5);#endif	    /* Process column *kcol+1* multicasts numeric values of L(:,k+1) 	       to process rows. */	    lk = LBj( k+1, grid ); /* Local block number. */	    lsub1 = Lrowind_bc_ptr[lk]; 	    if ( lsub1 ) {		msgcnt[0] = lsub1[1] + BC_HEADER + lsub1[0]*LB_DESCRIPTOR;		msgcnt[1] = lsub1[1] * SuperSize( k+1 );	    } else {		msgcnt[0] = 0;		msgcnt[1] = 0;	    }	    scp = &grid->rscp; /* The scope of process row. */	    for (pj = 0; pj < Pc; ++pj) {		if ( ToSendR[lk][pj] != EMPTY ) {		    lusup1 = Lnzval_bc_ptr[lk];#if ( PROFlevel>=1 )		    TIC(t1);#endif#if ( VAMPIR>=1 )		    VT_begin(1);#endif		    MPI_Isend( lsub1, msgcnt[0], mpi_int_t, pj,			      (4*(k+1))%NTAGS, scp->comm, &send_req[pj] );		    MPI_Isend( lusup1, msgcnt[1], MPI_DOUBLE, pj,			     (4*(k+1)+1)%NTAGS, scp->comm, &send_req[pj+Pc] );#if ( VAMPIR>=1 )		    VT_end(1);#endif#if ( PROFlevel>=1 )		    TOC(t2, t1);		    stat->utime[COMM] += t2;		    msg_cnt += 2;		    msg_vol += msgcnt[0]*iword + msgcnt[1]*dword;#endif#if ( DEBUGlevel>=2 )		    printf("(%d) Send L(:,%4d): lsub %4d, lusup %4d to Pc %2d\n",			   iam, k+1, msgcnt[0], msgcnt[1], pj);#endif		}	    } /* for pj ... */	  } else { /* Post Recv of block column L(:,k+1). */	    if ( ToRecv[k+1] >= 1 ) {		scp = &grid->rscp; /* The scope of process row. */		MPI_Irecv(Lsub_buf_2[(k+1)%2], Llu->bufmax[0], mpi_int_t, kcol,			  (4*(k+1))%NTAGS, scp->comm, &recv_req[0]);		MPI_Irecv(Lval_buf_2[(k+1)%2], Llu->bufmax[1], MPI_DOUBLE, kcol, 			  (4*(k+1)+1)%NTAGS, scp->comm, &recv_req[1]);#if ( DEBUGlevel>=2 )		printf("(%d) Post Irecv L(:,%4d)\n", iam, k+1);#endif	    }	  } /* if mycol == Pc(k+1) */        } /* if k+1 < nsupers */	if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */	    /* ---------------------------------------------------	       Update all other blocks using block row U(k,:)	       --------------------------------------------------- */	    for (j = 0; j < nub; ++j) { 		lptr = lptr0;		luptr = luptr0;		jb = usub[iukp];  /* Global block number of block U(k,j). */		ljb = LBj( jb, grid ); /* Local block number of U(k,j). */		nsupc = SuperSize( jb );		iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */		/* Prepare to call DGEMM. */		jj = iukp;		while ( usub[jj] == klst ) ++jj;		ldu = klst - usub[jj++];		ncols = 1;		full = 1;		for (; jj < iukp+nsupc; ++jj) {		    segsize = klst - usub[jj];		    if ( segsize ) {		        ++ncols;			if ( segsize != ldu ) full = 0;		        if ( segsize > ldu ) ldu = segsize;		    }		}#if ( DEBUGlevel>=3 )		printf("(%d) full=%d,k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n",		       iam, full, k, jb, ldu, ncols, nsupc);		++num_update;#endif		if ( full ) {		    tempu = &uval[rukp];		} else { /* Copy block U(k,j) into tempU2d. */#if ( DEBUGlevel>=3 )		    ++num_copy;#endif		    tempu = tempU2d;		    for (jj = iukp; jj < iukp+nsupc; ++jj) {		        segsize = klst - usub[jj];			if ( segsize ) {			    lead_zero = ldu - segsize;			    for (i = 0; i < lead_zero; ++i) tempu[i] = 0.0;			    tempu += lead_zero;			    for (i = 0; i < segsize; ++i)			        tempu[i] = uval[rukp+i];			    rukp += segsize;			    tempu += segsize;			}		    }		    tempu = tempU2d;		    rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */		} /* if full ... */		for (lb = 0; lb < nlb; ++lb) { 		    ib = lsub[lptr];       /* Row block L(i,k). */		    nbrow = lsub[lptr+1];  /* Number of full rows. */		    lptr += LB_DESCRIPTOR; /* Skip descriptor. */		    tempv = tempv2d;#ifdef _CRAY		    SGEMM(ftcs, ftcs, &nbrow, &ncols, &ldu, &alpha, 			  &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, 			  tempu, &ldu, &beta, tempv, &ldt);#else		    dgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, 			   &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, 			   tempu, &ldu, &beta, tempv, &ldt);#endif		    stat->ops[FACT] += 2 * nbrow * ldu * ncols;		    /* Now gather the result into the destination block. */		    if ( ib < jb ) { /* A(i,j) is in U. */			ilst = FstBlockC( ib+1 );			lib = LBi( ib, grid );			index = Ufstnz_br_ptr[lib];			ijb = index[iuip[lib]];			while ( ijb < jb ) { /* Search for dest block. */			    ruip[lib] += index[iuip[lib]+1];			    iuip[lib] += UB_DESCRIPTOR + SuperSize( ijb );			    ijb = index[iuip[lib]];

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -