⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 util.c

📁 SuperLU 2.2版本。对大型、稀疏、非对称的线性系统的直接求解
💻 C
📖 第 1 页 / 共 2 页
字号:
	p = PNUM( PROW(gbi,grid), PCOL(gbi,grid), grid ); /* Diagonal process */	++SendCnt[p];    }      /* Set up the displacements for alltoall. */    MPI_Alltoall(SendCnt, 1, MPI_INT, RecvCnt, 1, MPI_INT, grid->comm);    sdispls[0] = rdispls[0] = 0;    for (p = 1; p < procs; ++p) {        sdispls[p] = sdispls[p-1] + SendCnt[p-1];        rdispls[p] = rdispls[p-1] + RecvCnt[p-1];    }    for (p = 0; p < procs; ++p) {        SendCnt_nrhs[p] = SendCnt[p] * nrhs;	sdispls_nrhs[p] = sdispls[p] * nrhs;        RecvCnt_nrhs[p] = RecvCnt[p] * nrhs;	rdispls_nrhs[p] = rdispls[p] * nrhs;    }    /* This is saved for repeated solves, and is freed in pxgstrs_finalize().*/    gstrs_comm->B_to_X_SendCnt = SendCnt;    /* ------------------------------------------------------------       SET UP COMMUNICATION PATTERN FOR ReDistribute_X_to_B.       ------------------------------------------------------------*/    /* This is freed in pxgstrs_finalize(). */    if ( !(itemp = SUPERLU_MALLOC(8*procs * sizeof(int))) )        ABORT("Malloc fails for X_to_B_itemp[].");    SendCnt      = itemp;    SendCnt_nrhs = itemp +   procs;    RecvCnt      = itemp + 2*procs;    RecvCnt_nrhs = itemp + 3*procs;    sdispls      = itemp + 4*procs;    sdispls_nrhs = itemp + 5*procs;    rdispls      = itemp + 6*procs;    rdispls_nrhs = itemp + 7*procs;    /* Count the number of X entries to be sent to each process.*/    for (p = 0; p < procs; ++p) SendCnt[p] = 0;    num_diag_procs = SOLVEstruct->num_diag_procs;    diag_procs = SOLVEstruct->diag_procs;    for (p = 0; p < num_diag_procs; ++p) { /* for all diagonal processes */	pkk = diag_procs[p];	if ( iam == pkk ) {	    for (k = p; k < nsupers; k += num_diag_procs) {		knsupc = SuperSize( k );		lk = LBi( k, grid ); /* Local block number */		irow = FstBlockC( k );		for (i = 0; i < knsupc; ++i) {#if 0		    q = row_to_proc[inv_perm_c[irow]];#else		    q = row_to_proc[irow];#endif		    ++SendCnt[q];		    ++irow;		}	    }	}    }    MPI_Alltoall(SendCnt, 1, MPI_INT, RecvCnt, 1, MPI_INT, grid->comm);    sdispls[0] = rdispls[0] = 0;    sdispls_nrhs[0] = rdispls_nrhs[0] = 0;    SendCnt_nrhs[0] = SendCnt[0] * nrhs;    RecvCnt_nrhs[0] = RecvCnt[0] * nrhs;    for (p = 1; p < procs; ++p) {        sdispls[p] = sdispls[p-1] + SendCnt[p-1];        rdispls[p] = rdispls[p-1] + RecvCnt[p-1];        sdispls_nrhs[p] = sdispls[p] * nrhs;        rdispls_nrhs[p] = rdispls[p] * nrhs;	SendCnt_nrhs[p] = SendCnt[p] * nrhs;	RecvCnt_nrhs[p] = RecvCnt[p] * nrhs;    }    /* This is saved for repeated solves, and is freed in pxgstrs_finalize().*/    gstrs_comm->X_to_B_SendCnt = SendCnt;    if ( !(ptr_to_ibuf = SUPERLU_MALLOC(2*procs * sizeof(int))) )        ABORT("Malloc fails for ptr_to_ibuf[].");    gstrs_comm->ptr_to_ibuf = ptr_to_ibuf;    gstrs_comm->ptr_to_dbuf = ptr_to_ibuf + procs;} /* PXGSTRS_INIT */void pxgstrs_finalize(pxgstrs_comm_t *gstrs_comm){    SUPERLU_FREE(gstrs_comm->B_to_X_SendCnt);    SUPERLU_FREE(gstrs_comm->X_to_B_SendCnt);    SUPERLU_FREE(gstrs_comm->ptr_to_ibuf);    SUPERLU_FREE(gstrs_comm);}/* * Diagnostic print of segment info after panel_dfs(). */void print_panel_seg_dist(int_t n, int_t w, int_t jcol, int_t nseg, 			  int_t *segrep, int_t *repfnz){    int_t j, k;        for (j = jcol; j < jcol+w; j++) {	printf("\tcol %d:\n", j);	for (k = 0; k < nseg; k++)	    printf("\t\tseg %d, segrep %d, repfnz %d\n", k, 			segrep[k], repfnz[(j-jcol)*n + segrep[k]]);    }}voidPStatInit(SuperLUStat_t *stat){    register int_t i;    if ( !(stat->utime = SUPERLU_MALLOC(NPHASES*sizeof(double))) )	ABORT("Malloc fails for stat->utime[]");    if ( !(stat->ops = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t))) )	ABORT("SUPERLU_MALLOC fails for stat->ops[]");    for (i = 0; i < NPHASES; ++i) {        stat->utime[i] = 0.;        stat->ops[i] = 0.;    }    stat->TinyPivots = stat->RefineSteps = 0;}voidPStatPrint(superlu_options_t *options, SuperLUStat_t *stat, gridinfo_t *grid){    double  *utime = stat->utime;    flops_t *ops = stat->ops;    int_t   iam = grid->iam;    flops_t flopcnt, factflop, solveflop;    if ( options->PrintStat == NO ) return;    if ( !iam && options->Fact != FACTORED ) {        if ( options->Equil != NO )	    printf("\tEQUIL time         %8.2f\n", utime[EQUIL]);	if ( options->RowPerm != NOROWPERM )	    printf("\tROWPERM time       %8.2f\n", utime[ROWPERM]);	if ( options->ColPerm != NATURAL )	    printf("\tCOLPERM time       %8.2f\n", utime[COLPERM]);        printf("\tSYMBFACT time      %8.2f\n", utime[SYMBFAC]);	printf("\tDISTRIBUTE time    %8.2f\n", utime[DIST]);    }    MPI_Reduce(&ops[FACT], &flopcnt, 1, MPI_FLOAT, MPI_SUM,	       0, grid->comm);    factflop = flopcnt;    if ( !iam && options->Fact != FACTORED ) {	printf("\tFACTOR time        %8.2f\n", utime[FACT]);	if ( utime[FACT] != 0.0 )	    printf("\tFactor flops\t%e\tMflops \t%8.2f\n",		   flopcnt,		   flopcnt*1e-6/utime[FACT]);    }	    MPI_Reduce(&ops[SOLVE], &flopcnt, 1, MPI_FLOAT, MPI_SUM, 	       0, grid->comm);    solveflop = flopcnt;    if ( !iam ) {	printf("\tSOLVE time         %8.2f\n", utime[SOLVE]);	if ( utime[SOLVE] != 0.0 )	    printf("\tSolve flops\t%e\tMflops \t%8.2f\n",		   flopcnt,		   flopcnt*1e-6/utime[SOLVE]);    }        if ( !iam && options->IterRefine != NOREFINE ) {	printf("\tREFINEMENT time    %8.2f\tSteps%8d\n\n",	       utime[REFINE], stat->RefineSteps);    }#if ( PROFlevel>=1 )    fflush(stdout);    MPI_Barrier( grid->comm );    {	int_t i, P = grid->nprow*grid->npcol;	flops_t b, maxflop;	if ( !iam ) printf("\n.. FACT time breakdown:\tcomm\ttotal\n");	for (i = 0; i < P; ++i) {	    if ( iam == i) {		printf("\t\t(%d)%8.2f%8.2f\n", iam, utime[COMM], utime[FACT]);		fflush(stdout);	    }	    MPI_Barrier( grid->comm );	}	if ( !iam ) printf("\n.. FACT ops distribution:\n");	for (i = 0; i < P; ++i) {	    if ( iam == i ) {		printf("\t\t(%d)\t%e\n", iam, ops[FACT]);		fflush(stdout);	    }	    MPI_Barrier( grid->comm );	}	MPI_Reduce(&ops[FACT], &maxflop, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm);	if ( !iam ) {	    b = factflop/P/maxflop;	    printf("\tFACT load balance: %.2f\n", b);	}	if ( !iam ) printf("\n.. SOLVE ops distribution:\n");	for (i = 0; i < P; ++i) {	    if ( iam == i ) {		printf("\t\t%d\t%e\n", iam, ops[SOLVE]);		fflush(stdout);	    }	    MPI_Barrier( grid->comm );	}	MPI_Reduce(&ops[SOLVE], &maxflop, 1, MPI_FLOAT, MPI_MAX, 0,grid->comm);	if ( !iam ) {	    b = solveflop/P/maxflop;	    printf("\tSOLVE load balance: %.2f\n", b);	}    }#endif/*  if ( !iam ) fflush(stdout);  CRASH THE SYSTEM pierre.  */}voidPStatFree(SuperLUStat_t *stat){    SUPERLU_FREE(stat->utime);    SUPERLU_FREE(stat->ops);}/*  * Fills an integer array with a given value. */void ifill_dist(int_t *a, int_t alen, int_t ival){    register int_t i;    for (i = 0; i < alen; i++) a[i] = ival;}voidget_diag_procs(int_t n, Glu_persist_t *Glu_persist, gridinfo_t *grid,	       int_t *num_diag_procs, int_t **diag_procs, int_t **diag_len){    int_t i, j, k, knsupc, nprow, npcol, nsupers, pkk;    int_t *xsup;    i = j = *num_diag_procs = pkk = 0;    nprow = grid->nprow;    npcol = grid->npcol;    nsupers = Glu_persist->supno[n-1] + 1;    xsup = Glu_persist->xsup;    do {	++(*num_diag_procs);	i = (++i) % nprow;	j = (++j) % npcol;	pkk = PNUM( i, j, grid );    } while ( pkk != 0 ); /* Until wrap back to process 0 */    if ( !(*diag_procs = intMalloc_dist(*num_diag_procs)) )	ABORT("Malloc fails for diag_procs[]");    if ( !(*diag_len = intCalloc_dist(*num_diag_procs)) )	ABORT("Calloc fails for diag_len[]");    for (i = j = k = 0; k < *num_diag_procs; ++k) {	pkk = PNUM( i, j, grid );	(*diag_procs)[k] = pkk;	i = (++i) % nprow;	j = (++j) % npcol;    }    for (k = 0; k < nsupers; ++k) {	knsupc = SuperSize( k );	i = k % *num_diag_procs;	(*diag_len)[i] += knsupc;    }}/*  * Get the statistics of the supernodes  */#define NBUCKS 10static 	int_t	max_sup_size;void super_stats_dist(int_t nsuper, int_t *xsup){    register int_t nsup1 = 0;    int_t          i, isize, whichb, bl, bh;    int_t          bucket[NBUCKS];    max_sup_size = 0;    for (i = 0; i <= nsuper; i++) {	isize = xsup[i+1] - xsup[i];	if ( isize == 1 ) nsup1++;	if ( max_sup_size < isize ) max_sup_size = isize;	    }    printf("    Supernode statistics:\n\tno of super = %d\n", nsuper+1);    printf("\tmax supernode size = %d\n", max_sup_size);    printf("\tno of size 1 supernodes = %d\n", nsup1);    /* Histogram of the supernode sizes */    ifill_dist (bucket, NBUCKS, 0);    for (i = 0; i <= nsuper; i++) {        isize = xsup[i+1] - xsup[i];        whichb = (float) isize / max_sup_size * NBUCKS;        if (whichb >= NBUCKS) whichb = NBUCKS - 1;        bucket[whichb]++;    }        printf("\tHistogram of supernode sizes:\n");    for (i = 0; i < NBUCKS; i++) {        bl = (float) i * max_sup_size / NBUCKS;        bh = (float) (i+1) * max_sup_size / NBUCKS;        printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]);    }}/* * Check whether repfnz[] == EMPTY after reset. */void check_repfnz_dist(int_t n, int_t w, int_t jcol, int_t *repfnz){    int_t jj, k;    for (jj = jcol; jj < jcol+w; jj++) 	for (k = 0; k < n; k++)	    if ( repfnz[(jj-jcol)*n + k] != EMPTY ) {		fprintf(stderr, "col %d, repfnz_col[%d] = %d\n", jj,			k, repfnz[(jj-jcol)*n + k]);		ABORT("check_repfnz_dist");	    }}void PrintInt10(char *name, int_t len, int_t *x){    register int_t i;        printf("%10s:", name);    for (i = 0; i < len; ++i) {	if ( i % 10 == 0 ) printf("\n\t[%2d-%2d]", i, i+9);	printf("%6d", x[i]);    }    printf("\n");}int file_PrintInt10(FILE *fp, char *name, int_t len, int_t *x){    register int_t i;        fprintf(fp, "%10s:", name);    for (i = 0; i < len; ++i) {	if ( i % 10 == 0 ) fprintf(fp, "\n\t[%2d-%2d]", i, i+9);	fprintf(fp, "%6d", x[i]);    }    fprintf(fp, "\n");}int_tCheckZeroDiagonal(int_t n, int_t *rowind, int_t *colbeg, int_t *colcnt){    register int_t i, j, zd, numzd = 0;    for (j = 0; j < n; ++j) {	zd = 0;	for (i = colbeg[j]; i < colbeg[j]+colcnt[j]; ++i) {	    /*if ( iperm[rowind[i]] == j ) zd = 1;*/	    if ( rowind[i] == j ) { zd = 1; break; }	}	if ( zd == 0 ) {#if ( PRNTlevel>=2 )	    printf(".. Diagonal of column %d is zero.\n", j);#endif	    ++numzd;	}    }    return numzd;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -