📄 blkdirect.c
字号:
used only with DIRSOL == ON to do a direct LU and count the ops - returned matrix has L below the diagonal, U above (GVL1 pg 58) - meant to be used with 2x2 block matrix factorization*/void blkLudecomp(mat, size)double *mat;int size;{ extern int fulldirops; double factor; int i, j, k, sqrdex(); for(k = 0; k < size-1; k++) { /* loop on rows */ if(mat[SQDEX(k, k, size)] == 0.0) { fprintf(stderr, "blkLudecomp: zero piovt\n"); exit(1); } fprintf(stderr,"%d ", k); for(i = k+1; i < size; i++) { /* loop on remaining rows */ factor = (mat[SQDEX(i, k, size)] /= mat[SQDEX(k, k, size)]); fulldirops++; for(j = k+1; j < size; j++) { /* loop on remaining columns */ mat[SQDEX(i, j, size)] -= (factor*mat[SQDEX(k, j, size)]); fulldirops++; } } } fprintf(stderr, "\n");}/* solves using factored matrix on disc*/void blkSolve(x, b, siz, matri, matsq)int siz;double *x, *b, *matri, *matsq; /* solution, rhs */{ int i, j, k; extern int fulldirops; extern double fullsoltime; fprintf(stdout, "blkSolve: fwd elimination..."); fflush(stdout); /* forward elimination, solve Ly = b (x becomes y) */ for(i = 0; i < siz; i++) x[i] = b[i]; /* copy rhs */ rdMat(matri, siz/2, L11, TRIMAT); /* a row in the result vector is a linear comb of the previous rows */ /* do first (lower triangular only) part */ starttimer; for(i = 1; i < siz/2; i++) { /* loop on rows */ for(k = 0; k < i; k++) { /* loop on previous rows */ x[i] -= (matri[LODEX(i, k, siz/2)]*x[k]); fulldirops++; } } stoptimer; fullsoltime += dtime; /* load L21 and LTIL */ rdMat(matri, siz/2, LTIL, TRIMAT); rdMat(matsq, siz/2, L21, SQRMAT); /* do second (square and lower triangular) part */ starttimer; for(; i < siz; i++) { /* loop on rows of entire matrix */ for(k = 0; k < siz/2; k++) { /* loop on first half of rows (L21) */ x[i] -= (matsq[SQDEX(i-siz/2, k, siz/2)]*x[k]); fulldirops++; } for(; k < i; k++) { /* loop on 2nd half of rows (LTIL) */ x[i] -= (matri[LODEX(i-siz/2, k-siz/2, siz/2)]*x[k]); fulldirops++; } } stoptimer; fullsoltime += dtime; fprintf(stdout, "back substitution..."); fflush(stdout); /* back substitute, solve Ux = y (x converted in place from y to x) */ rdMat(matri, siz/2, UTIL, TRIMAT); /* load lower right U factor */ starttimer; for(i = siz-1; i >= siz/2; i--) { /* loop on rows */ for(k = siz-1; k > i; k--) { /* loop on rows (of x) already done */ x[i] -= (matri[UPDEX(i-siz/2, k-siz/2, siz/2)]*x[k]); fulldirops++; } x[i] /= matri[UPDEX(i-siz/2, i-siz/2, siz/2)]; /* divide by u_{ii} */ fulldirops++; } stoptimer; fullsoltime += dtime; /* load U11, U12 to do triangle plus square part of back solve */ rdMat(matri, siz/2, U11, TRIMAT); rdMat(matsq, siz/2, U12, SQRMAT); /* U12 is stored columnwise */ starttimer; for(; i >= 0; i--) { /* loop on rows */ for(k = siz-1; k >= siz/2; k--) { /* loop on rows corresponding to U12 */ /* note flipped index because U12 stored as columns */ x[i] -= (matsq[SQDEX(k-siz/2, i, siz/2)]*x[k]); fulldirops++; } for(; k > i; k--) { /* loop on rows corresponding to cols of U11 */ x[i] -= (matri[UPDEX(i, k, siz/2)]*x[k]); fulldirops++; } x[i] /= matri[UPDEX(i, i, siz/2)]; fulldirops++; } stoptimer; fullsoltime += dtime; fprintf(stdout, "done.\n\n"); fflush(stdout);}/* used only in conjunction with EXPGCR == ON and DIRSOL == ON to dump full P to disc in four square chunks - N MUST BE EVEN - matrix stored in factored form on exit - used to get around memory restrictions - only 3/8 of the entire matrix is in core at any given time, rest on disc in L11, U11, U12, L21, Ltil and Util*/void blkQ2Pfull(directlist, numchgs, numchgs_wdummy, triArray, sqrArray, real_index, is_dummy)int numchgs, numchgs_wdummy, *is_dummy, **real_index;double **triArray, **sqrArray; /* LINEAR arrays: 1 triangular, 1 square mat */cube *directlist;{ int i, j, fromp, fromq, top, toq, matsize, lowdex(), sqrdex(), uppdex(); int k, l, i_real, j_real; double calcp(); cube *pq, *pp; charge **pchgs, **qchgs, *ppan, *qpan; double pos_fact, neg_fact; /* allocate room for one 1/4 size square full P matrix, one 1/4 size tri. - linear arrays are used to cut down on memory overhead */ /* allocate for read index array too */ if(numchgs % 2 == 0) { /* if numchgs is even */ matsize = numchgs*numchgs/4; CALLOC(*sqrArray, matsize, double, ON, AMSC); matsize = ((numchgs/2)*(numchgs/2 + 1))/2; CALLOC(*triArray, matsize, double, ON, AMSC); CALLOC(*real_index, numchgs, int, ON, AMSC); } else { fprintf(stderr, "blkQ2Pfull: can't handle an odd number of panels\n"); exit(1); } /* load the matrix in the style of Q2P() - no attempt to exploit symmetry */ /* calculate interaction between every direct list entry and entire dlist */ /* the block implementation MUST have all the charges in 1st dlist entry */ pp = pq = directlist; if(pp == NULL || pp->dnext != NULL || pp->upnumeles[0] != numchgs_wdummy) { fprintf(stderr, "blkQ2Pfull: bad directlist, must run with depth 0\n"); exit(1); } pchgs = qchgs = pp->chgs; /* get the real index array - indices of non-dummy panels */ j = 0; for(i = 0; i < numchgs_wdummy; i++) { /* should be that pchgs[i]->index = i + 1 */ /* note COULD BE STRANGE DUE TO INDEXING FROM 1 */ ASSERT(i == pchgs[i]->index - 1); if(!pchgs[i]->dummy) (*real_index)[j++] = i; } if(j != numchgs) { fprintf(stderr, "blkQ2Pfull: panel count and given #panels don't match\n"); exit(1); } /* dump the four matrix sections */ /* - if a charge panel is a dummy panel, skip the interaction calculation */ /* - if a potential panel is on a BOTH or DIELEC surf, include its dummy panels by converting the entry into a divided diff */ for(fromp = 0, k = 0; k < 2; k++, fromp += numchgs/2) { for(fromq = 0, l = 0; l < 2; l++, fromq += numchgs/2) { for(i = 0; i < numchgs/2; i++) { /* loop on collocation points */ i_real = (*real_index)[fromp+i]; ASSERT(!(ppan = pchgs[i_real])->dummy); for(j = 0; j < numchgs/2; j++) { /* loop on charge panels */ /* real_index should eliminate all direct refs to dummy panels */ j_real = (*real_index)[fromq+j]; ASSERT(!(qpan = qchgs[j_real])->dummy); (*sqrArray)[SQDEX(i, j, numchgs/2)] = calcp(qpan, ppan->x, ppan->y, ppan->z, NULL); /* old: qchgs[from+j], pchgs[fromp+i] */ /* older: pchgs[fromp+i], qchgs[fromq+j] */ if(ppan->surf->type == DIELEC || ppan->surf->type == BOTH) { /* add off-panel evaluation contributions to divided diffs */ /* see also dumpQ2PDiag() in mulDisplay.c */ pos_fact = ppan->surf->outer_perm/ppan->pos_dummy->area; neg_fact = ppan->surf->inner_perm/ppan->neg_dummy->area; /* effectively do one columns worth of two row ops, get div dif */ (*sqrArray)[SQDEX(i, j, numchgs/2)] = (pos_fact*calcp(qpan, ppan->pos_dummy->x, ppan->pos_dummy->y, ppan->pos_dummy->z, NULL) - (pos_fact + neg_fact)*(*sqrArray)[SQDEX(i, j, numchgs/2)] + neg_fact*calcp(qpan, ppan->neg_dummy->x, ppan->neg_dummy->y, ppan->neg_dummy->z, NULL)); } } } /* dump the 1/4 matrix to a file */ if(k == 0 && l == 0) { wrMat(*sqrArray, numchgs/2, L11, SQRMAT); /* dumpMatCor((double **)NULL, *sqrArray, numchgs/2); /* for debug */ } else if(k == 0 && l == 1) wrMat(*sqrArray, numchgs/2, U12, SQRMAT); else if(k == 1 && l == 0) wrMat(*sqrArray, numchgs/2, L21, SQRMAT); else wrMat(*sqrArray, numchgs/2, LTIL, SQRMAT); } } fprintf(stderr, "Initial dump to disk complete\n\n"); fprintf(stdout, "Initial dump to disk complete\n\n"); fflush(stdout);}/* does a block factorization into L11 0 U11 U12 L21 LTI 0 UTI using four sections of A stored on disk as A11 = L11, A12 = U12, A21 = L21, A22 = LTI*/void blkLUdecomp(sqrArray, triArray, numchgs)double *sqrArray, *triArray; /* previously allocated flattened matrices */int numchgs; /* A is numchgsxnumchgs */{ extern double lutime; /* factor the stored matrices to give an overall stored factorization */ /* load the A11 part */ rdMat(sqrArray, numchgs/2, L11, SQRMAT); /* factor it in place */ starttimer; blkLudecomp(sqrArray, numchgs/2); stoptimer; lutime += dtime; /* write out factors to different files */ matXfer(sqrArray, triArray, numchgs/2, UP2TR); /* upper part to triArr */ wrMat(triArray, numchgs/2, U11, TRIMAT); matXfer(sqrArray, triArray, numchgs/2, LO2TR); /* lower part to triArr */ wrMat(triArray, numchgs/2, L11, TRIMAT); fprintf(stderr, "A11 factorization complete\n\n"); fprintf(stdout, "\nblkLUdecomp: A11 factored..."); fflush(stdout); /* load A12 and solve in place for U12 and write (L11 in position alrdy) */ rdMat(sqrArray, numchgs/2, U12, SQRMAT); starttimer; blkMatsolve(sqrArray, triArray, numchgs/2, LOWMAT); stoptimer; lutime += dtime; wrMat(sqrArray, numchgs/2, U12, COLMAT); /* store as columns */ fprintf(stderr, "A12 factorization complete\n\n"); fprintf(stdout, "A12 factored..."); fflush(stdout); /* load A21 and U11, solve in place for L21 and write */ rdMat(triArray, numchgs/2, U11, TRIMAT); rdMat(sqrArray, numchgs/2, L21, SQRMAT); starttimer; blkMatsolve(sqrArray, triArray, numchgs/2, UPPMAT); stoptimer; lutime += dtime; wrMat(sqrArray, numchgs/2, L21, SQRMAT); /* store as rows */ fprintf(stderr, "A21 factorization complete\n\n"); fprintf(stdout, "A21 factored..."); fflush(stdout); /* load A22 and subtract off (L21)(U12) product 1/4 matrix at a time */ rdMat(sqrArray, numchgs/2, LTIL, SQRMAT); subInnerProd(sqrArray, triArray, numchgs/2, L21, U12); /* timed internally */ /* factor Atilde and write Ltilde, Utilde */ starttimer; blkLudecomp(sqrArray, numchgs/2); stoptimer; lutime += dtime; matXfer(sqrArray, triArray, numchgs/2, UP2TR); /* upper part to triArr */ wrMat(triArray, numchgs/2, UTIL, TRIMAT); matXfer(sqrArray, triArray, numchgs/2, LO2TR); /* lower part to triArr */ wrMat(triArray, numchgs/2, LTIL, TRIMAT); fprintf(stderr, "Block factorization complete\n\n"); fprintf(stdout, "done.\n"); fflush(stdout);}/* does a matrix vector multiply with the matrix stored on disk in 4 blocks: L11 U12 L21 LTI*/void blkAqprod(p, q, size, sqmat)int size; /* A is size by size */double *p, *q; /* p = Aq is calculated */double *sqmat; /* flat storage space for 1/4 of A */{ int i, j, k, l, fromp, fromq, sqrdex(); extern int fullPqops; extern double dirtime; for(fromp = 0, k = 0; k < 2; k++, fromp += size/2) { for(fromq = 0, l = 0; l < 2; l++, fromq += size/2) { /* read in the correct 1/4 matrix to a file */ if(k == 0 && l == 0) rdMat(sqmat, size/2, L11, SQRMAT); else if(k == 0 && l == 1) rdMat(sqmat, size/2, U12, SQRMAT); else if(k == 1 && l == 0) rdMat(sqmat, size/2, L21, SQRMAT); else rdMat(sqmat, size/2, LTIL, SQRMAT); /* do the product for this section of the matrix */ starttimer; for(i = 0; i < size/2; i++) { for(j = 0; j < size/2; j++) { p[fromp+i] += sqmat[SQDEX(i, j, size/2)]*q[fromq+j]; fullPqops++; } } stoptimer; dirtime += dtime; } }}/* used to reduce a vector eval_size long to one up_size long - used w/block direct routines since they work with the condensed matrix (all zero columns removed and row ops done for divided differences) - should ultimately convert multipole over to condensed form, wont need this*/void blkCompressVector(vec, num_panels, real_size, is_dummy)double *vec;int num_panels, *is_dummy, real_size;{ int i, j; /* copy the entries of the vector corresponding to the real panels into the first up_size entries - is_dummy must be indexed from zero */ for(i = j = 0; i < num_panels; i++) { if(!is_dummy[i]) vec[j++] = vec[i]; } if(j != real_size) { fprintf(stderr, "blkCompressVector: number of real panels not right, %d\n", j); exit(1); }}/* the inverse of the above function*/void blkExpandVector(vec, num_panels, real_size)int num_panels, real_size;double *vec;{ int i, j, from, to, cur_real; extern int *real_index; /* this index set relies on indexing from 0 */ /* transfer to vector */ for(i = real_size - 1; i >= 0; i--) { vec[real_index[i]] = vec[i]; } /* zero out in between valid entries */ from = 0; for(i = 0; i < real_size; i++) { for(j = from; j < real_index[i]; j++) { vec[j] = 0.0; } from = j + 1; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -