📄 blkdirect.c

📁 很经典的电磁计算（电容计算）软件 MIT90年代开发
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
  used only with DIRSOL == ON to do a direct LU and count the ops  - returned matrix has L below the diagonal, U above (GVL1 pg 58)  - meant to be used with 2x2 block matrix factorization*/void blkLudecomp(mat, size)double *mat;int size;{  extern int fulldirops;  double factor;  int i, j, k, sqrdex();  for(k = 0; k < size-1; k++) {	/* loop on rows */    if(mat[SQDEX(k, k, size)] == 0.0) {      fprintf(stderr, "blkLudecomp: zero piovt\n");      exit(1);    }    fprintf(stderr,"%d ", k);    for(i = k+1; i < size; i++) { /* loop on remaining rows */      factor = (mat[SQDEX(i, k, size)] /= mat[SQDEX(k, k, size)]);      fulldirops++;      for(j = k+1; j < size; j++) { /* loop on remaining columns */	mat[SQDEX(i, j, size)] -= (factor*mat[SQDEX(k, j, size)]);	fulldirops++;      }    }  }  fprintf(stderr, "\n");}/*  solves using factored matrix on disc*/void blkSolve(x, b, siz, matri, matsq)int siz;double *x, *b, *matri, *matsq;			/* solution, rhs */{  int i, j, k;  extern int fulldirops;  extern double fullsoltime;  fprintf(stdout, "blkSolve: fwd elimination...");  fflush(stdout);  /* forward elimination, solve Ly = b (x becomes y) */  for(i = 0; i < siz; i++) x[i] = b[i];	/* copy rhs */  rdMat(matri, siz/2, L11, TRIMAT);  /* a row in the result vector is a linear comb of the previous rows */  /* do first (lower triangular only) part */  starttimer;  for(i = 1; i < siz/2; i++) {	/* loop on rows */    for(k = 0; k < i; k++) {	/* loop on previous rows */      x[i] -= (matri[LODEX(i, k, siz/2)]*x[k]);      fulldirops++;    }  }  stoptimer;  fullsoltime += dtime;  /* load L21 and LTIL */  rdMat(matri, siz/2, LTIL, TRIMAT);  rdMat(matsq, siz/2, L21, SQRMAT);  /* do second (square and lower triangular) part */  starttimer;  for(; i < siz; i++) {		/* loop on rows of entire matrix */    for(k = 0; k < siz/2; k++) { /* loop on first half of rows (L21) */      x[i] -= (matsq[SQDEX(i-siz/2, k, siz/2)]*x[k]);      fulldirops++;    }    for(; k < i; k++) {	/* loop on 2nd half of rows (LTIL) */      x[i] -= (matri[LODEX(i-siz/2, k-siz/2, siz/2)]*x[k]);      fulldirops++;    }  }  stoptimer;  fullsoltime += dtime;  fprintf(stdout, "back substitution...");  fflush(stdout);  /* back substitute, solve Ux = y (x converted in place from y to x) */  rdMat(matri, siz/2, UTIL, TRIMAT); /* load lower right U factor */  starttimer;  for(i = siz-1; i >= siz/2; i--) {	/* loop on rows */    for(k = siz-1; k > i; k--) {	/* loop on rows (of x) already done */      x[i] -= (matri[UPDEX(i-siz/2, k-siz/2, siz/2)]*x[k]);      fulldirops++;    }    x[i] /= matri[UPDEX(i-siz/2, i-siz/2, siz/2)]; /* divide by u_{ii} */    fulldirops++;  }  stoptimer;  fullsoltime += dtime;  /* load U11, U12 to do triangle plus square part of back solve */  rdMat(matri, siz/2, U11, TRIMAT);  rdMat(matsq, siz/2, U12, SQRMAT); /* U12 is stored columnwise */  starttimer;  for(; i >= 0; i--) {		/* loop on rows */    for(k = siz-1; k >= siz/2; k--) { /* loop on rows corresponding to U12 */      /* note flipped index because U12 stored as columns */      x[i] -= (matsq[SQDEX(k-siz/2, i, siz/2)]*x[k]);      fulldirops++;    }    for(; k > i; k--) {		/* loop on rows corresponding to cols of U11 */      x[i] -= (matri[UPDEX(i, k, siz/2)]*x[k]);      fulldirops++;    }    x[i] /= matri[UPDEX(i, i, siz/2)];    fulldirops++;  }  stoptimer;  fullsoltime += dtime;  fprintf(stdout, "done.\n\n");  fflush(stdout);}/*  used only in conjunction with EXPGCR == ON  and DIRSOL == ON  to dump full P to disc in four square chunks - N MUST BE EVEN  - matrix stored in factored form on exit  - used to get around memory restrictions  - only 3/8 of the entire matrix is in core at any given time, rest on disc    in L11, U11, U12, L21, Ltil and Util*/void blkQ2Pfull(directlist,  numchgs, numchgs_wdummy, 		triArray, sqrArray, real_index, is_dummy)int numchgs, numchgs_wdummy, *is_dummy, **real_index;double **triArray, **sqrArray;	/* LINEAR arrays: 1 triangular, 1 square mat */cube *directlist;{  int i, j, fromp, fromq, top, toq, matsize, lowdex(), sqrdex(), uppdex();   int k, l, i_real, j_real;  double calcp();  cube *pq, *pp;  charge **pchgs, **qchgs, *ppan, *qpan;  double pos_fact, neg_fact;  /* allocate room for one 1/4 size square full P matrix, one 1/4 size tri.     - linear arrays are used to cut down on memory overhead */  /* allocate for read index array too */  if(numchgs % 2 == 0) {	/* if numchgs is even */    matsize = numchgs*numchgs/4;    CALLOC(*sqrArray, matsize, double, ON, AMSC);    matsize = ((numchgs/2)*(numchgs/2 + 1))/2;    CALLOC(*triArray, matsize, double, ON, AMSC);    CALLOC(*real_index, numchgs, int, ON, AMSC);  }  else {    fprintf(stderr, "blkQ2Pfull: can't handle an odd number of panels\n");    exit(1);  }  /* load the matrix in the style of Q2P() - no attempt to exploit symmetry */  /* calculate interaction between every direct list entry and entire dlist */  /* the block implementation MUST have all the charges in 1st dlist entry */  pp = pq = directlist;  if(pp == NULL || pp->dnext != NULL || pp->upnumeles[0] != numchgs_wdummy) {    fprintf(stderr, "blkQ2Pfull: bad directlist, must run with depth 0\n");    exit(1);  }  pchgs = qchgs = pp->chgs;  /* get the real index array - indices of non-dummy panels */  j = 0;  for(i = 0; i < numchgs_wdummy; i++) {    /* should be that pchgs[i]->index = i + 1 */    /*  note COULD BE STRANGE DUE TO INDEXING FROM 1 */    ASSERT(i == pchgs[i]->index - 1);    if(!pchgs[i]->dummy) (*real_index)[j++] = i;  }  if(j != numchgs) {    fprintf(stderr, "blkQ2Pfull: panel count and given #panels don't match\n");    exit(1);  }  /* dump the four matrix sections */  /* - if a charge panel is a dummy panel, skip the interaction calculation */  /* - if a potential panel is on a BOTH or DIELEC surf,        include its dummy panels by converting the entry into a divided diff */  for(fromp = 0, k = 0; k < 2; k++, fromp += numchgs/2) {    for(fromq = 0, l = 0; l < 2; l++, fromq += numchgs/2) {      for(i = 0; i < numchgs/2; i++) { /* loop on collocation points */	i_real = (*real_index)[fromp+i];	ASSERT(!(ppan = pchgs[i_real])->dummy);	for(j = 0; j < numchgs/2; j++) { /* loop on charge panels */	  /* real_index should eliminate all direct refs to dummy panels */	  j_real = (*real_index)[fromq+j];	  ASSERT(!(qpan = qchgs[j_real])->dummy);	  (*sqrArray)[SQDEX(i, j, numchgs/2)] = calcp(qpan, ppan->x, ppan->y, 						      ppan->z, NULL);	  /* old: qchgs[from+j], pchgs[fromp+i] */	  /* older: pchgs[fromp+i], qchgs[fromq+j] */	  if(ppan->surf->type == DIELEC || ppan->surf->type == BOTH) {	    /* add off-panel evaluation contributions to divided diffs */	    /* see also dumpQ2PDiag() in mulDisplay.c */	    pos_fact = ppan->surf->outer_perm/ppan->pos_dummy->area;	    neg_fact = ppan->surf->inner_perm/ppan->neg_dummy->area;	    /* effectively do one columns worth of two row ops, get div dif */	    (*sqrArray)[SQDEX(i, j, numchgs/2)]		= (pos_fact*calcp(qpan, ppan->pos_dummy->x, ppan->pos_dummy->y,				  ppan->pos_dummy->z, NULL)		   - (pos_fact + neg_fact)*(*sqrArray)[SQDEX(i, j, numchgs/2)]		   + neg_fact*calcp(qpan, ppan->neg_dummy->x, 				    ppan->neg_dummy->y, ppan->neg_dummy->z,				    NULL));	  }	}      }            /* dump the 1/4 matrix to a file */      if(k == 0 && l == 0) {	wrMat(*sqrArray, numchgs/2, L11, SQRMAT);	/* dumpMatCor((double **)NULL, *sqrArray, numchgs/2); /* for debug */      }      else if(k == 0 && l == 1) wrMat(*sqrArray, numchgs/2, U12, SQRMAT);      else if(k == 1 && l == 0) wrMat(*sqrArray, numchgs/2, L21, SQRMAT);      else wrMat(*sqrArray, numchgs/2, LTIL, SQRMAT);    }  }  fprintf(stderr, "Initial dump to disk complete\n\n");  fprintf(stdout, "Initial dump to disk complete\n\n");  fflush(stdout);}/*  does a block factorization into  L11  0    U11 U12  L21 LTI    0  UTI  using four sections of A stored on disk as   A11 = L11, A12 = U12, A21 = L21, A22 = LTI*/void blkLUdecomp(sqrArray, triArray, numchgs)double *sqrArray, *triArray;	/* previously allocated flattened matrices */int numchgs;			/* A is numchgsxnumchgs */{  extern double lutime;  /* factor the stored matrices to give an overall stored factorization */  /* load the A11 part */  rdMat(sqrArray, numchgs/2, L11, SQRMAT);  /* factor it in place */  starttimer;  blkLudecomp(sqrArray, numchgs/2);  stoptimer;  lutime += dtime;  /* write out factors to different files */  matXfer(sqrArray, triArray, numchgs/2, UP2TR); /* upper part to triArr */  wrMat(triArray, numchgs/2, U11, TRIMAT);  matXfer(sqrArray, triArray, numchgs/2, LO2TR); /* lower part to triArr */  wrMat(triArray, numchgs/2, L11, TRIMAT);  fprintf(stderr, "A11 factorization complete\n\n");  fprintf(stdout, "\nblkLUdecomp: A11 factored...");  fflush(stdout);  /* load A12 and solve in place for U12 and write (L11 in position alrdy) */  rdMat(sqrArray, numchgs/2, U12, SQRMAT);  starttimer;  blkMatsolve(sqrArray, triArray, numchgs/2, LOWMAT);  stoptimer;  lutime += dtime;  wrMat(sqrArray, numchgs/2, U12, COLMAT); /* store as columns */  fprintf(stderr, "A12 factorization complete\n\n");  fprintf(stdout, "A12 factored...");  fflush(stdout);  /* load A21 and U11, solve in place for L21 and write */  rdMat(triArray, numchgs/2, U11, TRIMAT);  rdMat(sqrArray, numchgs/2, L21, SQRMAT);  starttimer;  blkMatsolve(sqrArray, triArray, numchgs/2, UPPMAT);  stoptimer;  lutime += dtime;  wrMat(sqrArray, numchgs/2, L21, SQRMAT); /* store as rows */  fprintf(stderr, "A21 factorization complete\n\n");  fprintf(stdout, "A21 factored...");  fflush(stdout);  /* load A22 and subtract off (L21)(U12) product 1/4 matrix at a time */  rdMat(sqrArray, numchgs/2, LTIL, SQRMAT);  subInnerProd(sqrArray, triArray, numchgs/2, L21, U12); /* timed internally */  /* factor Atilde and write Ltilde, Utilde */  starttimer;  blkLudecomp(sqrArray, numchgs/2);  stoptimer;  lutime += dtime;  matXfer(sqrArray, triArray, numchgs/2, UP2TR); /* upper part to triArr */  wrMat(triArray, numchgs/2, UTIL, TRIMAT);  matXfer(sqrArray, triArray, numchgs/2, LO2TR); /* lower part to triArr */  wrMat(triArray, numchgs/2, LTIL, TRIMAT);  fprintf(stderr, "Block factorization complete\n\n");  fprintf(stdout, "done.\n");  fflush(stdout);}/*  does a matrix vector multiply with the matrix stored on disk in 4 blocks:  L11 U12  L21 LTI*/void blkAqprod(p, q, size, sqmat)int size;			/* A is size by size */double *p, *q;			/* p = Aq is calculated */double *sqmat;			/* flat storage space for 1/4 of A */{  int i, j, k, l, fromp, fromq, sqrdex();  extern int fullPqops;  extern double dirtime;  for(fromp = 0, k = 0; k < 2; k++, fromp += size/2) {    for(fromq = 0, l = 0; l < 2; l++, fromq += size/2) {            /* read in the correct 1/4 matrix to a file */      if(k == 0 && l == 0) rdMat(sqmat, size/2, L11, SQRMAT);      else if(k == 0 && l == 1) rdMat(sqmat, size/2, U12, SQRMAT);      else if(k == 1 && l == 0) rdMat(sqmat, size/2, L21, SQRMAT);      else rdMat(sqmat, size/2, LTIL, SQRMAT);      /* do the product for this section of the matrix */      starttimer;      for(i = 0; i < size/2; i++) {	for(j = 0; j < size/2; j++) {	  p[fromp+i] += sqmat[SQDEX(i, j, size/2)]*q[fromq+j];	  fullPqops++;	}      }      stoptimer;      dirtime += dtime;    }  }}/*  used to reduce a vector eval_size long to one up_size long  - used w/block direct routines since they work with the condensed matrix    (all zero columns removed and row ops done for divided differences)  - should ultimately convert multipole over to condensed form, wont need this*/void blkCompressVector(vec, num_panels, real_size, is_dummy)double *vec;int num_panels, *is_dummy, real_size;{  int i, j;  /* copy the entries of the vector corresponding to the real panels     into the first up_size entries - is_dummy must be indexed from zero */  for(i = j = 0; i < num_panels; i++) {    if(!is_dummy[i]) vec[j++] = vec[i];  }  if(j != real_size) {    fprintf(stderr, "blkCompressVector: number of real panels not right, %d\n",	    j);    exit(1);  }}/*  the inverse of the above function*/void blkExpandVector(vec, num_panels, real_size)int num_panels, real_size;double *vec;{  int i, j, from, to, cur_real;  extern int *real_index;	/* this index set relies on indexing from 0 */  /* transfer to vector */  for(i = real_size - 1; i >= 0; i--) {    vec[real_index[i]] = vec[i];  }  /* zero out in between valid entries */  from = 0;  for(i = 0; i < real_size; i++) {    for(j = from; j < real_index[i]; j++) {      vec[j] = 0.0;    }    from = j + 1;  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -