⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 serial.c

📁 一个用来实现偏微分方程中网格的计算库
💻 C
📖 第 1 页 / 共 3 页
字号:
  ASSERTS(idxsum(nparts, htable) == -nparts);  ikeysort(smallcount, flowto);  for (j=0; j<amin(smallcount, max_mult); j++, bigcount++) {    bestflow[bigcount].key = flowto[j].key;    bestflow[bigcount].val = current_from*nparts+flowto[j].val;  }  ikeysort(bigcount, bestflow);  ASSERTS(idxsum(nparts, map) == -nparts);  ASSERTS(idxsum(nparts, rowmap) == -nparts);  nmapped = 0;  /* now make as many assignments as possible */  for (ii=0; ii<bigcount; ii++) {    i = bestflow[ii].val;    j = i % nparts;  /* to */    k = i / nparts;  /* from */    if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(tpwgts, graph->ncon, j, k)) {      map[j] = k;      rowmap[k] = j;      nmapped++;    }    if (nmapped == nparts)      break;  }  /* remap the rest */  /* it may help try remapping to the same label first */  if (nmapped < nparts) {    for (j=0; j<nparts && nmapped<nparts; j++) {      if (map[j] == -1) {        for (ii=0; ii<nparts; ii++) {          i = (j+ii) % nparts;          if (rowmap[i] == -1 && SimilarTpwgts(tpwgts, graph->ncon, i, j)) {            map[j] = i;            rowmap[i] = j;            nmapped++;            break;          }        }      }    }  }  /* check to see if remapping fails (due to dis-similar tpwgts) */  /* if remapping fails, revert to original mapping */  if (nmapped < nparts)    for (i=0; i<nparts; i++)      map[i] = i;  for (i=0; i<nvtxs; i++)    remap[i] = map[remap[i]];  GKfree((void **)&sortvtx, (void **)&flowto, (void **)&htable, LTERM);}/**************************************************************************  This is a comparison function for Serial Remap**************************************************************************/int SSMIncKeyCmp(const void *fptr, const void *sptr){  KeyKeyValueType *first, *second;  first = (KeyKeyValueType *)(fptr);  second = (KeyKeyValueType *)(sptr);  if (first->key1 > second->key1)    return 1;  if (first->key1 < second->key1)     return -1;  if (first->key2 < second->key2)    return 1;  if (first->key2 > second->key2)     return -1;   return 0;}/************************************************************************** This function performs an edge-based FM refinement**************************************************************************/void Moc_Serial_FM_2WayRefine(GraphType *graph, float *tpwgts, int npasses){  int i, ii, j, k;  int kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum;  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;  idxtype *moved, *swaps, *qnum;  float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;  FPQueueType parts[MAXNCON][2];  int higain, oldgain, mincut, initcut, newcut, mincutorder;  float rtpwgts[MAXNCON*2];  KeyValueType *cand;int mype;MPI_Comm_rank(MPI_COMM_WORLD, &mype);  nvtxs = graph->nvtxs;  ncon = graph->ncon;  xadj = graph->xadj;  nvwgt = graph->nvwgt;  adjncy = graph->adjncy;  adjwgt = graph->adjwgt;  where = graph->where;  id = graph->sendind;  ed = graph->recvind;  npwgts = graph->gnpwgts;  bndptr = graph->sendptr;  bndind = graph->recvptr;  moved = idxmalloc(nvtxs, "moved");  swaps = idxmalloc(nvtxs, "swaps");  qnum = idxmalloc(nvtxs, "qnum");  cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");  limit = amin(amax(0.01*nvtxs, 25), 150);  /* Initialize the queues */  for (i=0; i<ncon; i++) {    FPQueueInit(&parts[i][0], nvtxs);    FPQueueInit(&parts[i][1], nvtxs);  }  for (i=0; i<nvtxs; i++)    qnum[i] = samax(ncon, nvwgt+i*ncon);  origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);  for (i=0; i<ncon; i++) {    rtpwgts[i] = origbal*tpwgts[i];    rtpwgts[ncon+i] = origbal*tpwgts[ncon+i];  }  idxset(nvtxs, -1, moved);  for (pass=0; pass<npasses; pass++) { /* Do a number of passes */    for (i=0; i<ncon; i++) {      FPQueueReset(&parts[i][0]);      FPQueueReset(&parts[i][1]);    }    mincutorder = -1;    newcut = mincut = initcut = graph->mincut;    for (i=0; i<ncon; i++)      mindiff[i] = fabs(tpwgts[i]-npwgts[i]);    minbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);    /* Insert boundary nodes in the priority queues */    nbnd = graph->gnvtxs;    for (i=0; i<nbnd; i++) {      cand[i].key = id[i]-ed[i];      cand[i].val = i;    }    ikeysort(nbnd, cand);    for (ii=0; ii<nbnd; ii++) {      i = bndind[cand[ii].val];      FPQueueInsert(&parts[qnum[i]][where[i]], i, (float)(ed[i]-id[i]));    }    for (nswaps=0; nswaps<nvtxs; nswaps++) {      Serial_SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts);      to = (from+1)%2;      if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1)        break;      saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);      saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);      newcut -= (ed[higain]-id[higain]);      newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);      if ((newcut < mincut && newbal-origbal <= .00001) ||          (newcut == mincut && (newbal < minbal ||                                (newbal == minbal && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {        mincut = newcut;        minbal = newbal;        mincutorder = nswaps;        for (i=0; i<ncon; i++)          mindiff[i] = fabs(tpwgts[i]-npwgts[i]);      }      else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */        newcut += (ed[higain]-id[higain]);        saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);        saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);        break;      }      where[higain] = to;      moved[higain] = nswaps;      swaps[nswaps] = higain;      /**************************************************************      * Update the id[i]/ed[i] values of the affected nodes      ***************************************************************/      SWAP(id[higain], ed[higain], tmp);      if (ed[higain] == 0 && xadj[higain] < xadj[higain+1])        BNDDelete(nbnd, bndind,  bndptr, higain);      for (j=xadj[higain]; j<xadj[higain+1]; j++) {        k = adjncy[j];        oldgain = ed[k]-id[k];        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);        INC_DEC(id[k], ed[k], kwgt);        /* Update its boundary information and queue position */        if (bndptr[k] != -1) { /* If k was a boundary vertex */          if (ed[k] == 0) { /* Not a boundary vertex any more */            BNDDelete(nbnd, bndind, bndptr, k);            if (moved[k] == -1)  /* Remove it if in the queues */              FPQueueDelete(&parts[qnum[k]][where[k]], k);          }          else { /* If it has not been moved, update its position in the queue */            if (moved[k] == -1)              FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)oldgain, (float)(ed[k]-id[k]));          }        }        else {          if (ed[k] > 0) {  /* It will now become a boundary vertex */            BNDInsert(nbnd, bndind, bndptr, k);            if (moved[k] == -1)              FPQueueInsert(&parts[qnum[k]][where[k]], k, (float)(ed[k]-id[k]));          }        }      }    }    /****************************************************************    * Roll back computations    *****************************************************************/    for (i=0; i<nswaps; i++)      moved[swaps[i]] = -1;  /* reset moved array */    for (nswaps--; nswaps>mincutorder; nswaps--) {      higain = swaps[nswaps];      to = where[higain] = (where[higain]+1)%2;      SWAP(id[higain], ed[higain], tmp);      if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])        BNDDelete(nbnd, bndind,  bndptr, higain);      else if (ed[higain] > 0 && bndptr[higain] == -1)        BNDInsert(nbnd, bndind,  bndptr, higain);      saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);      saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);      for (j=xadj[higain]; j<xadj[higain+1]; j++) {        k = adjncy[j];        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);        INC_DEC(id[k], ed[k], kwgt);        if (bndptr[k] != -1 && ed[k] == 0)          BNDDelete(nbnd, bndind, bndptr, k);        if (bndptr[k] == -1 && ed[k] > 0)          BNDInsert(nbnd, bndind, bndptr, k);      }    }    graph->mincut = mincut;    graph->gnvtxs = nbnd;    if (mincutorder == -1 || mincut == initcut)      break;  }  for (i=0; i<ncon; i++) {    FPQueueFree(&parts[i][0]);    FPQueueFree(&parts[i][1]);  }  GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM);  return;}/************************************************************************** This function selects the partition number and the queue from which* we will move vertices out**************************************************************************/void Serial_SelectQueue(int ncon, float *npwgts, float *tpwgts, int *from, int *cnum,     FPQueueType queues[MAXNCON][2]){  int i, part;  float maxgain=0.0;  float max = -1.0, maxdiff=0.0;int mype;MPI_Comm_rank(MPI_COMM_WORLD, &mype);  *from = -1;  *cnum = -1;  /* First determine the side and the queue, irrespective of the presence of nodes */  for (part=0; part<2; part++) {    for (i=0; i<ncon; i++) {      if (npwgts[part*ncon+i]-tpwgts[part*ncon+i] >= maxdiff) {        maxdiff = npwgts[part*ncon+i]-tpwgts[part*ncon+i];        *from = part;        *cnum = i;      }    }  }  if (*from != -1 && FPQueueGetQSize(&queues[*cnum][*from]) == 0) {    /* The desired queue is empty, select a node from that side anyway */    for (i=0; i<ncon; i++) {      if (FPQueueGetQSize(&queues[i][*from]) > 0) {        max = npwgts[(*from)*ncon + i];        *cnum = i;        break;      }    }    for (i++; i<ncon; i++) {      if (npwgts[(*from)*ncon + i] > max && FPQueueGetQSize(&queues[i][*from]) > 0) {        max = npwgts[(*from)*ncon + i];        *cnum = i;      }    }  }  /* Check to see if you can focus on the cut */  if (maxdiff <= 0.0 || *from == -1) {    maxgain = -100000.0;    for (part=0; part<2; part++) {      for (i=0; i<ncon; i++) {        if (FPQueueGetQSize(&queues[i][part]) > 0 &&            FPQueueSeeMaxGain(&queues[i][part]) > maxgain) {          maxgain = FPQueueSeeMaxGain(&queues[i][part]);          *from = part;          *cnum = i;        }      }    }  }  return;}/************************************************************************** This function checks if the balance achieved is better than the diff* For now, it uses a 2-norm measure**************************************************************************/int Serial_BetterBalance(int ncon, float *npwgts, float *tpwgts, float *diff){  int i;  float ndiff[MAXNCON];  for (i=0; i<ncon; i++)    ndiff[i] = fabs(tpwgts[i]-npwgts[i]);  return snorm2(ncon, ndiff) < snorm2(ncon, diff);}/************************************************************************** This function computes the load imbalance over all the constrains**************************************************************************/float Serial_Compute2WayHLoadImbalance(int ncon, float *npwgts, float *tpwgts){  int i;  float max=0.0, temp;  for (i=0; i<ncon; i++) {    if (tpwgts[i] == 0.0)      temp = 0.0;    else      temp = fabs(tpwgts[i]-npwgts[i])/tpwgts[i];    max = (max < temp ? temp : max);  }  return 1.0+max;}/************************************************************************** This function performs an edge-based FM refinement**************************************************************************/void Moc_Serial_Balance2Way(GraphType *graph, float *tpwgts, float lbfactor){  int i, ii, j, k, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum;  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;  idxtype *moved, *swaps, *qnum;  float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;  FPQueueType parts[MAXNCON][2];  int higain, oldgain, mincut, newcut, mincutorder;  int qsizes[MAXNCON][2];  KeyValueType *cand;  nvtxs = graph->nvtxs;  ncon = graph->ncon;  xadj = graph->xadj;  nvwgt = graph->nvwgt;  adjncy = graph->adjncy;  adjwgt = graph->adjwgt;  where = graph->where;  id = graph->sendind;  ed = graph->recvind;  npwgts = graph->gnpwgts;  bndptr = graph->sendptr;  bndind = graph->recvptr;  moved = idxmalloc(nvtxs, "moved");  swaps = idxmalloc(nvtxs, "swaps");  qnum = idxmalloc(nvtxs, "qnum");  cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");  limit = amin(amax(0.01*nvtxs, 15), 100);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -