📄 serial.c
字号:
ASSERTS(idxsum(nparts, htable) == -nparts); ikeysort(smallcount, flowto); for (j=0; j<amin(smallcount, max_mult); j++, bigcount++) { bestflow[bigcount].key = flowto[j].key; bestflow[bigcount].val = current_from*nparts+flowto[j].val; } ikeysort(bigcount, bestflow); ASSERTS(idxsum(nparts, map) == -nparts); ASSERTS(idxsum(nparts, rowmap) == -nparts); nmapped = 0; /* now make as many assignments as possible */ for (ii=0; ii<bigcount; ii++) { i = bestflow[ii].val; j = i % nparts; /* to */ k = i / nparts; /* from */ if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(tpwgts, graph->ncon, j, k)) { map[j] = k; rowmap[k] = j; nmapped++; } if (nmapped == nparts) break; } /* remap the rest */ /* it may help try remapping to the same label first */ if (nmapped < nparts) { for (j=0; j<nparts && nmapped<nparts; j++) { if (map[j] == -1) { for (ii=0; ii<nparts; ii++) { i = (j+ii) % nparts; if (rowmap[i] == -1 && SimilarTpwgts(tpwgts, graph->ncon, i, j)) { map[j] = i; rowmap[i] = j; nmapped++; break; } } } } } /* check to see if remapping fails (due to dis-similar tpwgts) */ /* if remapping fails, revert to original mapping */ if (nmapped < nparts) for (i=0; i<nparts; i++) map[i] = i; for (i=0; i<nvtxs; i++) remap[i] = map[remap[i]]; GKfree((void **)&sortvtx, (void **)&flowto, (void **)&htable, LTERM);}/************************************************************************** This is a comparison function for Serial Remap**************************************************************************/int SSMIncKeyCmp(const void *fptr, const void *sptr){ KeyKeyValueType *first, *second; first = (KeyKeyValueType *)(fptr); second = (KeyKeyValueType *)(sptr); if (first->key1 > second->key1) return 1; if (first->key1 < second->key1) return -1; if (first->key2 < second->key2) return 1; if (first->key2 > second->key2) return -1; return 0;}/************************************************************************** This function performs an edge-based FM refinement**************************************************************************/void Moc_Serial_FM_2WayRefine(GraphType *graph, float *tpwgts, int npasses){ int i, ii, j, k; int kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; FPQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, initcut, newcut, mincutorder; float rtpwgts[MAXNCON*2]; KeyValueType *cand;int mype;MPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; moved = idxmalloc(nvtxs, "moved"); swaps = idxmalloc(nvtxs, "swaps"); qnum = idxmalloc(nvtxs, "qnum"); cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); limit = amin(amax(0.01*nvtxs, 25), 150); /* Initialize the queues */ for (i=0; i<ncon; i++) { FPQueueInit(&parts[i][0], nvtxs); FPQueueInit(&parts[i][1], nvtxs); } for (i=0; i<nvtxs; i++) qnum[i] = samax(ncon, nvwgt+i*ncon); origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); for (i=0; i<ncon; i++) { rtpwgts[i] = origbal*tpwgts[i]; rtpwgts[ncon+i] = origbal*tpwgts[ncon+i]; } idxset(nvtxs, -1, moved); for (pass=0; pass<npasses; pass++) { /* Do a number of passes */ for (i=0; i<ncon; i++) { FPQueueReset(&parts[i][0]); FPQueueReset(&parts[i][1]); } mincutorder = -1; newcut = mincut = initcut = graph->mincut; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); minbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); /* Insert boundary nodes in the priority queues */ nbnd = graph->gnvtxs; for (i=0; i<nbnd; i++) { cand[i].key = id[i]-ed[i]; cand[i].val = i; } ikeysort(nbnd, cand); for (ii=0; ii<nbnd; ii++) { i = bndind[cand[ii].val]; FPQueueInsert(&parts[qnum[i]][where[i]], i, (float)(ed[i]-id[i])); } for (nswaps=0; nswaps<nvtxs; nswaps++) { Serial_SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1) break; saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if ((newcut < mincut && newbal-origbal <= .00001) || (newcut == mincut && (newbal < minbal || (newbal == minbal && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update its boundary information and queue position */ if (bndptr[k] != -1) { /* If k was a boundary vertex */ if (ed[k] == 0) { /* Not a boundary vertex any more */ BNDDelete(nbnd, bndind, bndptr, k); if (moved[k] == -1) /* Remove it if in the queues */ FPQueueDelete(&parts[qnum[k]][where[k]], k); } else { /* If it has not been moved, update its position in the queue */ if (moved[k] == -1) FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)oldgain, (float)(ed[k]-id[k])); } } else { if (ed[k] > 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) FPQueueInsert(&parts[qnum[k]][where[k]], k, (float)(ed[k]-id[k])); } } } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; i<nswaps; i++) moved[swaps[i]] = -1; /* reset moved array */ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; if (mincutorder == -1 || mincut == initcut) break; } for (i=0; i<ncon; i++) { FPQueueFree(&parts[i][0]); FPQueueFree(&parts[i][1]); } GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM); return;}/************************************************************************** This function selects the partition number and the queue from which* we will move vertices out**************************************************************************/void Serial_SelectQueue(int ncon, float *npwgts, float *tpwgts, int *from, int *cnum, FPQueueType queues[MAXNCON][2]){ int i, part; float maxgain=0.0; float max = -1.0, maxdiff=0.0;int mype;MPI_Comm_rank(MPI_COMM_WORLD, &mype); *from = -1; *cnum = -1; /* First determine the side and the queue, irrespective of the presence of nodes */ for (part=0; part<2; part++) { for (i=0; i<ncon; i++) { if (npwgts[part*ncon+i]-tpwgts[part*ncon+i] >= maxdiff) { maxdiff = npwgts[part*ncon+i]-tpwgts[part*ncon+i]; *from = part; *cnum = i; } } } if (*from != -1 && FPQueueGetQSize(&queues[*cnum][*from]) == 0) { /* The desired queue is empty, select a node from that side anyway */ for (i=0; i<ncon; i++) { if (FPQueueGetQSize(&queues[i][*from]) > 0) { max = npwgts[(*from)*ncon + i]; *cnum = i; break; } } for (i++; i<ncon; i++) { if (npwgts[(*from)*ncon + i] > max && FPQueueGetQSize(&queues[i][*from]) > 0) { max = npwgts[(*from)*ncon + i]; *cnum = i; } } } /* Check to see if you can focus on the cut */ if (maxdiff <= 0.0 || *from == -1) { maxgain = -100000.0; for (part=0; part<2; part++) { for (i=0; i<ncon; i++) { if (FPQueueGetQSize(&queues[i][part]) > 0 && FPQueueSeeMaxGain(&queues[i][part]) > maxgain) { maxgain = FPQueueSeeMaxGain(&queues[i][part]); *from = part; *cnum = i; } } } } return;}/************************************************************************** This function checks if the balance achieved is better than the diff* For now, it uses a 2-norm measure**************************************************************************/int Serial_BetterBalance(int ncon, float *npwgts, float *tpwgts, float *diff){ int i; float ndiff[MAXNCON]; for (i=0; i<ncon; i++) ndiff[i] = fabs(tpwgts[i]-npwgts[i]); return snorm2(ncon, ndiff) < snorm2(ncon, diff);}/************************************************************************** This function computes the load imbalance over all the constrains**************************************************************************/float Serial_Compute2WayHLoadImbalance(int ncon, float *npwgts, float *tpwgts){ int i; float max=0.0, temp; for (i=0; i<ncon; i++) { if (tpwgts[i] == 0.0) temp = 0.0; else temp = fabs(tpwgts[i]-npwgts[i])/tpwgts[i]; max = (max < temp ? temp : max); } return 1.0+max;}/************************************************************************** This function performs an edge-based FM refinement**************************************************************************/void Moc_Serial_Balance2Way(GraphType *graph, float *tpwgts, float lbfactor){ int i, ii, j, k, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; FPQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, newcut, mincutorder; int qsizes[MAXNCON][2]; KeyValueType *cand; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; moved = idxmalloc(nvtxs, "moved"); swaps = idxmalloc(nvtxs, "swaps"); qnum = idxmalloc(nvtxs, "qnum"); cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); limit = amin(amax(0.01*nvtxs, 15), 100);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -