📄 rf_dagutils.c
字号:
MIN(start of access, start of failed SU), (sosEndAddr) MAX(end of access, end of failed SU), (eosStartAddr) end of stripe (i.e. start of next stripe) (eosAddr) */ sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); sosEndAddr = RF_MIN(asmap->raidAddress, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,failedPDA->raidAddress)); eosStartAddr = RF_MAX(asmap->endRaidAddress, rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, failedPDA->raidAddress)); eosAddr = rf_RaidAddressOfNextStripeBoundary(layoutPtr, asmap->raidAddress); /* now generate access stripe maps for each of the above regions of the * stripe. Use a dummy (NULL) buf ptr for now */ new_asm_h[0] = (sosAddr != sosEndAddr) ? rf_MapAccess(raidPtr, sosAddr, sosEndAddr-sosAddr, NULL, RF_DONT_REMAP) : NULL; new_asm_h[1] = (eosStartAddr != eosAddr) ? rf_MapAccess(raidPtr, eosStartAddr, eosAddr-eosStartAddr, NULL, RF_DONT_REMAP) : NULL; /* walk through the PDAs and range-restrict each SU to the region of the * SU touched on the failed PDA. also compute total data buffer space * requirements in this step. Ignore the parity for now. */ numSect[0] = numSect[1] = 0; if (new_asm_h[0]) { new_asm_h[0]->next = dag_h->asmList; dag_h->asmList = new_asm_h[0]; for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) { rf_RangeRestrictPDA(raidPtr,failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); numSect[0] += pda->numSector; } } if (new_asm_h[1]) { new_asm_h[1]->next = dag_h->asmList; dag_h->asmList = new_asm_h[1]; for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) { rf_RangeRestrictPDA(raidPtr,failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); numSect[1] += pda->numSector; } } numParitySect = failedPDA->numSector; /* allocate buffer space for the data & parity we have to read to recover * from the failure */ if (numSect[0]+numSect[1]+ ((rpBufPtr) ? numParitySect : 0)) { /* don't allocate parity buf if not needed */ RF_MallocAndAdd(rdBuf, rf_RaidAddressToByte(raidPtr,numSect[0]+numSect[1]+numParitySect), (char *), allocList); bufP = rdBuf; if (rf_degDagDebug) printf("Newly allocated buffer (%d bytes) is 0x%lx\n", rf_RaidAddressToByte(raidPtr,numSect[0]+numSect[1]+numParitySect), (unsigned long) bufP); } /* now walk through the pdas one last time and assign buffer pointers * (ugh!). Again, ignore the parity. also, count nodes to find out how * many bufs need to be xored together */ (*nXorBufs) = 1; /* in read case, 1 is for parity. In write case, 1 is for failed data */ if (new_asm_h[0]) { for (pda=new_asm_h[0]->stripeMap->physInfo; pda; pda=pda->next) {pda->bufPtr = bufP; bufP += rf_RaidAddressToByte(raidPtr,pda->numSector);} *nXorBufs += new_asm_h[0]->stripeMap->numStripeUnitsAccessed; } if (new_asm_h[1]) { for (pda=new_asm_h[1]->stripeMap->physInfo; pda; pda=pda->next) {pda->bufPtr = bufP; bufP += rf_RaidAddressToByte(raidPtr,pda->numSector);} (*nXorBufs) += new_asm_h[1]->stripeMap->numStripeUnitsAccessed; } if (rpBufPtr) *rpBufPtr = bufP; /* the rest of the buffer is for parity */ /* the last step is to figure out how many more distinct buffers need to * get xor'd to produce the missing unit. there's one for each user-data * read node that overlaps the portion of the failed unit being accessed */ for (foundit=i=0,pda=asmap->physInfo; pda; i++,pda=pda->next) { if (pda == failedPDA) {i--; foundit=1; continue;} if (rf_PDAOverlap(layoutPtr, pda, failedPDA)) { overlappingPDAs[i] = 1; (*nXorBufs)++; } } if (!foundit) {RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA in asm list\n"); RF_ASSERT(0);} if (rf_degDagDebug) { if (new_asm_h[0]) { printf("First asm:\n"); rf_PrintFullAccessStripeMap(new_asm_h[0], 1); } if (new_asm_h[1]) { printf("Second asm:\n"); rf_PrintFullAccessStripeMap(new_asm_h[1], 1); } }}/* adjusts the offset and number of sectors in the destination pda so that * it covers at most the region of the SU covered by the source PDA. This * is exclusively a restriction: the number of sectors indicated by the * target PDA can only shrink. * * For example: s = sectors within SU indicated by source PDA * d = sectors within SU indicated by dest PDA * r = results, stored in dest PDA * * |--------------- one stripe unit ---------------------| * | sssssssssssssssssssssssssssssssss | * | ddddddddddddddddddddddddddddddddddddddddddddd | * | rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr | * * Another example: * * |--------------- one stripe unit ---------------------| * | sssssssssssssssssssssssssssssssss | * | ddddddddddddddddddddddd | * | rrrrrrrrrrrrrrrr | * */void rf_RangeRestrictPDA( RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *src, RF_PhysDiskAddr_t *dest, int dobuffer, int doraidaddr){ RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector); RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector); RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector-1); /* use -1 to be sure we stay within SU */ RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector-1); RF_SectorNum_t subAddr = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->startSector); /* stripe unit boundary */ dest->startSector = subAddr + RF_MAX(soffs,doffs); dest->numSector = subAddr + RF_MIN(send,dend) + 1 - dest->startSector; if (dobuffer) dest->bufPtr += (soffs > doffs) ? rf_RaidAddressToByte(raidPtr,soffs-doffs) : 0; if (doraidaddr) { dest->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->raidAddress) + rf_StripeUnitOffset(layoutPtr, dest->startSector); }}/* * Want the highest of these primes to be the largest one * less than the max expected number of columns (won't hurt * to be too small or too large, but won't be optimal, either) * --jimz */#define NLOWPRIMES 8static int lowprimes[NLOWPRIMES] = {2,3,5,7,11,13,17,19};/***************************************************************************** * compute the workload shift factor. (chained declustering) * * return nonzero if access should shift to secondary, otherwise, * access is to primary *****************************************************************************/int rf_compute_workload_shift( RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda){ /* * variables: * d = column of disk containing primary * f = column of failed disk * n = number of disks in array * sd = "shift distance" (number of columns that d is to the right of f) * row = row of array the access is in * v = numerator of redirection ratio * k = denominator of redirection ratio */ RF_RowCol_t d, f, sd, row, n; int k, v, ret, i; row = pda->row; n = raidPtr->numCol; /* assign column of primary copy to d */ d = pda->col; /* assign column of dead disk to f */ for(f=0;((!RF_DEAD_DISK(raidPtr->Disks[row][f].status))&&(f<n));f++); RF_ASSERT(f < n); RF_ASSERT(f != d); sd = (f > d) ? (n + d - f) : (d - f); RF_ASSERT(sd < n); /* * v of every k accesses should be redirected * * v/k := (n-1-sd)/(n-1) */ v = (n-1-sd); k = (n-1);#if 1 /* * XXX * Is this worth it? * * Now reduce the fraction, by repeatedly factoring * out primes (just like they teach in elementary school!) */ for(i=0;i<NLOWPRIMES;i++) { if (lowprimes[i] > v) break; while (((v%lowprimes[i])==0) && ((k%lowprimes[i])==0)) { v /= lowprimes[i]; k /= lowprimes[i]; } }#endif raidPtr->hist_diskreq[row][d]++; if (raidPtr->hist_diskreq[row][d] > v) { ret = 0; /* do not redirect */ } else { ret = 1; /* redirect */ }#if 0 printf("d=%d f=%d sd=%d v=%d k=%d ret=%d h=%d\n", d, f, sd, v, k, ret, raidPtr->hist_diskreq[row][d]);#endif if (raidPtr->hist_diskreq[row][d] >= k) { /* reset counter */ raidPtr->hist_diskreq[row][d] = 0; } return(ret);}/* * Disk selection routines *//* * Selects the disk with the shortest queue from a mirror pair. * Both the disk I/Os queued in RAIDframe as well as those at the physical * disk are counted as members of the "queue" */void rf_SelectMirrorDiskIdle(RF_DagNode_t *node){ RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr; RF_RowCol_t rowData, colData, rowMirror, colMirror; int dataQueueLength, mirrorQueueLength, usemirror; RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *)node->params[0].p; RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *)node->params[4].p; RF_PhysDiskAddr_t *tmp_pda; RF_RaidDisk_t **disks = raidPtr->Disks; RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue; /* return the [row col] of the disk with the shortest queue */ rowData = data_pda->row; colData = data_pda->col; rowMirror = mirror_pda->row; colMirror = mirror_pda->col; dataQueue = &(dqs[rowData][colData]); mirrorQueue = &(dqs[rowMirror][colMirror]);#ifdef RF_LOCK_QUEUES_TO_READ_LEN RF_LOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ dataQueueLength = dataQueue->queueLength + dataQueue->numOutstanding;#ifdef RF_LOCK_QUEUES_TO_READ_LEN RF_UNLOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle"); RF_LOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ mirrorQueueLength = mirrorQueue->queueLength + mirrorQueue->numOutstanding;#ifdef RF_LOCK_QUEUES_TO_READ_LEN RF_UNLOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");#endif /* RF_LOCK_QUEUES_TO_READ_LEN */ usemirror = 0; if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) { usemirror = 0; } else if (RF_DEAD_DISK(disks[rowData][colData].status)) { usemirror = 1; } else if (dataQueueLength < mirrorQueueLength) { usemirror = 0; } else if (mirrorQueueLength < dataQueueLength) { usemirror = 1; } else { /* queues are equal length. attempt cleverness. */ if (SNUM_DIFF(dataQueue->last_deq_sector,data_pda->startSector) <= SNUM_DIFF(mirrorQueue->last_deq_sector,mirror_pda->startSector)) { usemirror = 0; } else { usemirror = 1; } } if (usemirror) { /* use mirror (parity) disk, swap params 0 & 4 */ tmp_pda = data_pda; node->params[0].p = mirror_pda; node->params[4].p = tmp_pda; } else { /* use data disk, leave param 0 unchanged */ } /* printf("dataQueueLength %d, mirrorQueueLength %d\n",dataQueueLength, mirrorQueueLength); */}/* * Do simple partitioning. This assumes that * the data and parity disks are laid out identically. */void rf_SelectMirrorDiskPartition(RF_DagNode_t *node){ RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr; RF_RowCol_t rowData, colData, rowMirror, colMirror; RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *)node->params[0].p; RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *)node->params[4].p; RF_PhysDiskAddr_t *tmp_pda; RF_RaidDisk_t **disks = raidPtr->Disks; RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue; int usemirror; /* return the [row col] of the disk with the shortest queue */ rowData = data_pda->row; colData = data_pda->col; rowMirror = mirror_pda->row; colMirror = mirror_pda->col; dataQueue = &(dqs[rowData][colData]); mirrorQueue = &(dqs[rowMirror][colMirror]); usemirror = 0; if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) { usemirror = 0; } else if (RF_DEAD_DISK(disks[rowData][colData].status)) { usemirror = 1; } else if (data_pda->startSector < (disks[rowData][colData].numBlocks / 2)) { usemirror = 0; } else { usemirror = 1; } if (usemirror) { /* use mirror (parity) disk, swap params 0 & 4 */ tmp_pda = data_pda; node->params[0].p = mirror_pda; node->params[4].p = tmp_pda; } else { /* use data disk, leave param 0 unchanged */ }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -