📄 rf_reconstruct.c
字号:
} else { ctrl->curPSID++; ctrl->ru_count = 0; /* code left over from when head-sep was based on parity stripe id */ if (ctrl->curPSID >= raidPtr->reconControl[row]->lastPSID) { CheckForNewMinHeadSep(raidPtr, row, ++(ctrl->headSepCounter)); return(1); /* finito! */ } /* find the disk offsets of the start of the parity stripe on both the current disk and the failed disk. * skip this entire parity stripe if either disk does not appear in the indicated PS */ status = ComputePSDiskOffsets(raidPtr, ctrl->curPSID, row, col, &ctrl->diskOffset, &rbuf->failedDiskSectorOffset, &rbuf->spRow, &rbuf->spCol, &rbuf->spOffset); if (status) { ctrl->ru_count = RUsPerPU-1; continue; } } rbuf->which_ru = ctrl->ru_count; /* skip this RU if it's already been reconstructed */ if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, rbuf->failedDiskSectorOffset)) { Dprintf2("Skipping psid %ld ru %d: already reconstructed\n",ctrl->curPSID,ctrl->ru_count); continue; } break; } ctrl->headSepCounter++; if (do_new_check) CheckForNewMinHeadSep(raidPtr, row, ctrl->headSepCounter); /* update min if needed */ /* at this point, we have definitely decided what to do, and we have only to see if we can actually do it now */ rbuf->parityStripeID = ctrl->curPSID; rbuf->which_ru = ctrl->ru_count; bzero((char *)&raidPtr->recon_tracerecs[col], sizeof(raidPtr->recon_tracerecs[col])); raidPtr->recon_tracerecs[col].reconacc = 1; RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer); retcode = TryToRead(raidPtr, row, col); return(retcode);}/* tries to issue the next read on the indicated disk. We may be blocked by (a) the heads being too * far apart, or (b) recon on the indicated RU being blocked due to a write by a user thread. * In this case, we issue a head-sep or blockage wait request, which will cause this same routine * to be invoked again later when the blockage has cleared. */static int TryToRead(raidPtr, row, col) RF_Raid_t *raidPtr; RF_RowCol_t row; RF_RowCol_t col;{ RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col]; RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU; RF_StripeNum_t psid = ctrl->curPSID; RF_ReconUnitNum_t which_ru = ctrl->ru_count; RF_DiskQueueData_t *req; int status, created = 0; RF_ReconParityStripeStatus_t *pssPtr; /* if the current disk is too far ahead of the others, issue a head-separation wait and return */ if (CheckHeadSeparation(raidPtr, ctrl, row, col, ctrl->headSepCounter, which_ru)) return(0); RF_LOCK_PSS_MUTEX(raidPtr, row, psid); pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]->pssTable, psid, which_ru, RF_PSS_CREATE, &created); /* if recon is blocked on the indicated parity stripe, issue a block-wait request and return. * this also must mark the indicated RU in the stripe as under reconstruction if not blocked. */ status = CheckForcedOrBlockedReconstruction(raidPtr, pssPtr, ctrl, row, col, psid, which_ru); if (status == RF_PSS_RECON_BLOCKED) { Dprintf2("RECON: Stalling psid %ld ru %d: recon blocked\n",psid,which_ru); goto out; } else if (status == RF_PSS_FORCED_ON_WRITE) { rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP); goto out; } /* make one last check to be sure that the indicated RU didn't get reconstructed while * we were waiting for something else to happen. This is unfortunate in that it causes * us to make this check twice in the normal case. Might want to make some attempt to * re-work this so that we only do this check if we've definitely blocked on one of the * above checks. When this condition is detected, we may have just created a bogus * status entry, which we need to delete. */ if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, ctrl->rbuf->failedDiskSectorOffset)) { Dprintf2("RECON: Skipping psid %ld ru %d: prior recon after stall\n",psid,which_ru); if (created) rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]->pssTable, pssPtr); rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP); goto out; } /* found something to read. issue the I/O */ Dprintf5("RECON: Read for psid %ld on row %d col %d offset %ld buf %lx\n", psid, row, col, ctrl->diskOffset, ctrl->rbuf->buffer); RF_ETIMER_STOP(raidPtr->recon_tracerecs[col].recon_timer); RF_ETIMER_EVAL(raidPtr->recon_tracerecs[col].recon_timer); raidPtr->recon_tracerecs[col].specific.recon.recon_start_to_fetch_us = RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[col].recon_timer); RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer); /* should be ok to use a NULL proc pointer here, all the bufs we use should be in kernel space */ req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, ctrl->diskOffset, sectorsPerRU, ctrl->rbuf->buffer, psid, which_ru, ReconReadDoneProc, (void *) ctrl, NULL, &raidPtr->recon_tracerecs[col], (void *)raidPtr, 0, NULL); RF_ASSERT(req); /* XXX -- fix this -- XXX */ ctrl->rbuf->arg = (void *) req; rf_DiskIOEnqueue(&raidPtr->Queues[row][col], req, RF_IO_RECON_PRIORITY); pssPtr->issued[col] = 1;out: RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid); return(0);}/* given a parity stripe ID, we want to find out whether both the current disk and the * failed disk exist in that parity stripe. If not, we want to skip this whole PS. * If so, we want to find the disk offset of the start of the PS on both the current * disk and the failed disk. * * this works by getting a list of disks comprising the indicated parity stripe, and * searching the list for the current and failed disks. Once we've decided they both * exist in the parity stripe, we need to decide whether each is data or parity, * so that we'll know which mapping function to call to get the corresponding disk * offsets. * * this is kind of unpleasant, but doing it this way allows the reconstruction code * to use parity stripe IDs rather than physical disks address to march through the * failed disk, which greatly simplifies a lot of code, as well as eliminating the * need for a reverse-mapping function. I also think it will execute faster, since * the calls to the mapping module are kept to a minimum. * * ASSUMES THAT THE STRIPE IDENTIFIER IDENTIFIES THE DISKS COMPRISING THE STRIPE * IN THE CORRECT ORDER */static int ComputePSDiskOffsets( RF_Raid_t *raidPtr, /* raid descriptor */ RF_StripeNum_t psid, /* parity stripe identifier */ RF_RowCol_t row, /* row and column of disk to find the offsets for */ RF_RowCol_t col, RF_SectorNum_t *outDiskOffset, RF_SectorNum_t *outFailedDiskSectorOffset, RF_RowCol_t *spRow, /* OUT: row,col of spare unit for failed unit */ RF_RowCol_t *spCol, RF_SectorNum_t *spOffset) /* OUT: offset into disk containing spare unit */{ RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol; RF_RaidAddr_t sosRaidAddress; /* start-of-stripe */ RF_RowCol_t *diskids; u_int i, j, k, i_offset, j_offset; RF_RowCol_t prow, pcol; int testcol, testrow; RF_RowCol_t stripe; RF_SectorNum_t poffset; char i_is_parity=0, j_is_parity=0; RF_RowCol_t stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; /* get a listing of the disks comprising that stripe */ sosRaidAddress = rf_ParityStripeIDToRaidAddress(layoutPtr, psid); (layoutPtr->map->IdentifyStripe)(raidPtr, sosRaidAddress, &diskids, &stripe); RF_ASSERT(diskids); /* reject this entire parity stripe if it does not contain the indicated disk or it does not contain the failed disk */ if (row != stripe) goto skipit; for (i=0; i<stripeWidth; i++) { if (col == diskids[i]) break; } if (i == stripeWidth) goto skipit; for (j=0; j<stripeWidth; j++) { if (fcol == diskids[j]) break; } if (j == stripeWidth) { goto skipit; } /* find out which disk the parity is on */ (layoutPtr->map->MapParity)(raidPtr, sosRaidAddress, &prow, &pcol, &poffset, RF_DONT_REMAP); /* find out if either the current RU or the failed RU is parity */ /* also, if the parity occurs in this stripe prior to the data and/or failed col, we need to decrement i and/or j */ for (k=0; k<stripeWidth; k++) if (diskids[k] == pcol) break; RF_ASSERT(k < stripeWidth); i_offset = i; j_offset=j; if (k < i) i_offset--; else if (k==i) {i_is_parity = 1; i_offset = 0;} /* set offsets to zero to disable multiply below */ if (k < j) j_offset--; else if (k==j) {j_is_parity = 1; j_offset = 0;} /* at this point, [ij]_is_parity tells us whether the [current,failed] disk is parity at * the start of this RU, and, if data, "[ij]_offset" tells us how far into the stripe * the [current,failed] disk is. */ /* call the mapping routine to get the offset into the current disk, repeat for failed disk. */ if (i_is_parity) layoutPtr->map->MapParity(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP); else layoutPtr->map->MapSector(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outDiskOffset, RF_DONT_REMAP); RF_ASSERT(row == testrow && col == testcol); if (j_is_parity) layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP); else layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testrow, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP); RF_ASSERT(row == testrow && fcol == testcol); /* now locate the spare unit for the failed unit */ if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) { if (j_is_parity) layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP); else layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spRow, spCol, spOffset, RF_REMAP); } else { *spRow = raidPtr->reconControl[row]->spareRow; *spCol = raidPtr->reconControl[row]->spareCol; *spOffset = *outFailedDiskSectorOffset; } return(0);skipit: Dprintf3("RECON: Skipping psid %ld: nothing needed from r%d c%d\n", psid, row, col); return(1);}/* this is called when a buffer has become ready to write to the replacement disk */static int IssueNextWriteRequest(raidPtr, row) RF_Raid_t *raidPtr; RF_RowCol_t row;{ RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU; RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol; RF_ReconBuffer_t *rbuf; RF_DiskQueueData_t *req; rbuf = rf_GetFullReconBuffer(raidPtr->reconControl[row]); RF_ASSERT(rbuf); /* there must be one available, or we wouldn't have gotten the event that sent us here */ RF_ASSERT(rbuf->pssPtr); rbuf->pssPtr->writeRbuf = rbuf; rbuf->pssPtr = NULL; Dprintf7("RECON: New write (r %d c %d offs %d) for psid %ld ru %d (failed disk offset %ld) buf %lx\n", rbuf->spRow, rbuf->spCol, rbuf->spOffset, rbuf->parityStripeID, rbuf->which_ru, rbuf->failedDiskSectorOffset, rbuf->buffer); Dprintf6("RECON: new write psid %ld %02x %02x %02x %02x %02x\n", rbuf->parityStripeID, rbuf->buffer[0]&0xff, rbuf->buffer[1]&0xff, rbuf->buffer[2]&0xff, rbuf->buffer[3]&0xff, rbuf->buffer[4]&0xff); /* should be ok to use a NULL b_proc here b/c all addrs should be in kernel space */ req = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, rbuf->spOffset, sectorsPerRU, rbuf->buffer, rbuf->parityStripeID, rbuf->which_ru, ReconWriteDoneProc, (void *) rbuf, NULL, &raidPtr->recon_tracerecs[fcol], (void *)raidPtr, 0, NULL); RF_ASSERT(req); /* XXX -- fix this -- XXX */ rbuf->arg = (void *) req; rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spRow][rbuf->spCol], req, RF_IO_RECON_PRIORITY); return(0);}/* this gets called upon the completion of a reconstruction read operation * the arg is a pointer to the per-disk reconstruction control structure * for the process that just finished a read. * * called at interrupt context in the kernel, so don't do anything illegal here. */static int ReconReadDoneProc(arg, status) void *arg; int status;{ RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg; RF_Raid_t *raidPtr = ctrl->reconCtrl->reconDesc->raidPtr; if (status) { /* * XXX */ printf("Recon read failed!\n"); RF_PANIC(); } RF_ETIMER_STOP(raidPtr->recon_tracerecs[ctrl->col].recon_timer); RF_ETIMER_EVAL(raidPtr->recon_tracerecs[ctrl->col].recon_timer); raidPtr->recon_tracerecs[ctrl->col].specific.recon.recon_fetch_to_return_us = RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[ctrl->col].recon_timer); RF_ETIMER_START(raidPtr->recon_tracerecs[ctrl->col].recon_timer); rf_CauseReconEvent(raidPtr, ctrl->row, ctrl->col, NULL, RF_REVENT_READDONE); return(0);}/* this gets called upon the completion of a reconstruction write operation. * the arg is a pointer to the rbuf that was just written * * called at interrupt context in the kernel, so don't do anything illegal here. */static int ReconWriteDoneProc(arg, status) void *arg; int status;{ RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg; Dprintf2("Reconstruction completed on psid %ld ru %d\n",rbuf->parityStripeID, rbuf->which_ru); if (status) {printf("Recon write failed!\n"); /*fprintf(stderr,"Recon write failed!\n");*/ RF_PANIC();} rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col, arg, RF_REVENT_WRITEDONE); return(0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -