📄 rf_reconstruct.c

📁 RAIDFrame是个非常好的磁盘阵列RAID仿真工具
💻 C
📖 第 1 页 / 共 5 页
字号:
      raidPtr->Disks[row][col].spareRow = srow;      raidPtr->Disks[row][col].spareCol = scol;            RF_UNLOCK_MUTEX(raidPtr->mutex);      RF_GETTIME(raidPtr->reconControl[row]->starttime);#if RF_DEMO > 0      if (rf_demoMode) {         rf_demo_update_mode(RF_DEMO_RECON);         rf_startup_recon_demo(rf_demoMeterVpos, raidPtr->numCol,           raidPtr->Layout.numDataCol+raidPtr->Layout.numParityCol, 0);      }#endif /* RF_DEMO > 0 */      /* now start up the actual reconstruction: issue a read for each surviving disk */      rf_start_cpu_monitor();      reconDesc->numDisksDone = 0;      for (i=0; i<raidPtr->numCol; i++) {        if (i != col) {          /* find and issue the next I/O on the indicated disk */	  if (IssueNextReadRequest(raidPtr, row, i)) {            Dprintf2("RECON: done issuing for r%d c%d\n", row, i);            reconDesc->numDisksDone++;          }        }      }    case 2:      Dprintf("RECON: resume requests\n");      rf_ResumeNewRequests(raidPtr);      reconDesc->state=3;    case 3:      /* process reconstruction events until all disks report that they've completed all work */      mapPtr=raidPtr->reconControl[row]->reconMap;            while (reconDesc->numDisksDone < raidPtr->numCol-1) {	event = rf_GetNextReconEvent(reconDesc, row, (void (*)())rf_ContinueReconstructFailedDisk,reconDesc);#ifdef SIMULATE	if (event==NULL) {return(0);}#else /* SIMULATE */	RF_ASSERT(event);#endif /* SIMULATE */	if (ProcessReconEvent(raidPtr, row, event)) reconDesc->numDisksDone++;	raidPtr->reconControl[row]->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs);#if RF_DEMO > 0	if (rf_prReconSched || rf_demoMode)#else /* RF_DEMO > 0 */	if (rf_prReconSched)#endif /* RF_DEMO > 0 */	{	  rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime));	}      }            reconDesc->state=4;          case 4:      mapPtr=raidPtr->reconControl[row]->reconMap;      if (rf_reconDebug) {	printf("RECON: all reads completed\n");      }                  /* at this point all the reads have completed.  We now wait for any pending writes       * to complete, and then we're done       */      while (rf_UnitsLeftToReconstruct(raidPtr->reconControl[row]->reconMap) > 0) {		event = rf_GetNextReconEvent(reconDesc, row, (void (*)())rf_ContinueReconstructFailedDisk,reconDesc);#ifdef SIMULATE	if (event==NULL) {return(0);}#else /* SIMULATE */	RF_ASSERT(event);#endif /* SIMULATE */		(void) ProcessReconEvent(raidPtr, row, event);         /* ignore return code */	raidPtr->reconControl[row]->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs);#if RF_DEMO > 0	if (rf_prReconSched || rf_demoMode)#else /* RF_DEMO > 0 */	if (rf_prReconSched)#endif /* RF_DEMO > 0 */	{	  rf_PrintReconSchedule(raidPtr->reconControl[row]->reconMap, &(raidPtr->reconControl[row]->starttime));	}      }      reconDesc->state=5;    case 5:      rf_stop_cpu_monitor();      /* Success:  mark the dead disk as reconstructed.  We quiesce the array here to assure no       * nasty interactions with pending user accesses when we free up the psstatus structure       * as part of FreeReconControl()       */      reconDesc->state=6;      retcode =  rf_SuspendNewRequestsAndWait(raidPtr);      rf_StopUserStats(raidPtr);      rf_PrintUserStats(raidPtr);               /* print out the stats on user accs accumulated during recon */#ifdef SIMULATE      if (retcode) return(0);#endif /* SIMULATE */      /* fall through to state 6 */    case 6:                  RF_LOCK_MUTEX(raidPtr->mutex);      raidPtr->numFailures--;      ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE);      raidPtr->Disks[row][col].status = (ds) ? rf_ds_dist_spared : rf_ds_spared;      raidPtr->status[row] = (ds) ? rf_rs_reconfigured : rf_rs_optimal;      RF_UNLOCK_MUTEX(raidPtr->mutex);      RF_GETTIME(etime);      RF_TIMEVAL_DIFF(&(raidPtr->reconControl[row]->starttime), &etime, &elpsd);      /* XXX -- why is state 7 different from state 6 if there is no return() here? -- XXX       *        Note that I set elpsd above & use it below, so if you put a return       *        here you'll have to fix this. (also, FreeReconControl is called below)       */          case 7:      rf_ResumeNewRequests(raidPtr);#if RF_DEMO > 0      if (rf_demoMode) {        rf_finish_recon_demo(&elpsd);      }      else {#endif /* RF_DEMO > 0 */	printf("Reconstruction of disk at row %d col %d completed and spare disk reassigned\n", row, col);	xor_s = raidPtr->accumXorTimeUs/1000000;	xor_resid_us = raidPtr->accumXorTimeUs%1000000;	printf("Recon time was %d.%06d seconds, accumulated XOR time was %ld us (%ld.%06ld)\n",		elpsd.tv_sec,elpsd.tv_usec,raidPtr->accumXorTimeUs,xor_s,xor_resid_us);	printf("  (start time %d sec %d usec, end time %d sec %d usec)\n",	       raidPtr->reconControl[row]->starttime.tv_sec,	       raidPtr->reconControl[row]->starttime.tv_usec,	       etime.tv_sec, etime.tv_usec);	rf_print_cpu_util("reconstruction");#if RF_RECON_STATS > 0	printf("Total head-sep stall count was %d\n", reconDesc->hsStallCount);#endif /* RF_RECON_STATS > 0 */#if RF_DEMO > 0      }#endif /* RF_DEMO > 0 */      rf_FreeReconControl(raidPtr, row);      RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t));      FreeReconDesc(reconDesc);          }  SignalReconDone(raidPtr);  return (0);}/***************************************************************************************** * do the right thing upon each reconstruction event. * returns nonzero if and only if there is nothing left unread on the indicated disk ****************************************************************************************/static int ProcessReconEvent(raidPtr, frow, event)  RF_Raid_t        *raidPtr;  RF_RowCol_t       frow;  RF_ReconEvent_t  *event;{  int retcode = 0, submitblocked;  RF_ReconBuffer_t *rbuf;  RF_SectorCount_t sectorsPerRU;  Dprintf1("RECON: ProcessReconEvent type %d\n", event->type);    switch(event->type) {  /* a read I/O has completed */  case RF_REVENT_READDONE:    rbuf = raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf;    Dprintf3("RECON: READDONE EVENT: row %d col %d psid %ld\n",      frow, event->col, rbuf->parityStripeID);    Dprintf7("RECON: done read  psid %ld buf %lx  %02x %02x %02x %02x %02x\n",      rbuf->parityStripeID, rbuf->buffer, rbuf->buffer[0]&0xff, rbuf->buffer[1]&0xff,      rbuf->buffer[2]&0xff, rbuf->buffer[3]&0xff, rbuf->buffer[4]&0xff);    rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);    submitblocked =  rf_SubmitReconBuffer(rbuf, 0, 0);    Dprintf1("RECON: submitblocked=%d\n", submitblocked);    if (!submitblocked) retcode = IssueNextReadRequest(raidPtr, frow, event->col);    break;  /* a write I/O has completed */  case RF_REVENT_WRITEDONE:    if (rf_floatingRbufDebug) {      rf_CheckFloatingRbufCount(raidPtr, 1);    }    sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU;    rbuf = (RF_ReconBuffer_t *) event->arg;    rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);    Dprintf3("RECON: WRITEDONE EVENT: psid %d ru %d (%d %% complete)\n", 	    rbuf->parityStripeID, rbuf->which_ru, raidPtr->reconControl[frow]->percentComplete);    rf_ReconMapUpdate(raidPtr, raidPtr->reconControl[frow]->reconMap,		   rbuf->failedDiskSectorOffset, rbuf->failedDiskSectorOffset + sectorsPerRU -1);    rf_RemoveFromActiveReconTable(raidPtr, frow, rbuf->parityStripeID, rbuf->which_ru);        if (rbuf->type == RF_RBUF_TYPE_FLOATING) {      RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);      raidPtr->numFullReconBuffers--;      rf_ReleaseFloatingReconBuffer(raidPtr, frow, rbuf);      RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);    } else if (rbuf->type == RF_RBUF_TYPE_FORCED) rf_FreeReconBuffer(rbuf);    else RF_ASSERT(0);    break;  case RF_REVENT_BUFCLEAR:                  /* A buffer-stall condition has been cleared */    Dprintf2("RECON: BUFCLEAR EVENT: row %d col %d\n",frow, event->col);    submitblocked = rf_SubmitReconBuffer(raidPtr->reconControl[frow]->perDiskInfo[event->col].rbuf, 0, (int) (long)event->arg);    RF_ASSERT(!submitblocked);              /* we wouldn't have gotten the BUFCLEAR event if we couldn't submit */    retcode = IssueNextReadRequest(raidPtr, frow, event->col);    break;  case RF_REVENT_BLOCKCLEAR:                /* A user-write reconstruction blockage has been cleared */    DDprintf2("RECON: BLOCKCLEAR EVENT: row %d col %d\n",frow, event->col);    retcode = TryToRead(raidPtr, frow, event->col);    break;  case RF_REVENT_HEADSEPCLEAR:              /* A max-head-separation reconstruction blockage has been cleared */    Dprintf2("RECON: HEADSEPCLEAR EVENT: row %d col %d\n",frow, event->col);    retcode = TryToRead(raidPtr, frow, event->col);    break;  /* a buffer has become ready to write */  case RF_REVENT_BUFREADY:    Dprintf2("RECON: BUFREADY EVENT: row %d col %d\n",frow, event->col);    retcode = IssueNextWriteRequest(raidPtr, frow);    if (rf_floatingRbufDebug) {      rf_CheckFloatingRbufCount(raidPtr, 1);    }    break;  /* we need to skip the current RU entirely because it got recon'd while we were waiting for something else to happen */  case RF_REVENT_SKIP:    DDprintf2("RECON: SKIP EVENT: row %d col %d\n",frow, event->col);    retcode = IssueNextReadRequest(raidPtr, frow, event->col);    break;  /* a forced-reconstruction read access has completed.  Just submit the buffer */  case RF_REVENT_FORCEDREADDONE:    rbuf = (RF_ReconBuffer_t *) event->arg;    rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);    DDprintf2("RECON: FORCEDREADDONE EVENT: row %d col %d\n",frow, event->col);    submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0);    RF_ASSERT(!submitblocked);    break;      default:    RF_PANIC();  }  rf_FreeReconEventDesc(event);  return(retcode);}/***************************************************************************************** * * find the next thing that's needed on the indicated disk, and issue a read * request for it.  We assume that the reconstruction buffer associated with this * process is free to receive the data.  If reconstruction is blocked on the * indicated RU, we issue a blockage-release request instead of a physical disk * read request.  If the current disk gets too far ahead of the others, we issue * a head-separation wait request and return. * * ctrl->{ru_count, curPSID, diskOffset} and rbuf->failedDiskSectorOffset are * maintained to point the the unit we're currently accessing.  Note that this deviates * from the standard C idiom of having counters point to the next thing to be  * accessed.  This allows us to easily retry when we're blocked by head separation * or reconstruction-blockage events. * * returns nonzero if and only if there is nothing left unread on the indicated disk ****************************************************************************************/static int IssueNextReadRequest(raidPtr, row, col)  RF_Raid_t    *raidPtr;  RF_RowCol_t   row;  RF_RowCol_t   col;{  RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl[row]->perDiskInfo[col];  RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;  RF_ReconBuffer_t *rbuf = ctrl->rbuf;  RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU;  RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU;  int do_new_check = 0, retcode = 0, status;  /* if we are currently the slowest disk, mark that we have to do a new check */  if (ctrl->headSepCounter <= raidPtr->reconControl[row]->minHeadSepCounter) do_new_check = 1;  while (1) {    ctrl->ru_count++;    if (ctrl->ru_count < RUsPerPU) {      ctrl->diskOffset             += sectorsPerRU;      rbuf->failedDiskSectorOffset += sectorsPerRU;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -