📄 scsi_error.c

📁 基于组件方式开发操作系统的OSKIT源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
   */  switch (status_byte(SCpnt->result))    {    case QUEUE_FULL:      /*       * The case of trying to send too many commands to a tagged queueing       * device.       */      return ADD_TO_MLQUEUE;    case GOOD:    case COMMAND_TERMINATED:      return SUCCESS;    case CHECK_CONDITION:      rtn = scsi_check_sense(SCpnt);      if( rtn == NEEDS_RETRY )	{	  goto maybe_retry;	}      return rtn;    case CONDITION_GOOD:    case INTERMEDIATE_GOOD:    case INTERMEDIATE_C_GOOD:      /*       * Who knows?  FIXME(eric)       */      return SUCCESS;    case BUSY:    case RESERVATION_CONFLICT:      goto maybe_retry;    default:      return FAILED;    }  return FAILED;maybe_retry:  if ((++SCpnt->retries) < SCpnt->allowed)    {      return NEEDS_RETRY;    }  else    {      return FAILED;    }}/* * Function:	scsi_eh_completed_normally * * Purpose:	Examine a command block that has come back from the low-level *		and figure out what to do next. * * Returns:	SUCCESS		- pass on to upper level. *		FAILED		- pass on to error handler thread. *		RETRY		- command should be retried. *		SOFTERR		- command succeeded, but we need to log *				  a soft error. * * Notes:	This is *ONLY* called when we are examining the status *		of commands queued during error recovery.  The main *		difference here is that we don't allow for the possibility *		of retries here, and we are a lot more restrictive about what *              we consider acceptable. */STATIC int scsi_eh_completed_normally (Scsi_Cmnd * SCpnt){  int	rtn;  /*   * First check the host byte, to see if there is anything in there   * that would indicate what we need to do.   */  if( host_byte(SCpnt->result) == DID_RESET )    {     if (SCpnt->flags & IS_RESETTING )       {	 /*	  * OK, this is normal.  We don't know whether in fact the	  * command in question really needs to be rerun or not - 	  * if this was the original data command then the answer is yes,	  * otherwise we just flag it as success.	  */	 SCpnt->flags &= ~IS_RESETTING;	 return NEEDS_RETRY;       }     /*      * Rats.  We are already in the error handler, so we now get to try      * and figure out what to do next.  If the sense is valid, we have      * a pretty good idea of what to do.  If not, we mark it as failed.      */     return scsi_check_sense (SCpnt);    }  if(host_byte(SCpnt->result) != DID_OK )  {      return FAILED;  }  /*   * Next, check the message byte.   */  if( msg_byte(SCpnt->result) != COMMAND_COMPLETE )    {      return FAILED;    }  /*   * Now, check the status byte to see if this indicates anything special.   */  switch (status_byte(SCpnt->result))    {    case GOOD:    case COMMAND_TERMINATED:      return SUCCESS;    case CHECK_CONDITION:      rtn = scsi_check_sense(SCpnt);      if( rtn == NEEDS_RETRY )	{	  return FAILED;	}      return rtn;    case CONDITION_GOOD:    case INTERMEDIATE_GOOD:    case INTERMEDIATE_C_GOOD:      /*       * Who knows?  FIXME(eric)       */      return SUCCESS;    case BUSY:    case QUEUE_FULL:    case RESERVATION_CONFLICT:    default:      return FAILED;    }  return FAILED;}/* * Function:	scsi_check_sense * * Purpose:	Examine sense information - give suggestion as to what *		we should do with it. */STATIC  int scsi_check_sense (Scsi_Cmnd * SCpnt){    if ( !scsi_sense_valid(SCpnt) )       {	return FAILED;      }    if (SCpnt->sense_buffer[2] & 0xe0)	return SUCCESS;    switch (SCpnt->sense_buffer[2] & 0xf)    {    case NO_SENSE:	return SUCCESS;    case RECOVERED_ERROR:	return /* SOFT_ERROR */ SUCCESS;    case ABORTED_COMMAND:	return NEEDS_RETRY;    case NOT_READY:    case UNIT_ATTENTION:        /*         * If we are expecting a CC/UA because of a bus reset that we         * performed, treat this just as a retry.  Otherwise this is         * information that we should pass up to the upper-level driver         * so that we can deal with it there.         */        if( SCpnt->device->expecting_cc_ua )        {            SCpnt->device->expecting_cc_ua = 0;            return NEEDS_RETRY;        }	return SUCCESS;    /* these three are not supported */    case COPY_ABORTED:    case VOLUME_OVERFLOW:    case MISCOMPARE:        return SUCCESS;    case MEDIUM_ERROR:	return NEEDS_RETRY;    case ILLEGAL_REQUEST:    case BLANK_CHECK:    case DATA_PROTECT:    case HARDWARE_ERROR:    default:	return SUCCESS;    }}/* * Function:	scsi_restart_operations * * Purpose:	Restart IO operations to the specified host. * * Arguments:	host  - host that we are restarting * * Returns:	Nothing * * Notes:	When we entered the error handler, we blocked all further *		I/O to this device.  We need to 'reverse' this process. */STATIC voidscsi_restart_operations(struct Scsi_Host * host){  Scsi_Device * SDpnt;  /*   * Next free up anything directly waiting upon the host.  This will be   * requests for character device operations, and also for ioctls to queued   * block devices.   */  SCSI_LOG_ERROR_RECOVERY(5,printk("scsi_error.c: Waking up host to restart\n"));   wake_up(&host->host_wait);   /*    * Finally, block devices need an extra kick in the pants.  This is because    * the request queueing mechanism may have queued lots of pending requests    * and there won't be a process waiting in a place where we can simply wake    * it up.  Thus we simply go through and call the request function to goose    * the various top level drivers and get things moving again.    */   for( SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next )     {       SCSI_LOG_ERROR_RECOVERY(5,printk("Calling request function to restart things...\n"));       if( SDpnt->scsi_request_fn != NULL )	 (*SDpnt->scsi_request_fn)();     }}/* * Function:	scsi_unjam_host * * Purpose:	Attempt to fix a host which has a command that failed for *		some reason. * * Arguments:	host	- host that needs unjamming. *  * Returns:	Nothing * * Notes:	When we come in here, we *know* that all commands on the *		bus have either completed, failed or timed out.  We also *		know that no further commands are being sent to the host, *		so things are relatively quiet and we have freedom to *		fiddle with things as we wish. * * Additional note:  This is only the *default* implementation.  It is possible *		for individual drivers to supply their own version of this *		function, and if the maintainer wishes to do this, it is *		strongly suggested that this function be taken as a template *		and modified.  This function was designed to correctly handle *		problems for about 95% of the different cases out there, and *		it should always provide at least a reasonable amount of error *		recovery. * * Note3:       Any command marked 'FAILED' or 'TIMEOUT' must eventually *              have scsi_finish_command() called for it.  We do all of *              the retry stuff here, so when we restart the host after we *              return it should have an empty queue. */STATIC intscsi_unjam_host(struct Scsi_Host * host){  int           devices_failed;  int           numfailed;  int           ourrtn;  int		rtn = FALSE;  int		result;  Scsi_Cmnd   * SCloop;  Scsi_Cmnd   * SCpnt;  Scsi_Device * SDpnt;  Scsi_Device * SDloop;  Scsi_Cmnd   * SCdone;  int           timed_out;  SCdone = NULL;  /*   * First, protect against any sort of race condition.  If any of the outstanding   * commands are in states that indicate that we are not yet blocked (i.e. we are   * not in a quiet state) then we got woken up in error.  If we ever end up here,   * we need to re-examine some of the assumptions.   */  for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next)  {      for(SCpnt=SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next)      {          if( SCpnt->state == SCSI_STATE_FAILED               || SCpnt->state == SCSI_STATE_TIMEOUT               || SCpnt->state == SCSI_STATE_INITIALIZING              || SCpnt->state == SCSI_STATE_UNUSED)          {              continue;          }          /*           * Rats.  Something is still floating around out there.  This could           * be the result of the fact that the upper level drivers are still frobbing           * commands that might have succeeded.  There are two outcomes.  One is that           * the command block will eventually be freed, and the other one is that           * the command will be queued and will be finished along the way.           */          SCSI_LOG_ERROR_RECOVERY(1,printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target));/* *        panic("SCSI Error handler woken too early\n"); * * This is no longer a problem, since now the code cares only about * SCSI_STATE_TIMEOUT and SCSI_STATE_FAILED. * Other states are useful only to release active commands when devices are * set offline. If (host->host_active == host->host_busy) we can safely assume * that there are no commands in state other then TIMEOUT od FAILED. (DB) * * FIXME: * It is not easy to release correctly commands according to their state when  * devices are set offline, when the state is neither TIMEOUT nor FAILED. * When a device is set offline, we can have some command with * rq_status=RQ_SCSY_BUSY, owner=SCSI_STATE_HIGHLEVEL,  * state=SCSI_STATE_INITIALIZING and the driver module cannot be released. * (DB, 17 May 1998) */      }  }  /*   * Next, see if we need to request sense information.  if so,   * then get it now, so we have a better idea of what to do.   * FIXME(eric) this has the unfortunate side effect that if a host   * adapter does not automatically request sense information, that we end   * up shutting it down before we request it.  All hosts should be doing this   * anyways, so for now all I have to say is tough noogies if you end up in here.   * On second thought, this is probably a good idea.  We *really* want to give   * authors an incentive to automatically request this.   */  SCSI_LOG_ERROR_RECOVERY(3,printk("scsi_unjam_host: Checking to see if we need to request sense\n"));  for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next)  {      for(SCpnt=SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next)      {          if( SCpnt->state != SCSI_STATE_FAILED || scsi_sense_valid(SCpnt) )          {              continue;          }          SCSI_LOG_ERROR_RECOVERY(2,printk("scsi_unjam_host: Requesting sense for %d\n",                                           SCpnt->target));          rtn = scsi_request_sense(SCpnt);          if( rtn != SUCCESS )          {              continue;          }          SCSI_LOG_ERROR_RECOVERY(3,printk("Sense requested for %p - result %x\n",                                           SCpnt, SCpnt->result));          SCSI_LOG_ERROR_RECOVERY(3,print_sense("bh",SCpnt));                            result = scsi_decide_disposition(SCpnt);          /*           * If the result was normal, then just pass it along to the           * upper level.           */          if( result == SUCCESS )          {              SCpnt->host->host_failed--;              scsi_eh_finish_command(&SCdone, SCpnt);          }          if( result != NEEDS_RETRY )          {              continue;          }          /*            * We only come in here if we want to retry a           * command.  The test to see whether the command           * should be retried should be keeping track of the           * number of tries, so we don't end up looping, of           * course.             */          SCpnt->state = NEEDS_RETRY;          rtn = scsi_eh_retry_command(SCpnt);          if( rtn != SUCCESS )          {              continue;          }          /*           * We eventually hand this one back to the top level.           */          SCpnt->host->host_failed--;          scsi_eh_finish_command(&SCdone, SCpnt);      }  }  /*   * Go through the list of commands and figure out where we stand and how bad things   * really are.   */  numfailed = 0;  timed_out = 0;  devices_failed = 0;  for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next)  {      unsigned int device_error = 0;      for(SCpnt=SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next)      {          if( SCpnt->state == SCSI_STATE_FAILED )          {              SCSI_LOG_ERROR_RECOVERY(5,printk("Command to ID %d failed\n",                                                SCpnt->target));              numfailed++;              device_error++;          }          if( SCpnt->state == SCSI_STATE_TIMEOUT )          {              SCSI_LOG_ERROR_RECOVERY(5,printk("Command to ID %d timedout\n",                                                SCpnt->target));              timed_out++;              device_error++;          }      }      if( device_error > 0 )      {          devices_failed++;      }  }  SCSI_LOG_ERROR_RECOVERY(2,printk("Total of %d+%d commands on %d devices require eh work\n",                                    numfailed, timed_out, devices_failed));  if( host->host_failed == 0 )  {      ourrtn = TRUE;      goto leave;  }  /*   * Next, try and see whether or not it makes sense to try and abort   * the running command.  This only works out to be the case if we have   * one command that has timed out.  If the command simply failed, it   * makes no sense to try and abort the command, since as far as the   * host adapter is concerned, it isn't running.   */  SCSI_LOG_ERROR_RECOVERY(3,printk("scsi_unjam_host: Checking to see if we want to try abort\n"));  for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next)  {      for(SCloop=SDpnt->device_queue; SCloop; SCloop = SCloop->next)      {          if( SCloop->state != SCSI_STATE_TIMEOUT )          {              continue;          }	  rtn = scsi_try_to_abort_command(SCloop, ABORT_TIMEOUT);	  if( rtn == SUCCESS )          {	      rtn = scsi_test_unit_ready(SCloop);              	      if( rtn == SUCCESS && scsi_unit_is_ready(SCloop) )              {		  rtn = scsi_eh_retry_command(SCloop);                  		  if( rtn == SUCCESS )                  {                      SCloop->host->host_failed--;		      scsi_eh_finish_command(&SCdone,SCloop);                  }              }          }      }  }    /*   * If we have corrected all of the problems, then we are done.   */  if( host->host_failed == 0 )  {      ourrtn = TRUE;      goto leave;  }  /*   * Either the abort wasn't appropriate, or it didn't succeed.   * Now try a bus device reset.  Still, look to see whether we have   * multiple devices that are jammed or not - if we have multiple devices,   * it makes no sense to try BUS_DEVICE_RESET - we really would need   * to try a BUS_RESET instead.   *   * Does this make sense - should we try BDR on each device individually?   * Yes, definitely.   */  SCSI_LOG_ERROR_RECOVERY(3,printk("scsi_unjam_host: Checking to see if we want to try BDR\n"));  for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next)  {      for(SCloop=SDpnt->device_queue; SCloop; SCloop = SCloop->next)      {          if( SCloop->state == SCSI_STATE_FAILED               || SCloop->state == SCSI_STATE_TIMEOUT )          {              break;          }      }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -