📄 scsi_error.c

📁 基于组件方式开发操作系统的OSKIT源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
      if( SCloop == NULL )      {          continue;      }      /*       * OK, we have a device that is having problems.  Try and send       * a bus device reset to it.       *       * FIXME(eric) - make sure we handle the case where multiple       * commands to the same device have failed. They all must       * get properly restarted.       */      rtn = scsi_try_bus_device_reset(SCloop, RESET_TIMEOUT);            if( rtn == SUCCESS )      {	  rtn = scsi_test_unit_ready(SCloop);	  	  if( rtn == SUCCESS && scsi_unit_is_ready(SCloop) )          {	      rtn = scsi_eh_retry_command(SCloop);	      	      if( rtn == SUCCESS )              {                  SCloop->host->host_failed--;		  scsi_eh_finish_command(&SCdone,SCloop);              }          }      }        }    if( host->host_failed == 0 )  {      ourrtn = TRUE;      goto leave;  }  /*   * If we ended up here, we have serious problems.  The only thing left   * to try is a full bus reset.  If someone has grabbed the bus and isn't   * letting go, then perhaps this will help.   */  SCSI_LOG_ERROR_RECOVERY(3,printk("scsi_unjam_host: Try hard bus reset\n"));  /*    * We really want to loop over the various channels, and do this on   * a channel by channel basis.  We should also check to see if any   * of the failed commands are on soft_reset devices, and if so, skip   * the reset.     */  for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next)  {next_device:      for(SCpnt=SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next)      {          if( SCpnt->state != SCSI_STATE_FAILED               && SCpnt->state != SCSI_STATE_TIMEOUT )          {              continue;          }          /*           * We have a failed command.  Make sure there are no other failed           * commands on the same channel that are timed out and implement a           * soft reset.           */          for(SDloop=host->host_queue; SDloop; SDloop = SDloop->next)          {              for(SCloop=SDloop->device_queue; SCloop; SCloop = SCloop->next)              {                  if( SCloop->channel != SCpnt->channel )                  {                      continue;                  }                                    if( SCloop->state != SCSI_STATE_FAILED                       && SCloop->state != SCSI_STATE_TIMEOUT )                  {                      continue;                  }                                    if( SDloop->soft_reset && SCloop->state == SCSI_STATE_TIMEOUT )                  {                      /*                        * If this device uses the soft reset option, and this                       * is one of the devices acting up, then our only                       * option is to wait a bit, since the command is                       * supposedly still running.                         *                       * FIXME(eric) - right now we will just end up falling                       * through to the 'take device offline' case.                       *                       * FIXME(eric) - It is possible that the command completed                       * *after* the error recovery procedure started, and if this                       * is the case, we are worrying about nothing here.                       */                      /*                       * Due to the spinlock, we will never get out of this                       * loop without a proper wait (DB)                       */                      scsi_sleep(1 * HZ);                      goto next_device;                  }              }          }          /*           * We now know that we are able to perform a reset for the           * bus that SCpnt points to.  There are no soft-reset devices           * with outstanding timed out commands.           */          rtn = scsi_try_bus_reset(SCpnt);          if( rtn == SUCCESS )          {              for(SDloop=host->host_queue; SDloop; SDloop = SDloop->next)              {                  for(SCloop=SDloop->device_queue; SCloop; SCloop = SCloop->next)                  {                      if( SCloop->channel != SCpnt->channel )                      {                          continue;                      }                                            if( SCloop->state != SCSI_STATE_FAILED                           && SCloop->state != SCSI_STATE_TIMEOUT )                      {                          continue;                      }                                            rtn = scsi_test_unit_ready(SCloop);                                            if( rtn == SUCCESS && scsi_unit_is_ready(SCloop) )                      {                          rtn = scsi_eh_retry_command(SCloop);                                                    if( rtn == SUCCESS )                          {                              SCpnt->host->host_failed--;                              scsi_eh_finish_command(&SCdone,SCloop);                          }                      }                                            /*                       * If the bus reset worked, but we are still unable to                       * talk to the device, take it offline.                       * FIXME(eric) - is this really the correct thing to do?                       */                      if( rtn != SUCCESS )                      {                          SCloop->device->online = FALSE;                          SCloop->host->host_failed--;                          scsi_eh_finish_command(&SCdone,SCloop);                      }                  }              }          }      }  }  if( host->host_failed == 0 )  {      ourrtn = TRUE;      goto leave;  }  /*   * If we ended up here, we have serious problems.  The only thing left   * to try is a full host reset - perhaps the firmware on the device   * crashed, or something like that.   *   * It is assumed that a succesful host reset will cause *all* information   * about the command to be flushed from both the host adapter *and* the   * device.   *   * FIXME(eric) - it isn't clear that devices that implement the soft reset   * option can ever be cleared except via cycling the power.  The problem is   * that sending the host reset command will cause the host to forget   * about the pending command, but the device won't forget.  For now, we   * skip the host reset option if any of the failed devices are configured   * to use the soft reset option.   */  for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next)  {next_device2:      for(SCpnt=SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next)      {          if( SCpnt->state != SCSI_STATE_FAILED               && SCpnt->state != SCSI_STATE_TIMEOUT )          {              continue;          }          if( SDpnt->soft_reset && SCpnt->state == SCSI_STATE_TIMEOUT )          {              /*                * If this device uses the soft reset option, and this               * is one of the devices acting up, then our only               * option is to wait a bit, since the command is               * supposedly still running.                 *               * FIXME(eric) - right now we will just end up falling               * through to the 'take device offline' case.               */              SCSI_LOG_ERROR_RECOVERY(3,                        printk("scsi_unjam_host: Unable to try hard host reset\n"));               /*                * Due to the spinlock, we will never get out of this                * loop without a proper wait. (DB)                */               scsi_sleep(1 * HZ);              goto next_device2;          }          SCSI_LOG_ERROR_RECOVERY(3,printk("scsi_unjam_host: Try hard host reset\n"));          /*           * FIXME(eric) - we need to obtain a valid SCpnt to perform this call.           */          rtn = scsi_try_host_reset(SCpnt);          if( rtn == SUCCESS )          {              /*               * FIXME(eric) we assume that all commands are flushed from the               * controller.  We should get a DID_RESET for all of the commands               * that were pending.  We should ignore these so that we can               * guarantee that we are in a consistent state.               *               * I believe this to be the case right now, but this needs to be               * tested.               */            for(SDloop=host->host_queue; SDloop; SDloop = SDloop->next)              {                  for(SCloop=SDloop->device_queue; SCloop; SCloop = SCloop->next)                  {                      if( SCloop->state != SCSI_STATE_FAILED                           && SCloop->state != SCSI_STATE_TIMEOUT )                      {                          continue;                      }                                            rtn = scsi_test_unit_ready(SCloop);                                            if( rtn == SUCCESS && scsi_unit_is_ready(SCloop) )                      {                          rtn = scsi_eh_retry_command(SCloop);                                                    if( rtn == SUCCESS )                          {                              SCpnt->host->host_failed--;                              scsi_eh_finish_command(&SCdone,SCloop);                          }                      }                      if( rtn != SUCCESS )                      {                          SCloop->device->online = FALSE;                          SCloop->host->host_failed--;                          scsi_eh_finish_command(&SCdone,SCloop);                      }                  }              }          }      }  }  /*   * If we solved all of the problems, then let's rev up the engines again.   */  if( host->host_failed == 0 )  {      ourrtn = TRUE;      goto leave;  }  /*   * If the HOST RESET failed, then for now we assume that the entire host   * adapter is too hosed to be of any use.  For our purposes, however, it is   * easier to simply take the devices offline that correspond to commands   * that failed.   */  SCSI_LOG_ERROR_RECOVERY(1,printk("scsi_unjam_host: Take device offline\n"));  for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next)  {      for(SCloop=SDpnt->device_queue; SCloop; SCloop = SCloop->next)      {          if( SCloop->state == SCSI_STATE_FAILED || SCloop->state == SCSI_STATE_TIMEOUT )          {              SCloop->device->online = FALSE;                            /*               * This should pass the failure up to the top level driver, and               * it will have to try and do something intelligent with it.               */              SCloop->host->host_failed--;                            if( SCloop->state == SCSI_STATE_TIMEOUT )              {                  SCloop->result |= (DRIVER_TIMEOUT << 24);              }              SCSI_LOG_ERROR_RECOVERY(3,printk("Finishing command for device %d %x\n",                     SCloop->device->id, SCloop->result));                            scsi_eh_finish_command(&SCdone,SCloop);          }      }  }  if( host->host_failed != 0 )  {      panic("scsi_unjam_host: Miscount of number of failed commands.\n");  }  SCSI_LOG_ERROR_RECOVERY(3,printk("scsi_unjam_host: Returning\n"));  ourrtn = FALSE;leave:  /*   * We should have a list of commands that we 'finished' during the course of   * error recovery.  This should be the same as the list of commands that timed out   * or failed.  We are currently holding these things in a linked list - we didn't   * put them in the bottom half queue because we wanted to keep things quiet while   * we were working on recovery, and passing them up to the top level could easily   * cause the top level to try and queue something else again.   *   * Start by marking that the host is no longer in error recovery.   */  host->in_recovery = 0;  /*   * Take the list of commands, and stick them in the bottom half queue.   * The current implementation of scsi_done will do this for us - if need   * be we can create a special version of this function to do the   * same job for us.   */  for(SCpnt = SCdone; SCpnt != NULL; SCpnt = SCdone)  {      SCdone = SCpnt->bh_next;      SCpnt->bh_next = NULL;      scsi_done(SCpnt);  }  return (ourrtn);}#ifndef OSKIT/* * Function:	scsi_error_handler * * Purpose:	Handle errors/timeouts of scsi commands, try and clean up *		and unjam the bus, and restart things. * * Arguments:	host	- host for which we are running. * * Returns:	Never returns. * * Notes:	This is always run in the context of a kernel thread.  The *		idea is that we start this thing up when the kernel starts *		up (one per host that we detect), and it immediately goes to *		sleep and waits for some event (i.e. failure).  When this *		takes place, we have the job of trying to unjam the bus *		and restarting things. * */voidscsi_error_handler(void * data){	struct Scsi_Host     * host = (struct Scsi_Host *) data;	int	               rtn;	struct semaphore sem = MUTEX_LOCKED;        unsigned long flags;        struct fs_struct *fs;	lock_kernel();	/*	 * If we were started as result of loading a module, close all of the	 * user space pages.  We don't need them, and if we didn't close them	 * they would be locked into memory.	 */	exit_mm(current);	current->session = 1;	current->pgrp = 1;		/* Become as one with the init task */		exit_fs(current);	/* current->fs->count--; */	fs = init_task.fs;	current->fs = fs;	atomic_inc(&fs->count);	siginitsetinv(&current->blocked, SHUTDOWN_SIGS);	/*	 * Set the name of this process.	 */	sprintf(current->comm, "scsi_eh_%d", host->host_no);	host->eh_wait = &sem;	host->ehandler = current;        	unlock_kernel();        /*         * Wake up the thread that created us.         */        SCSI_LOG_ERROR_RECOVERY(3,printk("Wake up parent %d\n", host->eh_notify->count.counter));        up(host->eh_notify);	while(1)	  {	    /*	     * If we get a signal, it means we are supposed to go	     * away and die.  This typically happens if the user is	     * trying to unload a module.	     */            SCSI_LOG_ERROR_RECOVERY(1,printk("Error handler sleeping\n"));	    down_interruptible (&sem);	    if (signal_pending(current) )	      break;            SCSI_LOG_ERROR_RECOVERY(1,printk("Error handler waking up\n"));	    spin_lock_irqsave(&io_request_lock, flags);            host->eh_active = 1;	    /*	     * We have a host that is failing for some reason.  Figure out	     * what we need to do to get it up and online again (if we can).	     * If we fail, we end up taking the thing offline.	     */	    if( host->hostt->eh_strategy_handler != NULL )	      {		rtn = host->hostt->eh_strategy_handler(host);	      }	    else	      {		rtn = scsi_unjam_host(host);	      }            host->eh_active = 0;	    /*	     * Note - if the above fails completely, the action is to take	     * individual devices offline and flush the queue of any	     * outstanding requests that may have been pending.  When we	     * restart, we restart any I/O to any other devices on the bus	     * which are still online.	     */	    scsi_restart_operations(host);             /* The spinlock is really needed up to this point. (DB) */	    spin_unlock_irqrestore(&io_request_lock, flags);	  }        SCSI_LOG_ERROR_RECOVERY(1,printk("Error handler exiting\n"));	/*	 * Make sure that nobody tries to wake us up again.	 */	host->eh_wait = NULL;	/*	 * Knock this down too.  From this point on, the host is flying	 * without a pilot.  If this is because the module is being unloaded,	 * that's fine.  If the user sent a signal to this thing, we are	 * potentially in real danger.	 */	host->in_recovery = 0;        host->eh_active = 0;	host->ehandler = NULL;	/*	 * If anyone is waiting for us to exit (i.e. someone trying to unload	 * a driver), then wake up that process to let them know we are on	 * the way out the door.  This may be overkill - I *think* that we	 * could probably just unload the driver and send the signal, and when	 * the error handling thread wakes up that it would just exit without	 * needing to touch any memory associated with the driver itself.	 */	if( host->eh_notify != NULL )	  up(host->eh_notify);}#endif/* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically * adjust the settings for this buffer only.  This must remain at the end * of the file. * --------------------------------------------------------------------------- * Local variables: * c-indent-level: 4 * c-brace-imaginary-offset: 0 * c-brace-offset: -4 * c-argdecl-indent: 4 * c-label-offset: -4 * c-continued-statement-offset: 4 * c-continued-brace-offset: 0 * indent-tabs-mode: nil * tab-width: 8 * End: */
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -