scsi_error.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 1,846 行 · 第 1/4 页

C
1,846
字号
/** * scsi_eh_lock_done - done function for eh door lock request * @scmd:	SCSI command block for the door lock request * * Notes: * 	We completed the asynchronous door lock request, and it has either * 	locked the door or failed.  We must free the command structures * 	associated with this request. **/static void scsi_eh_lock_done(struct scsi_cmnd *scmd){	struct scsi_request *sreq = scmd->sc_request;	scsi_release_request(sreq);}/** * scsi_eh_lock_door - Prevent medium removal for the specified device * @sdev:	SCSI device to prevent medium removal * * Locking: * 	We must be called from process context; scsi_allocate_request() * 	may sleep. * * Notes: * 	We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the * 	head of the devices request queue, and continue. * * Bugs: * 	scsi_allocate_request() may sleep waiting for existing requests to * 	be processed.  However, since we haven't kicked off any request * 	processing for this host, this may deadlock. * *	If scsi_allocate_request() fails for what ever reason, we *	completely forget to lock the door. **/static void scsi_eh_lock_door(struct scsi_device *sdev){	struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL);	if (unlikely(!sreq)) {		printk(KERN_ERR "%s: request allocate failed,"		       "prevent media removal cmd not sent\n", __FUNCTION__);		return;	}	sreq->sr_cmnd[0] = ALLOW_MEDIUM_REMOVAL;	sreq->sr_cmnd[1] = 0;	sreq->sr_cmnd[2] = 0;	sreq->sr_cmnd[3] = 0;	sreq->sr_cmnd[4] = SCSI_REMOVAL_PREVENT;	sreq->sr_cmnd[5] = 0;	sreq->sr_data_direction = DMA_NONE;	sreq->sr_bufflen = 0;	sreq->sr_buffer = NULL;	sreq->sr_allowed = 5;	sreq->sr_done = scsi_eh_lock_done;	sreq->sr_timeout_per_command = 10 * HZ;	sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);	scsi_insert_special_req(sreq, 1);}/** * scsi_restart_operations - restart io operations to the specified host. * @shost:	Host we are restarting. * * Notes: *    When we entered the error handler, we blocked all further i/o to *    this device.  we need to 'reverse' this process. **/static void scsi_restart_operations(struct Scsi_Host *shost){	struct scsi_device *sdev;	/*	 * If the door was locked, we need to insert a door lock request	 * onto the head of the SCSI request queue for the device.  There	 * is no point trying to lock the door of an off-line device.	 */	shost_for_each_device(sdev, shost) {		if (scsi_device_online(sdev) && sdev->locked)			scsi_eh_lock_door(sdev);	}	/*	 * next free up anything directly waiting upon the host.  this	 * will be requests for character device operations, and also for	 * ioctls to queued block devices.	 */	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",					  __FUNCTION__));	clear_bit(SHOST_RECOVERY, &shost->shost_state);	wake_up(&shost->host_wait);	/*	 * finally we need to re-initiate requests that may be pending.  we will	 * have had everything blocked while error handling is taking place, and	 * now that error recovery is done, we will need to ensure that these	 * requests are started.	 */	scsi_run_host_queues(shost);}/** * scsi_eh_ready_devs - check device ready state and recover if not. * @shost: 	host to be recovered. * @eh_done_q:	list_head for processed commands. * **/static void scsi_eh_ready_devs(struct Scsi_Host *shost,			       struct list_head *work_q,			       struct list_head *done_q){	if (!scsi_eh_stu(shost, work_q, done_q))		if (!scsi_eh_bus_device_reset(shost, work_q, done_q))			if (!scsi_eh_bus_reset(shost, work_q, done_q))				if (!scsi_eh_host_reset(work_q, done_q))					scsi_eh_offline_sdevs(work_q, done_q);}/** * scsi_eh_flush_done_q - finish processed commands or retry them. * @done_q:	list_head of processed commands. * **/static void scsi_eh_flush_done_q(struct list_head *done_q){	struct list_head *lh, *lh_sf;	struct scsi_cmnd *scmd;	list_for_each_safe(lh, lh_sf, done_q) {		scmd = list_entry(lh, struct scsi_cmnd, eh_entry);		list_del_init(lh);		if (scsi_device_online(scmd->device) &&		    !blk_noretry_request(scmd->request) &&		    (++scmd->retries < scmd->allowed)) {			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush"							  " retry cmd: %p\n",							  current->comm,							  scmd));				scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);		} else {			if (!scmd->result)				scmd->result |= (DRIVER_TIMEOUT << 24);			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish"							" cmd: %p\n",							current->comm, scmd));			scsi_finish_command(scmd);		}	}}/** * scsi_unjam_host - Attempt to fix a host which has a cmd that failed. * @shost:	Host to unjam. * * Notes: *    When we come in here, we *know* that all commands on the bus have *    either completed, failed or timed out.  we also know that no further *    commands are being sent to the host, so things are relatively quiet *    and we have freedom to fiddle with things as we wish. * *    This is only the *default* implementation.  it is possible for *    individual drivers to supply their own version of this function, and *    if the maintainer wishes to do this, it is strongly suggested that *    this function be taken as a template and modified.  this function *    was designed to correctly handle problems for about 95% of the *    different cases out there, and it should always provide at least a *    reasonable amount of error recovery. * *    Any command marked 'failed' or 'timeout' must eventually have *    scsi_finish_cmd() called for it.  we do all of the retry stuff *    here, so when we restart the host after we return it should have an *    empty queue. **/static void scsi_unjam_host(struct Scsi_Host *shost){	unsigned long flags;	LIST_HEAD(eh_work_q);	LIST_HEAD(eh_done_q);	spin_lock_irqsave(shost->host_lock, flags);	list_splice_init(&shost->eh_cmd_q, &eh_work_q);	spin_unlock_irqrestore(shost->host_lock, flags);	SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));	if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))		if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))			scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);	scsi_eh_flush_done_q(&eh_done_q);}/** * scsi_error_handler - Handle errors/timeouts of SCSI cmds. * @data:	Host for which we are running. * * Notes: *    This is always run in the context of a kernel thread.  The idea is *    that we start this thing up when the kernel starts up (one per host *    that we detect), and it immediately goes to sleep and waits for some *    event (i.e. failure).  When this takes place, we have the job of *    trying to unjam the bus and restarting things. **/int scsi_error_handler(void *data){	struct Scsi_Host *shost = (struct Scsi_Host *) data;	int rtn;	DECLARE_MUTEX_LOCKED(sem);	/*	 *    Flush resources	 */	daemonize("scsi_eh_%d", shost->host_no);	current->flags |= PF_NOFREEZE;	shost->eh_wait = &sem;	shost->ehandler = current;	/*	 * Wake up the thread that created us.	 */	SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of"					  " scsi_eh_%d\n",shost->host_no));	complete(shost->eh_notify);	while (1) {		/*		 * If we get a signal, it means we are supposed to go		 * away and die.  This typically happens if the user is		 * trying to unload a module.		 */		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"						  " scsi_eh_%d"						  " sleeping\n",shost->host_no));		/*		 * Note - we always use down_interruptible with the semaphore		 * even if the module was loaded as part of the kernel.  The		 * reason is that down() will cause this thread to be counted		 * in the load average as a running process, and down		 * interruptible doesn't.  Given that we need to allow this		 * thread to die if the driver was loaded as a module, using		 * semaphores isn't unreasonable.		 */		down_interruptible(&sem);		if (shost->eh_kill)			break;		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"						  " scsi_eh_%d waking"						  " up\n",shost->host_no));		shost->eh_active = 1;		/*		 * We have a host that is failing for some reason.  Figure out		 * what we need to do to get it up and online again (if we can).		 * If we fail, we end up taking the thing offline.		 */		if (shost->hostt->eh_strategy_handler) 			rtn = shost->hostt->eh_strategy_handler(shost);		else			scsi_unjam_host(shost);		shost->eh_active = 0;		/*		 * Note - if the above fails completely, the action is to take		 * individual devices offline and flush the queue of any		 * outstanding requests that may have been pending.  When we		 * restart, we restart any I/O to any other devices on the bus		 * which are still online.		 */		scsi_restart_operations(shost);	}	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d"					  " exiting\n",shost->host_no));	/*	 * Make sure that nobody tries to wake us up again.	 */	shost->eh_wait = NULL;	/*	 * Knock this down too.  From this point on, the host is flying	 * without a pilot.  If this is because the module is being unloaded,	 * that's fine.  If the user sent a signal to this thing, we are	 * potentially in real danger.	 */	shost->eh_active = 0;	shost->ehandler = NULL;	/*	 * If anyone is waiting for us to exit (i.e. someone trying to unload	 * a driver), then wake up that process to let them know we are on	 * the way out the door.	 */	complete_and_exit(shost->eh_notify, 0);	return 0;}/* * Function:    scsi_report_bus_reset() * * Purpose:     Utility function used by low-level drivers to report that *		they have observed a bus reset on the bus being handled. * * Arguments:   shost       - Host in question *		channel     - channel on which reset was observed. * * Returns:     Nothing * * Lock status: Host lock must be held. * * Notes:       This only needs to be called if the reset is one which *		originates from an unknown location.  Resets originated *		by the mid-level itself don't need to call this, but there *		should be no harm. * *		The main purpose of this is to make sure that a CHECK_CONDITION *		is properly treated. */void scsi_report_bus_reset(struct Scsi_Host *shost, int channel){	struct scsi_device *sdev;	__shost_for_each_device(sdev, shost) {		if (channel == sdev->channel) {			sdev->was_reset = 1;			sdev->expecting_cc_ua = 1;		}	}}/* * Function:    scsi_report_device_reset() * * Purpose:     Utility function used by low-level drivers to report that *		they have observed a device reset on the device being handled. * * Arguments:   shost       - Host in question *		channel     - channel on which reset was observed *		target	    - target on which reset was observed * * Returns:     Nothing * * Lock status: Host lock must be held * * Notes:       This only needs to be called if the reset is one which *		originates from an unknown location.  Resets originated *		by the mid-level itself don't need to call this, but there *		should be no harm. * *		The main purpose of this is to make sure that a CHECK_CONDITION *		is properly treated. */void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target){	struct scsi_device *sdev;	__shost_for_each_device(sdev, shost) {		if (channel == sdev->channel &&		    target == sdev->id) {			sdev->was_reset = 1;			sdev->expecting_cc_ua = 1;		}	}}static voidscsi_reset_provider_done_command(struct scsi_cmnd *scmd){}/* * Function:	scsi_reset_provider * * Purpose:	Send requested reset to a bus or device at any phase. * * Arguments:	device	- device to send reset to *		flag - reset type (see scsi.h) * * Returns:	SUCCESS/FAILURE. * * Notes:	This is used by the SCSI Generic driver to provide *		Bus/Device reset capability. */intscsi_reset_provider(struct scsi_device *dev, int flag){	struct scsi_cmnd *scmd = scsi_get_command(dev, GFP_KERNEL);	struct request req;	int rtn;	scmd->request = &req;	memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout));	scmd->request->rq_status      	= RQ_SCSI_BUSY;	scmd->state                   	= SCSI_STATE_INITIALIZING;	scmd->owner	     		= SCSI_OWNER_MIDLEVEL;    	memset(&scmd->cmnd, '\0', sizeof(scmd->cmnd));    	scmd->scsi_done		= scsi_reset_provider_done_command;	scmd->done			= NULL;	scmd->buffer			= NULL;	scmd->bufflen			= 0;	scmd->request_buffer		= NULL;	scmd->request_bufflen		= 0;	scmd->internal_timeout		= NORMAL_TIMEOUT;	scmd->abort_reason		= DID_ABORT;	scmd->cmd_len			= 0;	scmd->sc_data_direction		= DMA_BIDIRECTIONAL;	scmd->sc_request		= NULL;	scmd->sc_magic			= SCSI_CMND_MAGIC;	init_timer(&scmd->eh_timeout);	/*	 * Sometimes the command can get back into the timer chain,	 * so use the pid as an identifier.	 */	scmd->pid			= 0;	switch (flag) {	case SCSI_TRY_RESET_DEVICE:		rtn = scsi_try_bus_device_reset(scmd);		if (rtn == SUCCESS)			break;		/* FALLTHROUGH */	case SCSI_TRY_RESET_BUS:		rtn = scsi_try_bus_reset(scmd);		if (rtn == SUCCESS)			break;		/* FALLTHROUGH */	case SCSI_TRY_RESET_HOST:		rtn = scsi_try_host_reset(scmd);		break;	default:		rtn = FAILED;	}	scsi_delete_timer(scmd);	scsi_next_command(scmd);	return rtn;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?