scsi_error.c

来自「linux 内核源代码」· C语言 代码 · 共 1,918 行 · 第 1/4 页

C
1,918
字号
/* *  scsi_error.c Copyright (C) 1997 Eric Youngdale * *  SCSI error/timeout handling *      Initial versions: Eric Youngdale.  Based upon conversations with *                        Leonard Zubkoff and David Miller at Linux Expo,  *                        ideas originating from all over the place. * *	Restructured scsi_unjam_host and associated functions. *	September 04, 2002 Mike Anderson (andmike@us.ibm.com) * *	Forward port of Russell King's (rmk@arm.linux.org.uk) changes and *	minor  cleanups. *	September 30, 2002 Mike Anderson (andmike@us.ibm.com) */#include <linux/module.h>#include <linux/sched.h>#include <linux/timer.h>#include <linux/string.h>#include <linux/kernel.h>#include <linux/freezer.h>#include <linux/kthread.h>#include <linux/interrupt.h>#include <linux/blkdev.h>#include <linux/delay.h>#include <scsi/scsi.h>#include <scsi/scsi_cmnd.h>#include <scsi/scsi_dbg.h>#include <scsi/scsi_device.h>#include <scsi/scsi_eh.h>#include <scsi/scsi_transport.h>#include <scsi/scsi_host.h>#include <scsi/scsi_ioctl.h>#include "scsi_priv.h"#include "scsi_logging.h"#include "scsi_transport_api.h"#define SENSE_TIMEOUT		(10*HZ)/* * These should *probably* be handled by the host itself. * Since it is allowed to sleep, it probably should. */#define BUS_RESET_SETTLE_TIME   (10)#define HOST_RESET_SETTLE_TIME  (10)/* called with shost->host_lock held */void scsi_eh_wakeup(struct Scsi_Host *shost){	if (shost->host_busy == shost->host_failed) {		wake_up_process(shost->ehandler);		SCSI_LOG_ERROR_RECOVERY(5,				printk("Waking error handler thread\n"));	}}/** * scsi_schedule_eh - schedule EH for SCSI host * @shost:	SCSI host to invoke error handling on. * * Schedule SCSI EH without scmd. **/void scsi_schedule_eh(struct Scsi_Host *shost){	unsigned long flags;	spin_lock_irqsave(shost->host_lock, flags);	if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 ||	    scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) {		shost->host_eh_scheduled++;		scsi_eh_wakeup(shost);	}	spin_unlock_irqrestore(shost->host_lock, flags);}EXPORT_SYMBOL_GPL(scsi_schedule_eh);/** * scsi_eh_scmd_add - add scsi cmd to error handling. * @scmd:	scmd to run eh on. * @eh_flag:	optional SCSI_EH flag. * * Return value: *	0 on failure. **/int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag){	struct Scsi_Host *shost = scmd->device->host;	unsigned long flags;	int ret = 0;	if (!shost->ehandler)		return 0;	spin_lock_irqsave(shost->host_lock, flags);	if (scsi_host_set_state(shost, SHOST_RECOVERY))		if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))			goto out_unlock;	ret = 1;	scmd->eh_eflags |= eh_flag;	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);	shost->host_failed++;	scsi_eh_wakeup(shost); out_unlock:	spin_unlock_irqrestore(shost->host_lock, flags);	return ret;}/** * scsi_add_timer - Start timeout timer for a single scsi command. * @scmd:	scsi command that is about to start running. * @timeout:	amount of time to allow this command to run. * @complete:	timeout function to call if timer isn't canceled. * * Notes: *    This should be turned into an inline function.  Each scsi command *    has its own timer, and as it is added to the queue, we set up the *    timer.  When the command completes, we cancel the timer. **/void scsi_add_timer(struct scsi_cmnd *scmd, int timeout,		    void (*complete)(struct scsi_cmnd *)){	/*	 * If the clock was already running for this command, then	 * first delete the timer.  The timer handling code gets rather	 * confused if we don't do this.	 */	if (scmd->eh_timeout.function)		del_timer(&scmd->eh_timeout);	scmd->eh_timeout.data = (unsigned long)scmd;	scmd->eh_timeout.expires = jiffies + timeout;	scmd->eh_timeout.function = (void (*)(unsigned long)) complete;	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"					  " %d, (%p)\n", __FUNCTION__,					  scmd, timeout, complete));	add_timer(&scmd->eh_timeout);}/** * scsi_delete_timer - Delete/cancel timer for a given function. * @scmd:	Cmd that we are canceling timer for * * Notes: *     This should be turned into an inline function. * * Return value: *     1 if we were able to detach the timer.  0 if we blew it, and the *     timer function has already started to run. **/int scsi_delete_timer(struct scsi_cmnd *scmd){	int rtn;	rtn = del_timer(&scmd->eh_timeout);	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"					 " rtn: %d\n", __FUNCTION__,					 scmd, rtn));	scmd->eh_timeout.data = (unsigned long)NULL;	scmd->eh_timeout.function = NULL;	return rtn;}/** * scsi_times_out - Timeout function for normal scsi commands. * @scmd:	Cmd that is timing out. * * Notes: *     We do not need to lock this.  There is the potential for a race *     only in that the normal completion handling might run, but if the *     normal completion function determines that the timer has already *     fired, then it mustn't do anything. **/void scsi_times_out(struct scsi_cmnd *scmd){	enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *);	scsi_log_completion(scmd, TIMEOUT_ERROR);	if (scmd->device->host->transportt->eh_timed_out)		eh_timed_out = scmd->device->host->transportt->eh_timed_out;	else if (scmd->device->host->hostt->eh_timed_out)		eh_timed_out = scmd->device->host->hostt->eh_timed_out;	else		eh_timed_out = NULL;	if (eh_timed_out)		switch (eh_timed_out(scmd)) {		case EH_HANDLED:			__scsi_done(scmd);			return;		case EH_RESET_TIMER:			scsi_add_timer(scmd, scmd->timeout_per_command,				       scsi_times_out);			return;		case EH_NOT_HANDLED:			break;		}	if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {		scmd->result |= DID_TIME_OUT << 16;		__scsi_done(scmd);	}}/** * scsi_block_when_processing_errors - Prevent cmds from being queued. * @sdev:	Device on which we are performing recovery. * * Description: *     We block until the host is out of error recovery, and then check to *     see whether the host or the device is offline. * * Return value: *     0 when dev was taken offline by error recovery. 1 OK to proceed. **/int scsi_block_when_processing_errors(struct scsi_device *sdev){	int online;	wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));	online = scsi_device_online(sdev);	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__,					  online));	return online;}EXPORT_SYMBOL(scsi_block_when_processing_errors);#ifdef CONFIG_SCSI_LOGGING/** * scsi_eh_prt_fail_stats - Log info on failures. * @shost:	scsi host being recovered. * @work_q:	Queue of scsi cmds to process. **/static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,					  struct list_head *work_q){	struct scsi_cmnd *scmd;	struct scsi_device *sdev;	int total_failures = 0;	int cmd_failed = 0;	int cmd_cancel = 0;	int devices_failed = 0;	shost_for_each_device(sdev, shost) {		list_for_each_entry(scmd, work_q, eh_entry) {			if (scmd->device == sdev) {				++total_failures;				if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD)					++cmd_cancel;				else 					++cmd_failed;			}		}		if (cmd_cancel || cmd_failed) {			SCSI_LOG_ERROR_RECOVERY(3,				sdev_printk(KERN_INFO, sdev,					    "%s: cmds failed: %d, cancel: %d\n",					    __FUNCTION__, cmd_failed,					    cmd_cancel));			cmd_cancel = 0;			cmd_failed = 0;			++devices_failed;		}	}	SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d"					  " devices require eh work\n",				  total_failures, devices_failed));}#endif/** * scsi_check_sense - Examine scsi cmd sense * @scmd:	Cmd to have sense checked. * * Return value: * 	SUCCESS or FAILED or NEEDS_RETRY * * Notes: *	When a deferred error is detected the current command has *	not been executed and needs retrying. **/static int scsi_check_sense(struct scsi_cmnd *scmd){	struct scsi_sense_hdr sshdr;	if (! scsi_command_normalize_sense(scmd, &sshdr))		return FAILED;	/* no valid sense data */	if (scsi_sense_is_deferred(&sshdr))		return NEEDS_RETRY;	/*	 * Previous logic looked for FILEMARK, EOM or ILI which are	 * mainly associated with tapes and returned SUCCESS.	 */	if (sshdr.response_code == 0x70) {		/* fixed format */		if (scmd->sense_buffer[2] & 0xe0)			return SUCCESS;	} else {		/*		 * descriptor format: look for "stream commands sense data		 * descriptor" (see SSC-3). Assume single sense data		 * descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG.		 */		if ((sshdr.additional_length > 3) &&		    (scmd->sense_buffer[8] == 0x4) &&		    (scmd->sense_buffer[11] & 0xe0))			return SUCCESS;	}	switch (sshdr.sense_key) {	case NO_SENSE:		return SUCCESS;	case RECOVERED_ERROR:		return /* soft_error */ SUCCESS;	case ABORTED_COMMAND:		return NEEDS_RETRY;	case NOT_READY:	case UNIT_ATTENTION:		/*		 * if we are expecting a cc/ua because of a bus reset that we		 * performed, treat this just as a retry.  otherwise this is		 * information that we should pass up to the upper-level driver		 * so that we can deal with it there.		 */		if (scmd->device->expecting_cc_ua) {			scmd->device->expecting_cc_ua = 0;			return NEEDS_RETRY;		}		/*		 * if the device is in the process of becoming ready, we 		 * should retry.		 */		if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01))			return NEEDS_RETRY;		/*		 * if the device is not started, we need to wake		 * the error handler to start the motor		 */		if (scmd->device->allow_restart &&		    (sshdr.asc == 0x04) && (sshdr.ascq == 0x02))			return FAILED;		return SUCCESS;		/* these three are not supported */	case COPY_ABORTED:	case VOLUME_OVERFLOW:	case MISCOMPARE:		return SUCCESS;	case MEDIUM_ERROR:		if (sshdr.asc == 0x11 || /* UNRECOVERED READ ERR */		    sshdr.asc == 0x13 || /* AMNF DATA FIELD */		    sshdr.asc == 0x14) { /* RECORD NOT FOUND */			return SUCCESS;		}		return NEEDS_RETRY;	case HARDWARE_ERROR:		if (scmd->device->retry_hwerror)			return NEEDS_RETRY;		else			return SUCCESS;	case ILLEGAL_REQUEST:	case BLANK_CHECK:	case DATA_PROTECT:	default:		return SUCCESS;	}}/** * scsi_eh_completed_normally - Disposition a eh cmd on return from LLD. * @scmd:	SCSI cmd to examine. * * Notes: *    This is *only* called when we are examining the status of commands *    queued during error recovery.  the main difference here is that we *    don't allow for the possibility of retries here, and we are a lot *    more restrictive about what we consider acceptable. **/static int scsi_eh_completed_normally(struct scsi_cmnd *scmd){	/*	 * first check the host byte, to see if there is anything in there	 * that would indicate what we need to do.	 */	if (host_byte(scmd->result) == DID_RESET) {		/*		 * rats.  we are already in the error handler, so we now		 * get to try and figure out what to do next.  if the sense		 * is valid, we have a pretty good idea of what to do.		 * if not, we mark it as FAILED.		 */		return scsi_check_sense(scmd);	}	if (host_byte(scmd->result) != DID_OK)		return FAILED;	/*	 * next, check the message byte.	 */	if (msg_byte(scmd->result) != COMMAND_COMPLETE)		return FAILED;	/*	 * now, check the status byte to see if this indicates	 * anything special.	 */	switch (status_byte(scmd->result)) {	case GOOD:	case COMMAND_TERMINATED:		return SUCCESS;	case CHECK_CONDITION:		return scsi_check_sense(scmd);	case CONDITION_GOOD:	case INTERMEDIATE_GOOD:	case INTERMEDIATE_C_GOOD:		/*		 * who knows?  FIXME(eric)		 */		return SUCCESS;	case BUSY:	case QUEUE_FULL:	case RESERVATION_CONFLICT:	default:		return FAILED;	}	return FAILED;}/** * scsi_eh_done - Completion function for error handling. * @scmd:	Cmd that is done. **/static void scsi_eh_done(struct scsi_cmnd *scmd){	struct completion     *eh_action;	SCSI_LOG_ERROR_RECOVERY(3,		printk("%s scmd: %p result: %x\n",			__FUNCTION__, scmd, scmd->result));	eh_action = scmd->device->host->eh_action;	if (eh_action)		complete(eh_action);}/** * scsi_try_host_reset - ask host adapter to reset itself * @scmd:	SCSI cmd to send hsot reset. **/static int scsi_try_host_reset(struct scsi_cmnd *scmd){	unsigned long flags;	int rtn;	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",					  __FUNCTION__));

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?