scsi_error.c
来自「linux 内核源代码」· C语言 代码 · 共 1,918 行 · 第 1/4 页
C
1,918 行
/* * scsi_error.c Copyright (C) 1997 Eric Youngdale * * SCSI error/timeout handling * Initial versions: Eric Youngdale. Based upon conversations with * Leonard Zubkoff and David Miller at Linux Expo, * ideas originating from all over the place. * * Restructured scsi_unjam_host and associated functions. * September 04, 2002 Mike Anderson (andmike@us.ibm.com) * * Forward port of Russell King's (rmk@arm.linux.org.uk) changes and * minor cleanups. * September 30, 2002 Mike Anderson (andmike@us.ibm.com) */#include <linux/module.h>#include <linux/sched.h>#include <linux/timer.h>#include <linux/string.h>#include <linux/kernel.h>#include <linux/freezer.h>#include <linux/kthread.h>#include <linux/interrupt.h>#include <linux/blkdev.h>#include <linux/delay.h>#include <scsi/scsi.h>#include <scsi/scsi_cmnd.h>#include <scsi/scsi_dbg.h>#include <scsi/scsi_device.h>#include <scsi/scsi_eh.h>#include <scsi/scsi_transport.h>#include <scsi/scsi_host.h>#include <scsi/scsi_ioctl.h>#include "scsi_priv.h"#include "scsi_logging.h"#include "scsi_transport_api.h"#define SENSE_TIMEOUT (10*HZ)/* * These should *probably* be handled by the host itself. * Since it is allowed to sleep, it probably should. */#define BUS_RESET_SETTLE_TIME (10)#define HOST_RESET_SETTLE_TIME (10)/* called with shost->host_lock held */void scsi_eh_wakeup(struct Scsi_Host *shost){ if (shost->host_busy == shost->host_failed) { wake_up_process(shost->ehandler); SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread\n")); }}/** * scsi_schedule_eh - schedule EH for SCSI host * @shost: SCSI host to invoke error handling on. * * Schedule SCSI EH without scmd. **/void scsi_schedule_eh(struct Scsi_Host *shost){ unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 || scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) { shost->host_eh_scheduled++; scsi_eh_wakeup(shost); } spin_unlock_irqrestore(shost->host_lock, flags);}EXPORT_SYMBOL_GPL(scsi_schedule_eh);/** * scsi_eh_scmd_add - add scsi cmd to error handling. * @scmd: scmd to run eh on. * @eh_flag: optional SCSI_EH flag. * * Return value: * 0 on failure. **/int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag){ struct Scsi_Host *shost = scmd->device->host; unsigned long flags; int ret = 0; if (!shost->ehandler) return 0; spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_set_state(shost, SHOST_RECOVERY)) if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) goto out_unlock; ret = 1; scmd->eh_eflags |= eh_flag; list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q); shost->host_failed++; scsi_eh_wakeup(shost); out_unlock: spin_unlock_irqrestore(shost->host_lock, flags); return ret;}/** * scsi_add_timer - Start timeout timer for a single scsi command. * @scmd: scsi command that is about to start running. * @timeout: amount of time to allow this command to run. * @complete: timeout function to call if timer isn't canceled. * * Notes: * This should be turned into an inline function. Each scsi command * has its own timer, and as it is added to the queue, we set up the * timer. When the command completes, we cancel the timer. **/void scsi_add_timer(struct scsi_cmnd *scmd, int timeout, void (*complete)(struct scsi_cmnd *)){ /* * If the clock was already running for this command, then * first delete the timer. The timer handling code gets rather * confused if we don't do this. */ if (scmd->eh_timeout.function) del_timer(&scmd->eh_timeout); scmd->eh_timeout.data = (unsigned long)scmd; scmd->eh_timeout.expires = jiffies + timeout; scmd->eh_timeout.function = (void (*)(unsigned long)) complete; SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:" " %d, (%p)\n", __FUNCTION__, scmd, timeout, complete)); add_timer(&scmd->eh_timeout);}/** * scsi_delete_timer - Delete/cancel timer for a given function. * @scmd: Cmd that we are canceling timer for * * Notes: * This should be turned into an inline function. * * Return value: * 1 if we were able to detach the timer. 0 if we blew it, and the * timer function has already started to run. **/int scsi_delete_timer(struct scsi_cmnd *scmd){ int rtn; rtn = del_timer(&scmd->eh_timeout); SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p," " rtn: %d\n", __FUNCTION__, scmd, rtn)); scmd->eh_timeout.data = (unsigned long)NULL; scmd->eh_timeout.function = NULL; return rtn;}/** * scsi_times_out - Timeout function for normal scsi commands. * @scmd: Cmd that is timing out. * * Notes: * We do not need to lock this. There is the potential for a race * only in that the normal completion handling might run, but if the * normal completion function determines that the timer has already * fired, then it mustn't do anything. **/void scsi_times_out(struct scsi_cmnd *scmd){ enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); scsi_log_completion(scmd, TIMEOUT_ERROR); if (scmd->device->host->transportt->eh_timed_out) eh_timed_out = scmd->device->host->transportt->eh_timed_out; else if (scmd->device->host->hostt->eh_timed_out) eh_timed_out = scmd->device->host->hostt->eh_timed_out; else eh_timed_out = NULL; if (eh_timed_out) switch (eh_timed_out(scmd)) { case EH_HANDLED: __scsi_done(scmd); return; case EH_RESET_TIMER: scsi_add_timer(scmd, scmd->timeout_per_command, scsi_times_out); return; case EH_NOT_HANDLED: break; } if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) { scmd->result |= DID_TIME_OUT << 16; __scsi_done(scmd); }}/** * scsi_block_when_processing_errors - Prevent cmds from being queued. * @sdev: Device on which we are performing recovery. * * Description: * We block until the host is out of error recovery, and then check to * see whether the host or the device is offline. * * Return value: * 0 when dev was taken offline by error recovery. 1 OK to proceed. **/int scsi_block_when_processing_errors(struct scsi_device *sdev){ int online; wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host)); online = scsi_device_online(sdev); SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__, online)); return online;}EXPORT_SYMBOL(scsi_block_when_processing_errors);#ifdef CONFIG_SCSI_LOGGING/** * scsi_eh_prt_fail_stats - Log info on failures. * @shost: scsi host being recovered. * @work_q: Queue of scsi cmds to process. **/static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost, struct list_head *work_q){ struct scsi_cmnd *scmd; struct scsi_device *sdev; int total_failures = 0; int cmd_failed = 0; int cmd_cancel = 0; int devices_failed = 0; shost_for_each_device(sdev, shost) { list_for_each_entry(scmd, work_q, eh_entry) { if (scmd->device == sdev) { ++total_failures; if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) ++cmd_cancel; else ++cmd_failed; } } if (cmd_cancel || cmd_failed) { SCSI_LOG_ERROR_RECOVERY(3, sdev_printk(KERN_INFO, sdev, "%s: cmds failed: %d, cancel: %d\n", __FUNCTION__, cmd_failed, cmd_cancel)); cmd_cancel = 0; cmd_failed = 0; ++devices_failed; } } SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d" " devices require eh work\n", total_failures, devices_failed));}#endif/** * scsi_check_sense - Examine scsi cmd sense * @scmd: Cmd to have sense checked. * * Return value: * SUCCESS or FAILED or NEEDS_RETRY * * Notes: * When a deferred error is detected the current command has * not been executed and needs retrying. **/static int scsi_check_sense(struct scsi_cmnd *scmd){ struct scsi_sense_hdr sshdr; if (! scsi_command_normalize_sense(scmd, &sshdr)) return FAILED; /* no valid sense data */ if (scsi_sense_is_deferred(&sshdr)) return NEEDS_RETRY; /* * Previous logic looked for FILEMARK, EOM or ILI which are * mainly associated with tapes and returned SUCCESS. */ if (sshdr.response_code == 0x70) { /* fixed format */ if (scmd->sense_buffer[2] & 0xe0) return SUCCESS; } else { /* * descriptor format: look for "stream commands sense data * descriptor" (see SSC-3). Assume single sense data * descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG. */ if ((sshdr.additional_length > 3) && (scmd->sense_buffer[8] == 0x4) && (scmd->sense_buffer[11] & 0xe0)) return SUCCESS; } switch (sshdr.sense_key) { case NO_SENSE: return SUCCESS; case RECOVERED_ERROR: return /* soft_error */ SUCCESS; case ABORTED_COMMAND: return NEEDS_RETRY; case NOT_READY: case UNIT_ATTENTION: /* * if we are expecting a cc/ua because of a bus reset that we * performed, treat this just as a retry. otherwise this is * information that we should pass up to the upper-level driver * so that we can deal with it there. */ if (scmd->device->expecting_cc_ua) { scmd->device->expecting_cc_ua = 0; return NEEDS_RETRY; } /* * if the device is in the process of becoming ready, we * should retry. */ if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01)) return NEEDS_RETRY; /* * if the device is not started, we need to wake * the error handler to start the motor */ if (scmd->device->allow_restart && (sshdr.asc == 0x04) && (sshdr.ascq == 0x02)) return FAILED; return SUCCESS; /* these three are not supported */ case COPY_ABORTED: case VOLUME_OVERFLOW: case MISCOMPARE: return SUCCESS; case MEDIUM_ERROR: if (sshdr.asc == 0x11 || /* UNRECOVERED READ ERR */ sshdr.asc == 0x13 || /* AMNF DATA FIELD */ sshdr.asc == 0x14) { /* RECORD NOT FOUND */ return SUCCESS; } return NEEDS_RETRY; case HARDWARE_ERROR: if (scmd->device->retry_hwerror) return NEEDS_RETRY; else return SUCCESS; case ILLEGAL_REQUEST: case BLANK_CHECK: case DATA_PROTECT: default: return SUCCESS; }}/** * scsi_eh_completed_normally - Disposition a eh cmd on return from LLD. * @scmd: SCSI cmd to examine. * * Notes: * This is *only* called when we are examining the status of commands * queued during error recovery. the main difference here is that we * don't allow for the possibility of retries here, and we are a lot * more restrictive about what we consider acceptable. **/static int scsi_eh_completed_normally(struct scsi_cmnd *scmd){ /* * first check the host byte, to see if there is anything in there * that would indicate what we need to do. */ if (host_byte(scmd->result) == DID_RESET) { /* * rats. we are already in the error handler, so we now * get to try and figure out what to do next. if the sense * is valid, we have a pretty good idea of what to do. * if not, we mark it as FAILED. */ return scsi_check_sense(scmd); } if (host_byte(scmd->result) != DID_OK) return FAILED; /* * next, check the message byte. */ if (msg_byte(scmd->result) != COMMAND_COMPLETE) return FAILED; /* * now, check the status byte to see if this indicates * anything special. */ switch (status_byte(scmd->result)) { case GOOD: case COMMAND_TERMINATED: return SUCCESS; case CHECK_CONDITION: return scsi_check_sense(scmd); case CONDITION_GOOD: case INTERMEDIATE_GOOD: case INTERMEDIATE_C_GOOD: /* * who knows? FIXME(eric) */ return SUCCESS; case BUSY: case QUEUE_FULL: case RESERVATION_CONFLICT: default: return FAILED; } return FAILED;}/** * scsi_eh_done - Completion function for error handling. * @scmd: Cmd that is done. **/static void scsi_eh_done(struct scsi_cmnd *scmd){ struct completion *eh_action; SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n", __FUNCTION__, scmd, scmd->result)); eh_action = scmd->device->host->eh_action; if (eh_action) complete(eh_action);}/** * scsi_try_host_reset - ask host adapter to reset itself * @scmd: SCSI cmd to send hsot reset. **/static int scsi_try_host_reset(struct scsi_cmnd *scmd){ unsigned long flags; int rtn; SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n", __FUNCTION__));
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?