📄 scsi_error.c
字号:
* be mindful of the maximum number of retries specified * and not get stuck in a loop. */ case DID_SOFT_ERROR: goto maybe_retry; case DID_BUS_BUSY: case DID_PARITY: case DID_ERROR: goto maybe_retry; case DID_TIME_OUT: /* * When we scan the bus, we get timeout messages for * these commands if there is no device available. * Other hosts report DID_NO_CONNECT for the same thing. */ if ((SCpnt->cmnd[0] == TEST_UNIT_READY || SCpnt->cmnd[0] == INQUIRY)) { return SUCCESS; } else { return FAILED; } case DID_RESET: /* * In the normal case where we haven't initiated a reset, this is * a failure. */ if (SCpnt->flags & IS_RESETTING) { SCpnt->flags &= ~IS_RESETTING; goto maybe_retry; } /* * Examine the sense data to figure out how to proceed from here. * If there is no sense data, we will be forced into the error * handler thread, where we get to examine the thing in a lot more * detail. */ return scsi_check_sense(SCpnt); default: return FAILED; } /* * Next, check the message byte. */ if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) { return FAILED; } /* * Now, check the status byte to see if this indicates anything special. */ switch (status_byte(SCpnt->result)) { case QUEUE_FULL: /* * The case of trying to send too many commands to a tagged queueing * device. */ return ADD_TO_MLQUEUE; case GOOD: case COMMAND_TERMINATED: return SUCCESS; case CHECK_CONDITION: rtn = scsi_check_sense(SCpnt); if (rtn == NEEDS_RETRY) { goto maybe_retry; } return rtn; case CONDITION_GOOD: case INTERMEDIATE_GOOD: case INTERMEDIATE_C_GOOD: /* * Who knows? FIXME(eric) */ return SUCCESS; case BUSY: case RESERVATION_CONFLICT: goto maybe_retry; default: return FAILED; } return FAILED; maybe_retry: if ((++SCpnt->retries) < SCpnt->allowed) { return NEEDS_RETRY; } else { /* * No more retries - report this one back to upper level. */ return SUCCESS; }}/* * Function: scsi_eh_completed_normally * * Purpose: Examine a command block that has come back from the low-level * and figure out what to do next. * * Returns: SUCCESS - pass on to upper level. * FAILED - pass on to error handler thread. * RETRY - command should be retried. * SOFTERR - command succeeded, but we need to log * a soft error. * * Notes: This is *ONLY* called when we are examining the status * of commands queued during error recovery. The main * difference here is that we don't allow for the possibility * of retries here, and we are a lot more restrictive about what * we consider acceptable. */STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt){ int rtn; /* * First check the host byte, to see if there is anything in there * that would indicate what we need to do. */ if (host_byte(SCpnt->result) == DID_RESET) { if (SCpnt->flags & IS_RESETTING) { /* * OK, this is normal. We don't know whether in fact the * command in question really needs to be rerun or not - * if this was the original data command then the answer is yes, * otherwise we just flag it as success. */ SCpnt->flags &= ~IS_RESETTING; return NEEDS_RETRY; } /* * Rats. We are already in the error handler, so we now get to try * and figure out what to do next. If the sense is valid, we have * a pretty good idea of what to do. If not, we mark it as failed. */ return scsi_check_sense(SCpnt); } if (host_byte(SCpnt->result) != DID_OK) { return FAILED; } /* * Next, check the message byte. */ if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) { return FAILED; } /* * Now, check the status byte to see if this indicates anything special. */ switch (status_byte(SCpnt->result)) { case GOOD: case COMMAND_TERMINATED: return SUCCESS; case CHECK_CONDITION: rtn = scsi_check_sense(SCpnt); if (rtn == NEEDS_RETRY) { return FAILED; } return rtn; case CONDITION_GOOD: case INTERMEDIATE_GOOD: case INTERMEDIATE_C_GOOD: /* * Who knows? FIXME(eric) */ return SUCCESS; case BUSY: case QUEUE_FULL: case RESERVATION_CONFLICT: default: return FAILED; } return FAILED;}/* * Function: scsi_check_sense * * Purpose: Examine sense information - give suggestion as to what * we should do with it. */STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt){ if (!scsi_sense_valid(SCpnt)) { return FAILED; } if (SCpnt->sense_buffer[2] & 0xe0) return SUCCESS; switch (SCpnt->sense_buffer[2] & 0xf) { case NO_SENSE: return SUCCESS; case RECOVERED_ERROR: return /* SOFT_ERROR */ SUCCESS; case ABORTED_COMMAND: return NEEDS_RETRY; case NOT_READY: case UNIT_ATTENTION: /* * If we are expecting a CC/UA because of a bus reset that we * performed, treat this just as a retry. Otherwise this is * information that we should pass up to the upper-level driver * so that we can deal with it there. */ if (SCpnt->device->expecting_cc_ua) { SCpnt->device->expecting_cc_ua = 0; return NEEDS_RETRY; } return SUCCESS; /* these three are not supported */ case COPY_ABORTED: case VOLUME_OVERFLOW: case MISCOMPARE: return SUCCESS; case MEDIUM_ERROR: return NEEDS_RETRY; case ILLEGAL_REQUEST: case BLANK_CHECK: case DATA_PROTECT: case HARDWARE_ERROR: default: return SUCCESS; }}/* * Function: scsi_restart_operations * * Purpose: Restart IO operations to the specified host. * * Arguments: host - host that we are restarting * * Lock status: Assumed that locks are not held upon entry. * * Returns: Nothing * * Notes: When we entered the error handler, we blocked all further * I/O to this device. We need to 'reverse' this process. */STATIC void scsi_restart_operations(struct Scsi_Host *host){ Scsi_Device *SDpnt; unsigned long flags; ASSERT_LOCK(&io_request_lock, 0); /* * Next free up anything directly waiting upon the host. This will be * requests for character device operations, and also for ioctls to queued * block devices. */ SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: Waking up host to restart\n")); wake_up(&host->host_wait); /* * Finally we need to re-initiate requests that may be pending. We will * have had everything blocked while error handling is taking place, and * now that error recovery is done, we will need to ensure that these * requests are started. */ spin_lock_irqsave(&io_request_lock, flags); for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { request_queue_t *q; if ((host->can_queue > 0 && (host->host_busy >= host->can_queue)) || (host->host_blocked) || (host->host_self_blocked) || (SDpnt->device_blocked)) { break; } q = &SDpnt->request_queue; q->request_fn(q); } spin_unlock_irqrestore(&io_request_lock, flags);}/* * Function: scsi_unjam_host * * Purpose: Attempt to fix a host which has a command that failed for * some reason. * * Arguments: host - host that needs unjamming. * * Returns: Nothing * * Notes: When we come in here, we *know* that all commands on the * bus have either completed, failed or timed out. We also * know that no further commands are being sent to the host, * so things are relatively quiet and we have freedom to * fiddle with things as we wish. * * Additional note: This is only the *default* implementation. It is possible * for individual drivers to supply their own version of this * function, and if the maintainer wishes to do this, it is * strongly suggested that this function be taken as a template * and modified. This function was designed to correctly handle * problems for about 95% of the different cases out there, and * it should always provide at least a reasonable amount of error * recovery. * * Note3: Any command marked 'FAILED' or 'TIMEOUT' must eventually * have scsi_finish_command() called for it. We do all of * the retry stuff here, so when we restart the host after we * return it should have an empty queue. */STATIC int scsi_unjam_host(struct Scsi_Host *host){ int devices_failed; int numfailed; int ourrtn; int rtn = FALSE; int result; Scsi_Cmnd *SCloop; Scsi_Cmnd *SCpnt; Scsi_Device *SDpnt; Scsi_Device *SDloop; Scsi_Cmnd *SCdone; int timed_out; ASSERT_LOCK(&io_request_lock, 0); SCdone = NULL; /* * First, protect against any sort of race condition. If any of the outstanding * commands are in states that indicate that we are not yet blocked (i.e. we are * not in a quiet state) then we got woken up in error. If we ever end up here, * we need to re-examine some of the assumptions. */ for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { if (SCpnt->state == SCSI_STATE_FAILED || SCpnt->state == SCSI_STATE_TIMEOUT || SCpnt->state == SCSI_STATE_INITIALIZING || SCpnt->state == SCSI_STATE_UNUSED) { continue; } /* * Rats. Something is still floating around out there. This could * be the result of the fact that the upper level drivers are still frobbing * commands that might have succeeded. There are two outcomes. One is that * the command block will eventually be freed, and the other one is that * the command will be queued and will be finished along the way. */ SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target));/* * panic("SCSI Error handler woken too early\n"); * * This is no longer a problem, since now the code cares only about * SCSI_STATE_TIMEOUT and SCSI_STATE_FAILED. * Other states are useful only to release active commands when devices are * set offline. If (host->host_active == host->host_busy) we can safely assume * that there are no commands in state other then TIMEOUT od FAILED. (DB) * * FIXME: * It is not easy to release correctly commands according to their state when * devices are set offline, when the state is neither TIMEOUT nor FAILED. * When a device is set offline, we can have some command with * rq_status=RQ_SCSY_BUSY, owner=SCSI_STATE_HIGHLEVEL, * state=SCSI_STATE_INITIALIZING and the driver module cannot be released. * (DB, 17 May 1998) */ } } /* * Next, see if we need to request sense information. if so, * then get it now, so we have a better idea of what to do. * FIXME(eric) this has the unfortunate side effect that if a host * adapter does not automatically request sense information, that we end * up shutting it down before we request it. All hosts should be doing this * anyways, so for now all I have to say is tough noogies if you end up in here. * On second thought, this is probably a good idea. We *really* want to give * authors an incentive to automatically request this. */ SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we need to request sense\n")); for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { if (SCpnt->state != SCSI_STATE_FAILED || scsi_sense_valid(SCpnt)) { continue; } SCSI_LOG_ERROR_RECOVERY(2, printk("scsi_unjam_host: Requesting sense for %d\n", SCpnt->target)); rtn = scsi_request_sense(SCpnt); if (rtn != SUCCESS) { continue; } SCSI_LOG_ERROR_RECOVERY(3, printk("Sense requested for %p - result %x\n", SCpnt, SCpnt->result)); SCSI_LOG_ERROR_RECOVERY(3, print_sense("bh", SCpnt)); result = scsi_decide_disposition(SCpnt); /* * If the result was normal, then just pass it along to the * upper level. */ if (result == SUCCESS) { SCpnt->host->host_failed--; scsi_eh_finish_command(&SCdone, SCpnt); } if (result != NEEDS_RETRY) { continue; } /* * We only come in here if we want to retry a * command. The test to see whether the command * should be retried should be keeping track of the * number of tries, so we don't end up looping, of * course. */ SCpnt->state = NEEDS_RETRY; rtn = scsi_eh_retry_command(SCpnt); if (rtn != SUCCESS) { continue; } /* * We eventually hand this one back to the top level. */ SCpnt->host->host_failed--; scsi_eh_finish_command(&SCdone, SCpnt); } } /* * Go through the list of commands and figure out where we stand and how bad things * really are. */ numfailed = 0; timed_out = 0; devices_failed = 0; for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { unsigned int device_error = 0; for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { if (SCpnt->state == SCSI_STATE_FAILED) { SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d failed\n", SCpnt->target)); numfailed++; device_error++; } if (SCpnt->state == SCSI_STATE_TIMEOUT) { SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d timedout\n", SCpnt->target)); timed_out++; device_error++; } } if (device_error > 0) { devices_failed++; } } SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d+%d commands on %d devices require eh work\n", numfailed, timed_out, devices_failed)); if (host->host_failed == 0) { ourrtn = TRUE; goto leave; } /* * Next, try and see whether or not it makes sense to try and abort * the running command. This only works out to be the case if we have * one command that has timed out. If the command simply failed, it * makes no sense to try and abort the command, since as far as the * host adapter is concerned, it isn't running. */ SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try abort\n")); for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) { if (SCloop->state != SCSI_STATE_TIMEOUT) { continue; } rtn = scsi_try_to_abort_command(SCloop, ABORT_TIMEOUT); if (rtn == SUCCESS) { rtn = scsi_test_unit_ready(SCloop); if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) { rtn = scsi_eh_retry_command(SCloop); if (rtn == SUCCESS) { SCloop->host->host_failed--; scsi_eh_finish_command(&SCdone, SCloop); } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -