📄 scsi_error.c
字号:
*/ switch (status_byte(SCpnt->result)) { case QUEUE_FULL: /* * The case of trying to send too many commands to a tagged queueing * device. */ return ADD_TO_MLQUEUE; case GOOD: case COMMAND_TERMINATED: return SUCCESS; case CHECK_CONDITION: rtn = scsi_check_sense(SCpnt); if( rtn == NEEDS_RETRY ) { goto maybe_retry; } return rtn; case CONDITION_GOOD: case INTERMEDIATE_GOOD: case INTERMEDIATE_C_GOOD: /* * Who knows? FIXME(eric) */ return SUCCESS; case BUSY: case RESERVATION_CONFLICT: goto maybe_retry; default: return FAILED; } return FAILED;maybe_retry: if ((++SCpnt->retries) < SCpnt->allowed) { return NEEDS_RETRY; } else { return FAILED; }}/* * Function: scsi_eh_completed_normally * * Purpose: Examine a command block that has come back from the low-level * and figure out what to do next. * * Returns: SUCCESS - pass on to upper level. * FAILED - pass on to error handler thread. * RETRY - command should be retried. * SOFTERR - command succeeded, but we need to log * a soft error. * * Notes: This is *ONLY* called when we are examining the status * of commands queued during error recovery. The main * difference here is that we don't allow for the possibility * of retries here, and we are a lot more restrictive about what * we consider acceptable. */STATIC int scsi_eh_completed_normally (Scsi_Cmnd * SCpnt){ int rtn; /* * First check the host byte, to see if there is anything in there * that would indicate what we need to do. */ if( host_byte(SCpnt->result) == DID_RESET ) { if (SCpnt->flags & IS_RESETTING ) { /* * OK, this is normal. We don't know whether in fact the * command in question really needs to be rerun or not - * if this was the original data command then the answer is yes, * otherwise we just flag it as success. */ SCpnt->flags &= ~IS_RESETTING; return NEEDS_RETRY; } /* * Rats. We are already in the error handler, so we now get to try * and figure out what to do next. If the sense is valid, we have * a pretty good idea of what to do. If not, we mark it as failed. */ return scsi_check_sense (SCpnt); } if(host_byte(SCpnt->result) != DID_OK ) { return FAILED; } /* * Next, check the message byte. */ if( msg_byte(SCpnt->result) != COMMAND_COMPLETE ) { return FAILED; } /* * Now, check the status byte to see if this indicates anything special. */ switch (status_byte(SCpnt->result)) { case GOOD: case COMMAND_TERMINATED: return SUCCESS; case CHECK_CONDITION: rtn = scsi_check_sense(SCpnt); if( rtn == NEEDS_RETRY ) { return FAILED; } return rtn; case CONDITION_GOOD: case INTERMEDIATE_GOOD: case INTERMEDIATE_C_GOOD: /* * Who knows? FIXME(eric) */ return SUCCESS; case BUSY: case QUEUE_FULL: case RESERVATION_CONFLICT: default: return FAILED; } return FAILED;}/* * Function: scsi_check_sense * * Purpose: Examine sense information - give suggestion as to what * we should do with it. */STATIC int scsi_check_sense (Scsi_Cmnd * SCpnt){ if ( !scsi_sense_valid(SCpnt) ) { return FAILED; } if (SCpnt->sense_buffer[2] & 0xe0) return SUCCESS; switch (SCpnt->sense_buffer[2] & 0xf) { case NO_SENSE: return SUCCESS; case RECOVERED_ERROR: return /* SOFT_ERROR */ SUCCESS; case ABORTED_COMMAND: return NEEDS_RETRY; case NOT_READY: case UNIT_ATTENTION: /* * If we are expecting a CC/UA because of a bus reset that we * performed, treat this just as a retry. Otherwise this is * information that we should pass up to the upper-level driver * so that we can deal with it there. */ if( SCpnt->device->expecting_cc_ua ) { SCpnt->device->expecting_cc_ua = 0; return NEEDS_RETRY; } return SUCCESS; /* these three are not supported */ case COPY_ABORTED: case VOLUME_OVERFLOW: case MISCOMPARE: return SUCCESS; case MEDIUM_ERROR: return NEEDS_RETRY; case ILLEGAL_REQUEST: case BLANK_CHECK: case DATA_PROTECT: case HARDWARE_ERROR: default: return SUCCESS; }}/* * Function: scsi_restart_operations * * Purpose: Restart IO operations to the specified host. * * Arguments: host - host that we are restarting * * Returns: Nothing * * Notes: When we entered the error handler, we blocked all further * I/O to this device. We need to 'reverse' this process. */STATIC voidscsi_restart_operations(struct Scsi_Host * host){ Scsi_Device * SDpnt; /* * Next free up anything directly waiting upon the host. This will be * requests for character device operations, and also for ioctls to queued * block devices. */ SCSI_LOG_ERROR_RECOVERY(5,printk("scsi_error.c: Waking up host to restart\n")); wake_up(&host->host_wait); /* * Finally, block devices need an extra kick in the pants. This is because * the request queueing mechanism may have queued lots of pending requests * and there won't be a process waiting in a place where we can simply wake * it up. Thus we simply go through and call the request function to goose * the various top level drivers and get things moving again. */ for( SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next ) { SCSI_LOG_ERROR_RECOVERY(5,printk("Calling request function to restart things...\n")); if( SDpnt->scsi_request_fn != NULL ) (*SDpnt->scsi_request_fn)(); }}/* * Function: scsi_unjam_host * * Purpose: Attempt to fix a host which has a command that failed for * some reason. * * Arguments: host - host that needs unjamming. * * Returns: Nothing * * Notes: When we come in here, we *know* that all commands on the * bus have either completed, failed or timed out. We also * know that no further commands are being sent to the host, * so things are relatively quiet and we have freedom to * fiddle with things as we wish. * * Additional note: This is only the *default* implementation. It is possible * for individual drivers to supply their own version of this * function, and if the maintainer wishes to do this, it is * strongly suggested that this function be taken as a template * and modified. This function was designed to correctly handle * problems for about 95% of the different cases out there, and * it should always provide at least a reasonable amount of error * recovery. * * Note3: Any command marked 'FAILED' or 'TIMEOUT' must eventually * have scsi_finish_command() called for it. We do all of * the retry stuff here, so when we restart the host after we * return it should have an empty queue. */STATIC intscsi_unjam_host(struct Scsi_Host * host){ int devices_failed; int numfailed; int ourrtn; int rtn = FALSE; int result; Scsi_Cmnd * SCloop; Scsi_Cmnd * SCpnt; Scsi_Device * SDpnt; Scsi_Device * SDloop; Scsi_Cmnd * SCdone; int timed_out; SCdone = NULL; /* * First, protect against any sort of race condition. If any of the outstanding * commands are in states that indicate that we are not yet blocked (i.e. we are * not in a quiet state) then we got woken up in error. If we ever end up here, * we need to re-examine some of the assumptions. */ for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next) { for(SCpnt=SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { if( SCpnt->state == SCSI_STATE_FAILED || SCpnt->state == SCSI_STATE_TIMEOUT || SCpnt->state == SCSI_STATE_INITIALIZING || SCpnt->state == SCSI_STATE_UNUSED) { continue; } /* * Rats. Something is still floating around out there. This could * be the result of the fact that the upper level drivers are still frobbing * commands that might have succeeded. There are two outcomes. One is that * the command block will eventually be freed, and the other one is that * the command will be queued and will be finished along the way. */ SCSI_LOG_ERROR_RECOVERY(1,printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target));/* * panic("SCSI Error handler woken too early\n"); * * This is no longer a problem, since now the code cares only about * SCSI_STATE_TIMEOUT and SCSI_STATE_FAILED. * Other states are useful only to release active commands when devices are * set offline. If (host->host_active == host->host_busy) we can safely assume * that there are no commands in state other then TIMEOUT od FAILED. (DB) * * FIXME: * It is not easy to release correctly commands according to their state when * devices are set offline, when the state is neither TIMEOUT nor FAILED. * When a device is set offline, we can have some command with * rq_status=RQ_SCSY_BUSY, owner=SCSI_STATE_HIGHLEVEL, * state=SCSI_STATE_INITIALIZING and the driver module cannot be released. * (DB, 17 May 1998) */ } } /* * Next, see if we need to request sense information. if so, * then get it now, so we have a better idea of what to do. * FIXME(eric) this has the unfortunate side effect that if a host * adapter does not automatically request sense information, that we end * up shutting it down before we request it. All hosts should be doing this * anyways, so for now all I have to say is tough noogies if you end up in here. * On second thought, this is probably a good idea. We *really* want to give * authors an incentive to automatically request this. */ SCSI_LOG_ERROR_RECOVERY(3,printk("scsi_unjam_host: Checking to see if we need to request sense\n")); for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next) { for(SCpnt=SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { if( SCpnt->state != SCSI_STATE_FAILED || scsi_sense_valid(SCpnt) ) { continue; } SCSI_LOG_ERROR_RECOVERY(2,printk("scsi_unjam_host: Requesting sense for %d\n", SCpnt->target)); rtn = scsi_request_sense(SCpnt); if( rtn != SUCCESS ) { continue; } SCSI_LOG_ERROR_RECOVERY(3,printk("Sense requested for %p - result %x\n", SCpnt, SCpnt->result)); SCSI_LOG_ERROR_RECOVERY(3,print_sense("bh",SCpnt)); result = scsi_decide_disposition(SCpnt); /* * If the result was normal, then just pass it along to the * upper level. */ if( result == SUCCESS ) { SCpnt->host->host_failed--; scsi_eh_finish_command(&SCdone, SCpnt); } if( result != NEEDS_RETRY ) { continue; } /* * We only come in here if we want to retry a * command. The test to see whether the command * should be retried should be keeping track of the * number of tries, so we don't end up looping, of * course. */ SCpnt->state = NEEDS_RETRY; rtn = scsi_eh_retry_command(SCpnt); if( rtn != SUCCESS ) { continue; } /* * We eventually hand this one back to the top level. */ SCpnt->host->host_failed--; scsi_eh_finish_command(&SCdone, SCpnt); } } /* * Go through the list of commands and figure out where we stand and how bad things * really are. */ numfailed = 0; timed_out = 0; devices_failed = 0; for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next) { unsigned int device_error = 0; for(SCpnt=SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { if( SCpnt->state == SCSI_STATE_FAILED ) { SCSI_LOG_ERROR_RECOVERY(5,printk("Command to ID %d failed\n", SCpnt->target)); numfailed++; device_error++; } if( SCpnt->state == SCSI_STATE_TIMEOUT ) { SCSI_LOG_ERROR_RECOVERY(5,printk("Command to ID %d timedout\n", SCpnt->target)); timed_out++; device_error++; } } if( device_error > 0 ) { devices_failed++; } } SCSI_LOG_ERROR_RECOVERY(2,printk("Total of %d+%d commands on %d devices require eh work\n", numfailed, timed_out, devices_failed)); if( host->host_failed == 0 ) { ourrtn = TRUE; goto leave; } /* * Next, try and see whether or not it makes sense to try and abort * the running command. This only works out to be the case if we have * one command that has timed out. If the command simply failed, it * makes no sense to try and abort the command, since as far as the * host adapter is concerned, it isn't running. */ SCSI_LOG_ERROR_RECOVERY(3,printk("scsi_unjam_host: Checking to see if we want to try abort\n")); for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next) { for(SCloop=SDpnt->device_queue; SCloop; SCloop = SCloop->next) { if( SCloop->state != SCSI_STATE_TIMEOUT ) { continue; } rtn = scsi_try_to_abort_command(SCloop, ABORT_TIMEOUT); if( rtn == SUCCESS ) { rtn = scsi_test_unit_ready(SCloop); if( rtn == SUCCESS && scsi_unit_is_ready(SCloop) ) { rtn = scsi_eh_retry_command(SCloop); if( rtn == SUCCESS ) { SCloop->host->host_failed--; scsi_eh_finish_command(&SCdone,SCloop); } } } } } /* * If we have corrected all of the problems, then we are done. */ if( host->host_failed == 0 ) { ourrtn = TRUE; goto leave; } /* * Either the abort wasn't appropriate, or it didn't succeed. * Now try a bus device reset. Still, look to see whether we have * multiple devices that are jammed or not - if we have multiple devices, * it makes no sense to try BUS_DEVICE_RESET - we really would need * to try a BUS_RESET instead. * * Does this make sense - should we try BDR on each device individually? * Yes, definitely. */ SCSI_LOG_ERROR_RECOVERY(3,printk("scsi_unjam_host: Checking to see if we want to try BDR\n")); for(SDpnt=host->host_queue; SDpnt; SDpnt = SDpnt->next) { for(SCloop=SDpnt->device_queue; SCloop; SCloop = SCloop->next) { if( SCloop->state == SCSI_STATE_FAILED || SCloop->state == SCSI_STATE_TIMEOUT ) { break; } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -