libata-eh.c
来自「linux 内核源代码」· C语言 代码 · 共 2,561 行 · 第 1/5 页
C
2,561 行
* Called just before performing EH actions to clear related bits * in @link->eh_info such that eh actions are not unnecessarily * repeated. * * LOCKING: * None. */void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, unsigned int action){ struct ata_port *ap = link->ap; struct ata_eh_info *ehi = &link->eh_info; struct ata_eh_context *ehc = &link->eh_context; unsigned long flags; spin_lock_irqsave(ap->lock, flags); /* Reset is represented by combination of actions and EHI * flags. Suck in all related bits before clearing eh_info to * avoid losing requested action. */ if (action & ATA_EH_RESET_MASK) { ehc->i.action |= ehi->action & ATA_EH_RESET_MASK; ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK; /* make sure all reset actions are cleared & clear EHI flags */ action |= ATA_EH_RESET_MASK; ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK; } ata_eh_clear_action(link, dev, ehi, action); if (!(ehc->i.flags & ATA_EHI_QUIET)) ap->pflags |= ATA_PFLAG_RECOVERED; spin_unlock_irqrestore(ap->lock, flags);}/** * ata_eh_done - EH action complete* @ap: target ATA port * @dev: target ATA dev for per-dev action (can be NULL) * @action: action just completed * * Called right after performing EH actions to clear related bits * in @link->eh_context. * * LOCKING: * None. */void ata_eh_done(struct ata_link *link, struct ata_device *dev, unsigned int action){ struct ata_eh_context *ehc = &link->eh_context; /* if reset is complete, clear all reset actions & reset modifier */ if (action & ATA_EH_RESET_MASK) { action |= ATA_EH_RESET_MASK; ehc->i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK; } ata_eh_clear_action(link, dev, &ehc->i, action);}/** * ata_err_string - convert err_mask to descriptive string * @err_mask: error mask to convert to string * * Convert @err_mask to descriptive string. Errors are * prioritized according to severity and only the most severe * error is reported. * * LOCKING: * None. * * RETURNS: * Descriptive string for @err_mask */static const char *ata_err_string(unsigned int err_mask){ if (err_mask & AC_ERR_HOST_BUS) return "host bus error"; if (err_mask & AC_ERR_ATA_BUS) return "ATA bus error"; if (err_mask & AC_ERR_TIMEOUT) return "timeout"; if (err_mask & AC_ERR_HSM) return "HSM violation"; if (err_mask & AC_ERR_SYSTEM) return "internal error"; if (err_mask & AC_ERR_MEDIA) return "media error"; if (err_mask & AC_ERR_INVALID) return "invalid argument"; if (err_mask & AC_ERR_DEV) return "device error"; return "unknown error";}/** * ata_read_log_page - read a specific log page * @dev: target device * @page: page to read * @buf: buffer to store read page * @sectors: number of sectors to read * * Read log page using READ_LOG_EXT command. * * LOCKING: * Kernel thread context (may sleep). * * RETURNS: * 0 on success, AC_ERR_* mask otherwise. */static unsigned int ata_read_log_page(struct ata_device *dev, u8 page, void *buf, unsigned int sectors){ struct ata_taskfile tf; unsigned int err_mask; DPRINTK("read log page - page %d\n", page); ata_tf_init(dev, &tf); tf.command = ATA_CMD_READ_LOG_EXT; tf.lbal = page; tf.nsect = sectors; tf.hob_nsect = sectors >> 8; tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; tf.protocol = ATA_PROT_PIO; err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, buf, sectors * ATA_SECT_SIZE, 0); DPRINTK("EXIT, err_mask=%x\n", err_mask); return err_mask;}/** * ata_eh_read_log_10h - Read log page 10h for NCQ error details * @dev: Device to read log page 10h from * @tag: Resulting tag of the failed command * @tf: Resulting taskfile registers of the failed command * * Read log page 10h to obtain NCQ error details and clear error * condition. * * LOCKING: * Kernel thread context (may sleep). * * RETURNS: * 0 on success, -errno otherwise. */static int ata_eh_read_log_10h(struct ata_device *dev, int *tag, struct ata_taskfile *tf){ u8 *buf = dev->link->ap->sector_buf; unsigned int err_mask; u8 csum; int i; err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); if (err_mask) return -EIO; csum = 0; for (i = 0; i < ATA_SECT_SIZE; i++) csum += buf[i]; if (csum) ata_dev_printk(dev, KERN_WARNING, "invalid checksum 0x%x on log page 10h\n", csum); if (buf[0] & 0x80) return -ENOENT; *tag = buf[0] & 0x1f; tf->command = buf[2]; tf->feature = buf[3]; tf->lbal = buf[4]; tf->lbam = buf[5]; tf->lbah = buf[6]; tf->device = buf[7]; tf->hob_lbal = buf[8]; tf->hob_lbam = buf[9]; tf->hob_lbah = buf[10]; tf->nsect = buf[12]; tf->hob_nsect = buf[13]; return 0;}/** * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE * @dev: device to perform REQUEST_SENSE to * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) * * Perform ATAPI REQUEST_SENSE after the device reported CHECK * SENSE. This function is EH helper. * * LOCKING: * Kernel thread context (may sleep). * * RETURNS: * 0 on success, AC_ERR_* mask on failure */static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc){ struct ata_device *dev = qc->dev; unsigned char *sense_buf = qc->scsicmd->sense_buffer; struct ata_port *ap = dev->link->ap; struct ata_taskfile tf; u8 cdb[ATAPI_CDB_LEN]; DPRINTK("ATAPI request sense\n"); /* FIXME: is this needed? */ memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); /* initialize sense_buf with the error register, * for the case where they are -not- overwritten */ sense_buf[0] = 0x70; sense_buf[2] = qc->result_tf.feature >> 4; /* some devices time out if garbage left in tf */ ata_tf_init(dev, &tf); memset(cdb, 0, ATAPI_CDB_LEN); cdb[0] = REQUEST_SENSE; cdb[4] = SCSI_SENSE_BUFFERSIZE; tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.command = ATA_CMD_PACKET; /* is it pointless to prefer PIO for "safety reasons"? */ if (ap->flags & ATA_FLAG_PIO_DMA) { tf.protocol = ATA_PROT_ATAPI_DMA; tf.feature |= ATAPI_PKT_DMA; } else { tf.protocol = ATA_PROT_ATAPI; tf.lbam = SCSI_SENSE_BUFFERSIZE; tf.lbah = 0; } return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, sense_buf, SCSI_SENSE_BUFFERSIZE, 0);}/** * ata_eh_analyze_serror - analyze SError for a failed port * @link: ATA link to analyze SError for * * Analyze SError if available and further determine cause of * failure. * * LOCKING: * None. */static void ata_eh_analyze_serror(struct ata_link *link){ struct ata_eh_context *ehc = &link->eh_context; u32 serror = ehc->i.serror; unsigned int err_mask = 0, action = 0; u32 hotplug_mask; if (serror & SERR_PERSISTENT) { err_mask |= AC_ERR_ATA_BUS; action |= ATA_EH_HARDRESET; } if (serror & (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { err_mask |= AC_ERR_ATA_BUS; action |= ATA_EH_SOFTRESET; } if (serror & SERR_PROTOCOL) { err_mask |= AC_ERR_HSM; action |= ATA_EH_SOFTRESET; } if (serror & SERR_INTERNAL) { err_mask |= AC_ERR_SYSTEM; action |= ATA_EH_HARDRESET; } /* Determine whether a hotplug event has occurred. Both * SError.N/X are considered hotplug events for enabled or * host links. For disabled PMP links, only N bit is * considered as X bit is left at 1 for link plugging. */ hotplug_mask = 0; if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; else hotplug_mask = SERR_PHYRDY_CHG; if (serror & hotplug_mask) ata_ehi_hotplugged(&ehc->i); ehc->i.err_mask |= err_mask; ehc->i.action |= action;}/** * ata_eh_analyze_ncq_error - analyze NCQ error * @link: ATA link to analyze NCQ error for * * Read log page 10h, determine the offending qc and acquire * error status TF. For NCQ device errors, all LLDDs have to do * is setting AC_ERR_DEV in ehi->err_mask. This function takes * care of the rest. * * LOCKING: * Kernel thread context (may sleep). */static void ata_eh_analyze_ncq_error(struct ata_link *link){ struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; struct ata_device *dev = link->device; struct ata_queued_cmd *qc; struct ata_taskfile tf; int tag, rc; /* if frozen, we can't do much */ if (ap->pflags & ATA_PFLAG_FROZEN) return; /* is it NCQ device error? */ if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) return; /* has LLDD analyzed already? */ for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { qc = __ata_qc_from_tag(ap, tag); if (!(qc->flags & ATA_QCFLAG_FAILED)) continue; if (qc->err_mask) return; } /* okay, this error is ours */ rc = ata_eh_read_log_10h(dev, &tag, &tf); if (rc) { ata_link_printk(link, KERN_ERR, "failed to read log page 10h " "(errno=%d)\n", rc); return; } if (!(link->sactive & (1 << tag))) { ata_link_printk(link, KERN_ERR, "log page 10h reported " "inactive tag %d\n", tag); return; } /* we've got the perpetrator, condemn it */ qc = __ata_qc_from_tag(ap, tag); memcpy(&qc->result_tf, &tf, sizeof(tf)); qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; ehc->i.err_mask &= ~AC_ERR_DEV;}/** * ata_eh_analyze_tf - analyze taskfile of a failed qc * @qc: qc to analyze * @tf: Taskfile registers to analyze * * Analyze taskfile of @qc and further determine cause of * failure. This function also requests ATAPI sense data if * avaliable. * * LOCKING: * Kernel thread context (may sleep). * * RETURNS: * Determined recovery action */static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, const struct ata_taskfile *tf){ unsigned int tmp, action = 0; u8 stat = tf->command, err = tf->feature; if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { qc->err_mask |= AC_ERR_HSM; return ATA_EH_SOFTRESET; } if (stat & (ATA_ERR | ATA_DF)) qc->err_mask |= AC_ERR_DEV; else return 0; switch (qc->dev->class) { case ATA_DEV_ATA: if (err & ATA_ICRC) qc->err_mask |= AC_ERR_ATA_BUS; if (err & ATA_UNC) qc->err_mask |= AC_ERR_MEDIA; if (err & ATA_IDNF) qc->err_mask |= AC_ERR_INVALID; break; case ATA_DEV_ATAPI: if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { tmp = atapi_eh_request_sense(qc); if (!tmp) { /* ATA_QCFLAG_SENSE_VALID is used to * tell atapi_qc_complete() that sense * data is already valid. * * TODO: interpret sense data and set * appropriate err_mask. */ qc->flags |= ATA_QCFLAG_SENSE_VALID; } else qc->err_mask |= tmp; } } if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) action |= ATA_EH_SOFTRESET; return action;}static int ata_eh_categorize_error(int is_io, unsigned int err_mask){ if (err_mask & AC_ERR_ATA_BUS) return 1; if (err_mask & AC_ERR_TIMEOUT) return 2; if (is_io) { if (err_mask & AC_ERR_HSM) return 2; if ((err_mask & (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) return 3; } return 0;}struct speed_down_verdict_arg { u64 since; int nr_errors[4];};static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg){ struct speed_down_verdict_arg *arg = void_arg; int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); if (ent->timestamp < arg->since) return -1; arg->nr_errors[cat]++; return 0;}/** * ata_eh_speed_down_verdict - Determine speed down verdict * @dev: Device of interest * * This function examines error ring of @dev and determines * whether NCQ needs to be turned off, transfer speed should be * stepped down, or falling back to PIO is necessary. * * Cat-1 is ATA_BUS error for any command. * * Cat-2 is TIMEOUT for any command or HSM violation for known * supported commands. * * Cat-3 is is unclassified DEV error for known supported * command. * * NCQ needs to be turned off if there have been more than 3 * Cat-2 + Cat-3 errors during last 10 minutes. * * Speed down is necessary if there have been more than 3 Cat-1 + * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. * * Falling back to PIO mode is necessary if there have been more * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. * * LOCKING: * Inherited from caller. * * RETURNS: * OR of ATA_EH_SPDN_* flags. */static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev){ const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; u64 j64 = get_jiffies_64(); struct speed_down_verdict_arg arg; unsigned int verdict = 0; /* scan past 10 mins of error history */ memset(&arg, 0, sizeof(arg)); arg.since = j64 - min(j64, j10mins); ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); if (arg.nr_errors[2] + arg.nr_errors[3] > 3) verdict |= ATA_EH_SPDN_NCQ_OFF; if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) verdict |= ATA_EH_SPDN_SPEED_DOWN; /* scan past 3 mins of error history */ memset(&arg, 0, sizeof(arg));
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?