libata-eh.c

来自「linux 内核源代码」· C语言 代码 · 共 2,561 行 · 第 1/5 页

C
2,561
字号
 *	Called just before performing EH actions to clear related bits *	in @link->eh_info such that eh actions are not unnecessarily *	repeated. * *	LOCKING: *	None. */void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,			unsigned int action){	struct ata_port *ap = link->ap;	struct ata_eh_info *ehi = &link->eh_info;	struct ata_eh_context *ehc = &link->eh_context;	unsigned long flags;	spin_lock_irqsave(ap->lock, flags);	/* Reset is represented by combination of actions and EHI	 * flags.  Suck in all related bits before clearing eh_info to	 * avoid losing requested action.	 */	if (action & ATA_EH_RESET_MASK) {		ehc->i.action |= ehi->action & ATA_EH_RESET_MASK;		ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK;		/* make sure all reset actions are cleared & clear EHI flags */		action |= ATA_EH_RESET_MASK;		ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK;	}	ata_eh_clear_action(link, dev, ehi, action);	if (!(ehc->i.flags & ATA_EHI_QUIET))		ap->pflags |= ATA_PFLAG_RECOVERED;	spin_unlock_irqrestore(ap->lock, flags);}/** *	ata_eh_done - EH action complete*	@ap: target ATA port *	@dev: target ATA dev for per-dev action (can be NULL) *	@action: action just completed * *	Called right after performing EH actions to clear related bits *	in @link->eh_context. * *	LOCKING: *	None. */void ata_eh_done(struct ata_link *link, struct ata_device *dev,		 unsigned int action){	struct ata_eh_context *ehc = &link->eh_context;	/* if reset is complete, clear all reset actions & reset modifier */	if (action & ATA_EH_RESET_MASK) {		action |= ATA_EH_RESET_MASK;		ehc->i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK;	}	ata_eh_clear_action(link, dev, &ehc->i, action);}/** *	ata_err_string - convert err_mask to descriptive string *	@err_mask: error mask to convert to string * *	Convert @err_mask to descriptive string.  Errors are *	prioritized according to severity and only the most severe *	error is reported. * *	LOCKING: *	None. * *	RETURNS: *	Descriptive string for @err_mask */static const char *ata_err_string(unsigned int err_mask){	if (err_mask & AC_ERR_HOST_BUS)		return "host bus error";	if (err_mask & AC_ERR_ATA_BUS)		return "ATA bus error";	if (err_mask & AC_ERR_TIMEOUT)		return "timeout";	if (err_mask & AC_ERR_HSM)		return "HSM violation";	if (err_mask & AC_ERR_SYSTEM)		return "internal error";	if (err_mask & AC_ERR_MEDIA)		return "media error";	if (err_mask & AC_ERR_INVALID)		return "invalid argument";	if (err_mask & AC_ERR_DEV)		return "device error";	return "unknown error";}/** *	ata_read_log_page - read a specific log page *	@dev: target device *	@page: page to read *	@buf: buffer to store read page *	@sectors: number of sectors to read * *	Read log page using READ_LOG_EXT command. * *	LOCKING: *	Kernel thread context (may sleep). * *	RETURNS: *	0 on success, AC_ERR_* mask otherwise. */static unsigned int ata_read_log_page(struct ata_device *dev,				      u8 page, void *buf, unsigned int sectors){	struct ata_taskfile tf;	unsigned int err_mask;	DPRINTK("read log page - page %d\n", page);	ata_tf_init(dev, &tf);	tf.command = ATA_CMD_READ_LOG_EXT;	tf.lbal = page;	tf.nsect = sectors;	tf.hob_nsect = sectors >> 8;	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE;	tf.protocol = ATA_PROT_PIO;	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE,				     buf, sectors * ATA_SECT_SIZE, 0);	DPRINTK("EXIT, err_mask=%x\n", err_mask);	return err_mask;}/** *	ata_eh_read_log_10h - Read log page 10h for NCQ error details *	@dev: Device to read log page 10h from *	@tag: Resulting tag of the failed command *	@tf: Resulting taskfile registers of the failed command * *	Read log page 10h to obtain NCQ error details and clear error *	condition. * *	LOCKING: *	Kernel thread context (may sleep). * *	RETURNS: *	0 on success, -errno otherwise. */static int ata_eh_read_log_10h(struct ata_device *dev,			       int *tag, struct ata_taskfile *tf){	u8 *buf = dev->link->ap->sector_buf;	unsigned int err_mask;	u8 csum;	int i;	err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1);	if (err_mask)		return -EIO;	csum = 0;	for (i = 0; i < ATA_SECT_SIZE; i++)		csum += buf[i];	if (csum)		ata_dev_printk(dev, KERN_WARNING,			       "invalid checksum 0x%x on log page 10h\n", csum);	if (buf[0] & 0x80)		return -ENOENT;	*tag = buf[0] & 0x1f;	tf->command = buf[2];	tf->feature = buf[3];	tf->lbal = buf[4];	tf->lbam = buf[5];	tf->lbah = buf[6];	tf->device = buf[7];	tf->hob_lbal = buf[8];	tf->hob_lbam = buf[9];	tf->hob_lbah = buf[10];	tf->nsect = buf[12];	tf->hob_nsect = buf[13];	return 0;}/** *	atapi_eh_request_sense - perform ATAPI REQUEST_SENSE *	@dev: device to perform REQUEST_SENSE to *	@sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) * *	Perform ATAPI REQUEST_SENSE after the device reported CHECK *	SENSE.  This function is EH helper. * *	LOCKING: *	Kernel thread context (may sleep). * *	RETURNS: *	0 on success, AC_ERR_* mask on failure */static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc){	struct ata_device *dev = qc->dev;	unsigned char *sense_buf = qc->scsicmd->sense_buffer;	struct ata_port *ap = dev->link->ap;	struct ata_taskfile tf;	u8 cdb[ATAPI_CDB_LEN];	DPRINTK("ATAPI request sense\n");	/* FIXME: is this needed? */	memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);	/* initialize sense_buf with the error register,	 * for the case where they are -not- overwritten	 */	sense_buf[0] = 0x70;	sense_buf[2] = qc->result_tf.feature >> 4;	/* some devices time out if garbage left in tf */	ata_tf_init(dev, &tf);	memset(cdb, 0, ATAPI_CDB_LEN);	cdb[0] = REQUEST_SENSE;	cdb[4] = SCSI_SENSE_BUFFERSIZE;	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;	tf.command = ATA_CMD_PACKET;	/* is it pointless to prefer PIO for "safety reasons"? */	if (ap->flags & ATA_FLAG_PIO_DMA) {		tf.protocol = ATA_PROT_ATAPI_DMA;		tf.feature |= ATAPI_PKT_DMA;	} else {		tf.protocol = ATA_PROT_ATAPI;		tf.lbam = SCSI_SENSE_BUFFERSIZE;		tf.lbah = 0;	}	return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,				 sense_buf, SCSI_SENSE_BUFFERSIZE, 0);}/** *	ata_eh_analyze_serror - analyze SError for a failed port *	@link: ATA link to analyze SError for * *	Analyze SError if available and further determine cause of *	failure. * *	LOCKING: *	None. */static void ata_eh_analyze_serror(struct ata_link *link){	struct ata_eh_context *ehc = &link->eh_context;	u32 serror = ehc->i.serror;	unsigned int err_mask = 0, action = 0;	u32 hotplug_mask;	if (serror & SERR_PERSISTENT) {		err_mask |= AC_ERR_ATA_BUS;		action |= ATA_EH_HARDRESET;	}	if (serror &	    (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) {		err_mask |= AC_ERR_ATA_BUS;		action |= ATA_EH_SOFTRESET;	}	if (serror & SERR_PROTOCOL) {		err_mask |= AC_ERR_HSM;		action |= ATA_EH_SOFTRESET;	}	if (serror & SERR_INTERNAL) {		err_mask |= AC_ERR_SYSTEM;		action |= ATA_EH_HARDRESET;	}	/* Determine whether a hotplug event has occurred.  Both	 * SError.N/X are considered hotplug events for enabled or	 * host links.  For disabled PMP links, only N bit is	 * considered as X bit is left at 1 for link plugging.	 */	hotplug_mask = 0;	if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link))		hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG;	else		hotplug_mask = SERR_PHYRDY_CHG;	if (serror & hotplug_mask)		ata_ehi_hotplugged(&ehc->i);	ehc->i.err_mask |= err_mask;	ehc->i.action |= action;}/** *	ata_eh_analyze_ncq_error - analyze NCQ error *	@link: ATA link to analyze NCQ error for * *	Read log page 10h, determine the offending qc and acquire *	error status TF.  For NCQ device errors, all LLDDs have to do *	is setting AC_ERR_DEV in ehi->err_mask.  This function takes *	care of the rest. * *	LOCKING: *	Kernel thread context (may sleep). */static void ata_eh_analyze_ncq_error(struct ata_link *link){	struct ata_port *ap = link->ap;	struct ata_eh_context *ehc = &link->eh_context;	struct ata_device *dev = link->device;	struct ata_queued_cmd *qc;	struct ata_taskfile tf;	int tag, rc;	/* if frozen, we can't do much */	if (ap->pflags & ATA_PFLAG_FROZEN)		return;	/* is it NCQ device error? */	if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV))		return;	/* has LLDD analyzed already? */	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {		qc = __ata_qc_from_tag(ap, tag);		if (!(qc->flags & ATA_QCFLAG_FAILED))			continue;		if (qc->err_mask)			return;	}	/* okay, this error is ours */	rc = ata_eh_read_log_10h(dev, &tag, &tf);	if (rc) {		ata_link_printk(link, KERN_ERR, "failed to read log page 10h "				"(errno=%d)\n", rc);		return;	}	if (!(link->sactive & (1 << tag))) {		ata_link_printk(link, KERN_ERR, "log page 10h reported "				"inactive tag %d\n", tag);		return;	}	/* we've got the perpetrator, condemn it */	qc = __ata_qc_from_tag(ap, tag);	memcpy(&qc->result_tf, &tf, sizeof(tf));	qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;	ehc->i.err_mask &= ~AC_ERR_DEV;}/** *	ata_eh_analyze_tf - analyze taskfile of a failed qc *	@qc: qc to analyze *	@tf: Taskfile registers to analyze * *	Analyze taskfile of @qc and further determine cause of *	failure.  This function also requests ATAPI sense data if *	avaliable. * *	LOCKING: *	Kernel thread context (may sleep). * *	RETURNS: *	Determined recovery action */static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,				      const struct ata_taskfile *tf){	unsigned int tmp, action = 0;	u8 stat = tf->command, err = tf->feature;	if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {		qc->err_mask |= AC_ERR_HSM;		return ATA_EH_SOFTRESET;	}	if (stat & (ATA_ERR | ATA_DF))		qc->err_mask |= AC_ERR_DEV;	else		return 0;	switch (qc->dev->class) {	case ATA_DEV_ATA:		if (err & ATA_ICRC)			qc->err_mask |= AC_ERR_ATA_BUS;		if (err & ATA_UNC)			qc->err_mask |= AC_ERR_MEDIA;		if (err & ATA_IDNF)			qc->err_mask |= AC_ERR_INVALID;		break;	case ATA_DEV_ATAPI:		if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {			tmp = atapi_eh_request_sense(qc);			if (!tmp) {				/* ATA_QCFLAG_SENSE_VALID is used to				 * tell atapi_qc_complete() that sense				 * data is already valid.				 *				 * TODO: interpret sense data and set				 * appropriate err_mask.				 */				qc->flags |= ATA_QCFLAG_SENSE_VALID;			} else				qc->err_mask |= tmp;		}	}	if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))		action |= ATA_EH_SOFTRESET;	return action;}static int ata_eh_categorize_error(int is_io, unsigned int err_mask){	if (err_mask & AC_ERR_ATA_BUS)		return 1;	if (err_mask & AC_ERR_TIMEOUT)		return 2;	if (is_io) {		if (err_mask & AC_ERR_HSM)			return 2;		if ((err_mask &		     (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)			return 3;	}	return 0;}struct speed_down_verdict_arg {	u64 since;	int nr_errors[4];};static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg){	struct speed_down_verdict_arg *arg = void_arg;	int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask);	if (ent->timestamp < arg->since)		return -1;	arg->nr_errors[cat]++;	return 0;}/** *	ata_eh_speed_down_verdict - Determine speed down verdict *	@dev: Device of interest * *	This function examines error ring of @dev and determines *	whether NCQ needs to be turned off, transfer speed should be *	stepped down, or falling back to PIO is necessary. * *	Cat-1 is ATA_BUS error for any command. * *	Cat-2 is TIMEOUT for any command or HSM violation for known *	supported commands. * *	Cat-3 is is unclassified DEV error for known supported *	command. * *	NCQ needs to be turned off if there have been more than 3 *	Cat-2 + Cat-3 errors during last 10 minutes. * *	Speed down is necessary if there have been more than 3 Cat-1 + *	Cat-2 errors or 10 Cat-3 errors during last 10 minutes. * *	Falling back to PIO mode is necessary if there have been more *	than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. * *	LOCKING: *	Inherited from caller. * *	RETURNS: *	OR of ATA_EH_SPDN_* flags. */static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev){	const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ;	u64 j64 = get_jiffies_64();	struct speed_down_verdict_arg arg;	unsigned int verdict = 0;	/* scan past 10 mins of error history */	memset(&arg, 0, sizeof(arg));	arg.since = j64 - min(j64, j10mins);	ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);	if (arg.nr_errors[2] + arg.nr_errors[3] > 3)		verdict |= ATA_EH_SPDN_NCQ_OFF;	if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10)		verdict |= ATA_EH_SPDN_SPEED_DOWN;	/* scan past 3 mins of error history */	memset(&arg, 0, sizeof(arg));

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?