i5000_edac.c

来自「linux 内核源代码」· C语言 代码 · 共 1,506 行 · 第 1/3 页

C
1,506
字号
	/* Only 1 bit will be on */	if (allErrors & FERR_FAT_M1ERR) {		i5000_mc_printk(mci, KERN_ERR,				"Alert on non-redundant retry or fast "				"reset timeout\n");	} else if (allErrors & FERR_FAT_M2ERR) {		i5000_mc_printk(mci, KERN_ERR,				"Northbound CRC error on non-redundant "				"retry\n");	} else if (allErrors & FERR_FAT_M3ERR) {		i5000_mc_printk(mci, KERN_ERR,				">Tmid Thermal event with intelligent "				"throttling disabled\n");	}	/* Form out message */	snprintf(msg, sizeof(msg),		 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d "		 "FATAL Err=0x%x)",		 branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas,		 allErrors);	/* Call the helper to output message */	edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);}/* * i5000_process_fatal_error_info(struct mem_ctl_info *mci, * 				struct i5000_error_info *info, * 				int handle_errors); * *	handle the Intel NON-FATAL errors, if any */static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,					struct i5000_error_info *info,					int handle_errors){	char msg[EDAC_MC_LABEL_LEN + 1 + 90];	u32 allErrors;	u32 ue_errors;	u32 ce_errors;	u32 misc_errors;	int branch;	int channel;	int bank;	int rank;	int rdwr;	int ras, cas;	/* mask off the Error bits that are possible */	allErrors = (info->ferr_nf_fbd & FERR_NF_MASK);	if (!allErrors)		return;		/* if no error, return now */	/* ONLY ONE of the possible error bits will be set, as per the docs */	i5000_mc_printk(mci, KERN_WARNING,			"NON-FATAL ERRORS Found!!! 1st NON-FATAL Err "			"Reg= 0x%x\n", allErrors);	ue_errors = allErrors & FERR_NF_UNCORRECTABLE;	if (ue_errors) {		debugf0("\tUncorrected bits= 0x%x\n", ue_errors);		branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd);		channel = branch;		bank = NREC_BANK(info->nrecmema);		rank = NREC_RANK(info->nrecmema);		rdwr = NREC_RDWR(info->nrecmema);		ras = NREC_RAS(info->nrecmemb);		cas = NREC_CAS(info->nrecmemb);		debugf0			("\t\tCSROW= %d  Channels= %d,%d  (Branch= %d "			"DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",			rank, channel, channel + 1, branch >> 1, bank,			rdwr ? "Write" : "Read", ras, cas);		/* Form out message */		snprintf(msg, sizeof(msg),			 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "			 "CAS=%d, UE Err=0x%x)",			 branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas,			 ue_errors);		/* Call the helper to output message */		edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);	}	/* Check correctable errors */	ce_errors = allErrors & FERR_NF_CORRECTABLE;	if (ce_errors) {		debugf0("\tCorrected bits= 0x%x\n", ce_errors);		branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd);		channel = 0;		if (REC_ECC_LOCATOR_ODD(info->redmemb))			channel = 1;		/* Convert channel to be based from zero, instead of		 * from branch base of 0 */		channel += branch;		bank = REC_BANK(info->recmema);		rank = REC_RANK(info->recmema);		rdwr = REC_RDWR(info->recmema);		ras = REC_RAS(info->recmemb);		cas = REC_CAS(info->recmemb);		debugf0("\t\tCSROW= %d Channel= %d  (Branch %d "			"DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",			rank, channel, branch >> 1, bank,			rdwr ? "Write" : "Read", ras, cas);		/* Form out message */		snprintf(msg, sizeof(msg),			 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "			 "CAS=%d, CE Err=0x%x)", branch >> 1, bank,			 rdwr ? "Write" : "Read", ras, cas, ce_errors);		/* Call the helper to output message */		edac_mc_handle_fbd_ce(mci, rank, channel, msg);	}	/* See if any of the thermal errors have fired */	misc_errors = allErrors & FERR_NF_THERMAL;	if (misc_errors) {		i5000_printk(KERN_WARNING, "\tTHERMAL Error, bits= 0x%x\n",			misc_errors);	}	/* See if any of the thermal errors have fired */	misc_errors = allErrors & FERR_NF_NON_RETRY;	if (misc_errors) {		i5000_printk(KERN_WARNING, "\tNON-Retry  Errors, bits= 0x%x\n",			misc_errors);	}	/* See if any of the thermal errors have fired */	misc_errors = allErrors & FERR_NF_NORTH_CRC;	if (misc_errors) {		i5000_printk(KERN_WARNING,			"\tNORTHBOUND CRC  Error, bits= 0x%x\n",			misc_errors);	}	/* See if any of the thermal errors have fired */	misc_errors = allErrors & FERR_NF_SPD_PROTOCOL;	if (misc_errors) {		i5000_printk(KERN_WARNING,			"\tSPD Protocol  Error, bits= 0x%x\n",			misc_errors);	}	/* See if any of the thermal errors have fired */	misc_errors = allErrors & FERR_NF_DIMM_SPARE;	if (misc_errors) {		i5000_printk(KERN_WARNING, "\tDIMM-Spare  Error, bits= 0x%x\n",			misc_errors);	}}/* *	i5000_process_error_info	Process the error info that is *	in the 'info' structure, previously retrieved from hardware */static void i5000_process_error_info(struct mem_ctl_info *mci,				struct i5000_error_info *info,				int handle_errors){	/* First handle any fatal errors that occurred */	i5000_process_fatal_error_info(mci, info, handle_errors);	/* now handle any non-fatal errors that occurred */	i5000_process_nonfatal_error_info(mci, info, handle_errors);}/* *	i5000_clear_error	Retrieve any error from the hardware *				but do NOT process that error. *				Used for 'clearing' out of previous errors *				Called by the Core module. */static void i5000_clear_error(struct mem_ctl_info *mci){	struct i5000_error_info info;	i5000_get_error_info(mci, &info);}/* *	i5000_check_error	Retrieve and process errors reported by the *				hardware. Called by the Core module. */static void i5000_check_error(struct mem_ctl_info *mci){	struct i5000_error_info info;	debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);	i5000_get_error_info(mci, &info);	i5000_process_error_info(mci, &info, 1);}/* *	i5000_get_devices	Find and perform 'get' operation on the MCH's *			device/functions we want to reference for this driver * *			Need to 'get' device 16 func 1 and func 2 */static int i5000_get_devices(struct mem_ctl_info *mci, int dev_idx){	//const struct i5000_dev_info *i5000_dev = &i5000_devs[dev_idx];	struct i5000_pvt *pvt;	struct pci_dev *pdev;	pvt = mci->pvt_info;	/* Attempt to 'get' the MCH register we want */	pdev = NULL;	while (1) {		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,				PCI_DEVICE_ID_INTEL_I5000_DEV16, pdev);		/* End of list, leave */		if (pdev == NULL) {			i5000_printk(KERN_ERR,				"'system address,Process Bus' "				"device not found:"				"vendor 0x%x device 0x%x FUNC 1 "				"(broken BIOS?)\n",				PCI_VENDOR_ID_INTEL,				PCI_DEVICE_ID_INTEL_I5000_DEV16);			return 1;		}		/* Scan for device 16 func 1 */		if (PCI_FUNC(pdev->devfn) == 1)			break;	}	pvt->branchmap_werrors = pdev;	/* Attempt to 'get' the MCH register we want */	pdev = NULL;	while (1) {		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,				PCI_DEVICE_ID_INTEL_I5000_DEV16, pdev);		if (pdev == NULL) {			i5000_printk(KERN_ERR,				"MC: 'branchmap,control,errors' "				"device not found:"				"vendor 0x%x device 0x%x Func 2 "				"(broken BIOS?)\n",				PCI_VENDOR_ID_INTEL,				PCI_DEVICE_ID_INTEL_I5000_DEV16);			pci_dev_put(pvt->branchmap_werrors);			return 1;		}		/* Scan for device 16 func 1 */		if (PCI_FUNC(pdev->devfn) == 2)			break;	}	pvt->fsb_error_regs = pdev;	debugf1("System Address, processor bus- PCI Bus ID: %s  %x:%x\n",		pci_name(pvt->system_address),		pvt->system_address->vendor, pvt->system_address->device);	debugf1("Branchmap, control and errors - PCI Bus ID: %s  %x:%x\n",		pci_name(pvt->branchmap_werrors),		pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device);	debugf1("FSB Error Regs - PCI Bus ID: %s  %x:%x\n",		pci_name(pvt->fsb_error_regs),		pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device);	pdev = NULL;	pdev = pci_get_device(PCI_VENDOR_ID_INTEL,			PCI_DEVICE_ID_I5000_BRANCH_0, pdev);	if (pdev == NULL) {		i5000_printk(KERN_ERR,			"MC: 'BRANCH 0' device not found:"			"vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",			PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_I5000_BRANCH_0);		pci_dev_put(pvt->branchmap_werrors);		pci_dev_put(pvt->fsb_error_regs);		return 1;	}	pvt->branch_0 = pdev;	/* If this device claims to have more than 2 channels then	 * fetch Branch 1's information	 */	if (pvt->maxch >= CHANNELS_PER_BRANCH) {		pdev = NULL;		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,				PCI_DEVICE_ID_I5000_BRANCH_1, pdev);		if (pdev == NULL) {			i5000_printk(KERN_ERR,				"MC: 'BRANCH 1' device not found:"				"vendor 0x%x device 0x%x Func 0 "				"(broken BIOS?)\n",				PCI_VENDOR_ID_INTEL,				PCI_DEVICE_ID_I5000_BRANCH_1);			pci_dev_put(pvt->branchmap_werrors);			pci_dev_put(pvt->fsb_error_regs);			pci_dev_put(pvt->branch_0);			return 1;		}		pvt->branch_1 = pdev;	}	return 0;}/* *	i5000_put_devices	'put' all the devices that we have *				reserved via 'get' */static void i5000_put_devices(struct mem_ctl_info *mci){	struct i5000_pvt *pvt;	pvt = mci->pvt_info;	pci_dev_put(pvt->branchmap_werrors);	/* FUNC 1 */	pci_dev_put(pvt->fsb_error_regs);	/* FUNC 2 */	pci_dev_put(pvt->branch_0);	/* DEV 21 */	/* Only if more than 2 channels do we release the second branch */	if (pvt->maxch >= CHANNELS_PER_BRANCH)		pci_dev_put(pvt->branch_1);	/* DEV 22 */}/* *	determine_amb_resent * *		the information is contained in NUM_MTRS different registers *		determineing which of the NUM_MTRS requires knowing *		which channel is in question * *	2 branches, each with 2 channels *		b0_ambpresent0 for channel '0' *		b0_ambpresent1 for channel '1' *		b1_ambpresent0 for channel '2' *		b1_ambpresent1 for channel '3' */static int determine_amb_present_reg(struct i5000_pvt *pvt, int channel){	int amb_present;	if (channel < CHANNELS_PER_BRANCH) {		if (channel & 0x1)			amb_present = pvt->b0_ambpresent1;		else			amb_present = pvt->b0_ambpresent0;	} else {		if (channel & 0x1)			amb_present = pvt->b1_ambpresent1;		else			amb_present = pvt->b1_ambpresent0;	}	return amb_present;}/* * determine_mtr(pvt, csrow, channel) * *	return the proper MTR register as determine by the csrow and channel desired */static int determine_mtr(struct i5000_pvt *pvt, int csrow, int channel){	int mtr;	if (channel < CHANNELS_PER_BRANCH)		mtr = pvt->b0_mtr[csrow >> 1];	else		mtr = pvt->b1_mtr[csrow >> 1];	return mtr;}/* */static void decode_mtr(int slot_row, u16 mtr){	int ans;	ans = MTR_DIMMS_PRESENT(mtr);	debugf2("\tMTR%d=0x%x:  DIMMs are %s\n", slot_row, mtr,		ans ? "Present" : "NOT Present");	if (!ans)		return;	debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));	debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));	debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANK(mtr) ? "double" : "single");	debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]);	debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);}static void handle_channel(struct i5000_pvt *pvt, int csrow, int channel,			struct i5000_dimm_info *dinfo){	int mtr;	int amb_present_reg;	int addrBits;	mtr = determine_mtr(pvt, csrow, channel);	if (MTR_DIMMS_PRESENT(mtr)) {		amb_present_reg = determine_amb_present_reg(pvt, channel);		/* Determine if there is  a  DIMM present in this DIMM slot */		if (amb_present_reg & (1 << (csrow >> 1))) {			dinfo->dual_rank = MTR_DIMM_RANK(mtr);			if (!((dinfo->dual_rank == 0) &&				((csrow & 0x1) == 0x1))) {				/* Start with the number of bits for a Bank				 * on the DRAM */				addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);				/* Add thenumber of ROW bits */				addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);				/* add the number of COLUMN bits */				addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);				addrBits += 6;	/* add 64 bits per DIMM */				addrBits -= 20;	/* divide by 2^^20 */				addrBits -= 3;	/* 8 bits per bytes */				dinfo->megabytes = 1 << addrBits;			}		}	}}/* *	calculate_dimm_size * *	also will output a DIMM matrix map, if debug is enabled, for viewing *	how the DIMMs are populated */static void calculate_dimm_size(struct i5000_pvt *pvt){	struct i5000_dimm_info *dinfo;	int csrow, max_csrows;	char *p, *mem_buffer;	int space, n;	int channel;	/* ================= Generate some debug output ================= */	space = PAGE_SIZE;	mem_buffer = p = kmalloc(space, GFP_KERNEL);	if (p == NULL) {		i5000_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n",			__FILE__, __func__);		return;	}	n = snprintf(p, space, "\n");	p += n;	space -= n;	/* Scan all the actual CSROWS (which is # of DIMMS * 2)	 * and calculate the information for each DIMM	 * Start with the highest csrow first, to display it first	 * and work toward the 0th csrow	 */	max_csrows = pvt->maxdimmperch * 2;	for (csrow = max_csrows - 1; csrow >= 0; csrow--) {		/* on an odd csrow, first output a 'boundary' marker,		 * then reset the message buffer  */		if (csrow & 0x1) {			n = snprintf(p, space, "---------------------------"				"--------------------------------");			p += n;			space -= n;			debugf2("%s\n", mem_buffer);			p = mem_buffer;			space = PAGE_SIZE;		}		n = snprintf(p, space, "csrow %2d    ", csrow);		p += n;		space -= n;		for (channel = 0; channel < pvt->maxch; channel++) {			dinfo = &pvt->dimm_info[csrow][channel];			handle_channel(pvt, csrow, channel, dinfo);			n = snprintf(p, space, "%4d MB   | ", dinfo->megabytes);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?