📄 mce_64.c
字号:
rip_msr = MSR_IA32_MCG_EIP; /* Log the machine checks left over from the previous reset. This also clears all registers */ do_machine_check(NULL, mce_bootlog ? -1 : -2); set_in_cr4(X86_CR4_MCE); if (cap & MCG_CTL_P) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); for (i = 0; i < banks; i++) { wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); }}/* Add per CPU specific workarounds here */static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c){ /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) { /* disable GART TBL walk error reporting, which trips off incorrectly with the IOMMU & 3ware & Cerberus. */ clear_bit(10, &bank[4]); /* Lots of broken BIOS around that don't clear them by default and leave crap in there. Don't log. */ mce_bootlog = 0; }}static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c){ switch (c->x86_vendor) { case X86_VENDOR_INTEL: mce_intel_feature_init(c); break; case X86_VENDOR_AMD: mce_amd_feature_init(c); break; default: break; }}/* * Called for each booted CPU to set up machine checks. * Must be called with preempt off. */void __cpuinit mcheck_init(struct cpuinfo_x86 *c){ static cpumask_t mce_cpus = CPU_MASK_NONE; mce_cpu_quirks(c); if (mce_dont_init || cpu_test_and_set(smp_processor_id(), mce_cpus) || !mce_available(c)) return; mce_init(NULL); mce_cpu_features(c);}/* * Character device to read and clear the MCE log. */static DEFINE_SPINLOCK(mce_state_lock);static int open_count; /* #times opened */static int open_exclu; /* already open exclusive? */static int mce_open(struct inode *inode, struct file *file){ spin_lock(&mce_state_lock); if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { spin_unlock(&mce_state_lock); return -EBUSY; } if (file->f_flags & O_EXCL) open_exclu = 1; open_count++; spin_unlock(&mce_state_lock); return nonseekable_open(inode, file);}static int mce_release(struct inode *inode, struct file *file){ spin_lock(&mce_state_lock); open_count--; open_exclu = 0; spin_unlock(&mce_state_lock); return 0;}static void collect_tscs(void *data){ unsigned long *cpu_tsc = (unsigned long *)data; rdtscll(cpu_tsc[smp_processor_id()]);}static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off){ unsigned long *cpu_tsc; static DECLARE_MUTEX(mce_read_sem); unsigned next; char __user *buf = ubuf; int i, err; cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL); if (!cpu_tsc) return -ENOMEM; down(&mce_read_sem); next = rcu_dereference(mcelog.next); /* Only supports full reads right now */ if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { up(&mce_read_sem); kfree(cpu_tsc); return -EINVAL; } err = 0; for (i = 0; i < next; i++) { unsigned long start = jiffies; while (!mcelog.entry[i].finished) { if (time_after_eq(jiffies, start + 2)) { memset(mcelog.entry + i,0, sizeof(struct mce)); goto timeout; } cpu_relax(); } smp_rmb(); err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce)); buf += sizeof(struct mce); timeout: ; } memset(mcelog.entry, 0, next * sizeof(struct mce)); mcelog.next = 0; synchronize_sched(); /* * Collect entries that were still getting written before the * synchronize. */ on_each_cpu(collect_tscs, cpu_tsc, 1, 1); for (i = next; i < MCE_LOG_LEN; i++) { if (mcelog.entry[i].finished && mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce)); smp_rmb(); buf += sizeof(struct mce); memset(&mcelog.entry[i], 0, sizeof(struct mce)); } } up(&mce_read_sem); kfree(cpu_tsc); return err ? -EFAULT : buf - ubuf;}static unsigned int mce_poll(struct file *file, poll_table *wait){ poll_wait(file, &mce_wait, wait); if (rcu_dereference(mcelog.next)) return POLLIN | POLLRDNORM; return 0;}static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg){ int __user *p = (int __user *)arg; if (!capable(CAP_SYS_ADMIN)) return -EPERM; switch (cmd) { case MCE_GET_RECORD_LEN: return put_user(sizeof(struct mce), p); case MCE_GET_LOG_LEN: return put_user(MCE_LOG_LEN, p); case MCE_GETCLEAR_FLAGS: { unsigned flags; do { flags = mcelog.flags; } while (cmpxchg(&mcelog.flags, flags, 0) != flags); return put_user(flags, p); } default: return -ENOTTY; }}static const struct file_operations mce_chrdev_ops = { .open = mce_open, .release = mce_release, .read = mce_read, .poll = mce_poll, .ioctl = mce_ioctl,};static struct miscdevice mce_log_device = { MISC_MCELOG_MINOR, "mcelog", &mce_chrdev_ops,};static unsigned long old_cr4 __initdata;void __init stop_mce(void){ old_cr4 = read_cr4(); clear_in_cr4(X86_CR4_MCE);}void __init restart_mce(void){ if (old_cr4 & X86_CR4_MCE) set_in_cr4(X86_CR4_MCE);}/* * Old style boot options parsing. Only for compatibility. */static int __init mcheck_disable(char *str){ mce_dont_init = 1; return 1;}/* mce=off disables machine check. Note you can re-enable it later using sysfs. mce=TOLERANCELEVEL (number, see above) mce=bootlog Log MCEs from before booting. Disabled by default on AMD. mce=nobootlog Don't log MCEs from before booting. */static int __init mcheck_enable(char *str){ if (!strcmp(str, "off")) mce_dont_init = 1; else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) mce_bootlog = str[0] == 'b'; else if (isdigit(str[0])) get_option(&str, &tolerant); else printk("mce= argument %s ignored. Please use /sys", str); return 1;}__setup("nomce", mcheck_disable);__setup("mce=", mcheck_enable);/* * Sysfs support *//* On resume clear all MCE state. Don't want to see leftovers from the BIOS. Only one CPU is active at this time, the others get readded later using CPU hotplug. */static int mce_resume(struct sys_device *dev){ mce_init(NULL); return 0;}/* Reinit MCEs after user configuration changes */static void mce_restart(void){ if (next_interval) cancel_delayed_work(&mcheck_work); /* Timer race is harmless here */ on_each_cpu(mce_init, NULL, 1, 1); next_interval = check_interval * HZ; if (next_interval) schedule_delayed_work(&mcheck_work, round_jiffies_relative(next_interval));}static struct sysdev_class mce_sysclass = { .resume = mce_resume, set_kset_name("machinecheck"),};DEFINE_PER_CPU(struct sys_device, device_mce);/* Why are there no generic functions for this? */#define ACCESSOR(name, var, start) \ static ssize_t show_ ## name(struct sys_device *s, char *buf) { \ return sprintf(buf, "%lx\n", (unsigned long)var); \ } \ static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \ char *end; \ unsigned long new = simple_strtoul(buf, &end, 0); \ if (end == buf) return -EINVAL; \ var = new; \ start; \ return end-buf; \ } \ static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);/* TBD should generate these dynamically based on number of available banks */ACCESSOR(bank0ctl,bank[0],mce_restart())ACCESSOR(bank1ctl,bank[1],mce_restart())ACCESSOR(bank2ctl,bank[2],mce_restart())ACCESSOR(bank3ctl,bank[3],mce_restart())ACCESSOR(bank4ctl,bank[4],mce_restart())ACCESSOR(bank5ctl,bank[5],mce_restart())static ssize_t show_trigger(struct sys_device *s, char *buf){ strcpy(buf, trigger); strcat(buf, "\n"); return strlen(trigger) + 1;}static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz){ char *p; int len; strncpy(trigger, buf, sizeof(trigger)); trigger[sizeof(trigger)-1] = 0; len = strlen(trigger); p = strchr(trigger, '\n'); if (*p) *p = 0; return len;}static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);ACCESSOR(tolerant,tolerant,)ACCESSOR(check_interval,check_interval,mce_restart())static struct sysdev_attribute *mce_attributes[] = { &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, &attr_tolerant, &attr_check_interval, &attr_trigger, NULL};static cpumask_t mce_device_initialized = CPU_MASK_NONE;/* Per cpu sysdev init. All of the cpus still share the same ctl bank */static __cpuinit int mce_create_device(unsigned int cpu){ int err; int i; if (!mce_available(&boot_cpu_data)) return -EIO; memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); per_cpu(device_mce,cpu).id = cpu; per_cpu(device_mce,cpu).cls = &mce_sysclass; err = sysdev_register(&per_cpu(device_mce,cpu)); if (err) return err; for (i = 0; mce_attributes[i]; i++) { err = sysdev_create_file(&per_cpu(device_mce,cpu), mce_attributes[i]); if (err) goto error; } cpu_set(cpu, mce_device_initialized); return 0;error: while (i--) { sysdev_remove_file(&per_cpu(device_mce,cpu), mce_attributes[i]); } sysdev_unregister(&per_cpu(device_mce,cpu)); return err;}static void mce_remove_device(unsigned int cpu){ int i; if (!cpu_isset(cpu, mce_device_initialized)) return; for (i = 0; mce_attributes[i]; i++) sysdev_remove_file(&per_cpu(device_mce,cpu), mce_attributes[i]); sysdev_unregister(&per_cpu(device_mce,cpu)); cpu_clear(cpu, mce_device_initialized);}/* Get notified when a cpu comes on/off. Be hotplug friendly. */static intmce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu){ unsigned int cpu = (unsigned long)hcpu; switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: mce_create_device(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: mce_remove_device(cpu); break; } return NOTIFY_OK;}static struct notifier_block mce_cpu_notifier = { .notifier_call = mce_cpu_callback,};static __init int mce_init_device(void){ int err; int i = 0; if (!mce_available(&boot_cpu_data)) return -EIO; err = sysdev_class_register(&mce_sysclass); if (err) return err; for_each_online_cpu(i) { err = mce_create_device(i); if (err) return err; } register_hotcpu_notifier(&mce_cpu_notifier); misc_register(&mce_log_device); return err;}device_initcall(mce_init_device);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -