📄 rtasd.c
字号:
/* * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Communication to userspace based on kernel/printk.c */#include <linux/types.h>#include <linux/errno.h>#include <linux/sched.h>#include <linux/kernel.h>#include <linux/poll.h>#include <linux/proc_fs.h>#include <linux/init.h>#include <linux/vmalloc.h>#include <asm/uaccess.h>#include <asm/io.h>#include <asm/rtas.h>#include <asm/prom.h>#if 0#define DEBUG(A...) printk(KERN_ERR A)#else#define DEBUG(A...)#endifstatic spinlock_t rtas_log_lock = SPIN_LOCK_UNLOCKED;DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait);#define LOG_NUMBER 64 /* must be a power of two */#define LOG_NUMBER_MASK (LOG_NUMBER-1)static char *rtas_log_buf;static unsigned long rtas_log_start;static unsigned long rtas_log_size;static int surveillance_requested;static unsigned int rtas_event_scan_rate;static unsigned int rtas_error_log_max;#define EVENT_SCAN_ALL_EVENTS 0xf0000000#define SURVEILLANCE_TOKEN 9000#define SURVEILLANCE_TIMEOUT 1#define SURVEILLANCE_SCANRATE 1/* * Since we use 32 bit RTAS, the physical address of this must be below * 4G or else bad things happen. Allocate this in the kernel data and * make it big enough. */#define RTAS_ERROR_LOG_MAX 1024static unsigned char logdata[RTAS_ERROR_LOG_MAX];static int rtas_log_open(struct inode * inode, struct file * file){ return 0;}static int rtas_log_release(struct inode * inode, struct file * file){ return 0;}static ssize_t rtas_log_read(struct file * file, char * buf, size_t count, loff_t *ppos){ int error; char *tmp; unsigned long offset; if (!buf || count < rtas_error_log_max) return -EINVAL; count = rtas_error_log_max; error = verify_area(VERIFY_WRITE, buf, count); if (error) return -EINVAL; tmp = kmalloc(rtas_error_log_max, GFP_KERNEL); if (!tmp) return -ENOMEM; error = wait_event_interruptible(rtas_log_wait, rtas_log_size); if (error) goto out; spin_lock(&rtas_log_lock); offset = rtas_error_log_max * (rtas_log_start & LOG_NUMBER_MASK); memcpy(tmp, &rtas_log_buf[offset], count); rtas_log_start += 1; rtas_log_size -= 1; spin_unlock(&rtas_log_lock); copy_to_user(buf, tmp, count); error = count;out: kfree(tmp); return error;}static unsigned int rtas_log_poll(struct file *file, poll_table * wait){ poll_wait(file, &rtas_log_wait, wait); if (rtas_log_size) return POLLIN | POLLRDNORM; return 0;}struct file_operations proc_rtas_log_operations = { read: rtas_log_read, poll: rtas_log_poll, open: rtas_log_open, release: rtas_log_release,};#define RTAS_ERR KERN_ERR "RTAS: "/* Extended error log header (12 bytes) */struct exthdr { unsigned int valid:1; unsigned int unrecoverable:1; unsigned int recoverable:1; unsigned int unrecoverable_bypassed:1; /* i.e. degraded performance */ unsigned int predictive:1; unsigned int newlog:1; unsigned int bigendian:1; /* always 1 */ unsigned int /* reserved */:1; unsigned int platform_specific:1; /* only in version 3+ */ unsigned int /* reserved */:3; unsigned int platform_value:4; /* valid iff platform_specific */ unsigned int power_pc:1; /* always 1 */ unsigned int /* reserved */:2; unsigned int addr_invalid:1; /* failing_address is invalid */ unsigned int format_type:4;#define EXTLOG_FMT_CPU 1#define EXTLOG_FMT_MEMORY 2#define EXTLOG_FMT_IO 3#define EXTLOG_FMT_POST 4#define EXTLOG_FMT_ENV 5#define EXTLOG_FMT_POW 6#define EXTLOG_FMT_IBMDIAG 12#define EXTLOG_FMT_IBMSP 13 /* This group is in version 3+ only */ unsigned int non_hardware:1; /* Firmware or software is suspect */ unsigned int hot_plug:1; /* Failing component may be hot plugged */ unsigned int group_failure:1; /* Group of components should be replaced */ unsigned int /* reserved */:1; unsigned int residual:1; /* Residual error from previous boot (maybe a crash) */ unsigned int boot:1; /* Error during boot */ unsigned int config_change:1; /* Configuration changed since last boot */ unsigned int post:1; /* Error during POST */ unsigned int bcdtime:32; /* Time of error in BCD HHMMSS00 */ unsigned int bcddate:32; /* Time of error in BCD YYYYMMDD */};struct cpuhdr { unsigned int internal:1; unsigned int intcache:1; unsigned int extcache_parity:1; /* or multi-bit ECC */ unsigned int extcache_ecc:1; unsigned int sysbus_timeout:1; unsigned int io_timeout:1; unsigned int sysbus_parity:1; unsigned int sysbus_protocol:1; unsigned int cpuid:8; unsigned int element:16; unsigned int failing_address_hi:32; unsigned int failing_address_lo:32; /* These are version 4+ */ unsigned int try_reboot:1; /* 1 => fault may be fixed by reboot */ unsigned int /* reserved */:7; /* 15 bytes reserved here */};struct memhdr { unsigned int uncorrectable:1; unsigned int ECC:1; unsigned int threshold_exceeded:1; unsigned int control_internal:1; unsigned int bad_address:1; unsigned int bad_data:1; unsigned int bus:1; unsigned int timeout:1; unsigned int sysbus_parity:1; unsigned int sysbus_timeout:1; unsigned int sysbus_protocol:1; unsigned int hostbridge_timeout:1; unsigned int hostbridge_parity:1; unsigned int reserved1:1; unsigned int support:1; unsigned int sysbus_internal:1; unsigned int mem_controller_detected:8; /* who detected fault? */ unsigned int mem_controller_faulted:8; /* who caused fault? */ unsigned int failing_address_hi:32; unsigned int failing_address_lo:32; unsigned int ecc_syndrome:16; unsigned int memory_card:8; unsigned int reserved2:8; unsigned int sub_elements:32; /* one bit per element */ unsigned int element:16;};struct iohdr { unsigned int bus_addr_parity:1; unsigned int bus_data_parity:1; unsigned int bus_timeout:1; unsigned int bridge_internal:1; unsigned int non_pci:1; /* i.e. secondary bus such as ISA */ unsigned int mezzanine_addr_parity:1; unsigned int mezzanine_data_parity:1; unsigned int mezzanine_timeout:1; unsigned int bridge_via_sysbus:1; unsigned int bridge_via_mezzanine:1; unsigned int bridge_via_expbus:1; unsigned int detected_by_expbus:1; unsigned int expbus_data_parity:1; unsigned int expbus_timeout:1; unsigned int expbus_connection_failure:1; unsigned int expbus_not_operating:1; /* IOA signalling the error */ unsigned int pci_sig_busno:8; unsigned int pci_sig_devfn:8; unsigned int pci_sig_deviceid:16; unsigned int pci_sig_vendorid:16; unsigned int pci_sig_revisionid:8; unsigned int pci_sig_slot:8; /* 00 => system board, ff => multiple */ /* IOA sending at time of error */ unsigned int pci_send_busno:8; unsigned int pci_send_devfn:8; unsigned int pci_send_deviceid:16; unsigned int pci_send_vendorid:16; unsigned int pci_send_revisionid:8; unsigned int pci_send_slot:8; /* 00 => system board, ff => multiple */};struct posthdr { unsigned int firmware:1; unsigned int config:1; unsigned int cpu:1; unsigned int memory:1; unsigned int io:1; unsigned int keyboard:1; unsigned int mouse:1; unsigned int display:1; unsigned int ipl_floppy:1; unsigned int ipl_controller:1; unsigned int ipl_cdrom:1; unsigned int ipl_disk:1; unsigned int ipl_net:1; unsigned int ipl_other:1; unsigned int /* reserved */:1; unsigned int firmware_selftest:1; char devname[12]; unsigned int post_code:4; unsigned int firmware_rev:2; unsigned int loc_code:8; /* currently unused */};struct epowhdr { unsigned int epow_sensor_value:32; unsigned int sensor:1; unsigned int power_fault:1; unsigned int fan:1; unsigned int temp:1; unsigned int redundancy:1; unsigned int CUoD:1; unsigned int /* reserved */:2; unsigned int general:1; unsigned int power_loss:1; unsigned int power_supply:1; unsigned int power_switch:1; unsigned int /* reserved */:4; unsigned int /* reserved */:16; unsigned int sensor_token:32; unsigned int sensor_index:32; unsigned int sensor_value:32; unsigned int sensor_status:32;};struct pm_eventhdr { unsigned int event_id:32;};struct sphdr { unsigned int ibm:32; /* "IBM\0" */ unsigned int timeout:1; unsigned int i2c_bus:1; unsigned int i2c_secondary_bus:1; unsigned int sp_memory:1; unsigned int sp_registers:1; unsigned int sp_communication:1; unsigned int sp_firmware:1; unsigned int sp_hardware:1; unsigned int vpd_eeprom:1; unsigned int op_panel:1; unsigned int power_controller:1; unsigned int fan_sensor:1; unsigned int thermal_sensor:1; unsigned int voltage_sensor:1; unsigned int reserved1:2; unsigned int serial_port:1; unsigned int nvram:1; unsigned int rtc:1; unsigned int jtag:1; unsigned int tod_battery:1; unsigned int reserved2:1; unsigned int heartbeat:1; unsigned int surveillance:1; unsigned int pcn_connection:1; /* power control network */ unsigned int pcn_node:1; unsigned int reserved3:2; unsigned int pcn_access:1; unsigned int reserved:3; unsigned int sensor_token:32; /* zero if undef */ unsigned int sensor_index:32; /* zero if undef */};static char *severity_names[] = { "NO ERROR", "EVENT", "WARNING", "ERROR_SYNC", "ERROR", "FATAL", "(6)", "(7)"};static char *rtas_disposition_names[] = { "FULLY RECOVERED", "LIMITED RECOVERY", "NOT RECOVERED", "(4)"};static char *entity_names[] = { /* for initiator & targets */ "UNKNOWN", "CPU", "PCI", "ISA", "MEMORY", "POWER MANAGEMENT", "HOT PLUG", "(7)", "(8)", "(9)", "(10)", "(11)", "(12)", "(13)", "(14)", "(15)"};static char *error_type[] = { /* Not all types covered here so need to bounds check */ "UNKNOWN", "RETRY", "TCE_ERR", "INTERN_DEV_FAIL", "TIMEOUT", "DATA_PARITY", "ADDR_PARITY", "CACHE_PARITY", "ADDR_INVALID", "ECC_UNCORR", "ECC_CORR",};static char *rtas_error_type(int type){ if (type < 11) return error_type[type]; if (type == 64) return "SENSOR"; if (type >=96 && type <= 159) return "POWER"; return error_type[0];}static void printk_cpu_failure(int version, struct exthdr *exthdr, char *data){ struct cpuhdr cpuhdr; memcpy(&cpuhdr, data, sizeof(cpuhdr)); if (cpuhdr.internal) printk(RTAS_ERR "Internal error (not cache)\n"); if (cpuhdr.intcache) printk(RTAS_ERR "Internal cache\n"); if (cpuhdr.extcache_parity) printk(RTAS_ERR "External cache parity (or multi-bit)\n"); if (cpuhdr.extcache_ecc) printk(RTAS_ERR "External cache ECC\n"); if (cpuhdr.sysbus_timeout) printk(RTAS_ERR "System bus timeout\n"); if (cpuhdr.io_timeout) printk(RTAS_ERR "I/O timeout\n"); if (cpuhdr.sysbus_parity) printk(RTAS_ERR "System bus parity\n"); if (cpuhdr.sysbus_protocol) printk(RTAS_ERR "System bus protocol/transfer\n"); printk(RTAS_ERR "CPU id: %d\n", cpuhdr.cpuid); printk(RTAS_ERR "Failing element: 0x%04x\n", cpuhdr.element); if (!exthdr->addr_invalid) printk(RTAS_ERR "Failing address: %08x%08x\n", cpuhdr.failing_address_hi, cpuhdr.failing_address_lo); if (version >= 4 && cpuhdr.try_reboot) printk(RTAS_ERR "A reboot of the system may correct the problem\n");}static void printk_mem_failure(int version, struct exthdr *exthdr, char *data){ struct memhdr memhdr; memcpy(&memhdr, data, sizeof(memhdr)); if (memhdr.uncorrectable) printk(RTAS_ERR "Uncorrectable Memory error\n"); if (memhdr.ECC) printk(RTAS_ERR "ECC Correctable error\n"); if (memhdr.threshold_exceeded) printk(RTAS_ERR "Correctable threshold exceeded\n"); if (memhdr.control_internal) printk(RTAS_ERR "Memory Controller internal error\n"); if (memhdr.bad_address) printk(RTAS_ERR "Memory Address error\n"); if (memhdr.bad_data) printk(RTAS_ERR "Memory Data error\n"); if (memhdr.bus) printk(RTAS_ERR "Memory bus/switch internal error\n"); if (memhdr.timeout) printk(RTAS_ERR "Memory timeout\n"); if (memhdr.sysbus_parity) printk(RTAS_ERR "System bus parity\n"); if (memhdr.sysbus_timeout) printk(RTAS_ERR "System bus timeout\n"); if (memhdr.sysbus_protocol) printk(RTAS_ERR "System bus protocol/transfer\n"); if (memhdr.hostbridge_timeout) printk(RTAS_ERR "I/O Host Bridge timeout\n"); if (memhdr.hostbridge_parity) printk(RTAS_ERR "I/O Host Bridge parity\n"); if (memhdr.support) printk(RTAS_ERR "System support function error\n"); if (memhdr.sysbus_internal) printk(RTAS_ERR "System bus internal hardware/switch error\n"); printk(RTAS_ERR "Memory Controller that detected failure: %d\n", memhdr.mem_controller_detected); printk(RTAS_ERR "Memory Controller that faulted: %d\n", memhdr.mem_controller_faulted); if (!exthdr->addr_invalid) printk(RTAS_ERR "Failing address: 0x%016x%016x\n", memhdr.failing_address_hi, memhdr.failing_address_lo); printk(RTAS_ERR "ECC syndrome bits: 0x%04x\n", memhdr.ecc_syndrome); printk(RTAS_ERR "Memory Card: %d\n", memhdr.memory_card); printk(RTAS_ERR "Failing element: 0x%04x\n", memhdr.element); printk(RTAS_ERR "Sub element bits: 0x%08x\n", memhdr.sub_elements);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -