📄 rtasd.c
字号:
static void printk_io_failure(int version, struct exthdr *exthdr, char *data){ struct iohdr iohdr; memcpy(&iohdr, data, sizeof(iohdr)); if (iohdr.bus_addr_parity) printk(RTAS_ERR "I/O bus address parity\n"); if (iohdr.bus_data_parity) printk(RTAS_ERR "I/O bus data parity\n"); if (iohdr.bus_timeout) printk(RTAS_ERR "I/O bus timeout, access or other\n"); if (iohdr.bridge_internal) printk(RTAS_ERR "I/O bus bridge/device internal\n"); if (iohdr.non_pci) printk(RTAS_ERR "Signaling IOA is a PCI to non-PCI bridge (e.g. ISA)\n"); if (iohdr.mezzanine_addr_parity) printk(RTAS_ERR "Mezzanine/System bus address parity\n"); if (iohdr.mezzanine_data_parity) printk(RTAS_ERR "Mezzanine/System bus data parity\n"); if (iohdr.mezzanine_timeout) printk(RTAS_ERR "Mezzanine/System bus timeout, transfer or protocol\n"); if (iohdr.bridge_via_sysbus) printk(RTAS_ERR "Bridge is connected to system bus\n"); if (iohdr.bridge_via_mezzanine) printk(RTAS_ERR "Bridge is connected to memory controller via mezzanine bus\n"); if (iohdr.bridge_via_expbus) printk(RTAS_ERR "Bridge is connected to I/O expansion bus\n"); if (iohdr.detected_by_expbus) printk(RTAS_ERR "Error on system bus detected by I/O expansion bus controller\n"); if (iohdr.expbus_data_parity) printk(RTAS_ERR "I/O expansion bus data error\n"); if (iohdr.expbus_timeout) printk(RTAS_ERR "I/O expansion bus timeout, access or other\n"); if (iohdr.expbus_connection_failure) printk(RTAS_ERR "I/O expansion bus connection failure\n"); if (iohdr.expbus_not_operating) printk(RTAS_ERR "I/O expansion unit not in an operating state (powered off, off-line)\n"); printk(RTAS_ERR "IOA Signaling the error: %d:%d.%d vendor:%04x device:%04x rev:%02x slot:%d\n", iohdr.pci_sig_busno, iohdr.pci_sig_devfn >> 3, iohdr.pci_sig_devfn & 0x7, iohdr.pci_sig_vendorid, iohdr.pci_sig_deviceid, iohdr.pci_sig_revisionid, iohdr.pci_sig_slot); printk(RTAS_ERR "IOA Sending during the error: %d:%d.%d vendor:%04x device:%04x rev:%02x slot:%d\n", iohdr.pci_send_busno, iohdr.pci_send_devfn >> 3, iohdr.pci_send_devfn & 0x7, iohdr.pci_send_vendorid, iohdr.pci_send_deviceid, iohdr.pci_send_revisionid, iohdr.pci_send_slot);}static void printk_post_failure(int version, struct exthdr *exthdr, char *data){ struct posthdr posthdr; memcpy(&posthdr, data, sizeof(posthdr)); if (posthdr.devname[0]) printk(RTAS_ERR "Failing Device: %s\n", posthdr.devname); if (posthdr.firmware) printk(RTAS_ERR "Firmware Error\n"); if (posthdr.config) printk(RTAS_ERR "Configuration Error\n"); if (posthdr.cpu) printk(RTAS_ERR "CPU POST Error\n"); if (posthdr.memory) printk(RTAS_ERR "Memory POST Error\n"); if (posthdr.io) printk(RTAS_ERR "I/O Subsystem POST Error\n"); if (posthdr.keyboard) printk(RTAS_ERR "Keyboard POST Error\n"); if (posthdr.mouse) printk(RTAS_ERR "Mouse POST Error\n"); if (posthdr.display) printk(RTAS_ERR "Display POST Error\n"); if (posthdr.ipl_floppy) printk(RTAS_ERR "Floppy IPL Error\n"); if (posthdr.ipl_controller) printk(RTAS_ERR "Drive Controller Error during IPL\n"); if (posthdr.ipl_cdrom) printk(RTAS_ERR "CDROM IPL Error\n"); if (posthdr.ipl_disk) printk(RTAS_ERR "Disk IPL Error\n"); if (posthdr.ipl_net) printk(RTAS_ERR "Network IPL Error\n"); if (posthdr.ipl_other) printk(RTAS_ERR "Other (tape,flash) IPL Error\n"); if (posthdr.firmware_selftest) printk(RTAS_ERR "Self-test error in firmware extended diagnostics\n"); printk(RTAS_ERR "POST Code: %d\n", posthdr.post_code); printk(RTAS_ERR "Firmware Revision Code: %d\n", posthdr.firmware_rev);}static void printk_epow_warning(int version, struct exthdr *exthdr, char *data){ struct epowhdr epowhdr; memcpy(&epowhdr, data, sizeof(epowhdr)); printk(RTAS_ERR "EPOW Sensor Value: 0x%08x\n", epowhdr.epow_sensor_value); if (epowhdr.sensor) { printk(RTAS_ERR "EPOW detected by a sensor\n"); printk(RTAS_ERR "Sensor Token: 0x%08x\n", epowhdr.sensor_token); printk(RTAS_ERR "Sensor Index: 0x%08x\n", epowhdr.sensor_index); printk(RTAS_ERR "Sensor Value: 0x%08x\n", epowhdr.sensor_value); printk(RTAS_ERR "Sensor Status: 0x%08x\n", epowhdr.sensor_status); } if (epowhdr.power_fault) printk(RTAS_ERR "EPOW caused by a power fault\n"); if (epowhdr.fan) printk(RTAS_ERR "EPOW caused by fan failure\n"); if (epowhdr.temp) printk(RTAS_ERR "EPOW caused by over-temperature condition\n"); if (epowhdr.redundancy) printk(RTAS_ERR "EPOW warning due to loss of redundancy\n"); if (epowhdr.CUoD) printk(RTAS_ERR "EPOW warning due to CUoD Entitlement Exceeded\n"); if (epowhdr.general) printk(RTAS_ERR "EPOW general power fault\n"); if (epowhdr.power_loss) printk(RTAS_ERR "EPOW power fault due to loss of power source\n"); if (epowhdr.power_supply) printk(RTAS_ERR "EPOW power fault due to internal power supply failure\n"); if (epowhdr.power_switch) printk(RTAS_ERR "EPOW power fault due to activation of power switch\n");}static void printk_pm_event(int version, struct exthdr *exthdr, char *data){ struct pm_eventhdr pm_eventhdr; memcpy(&pm_eventhdr, data, sizeof(pm_eventhdr)); printk(RTAS_ERR "Event id: 0x%08x\n", pm_eventhdr.event_id);}static void printk_sp_log_msg(int version, struct exthdr *exthdr, char *data){ struct sphdr sphdr; u32 eyecatcher; memcpy(&sphdr, data, sizeof(sphdr)); eyecatcher = sphdr.ibm; if (strcmp((char *)&eyecatcher, "IBM") != 0) printk(RTAS_ERR "This log entry may be corrupt (IBM signature malformed)\n"); if (sphdr.timeout) printk(RTAS_ERR "Timeout on communication response from service processor\n"); if (sphdr.i2c_bus) printk(RTAS_ERR "I2C general bus error\n"); if (sphdr.i2c_secondary_bus) printk(RTAS_ERR "I2C secondary bus error\n"); if (sphdr.sp_memory) printk(RTAS_ERR "Internal service processor memory error\n"); if (sphdr.sp_registers) printk(RTAS_ERR "Service processor error accessing special registers\n"); if (sphdr.sp_communication) printk(RTAS_ERR "Service processor reports unknown communcation error\n"); if (sphdr.sp_firmware) printk(RTAS_ERR "Internal service processor firmware error\n"); if (sphdr.sp_hardware) printk(RTAS_ERR "Other internal service processor hardware error\n"); if (sphdr.vpd_eeprom) printk(RTAS_ERR "Service processor error accessing VPD EEPROM\n"); if (sphdr.op_panel) printk(RTAS_ERR "Service processor error accessing Operator Panel\n"); if (sphdr.power_controller) printk(RTAS_ERR "Service processor error accessing Power Controller\n"); if (sphdr.fan_sensor) printk(RTAS_ERR "Service processor error accessing Fan Sensor\n"); if (sphdr.thermal_sensor) printk(RTAS_ERR "Service processor error accessing Thermal Sensor\n"); if (sphdr.voltage_sensor) printk(RTAS_ERR "Service processor error accessing Voltage Sensor\n"); if (sphdr.serial_port) printk(RTAS_ERR "Service processor error accessing serial port\n"); if (sphdr.nvram) printk(RTAS_ERR "Service processor detected NVRAM error\n"); if (sphdr.rtc) printk(RTAS_ERR "Service processor error accessing real time clock\n"); if (sphdr.jtag) printk(RTAS_ERR "Service processor error accessing JTAG/COP\n"); if (sphdr.tod_battery) printk(RTAS_ERR "Service processor or RTAS detects loss of voltage from TOD battery\n"); if (sphdr.heartbeat) printk(RTAS_ERR "Loss of heartbeat from Service processor\n"); if (sphdr.surveillance) printk(RTAS_ERR "Service processor detected a surveillance timeout\n"); if (sphdr.pcn_connection) printk(RTAS_ERR "Power Control Network general connection failure\n"); if (sphdr.pcn_node) printk(RTAS_ERR "Power Control Network node failure\n"); if (sphdr.pcn_access) printk(RTAS_ERR "Service processor error accessing Power Control Network\n"); if (sphdr.sensor_token) printk(RTAS_ERR "Sensor Token 0x%08x (%d)\n", sphdr.sensor_token, sphdr.sensor_token); if (sphdr.sensor_index) printk(RTAS_ERR "Sensor Index 0x%08x (%d)\n", sphdr.sensor_index, sphdr.sensor_index);}static void printk_ext_raw_data(char *data){ int i; printk(RTAS_ERR "raw ext data: "); for (i = 0; i < 40; i++) { printk("%02x", data[i]); } printk("\n");}static void printk_ext_log_data(int version, char *buf){ char *data = buf+12; struct exthdr exthdr; memcpy(&exthdr, buf, sizeof(exthdr)); /* copy for alignment */ if (!exthdr.valid) { if (exthdr.bigendian && exthdr.power_pc) printk(RTAS_ERR "extended log data is not valid\n"); else printk(RTAS_ERR "extended log data can not be decoded\n"); return; } /* Dump useful stuff in the exthdr */ printk(RTAS_ERR "Status:%s%s%s%s%s\n", exthdr.unrecoverable ? " unrecoverable" : "", exthdr.recoverable ? " recoverable" : "", exthdr.unrecoverable_bypassed ? " bypassed" : "", exthdr.predictive ? " predictive" : "", exthdr.newlog ? " new" : ""); printk(RTAS_ERR "Date/Time: %08x %08x\n", exthdr.bcddate, exthdr.bcdtime); switch (exthdr.format_type) { case EXTLOG_FMT_CPU: printk(RTAS_ERR "CPU Failure\n"); printk_cpu_failure(version, &exthdr, data); break; case EXTLOG_FMT_MEMORY: printk(RTAS_ERR "Memory Failure\n"); printk_mem_failure(version, &exthdr, data); break; case EXTLOG_FMT_IO: printk(RTAS_ERR "I/O Failure\n"); printk_io_failure(version, &exthdr, data); break; case EXTLOG_FMT_POST: printk(RTAS_ERR "POST Failure\n"); printk_post_failure(version, &exthdr, data); break; case EXTLOG_FMT_ENV: printk(RTAS_ERR "Environment and Power Warning\n"); printk_epow_warning(version, &exthdr, data); break; case EXTLOG_FMT_POW: printk(RTAS_ERR "Power Management Event\n"); printk_pm_event(version, &exthdr, data); break; case EXTLOG_FMT_IBMDIAG: printk(RTAS_ERR "IBM Diagnostic Log\n"); printk_ext_raw_data(data); break; case EXTLOG_FMT_IBMSP: printk(RTAS_ERR "IBM Service Processor Log\n"); printk_sp_log_msg(version, &exthdr, data); break; default: printk(RTAS_ERR "Unknown ext format type %d\n", exthdr.format_type); printk_ext_raw_data(data); break; }}/* Yeah, the output here is ugly, but we want a CE to be * able to grep RTAS /var/log/messages and see all the info * collected together with obvious begin/end. */static void printk_log_rtas(char *buf){ struct rtas_error_log *err = (struct rtas_error_log *)buf; printk(RTAS_ERR "-------- event-scan begin --------\n"); if (strcmp(buf+8+40, "IBM") == 0) { /* Location code follows */ char *loc = buf+8+40+4; if (*loc >= 'A' && *loc <= 'Z') /* Sanity check */ printk(RTAS_ERR "Location Code: %s\n", loc); } printk(RTAS_ERR "%s: (%s) type: %s\n", severity_names[err->severity], rtas_disposition_names[err->disposition], rtas_error_type(err->type)); printk(RTAS_ERR "initiator: %s target: %s\n", entity_names[err->initiator], entity_names[err->target]); if (err->extended_log_length) printk_ext_log_data(err->version, buf+8); printk(RTAS_ERR "-------- event-scan end ----------\n");}static void log_rtas(char *buf){ unsigned long offset; DEBUG("logging rtas event\n"); /* Temporary -- perhaps we can do this when nobody has the log open? */ printk_log_rtas(buf); spin_lock(&rtas_log_lock); offset = rtas_error_log_max * ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK); memcpy(&rtas_log_buf[offset], buf, rtas_error_log_max); if (rtas_log_size < LOG_NUMBER) rtas_log_size += 1; else rtas_log_start += 1; spin_unlock(&rtas_log_lock); wake_up_interruptible(&rtas_log_wait);}static int enable_surveillance(void){ int error; error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, SURVEILLANCE_TOKEN, 0, SURVEILLANCE_TIMEOUT); if (error) { printk(KERN_ERR "rtasd: could not enable surveillance\n"); return -1; } rtas_event_scan_rate = SURVEILLANCE_SCANRATE; return 0;}static int get_eventscan_parms(void){ struct device_node *node; int *ip; node = find_path_device("/rtas"); ip = (int *)get_property(node, "rtas-event-scan-rate", NULL); if (ip == NULL) { printk(KERN_ERR "rtasd: no rtas-event-scan-rate\n"); return -1; } rtas_event_scan_rate = *ip; DEBUG("rtas-event-scan-rate %d\n", rtas_event_scan_rate); ip = (int *)get_property(node, "rtas-error-log-max", NULL); if (ip == NULL) { printk(KERN_ERR "rtasd: no rtas-error-log-max\n"); return -1; } rtas_error_log_max = *ip; DEBUG("rtas-error-log-max %d\n", rtas_error_log_max); if (rtas_error_log_max > RTAS_ERROR_LOG_MAX) { printk(KERN_ERR "rtasd: truncated error log from %d to %d bytes\n", rtas_error_log_max, RTAS_ERROR_LOG_MAX); rtas_error_log_max = RTAS_ERROR_LOG_MAX; } return 0;}extern long sys_sched_get_priority_max(int policy);static int rtasd(void *unused){ int cpu = 0; int error; int first_pass = 1; int event_scan = rtas_token("event-scan"); if (event_scan == RTAS_UNKNOWN_SERVICE || get_eventscan_parms() == -1) goto error; rtas_log_buf = vmalloc(rtas_error_log_max*LOG_NUMBER); if (!rtas_log_buf) { printk(KERN_ERR "rtasd: no memory\n"); goto error; } DEBUG("will sleep for %d jiffies\n", (HZ*60/rtas_event_scan_rate) / 2); daemonize(); sigfillset(¤t->blocked); sprintf(current->comm, "rtasd"); /* Rusty unreal time task */ current->policy = SCHED_FIFO; current->nice = sys_sched_get_priority_max(SCHED_FIFO) + 1; cpu = 0; current->cpus_allowed = 1UL << cpu_logical_map(cpu); schedule(); while(1) { do { memset(logdata, 0, rtas_error_log_max); error = rtas_call(event_scan, 4, 1, NULL, EVENT_SCAN_ALL_EVENTS, 0, __pa(logdata), rtas_error_log_max); if (error == -1) { printk(KERN_ERR "event-scan failed\n"); break; } if (error == 0) log_rtas(logdata); } while(error == 0); DEBUG("watchdog scheduled on cpu %d\n", smp_processor_id()); cpu++; if (cpu >= smp_num_cpus) { if (first_pass && surveillance_requested) { DEBUG("enabling surveillance\n"); if (enable_surveillance()) goto error_vfree; DEBUG("surveillance enabled\n"); } first_pass = 0; cpu = 0; } current->cpus_allowed = 1UL << cpu_logical_map(cpu); /* Check all cpus for pending events before sleeping*/ if (first_pass) { schedule(); } else { set_current_state(TASK_INTERRUPTIBLE); schedule_timeout((HZ*60/rtas_event_scan_rate) / 2); } }error_vfree: vfree(rtas_log_buf);error: /* Should delete proc entries */ return -EINVAL;}static void __init rtas_init(void){ struct proc_dir_entry *rtas_dir, *entry; rtas_dir = proc_mkdir("rtas", 0); if (!rtas_dir) { printk(KERN_ERR "Failed to create rtas proc directory\n"); } else { entry = create_proc_entry("error_log", S_IRUSR, rtas_dir); if (entry) entry->proc_fops = &proc_rtas_log_operations; else printk(KERN_ERR "Failed to create rtas/error_log proc entry\n"); } if (kernel_thread(rtasd, 0, CLONE_FS) < 0) printk(KERN_ERR "Failed to start RTAS daemon\n"); printk(KERN_ERR "RTAS daemon started\n");}static int __init surveillance_setup(char *str){ int i; if (get_option(&str,&i)) { if (i == 1) surveillance_requested = 1; } return 1;}__initcall(rtas_init);__setup("surveillance=", surveillance_setup);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -