📄 kexec.c
字号:
if (flags & KEXEC_PRESERVE_CONTEXT) image->preserve_context = 1; result = machine_kexec_prepare(image); if (result) goto out; for (i = 0; i < nr_segments; i++) { result = kimage_load_segment(image, &image->segment[i]); if (result) goto out; } kimage_terminate(image); } /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image);out: mutex_unlock(&kexec_mutex); kimage_free(image); return result;}#ifdef CONFIG_COMPATasmlinkage long compat_sys_kexec_load(unsigned long entry, unsigned long nr_segments, struct compat_kexec_segment __user *segments, unsigned long flags){ struct compat_kexec_segment in; struct kexec_segment out, __user *ksegments; unsigned long i, result; /* Don't allow clients that don't understand the native * architecture to do anything. */ if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) return -EINVAL; if (nr_segments > KEXEC_SEGMENT_MAX) return -EINVAL; ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); for (i=0; i < nr_segments; i++) { result = copy_from_user(&in, &segments[i], sizeof(in)); if (result) return -EFAULT; out.buf = compat_ptr(in.buf); out.bufsz = in.bufsz; out.mem = in.mem; out.memsz = in.memsz; result = copy_to_user(&ksegments[i], &out, sizeof(out)); if (result) return -EFAULT; } return sys_kexec_load(entry, nr_segments, ksegments, flags);}#endifvoid crash_kexec(struct pt_regs *regs){ /* Take the kexec_mutex here to prevent sys_kexec_load * running on one cpu from replacing the crash kernel * we are using after a panic on a different cpu. * * If the crash kernel was not located in a fixed area * of memory the xchg(&kexec_crash_image) would be * sufficient. But since I reuse the memory... */ if (mutex_trylock(&kexec_mutex)) { if (kexec_crash_image) { struct pt_regs fixed_regs; crash_setup_regs(&fixed_regs, regs); crash_save_vmcoreinfo(); machine_crash_shutdown(&fixed_regs); machine_kexec(kexec_crash_image); } mutex_unlock(&kexec_mutex); }}static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, size_t data_len){ struct elf_note note; note.n_namesz = strlen(name) + 1; note.n_descsz = data_len; note.n_type = type; memcpy(buf, ¬e, sizeof(note)); buf += (sizeof(note) + 3)/4; memcpy(buf, name, note.n_namesz); buf += (note.n_namesz + 3)/4; memcpy(buf, data, note.n_descsz); buf += (note.n_descsz + 3)/4; return buf;}static void final_note(u32 *buf){ struct elf_note note; note.n_namesz = 0; note.n_descsz = 0; note.n_type = 0; memcpy(buf, ¬e, sizeof(note));}void crash_save_cpu(struct pt_regs *regs, int cpu){ struct elf_prstatus prstatus; u32 *buf; if ((cpu < 0) || (cpu >= NR_CPUS)) return; /* Using ELF notes here is opportunistic. * I need a well defined structure format * for the data I pass, and I need tags * on the data to indicate what information I have * squirrelled away. ELF notes happen to provide * all of that, so there is no need to invent something new. */ buf = (u32*)per_cpu_ptr(crash_notes, cpu); if (!buf) return; memset(&prstatus, 0, sizeof(prstatus)); prstatus.pr_pid = current->pid; elf_core_copy_regs(&prstatus.pr_reg, regs); buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, &prstatus, sizeof(prstatus)); final_note(buf);}static int __init crash_notes_memory_init(void){ /* Allocate memory for saving cpu registers. */ crash_notes = alloc_percpu(note_buf_t); if (!crash_notes) { printk("Kexec: Memory allocation for saving cpu register" " states failed\n"); return -ENOMEM; } return 0;}module_init(crash_notes_memory_init)/* * parsing the "crashkernel" commandline * * this code is intended to be called from architecture specific code *//* * This function parses command lines in the format * * crashkernel=ramsize-range:size[,...][@offset] * * The function returns 0 on success and -EINVAL on failure. */static int __init parse_crashkernel_mem(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base){ char *cur = cmdline, *tmp; /* for each entry of the comma-separated list */ do { unsigned long long start, end = ULLONG_MAX, size; /* get the start of the range */ start = memparse(cur, &tmp); if (cur == tmp) { pr_warning("crashkernel: Memory value expected\n"); return -EINVAL; } cur = tmp; if (*cur != '-') { pr_warning("crashkernel: '-' expected\n"); return -EINVAL; } cur++; /* if no ':' is here, than we read the end */ if (*cur != ':') { end = memparse(cur, &tmp); if (cur == tmp) { pr_warning("crashkernel: Memory " "value expected\n"); return -EINVAL; } cur = tmp; if (end <= start) { pr_warning("crashkernel: end <= start\n"); return -EINVAL; } } if (*cur != ':') { pr_warning("crashkernel: ':' expected\n"); return -EINVAL; } cur++; size = memparse(cur, &tmp); if (cur == tmp) { pr_warning("Memory value expected\n"); return -EINVAL; } cur = tmp; if (size >= system_ram) { pr_warning("crashkernel: invalid size\n"); return -EINVAL; } /* match ? */ if (system_ram >= start && system_ram < end) { *crash_size = size; break; } } while (*cur++ == ','); if (*crash_size > 0) { while (*cur != ' ' && *cur != '@') cur++; if (*cur == '@') { cur++; *crash_base = memparse(cur, &tmp); if (cur == tmp) { pr_warning("Memory value expected " "after '@'\n"); return -EINVAL; } } } return 0;}/* * That function parses "simple" (old) crashkernel command lines like * * crashkernel=size[@offset] * * It returns 0 on success and -EINVAL on failure. */static int __init parse_crashkernel_simple(char *cmdline, unsigned long long *crash_size, unsigned long long *crash_base){ char *cur = cmdline; *crash_size = memparse(cmdline, &cur); if (cmdline == cur) { pr_warning("crashkernel: memory value expected\n"); return -EINVAL; } if (*cur == '@') *crash_base = memparse(cur+1, &cur); return 0;}/* * That function is the entry point for command line parsing and should be * called from the arch-specific code. */int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base){ char *p = cmdline, *ck_cmdline = NULL; char *first_colon, *first_space; BUG_ON(!crash_size || !crash_base); *crash_size = 0; *crash_base = 0; /* find crashkernel and use the last one if there are more */ p = strstr(p, "crashkernel="); while (p) { ck_cmdline = p; p = strstr(p+1, "crashkernel="); } if (!ck_cmdline) return -EINVAL; ck_cmdline += 12; /* strlen("crashkernel=") */ /* * if the commandline contains a ':', then that's the extended * syntax -- if not, it must be the classic syntax */ first_colon = strchr(ck_cmdline, ':'); first_space = strchr(ck_cmdline, ' '); if (first_colon && (!first_space || first_colon < first_space)) return parse_crashkernel_mem(ck_cmdline, system_ram, crash_size, crash_base); else return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); return 0;}void crash_save_vmcoreinfo(void){ u32 *buf; if (!vmcoreinfo_size) return; vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); buf = (u32 *)vmcoreinfo_note; buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, vmcoreinfo_size); final_note(buf);}void vmcoreinfo_append_str(const char *fmt, ...){ va_list args; char buf[0x50]; int r; va_start(args, fmt); r = vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); if (r + vmcoreinfo_size > vmcoreinfo_max_size) r = vmcoreinfo_max_size - vmcoreinfo_size; memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); vmcoreinfo_size += r;}/* * provide an empty default implementation here -- architecture * code may override this */void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void){}unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void){ return __pa((unsigned long)(char *)&vmcoreinfo_note);}static int __init crash_save_vmcoreinfo_init(void){ VMCOREINFO_OSRELEASE(init_uts_ns.name.release); VMCOREINFO_PAGESIZE(PAGE_SIZE); VMCOREINFO_SYMBOL(init_uts_ns); VMCOREINFO_SYMBOL(node_online_map); VMCOREINFO_SYMBOL(swapper_pg_dir); VMCOREINFO_SYMBOL(_stext);#ifndef CONFIG_NEED_MULTIPLE_NODES VMCOREINFO_SYMBOL(mem_map); VMCOREINFO_SYMBOL(contig_page_data);#endif#ifdef CONFIG_SPARSEMEM VMCOREINFO_SYMBOL(mem_section); VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); VMCOREINFO_STRUCT_SIZE(mem_section); VMCOREINFO_OFFSET(mem_section, section_mem_map);#endif VMCOREINFO_STRUCT_SIZE(page); VMCOREINFO_STRUCT_SIZE(pglist_data); VMCOREINFO_STRUCT_SIZE(zone); VMCOREINFO_STRUCT_SIZE(free_area); VMCOREINFO_STRUCT_SIZE(list_head); VMCOREINFO_SIZE(nodemask_t); VMCOREINFO_OFFSET(page, flags); VMCOREINFO_OFFSET(page, _count); VMCOREINFO_OFFSET(page, mapping); VMCOREINFO_OFFSET(page, lru); VMCOREINFO_OFFSET(pglist_data, node_zones); VMCOREINFO_OFFSET(pglist_data, nr_zones);#ifdef CONFIG_FLAT_NODE_MEM_MAP VMCOREINFO_OFFSET(pglist_data, node_mem_map);#endif VMCOREINFO_OFFSET(pglist_data, node_start_pfn); VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); VMCOREINFO_OFFSET(pglist_data, node_id); VMCOREINFO_OFFSET(zone, free_area); VMCOREINFO_OFFSET(zone, vm_stat); VMCOREINFO_OFFSET(zone, spanned_pages); VMCOREINFO_OFFSET(free_area, free_list); VMCOREINFO_OFFSET(list_head, next); VMCOREINFO_OFFSET(list_head, prev); VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); VMCOREINFO_NUMBER(NR_FREE_PAGES); VMCOREINFO_NUMBER(PG_lru); VMCOREINFO_NUMBER(PG_private); VMCOREINFO_NUMBER(PG_swapcache); arch_crash_save_vmcoreinfo(); return 0;}module_init(crash_save_vmcoreinfo_init)/* * Move into place and start executing a preloaded standalone * executable. If nothing was preloaded return an error. */int kernel_kexec(void){ int error = 0; if (!mutex_trylock(&kexec_mutex)) return -EBUSY; if (!kexec_image) { error = -EINVAL; goto Unlock; }#ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { mutex_lock(&pm_mutex); pm_prepare_console(); error = freeze_processes(); if (error) { error = -EBUSY; goto Restore_console; } suspend_console(); error = device_suspend(PMSG_FREEZE); if (error) goto Resume_console; error = disable_nonboot_cpus(); if (error) goto Resume_devices; device_pm_lock(); local_irq_disable(); /* At this point, device_suspend() has been called, * but *not* device_power_down(). We *must* * device_power_down() now. Otherwise, drivers for * some devices (e.g. interrupt controllers) become * desynchronized with the actual state of the * hardware at resume time, and evil weirdness ensues. */ error = device_power_down(PMSG_FREEZE); if (error) goto Enable_irqs; } else#endif { kernel_restart_prepare(NULL); printk(KERN_EMERG "Starting new kernel\n"); machine_shutdown(); } machine_kexec(kexec_image);#ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { device_power_up(PMSG_RESTORE); Enable_irqs: local_irq_enable(); device_pm_unlock(); enable_nonboot_cpus(); Resume_devices: device_resume(PMSG_RESTORE); Resume_console: resume_console(); thaw_processes(); Restore_console: pm_restore_console(); mutex_unlock(&pm_mutex); }#endif Unlock: mutex_unlock(&kexec_mutex); return error;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -