📄 kexec.c
字号:
return result;}static void kimage_free_extra_pages(struct kimage *image){ /* Walk through and free any extra destination pages I may have */ kimage_free_page_list(&image->dest_pages); /* Walk through and free any unuseable pages I have cached */ kimage_free_page_list(&image->unuseable_pages);}static int kimage_terminate(struct kimage *image){ if (*image->entry != 0) image->entry++; *image->entry = IND_DONE; return 0;}#define for_each_kimage_entry(image, ptr, entry) \ for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ ptr = (entry & IND_INDIRECTION)? \ phys_to_virt((entry & PAGE_MASK)): ptr +1)static void kimage_free_entry(kimage_entry_t entry){ struct page *page; page = pfn_to_page(entry >> PAGE_SHIFT); kimage_free_pages(page);}static void kimage_free(struct kimage *image){ kimage_entry_t *ptr, entry; kimage_entry_t ind = 0; if (!image) return; kimage_free_extra_pages(image); for_each_kimage_entry(image, ptr, entry) { if (entry & IND_INDIRECTION) { /* Free the previous indirection page */ if (ind & IND_INDIRECTION) kimage_free_entry(ind); /* Save this indirection page until we are * done with it. */ ind = entry; } else if (entry & IND_SOURCE) kimage_free_entry(entry); } /* Free the final indirection page */ if (ind & IND_INDIRECTION) kimage_free_entry(ind); /* Handle any machine specific cleanup */ machine_kexec_cleanup(image); /* Free the kexec control pages... */ kimage_free_page_list(&image->control_pages); kfree(image);}static kimage_entry_t *kimage_dst_used(struct kimage *image, unsigned long page){ kimage_entry_t *ptr, entry; unsigned long destination = 0; for_each_kimage_entry(image, ptr, entry) { if (entry & IND_DESTINATION) destination = entry & PAGE_MASK; else if (entry & IND_SOURCE) { if (page == destination) return ptr; destination += PAGE_SIZE; } } return NULL;}static struct page *kimage_alloc_page(struct kimage *image, gfp_t gfp_mask, unsigned long destination){ /* * Here we implement safeguards to ensure that a source page * is not copied to its destination page before the data on * the destination page is no longer useful. * * To do this we maintain the invariant that a source page is * either its own destination page, or it is not a * destination page at all. * * That is slightly stronger than required, but the proof * that no problems will not occur is trivial, and the * implementation is simply to verify. * * When allocating all pages normally this algorithm will run * in O(N) time, but in the worst case it will run in O(N^2) * time. If the runtime is a problem the data structures can * be fixed. */ struct page *page; unsigned long addr; /* * Walk through the list of destination pages, and see if I * have a match. */ list_for_each_entry(page, &image->dest_pages, lru) { addr = page_to_pfn(page) << PAGE_SHIFT; if (addr == destination) { list_del(&page->lru); return page; } } page = NULL; while (1) { kimage_entry_t *old; /* Allocate a page, if we run out of memory give up */ page = kimage_alloc_pages(gfp_mask, 0); if (!page) return NULL; /* If the page cannot be used file it away */ if (page_to_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { list_add(&page->lru, &image->unuseable_pages); continue; } addr = page_to_pfn(page) << PAGE_SHIFT; /* If it is the destination page we want use it */ if (addr == destination) break; /* If the page is not a destination page use it */ if (!kimage_is_destination_range(image, addr, addr + PAGE_SIZE)) break; /* * I know that the page is someones destination page. * See if there is already a source page for this * destination page. And if so swap the source pages. */ old = kimage_dst_used(image, addr); if (old) { /* If so move it */ unsigned long old_addr; struct page *old_page; old_addr = *old & PAGE_MASK; old_page = pfn_to_page(old_addr >> PAGE_SHIFT); copy_highpage(page, old_page); *old = addr | (*old & ~PAGE_MASK); /* The old page I have found cannot be a * destination page, so return it. */ addr = old_addr; page = old_page; break; } else { /* Place the page on the destination list I * will use it later. */ list_add(&page->lru, &image->dest_pages); } } return page;}static int kimage_load_normal_segment(struct kimage *image, struct kexec_segment *segment){ unsigned long maddr; unsigned long ubytes, mbytes; int result; unsigned char __user *buf; result = 0; buf = segment->buf; ubytes = segment->bufsz; mbytes = segment->memsz; maddr = segment->mem; result = kimage_set_destination(image, maddr); if (result < 0) goto out; while (mbytes) { struct page *page; char *ptr; size_t uchunk, mchunk; page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); if (page == 0) { result = -ENOMEM; goto out; } result = kimage_add_page(image, page_to_pfn(page) << PAGE_SHIFT); if (result < 0) goto out; ptr = kmap(page); /* Start with a clear page */ memset(ptr, 0, PAGE_SIZE); ptr += maddr & ~PAGE_MASK; mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); if (mchunk > mbytes) mchunk = mbytes; uchunk = mchunk; if (uchunk > ubytes) uchunk = ubytes; result = copy_from_user(ptr, buf, uchunk); kunmap(page); if (result) { result = (result < 0) ? result : -EIO; goto out; } ubytes -= uchunk; maddr += mchunk; buf += mchunk; mbytes -= mchunk; }out: return result;}static int kimage_load_crash_segment(struct kimage *image, struct kexec_segment *segment){ /* For crash dumps kernels we simply copy the data from * user space to it's destination. * We do things a page at a time for the sake of kmap. */ unsigned long maddr; unsigned long ubytes, mbytes; int result; unsigned char __user *buf; result = 0; buf = segment->buf; ubytes = segment->bufsz; mbytes = segment->memsz; maddr = segment->mem; while (mbytes) { struct page *page; char *ptr; size_t uchunk, mchunk; page = pfn_to_page(maddr >> PAGE_SHIFT); if (page == 0) { result = -ENOMEM; goto out; } ptr = kmap(page); ptr += maddr & ~PAGE_MASK; mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); if (mchunk > mbytes) mchunk = mbytes; uchunk = mchunk; if (uchunk > ubytes) { uchunk = ubytes; /* Zero the trailing part of the page */ memset(ptr + uchunk, 0, mchunk - uchunk); } result = copy_from_user(ptr, buf, uchunk); kexec_flush_icache_page(page); kunmap(page); if (result) { result = (result < 0) ? result : -EIO; goto out; } ubytes -= uchunk; maddr += mchunk; buf += mchunk; mbytes -= mchunk; }out: return result;}static int kimage_load_segment(struct kimage *image, struct kexec_segment *segment){ int result = -ENOMEM; switch (image->type) { case KEXEC_TYPE_DEFAULT: result = kimage_load_normal_segment(image, segment); break; case KEXEC_TYPE_CRASH: result = kimage_load_crash_segment(image, segment); break; } return result;}/* * Exec Kernel system call: for obvious reasons only root may call it. * * This call breaks up into three pieces. * - A generic part which loads the new kernel from the current * address space, and very carefully places the data in the * allocated pages. * * - A generic part that interacts with the kernel and tells all of * the devices to shut down. Preventing on-going dmas, and placing * the devices in a consistent state so a later kernel can * reinitialize them. * * - A machine specific part that includes the syscall number * and the copies the image to it's final destination. And * jumps into the image at entry. * * kexec does not sync, or unmount filesystems so if you need * that to happen you need to do that yourself. */struct kimage *kexec_image;struct kimage *kexec_crash_image;/* * A home grown binary mutex. * Nothing can wait so this mutex is safe to use * in interrupt context :) */static int kexec_lock;asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, struct kexec_segment __user *segments, unsigned long flags){ struct kimage **dest_image, *image; int locked; int result; /* We only trust the superuser with rebooting the system. */ if (!capable(CAP_SYS_BOOT)) return -EPERM; /* * Verify we have a legal set of flags * This leaves us room for future extensions. */ if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) return -EINVAL; /* Verify we are on the appropriate architecture */ if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) return -EINVAL; /* Put an artificial cap on the number * of segments passed to kexec_load. */ if (nr_segments > KEXEC_SEGMENT_MAX) return -EINVAL; image = NULL; result = 0; /* Because we write directly to the reserved memory * region when loading crash kernels we need a mutex here to * prevent multiple crash kernels from attempting to load * simultaneously, and to prevent a crash kernel from loading * over the top of a in use crash kernel. * * KISS: always take the mutex. */ locked = xchg(&kexec_lock, 1); if (locked) return -EBUSY; dest_image = &kexec_image; if (flags & KEXEC_ON_CRASH) dest_image = &kexec_crash_image; if (nr_segments > 0) { unsigned long i; /* Loading another kernel to reboot into */ if ((flags & KEXEC_ON_CRASH) == 0) result = kimage_normal_alloc(&image, entry, nr_segments, segments); /* Loading another kernel to switch to if this one crashes */ else if (flags & KEXEC_ON_CRASH) { /* Free any current crash dump kernel before * we corrupt it. */ kimage_free(xchg(&kexec_crash_image, NULL)); result = kimage_crash_alloc(&image, entry, nr_segments, segments); } if (result) goto out; result = machine_kexec_prepare(image); if (result) goto out; for (i = 0; i < nr_segments; i++) { result = kimage_load_segment(image, &image->segment[i]); if (result) goto out; } result = kimage_terminate(image); if (result) goto out; } /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image);out: locked = xchg(&kexec_lock, 0); /* Release the mutex */ BUG_ON(!locked); kimage_free(image); return result;}#ifdef CONFIG_COMPATasmlinkage long compat_sys_kexec_load(unsigned long entry, unsigned long nr_segments, struct compat_kexec_segment __user *segments, unsigned long flags){ struct compat_kexec_segment in; struct kexec_segment out, __user *ksegments; unsigned long i, result; /* Don't allow clients that don't understand the native * architecture to do anything. */ if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) return -EINVAL; if (nr_segments > KEXEC_SEGMENT_MAX) return -EINVAL; ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); for (i=0; i < nr_segments; i++) { result = copy_from_user(&in, &segments[i], sizeof(in)); if (result) return -EFAULT; out.buf = compat_ptr(in.buf); out.bufsz = in.bufsz; out.mem = in.mem; out.memsz = in.memsz; result = copy_to_user(&ksegments[i], &out, sizeof(out)); if (result) return -EFAULT; } return sys_kexec_load(entry, nr_segments, ksegments, flags);}#endifvoid crash_kexec(struct pt_regs *regs){ int locked; /* Take the kexec_lock here to prevent sys_kexec_load * running on one cpu from replacing the crash kernel * we are using after a panic on a different cpu. * * If the crash kernel was not located in a fixed area * of memory the xchg(&kexec_crash_image) would be * sufficient. But since I reuse the memory... */ locked = xchg(&kexec_lock, 1); if (!locked) { if (kexec_crash_image) { struct pt_regs fixed_regs; crash_setup_regs(&fixed_regs, regs); machine_crash_shutdown(&fixed_regs); machine_kexec(kexec_crash_image); } locked = xchg(&kexec_lock, 0); BUG_ON(!locked); }}static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, size_t data_len){ struct elf_note note; note.n_namesz = strlen(name) + 1; note.n_descsz = data_len; note.n_type = type; memcpy(buf, ¬e, sizeof(note)); buf += (sizeof(note) + 3)/4; memcpy(buf, name, note.n_namesz); buf += (note.n_namesz + 3)/4; memcpy(buf, data, note.n_descsz); buf += (note.n_descsz + 3)/4; return buf;}static void final_note(u32 *buf){ struct elf_note note; note.n_namesz = 0; note.n_descsz = 0; note.n_type = 0; memcpy(buf, ¬e, sizeof(note));}void crash_save_cpu(struct pt_regs *regs, int cpu){ struct elf_prstatus prstatus; u32 *buf; if ((cpu < 0) || (cpu >= NR_CPUS)) return; /* Using ELF notes here is opportunistic. * I need a well defined structure format * for the data I pass, and I need tags * on the data to indicate what information I have * squirrelled away. ELF notes happen to provide * all of that, so there is no need to invent something new. */ buf = (u32*)per_cpu_ptr(crash_notes, cpu); if (!buf) return; memset(&prstatus, 0, sizeof(prstatus)); prstatus.pr_pid = current->pid; elf_core_copy_regs(&prstatus.pr_reg, regs); buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, &prstatus, sizeof(prstatus)); final_note(buf);}static int __init crash_notes_memory_init(void){ /* Allocate memory for saving cpu registers. */ crash_notes = alloc_percpu(note_buf_t); if (!crash_notes) { printk("Kexec: Memory allocation for saving cpu register" " states failed\n"); return -ENOMEM; } return 0;}module_init(crash_notes_memory_init)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -