📄 kexec.c
字号:
/* * kexec.c - kexec system call * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> * * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */#include <linux/capability.h>#include <linux/mm.h>#include <linux/file.h>#include <linux/slab.h>#include <linux/fs.h>#include <linux/kexec.h>#include <linux/spinlock.h>#include <linux/list.h>#include <linux/highmem.h>#include <linux/syscalls.h>#include <linux/reboot.h>#include <linux/syscalls.h>#include <linux/ioport.h>#include <linux/hardirq.h>#include <linux/elf.h>#include <linux/elfcore.h>#include <asm/page.h>#include <asm/uaccess.h>#include <asm/io.h>#include <asm/system.h>#include <asm/semaphore.h>/* Per cpu memory for storing cpu states in case of system crash. */note_buf_t* crash_notes;/* Location of the reserved area for the crash kernel */struct resource crashk_res = { .name = "Crash kernel", .start = 0, .end = 0, .flags = IORESOURCE_BUSY | IORESOURCE_MEM};int kexec_should_crash(struct task_struct *p){ if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops) return 1; return 0;}/* * When kexec transitions to the new kernel there is a one-to-one * mapping between physical and virtual addresses. On processors * where you can disable the MMU this is trivial, and easy. For * others it is still a simple predictable page table to setup. * * In that environment kexec copies the new kernel to its final * resting place. This means I can only support memory whose * physical address can fit in an unsigned long. In particular * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. * If the assembly stub has more restrictive requirements * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be * defined more restrictively in <asm/kexec.h>. * * The code for the transition from the current kernel to the * the new kernel is placed in the control_code_buffer, whose size * is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single * page of memory is necessary, but some architectures require more. * Because this memory must be identity mapped in the transition from * virtual to physical addresses it must live in the range * 0 - TASK_SIZE, as only the user space mappings are arbitrarily * modifiable. * * The assembly stub in the control code buffer is passed a linked list * of descriptor pages detailing the source pages of the new kernel, * and the destination addresses of those source pages. As this data * structure is not used in the context of the current OS, it must * be self-contained. * * The code has been made to work with highmem pages and will use a * destination page in its final resting place (if it happens * to allocate it). The end product of this is that most of the * physical address space, and most of RAM can be used. * * Future directions include: * - allocating a page table with the control code buffer identity * mapped, to simplify machine_kexec and make kexec_on_panic more * reliable. *//* * KIMAGE_NO_DEST is an impossible destination address..., for * allocating pages whose destination address we do not care about. */#define KIMAGE_NO_DEST (-1UL)static int kimage_is_destination_range(struct kimage *image, unsigned long start, unsigned long end);static struct page *kimage_alloc_page(struct kimage *image, gfp_t gfp_mask, unsigned long dest);static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, unsigned long nr_segments, struct kexec_segment __user *segments){ size_t segment_bytes; struct kimage *image; unsigned long i; int result; /* Allocate a controlling structure */ result = -ENOMEM; image = kzalloc(sizeof(*image), GFP_KERNEL); if (!image) goto out; image->head = 0; image->entry = &image->head; image->last_entry = &image->head; image->control_page = ~0; /* By default this does not apply */ image->start = entry; image->type = KEXEC_TYPE_DEFAULT; /* Initialize the list of control pages */ INIT_LIST_HEAD(&image->control_pages); /* Initialize the list of destination pages */ INIT_LIST_HEAD(&image->dest_pages); /* Initialize the list of unuseable pages */ INIT_LIST_HEAD(&image->unuseable_pages); /* Read in the segments */ image->nr_segments = nr_segments; segment_bytes = nr_segments * sizeof(*segments); result = copy_from_user(image->segment, segments, segment_bytes); if (result) goto out; /* * Verify we have good destination addresses. The caller is * responsible for making certain we don't attempt to load * the new image into invalid or reserved areas of RAM. This * just verifies it is an address we can use. * * Since the kernel does everything in page size chunks ensure * the destination addreses are page aligned. Too many * special cases crop of when we don't do this. The most * insidious is getting overlapping destination addresses * simply because addresses are changed to page size * granularity. */ result = -EADDRNOTAVAIL; for (i = 0; i < nr_segments; i++) { unsigned long mstart, mend; mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz; if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) goto out; if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) goto out; } /* Verify our destination addresses do not overlap. * If we alloed overlapping destination addresses * through very weird things can happen with no * easy explanation as one segment stops on another. */ result = -EINVAL; for (i = 0; i < nr_segments; i++) { unsigned long mstart, mend; unsigned long j; mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz; for (j = 0; j < i; j++) { unsigned long pstart, pend; pstart = image->segment[j].mem; pend = pstart + image->segment[j].memsz; /* Do the segments overlap ? */ if ((mend > pstart) && (mstart < pend)) goto out; } } /* Ensure our buffer sizes are strictly less than * our memory sizes. This should always be the case, * and it is easier to check up front than to be surprised * later on. */ result = -EINVAL; for (i = 0; i < nr_segments; i++) { if (image->segment[i].bufsz > image->segment[i].memsz) goto out; } result = 0;out: if (result == 0) *rimage = image; else kfree(image); return result;}static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, unsigned long nr_segments, struct kexec_segment __user *segments){ int result; struct kimage *image; /* Allocate and initialize a controlling structure */ image = NULL; result = do_kimage_alloc(&image, entry, nr_segments, segments); if (result) goto out; *rimage = image; /* * Find a location for the control code buffer, and add it * the vector of segments so that it's pages will also be * counted as destination pages. */ result = -ENOMEM; image->control_code_page = kimage_alloc_control_pages(image, get_order(KEXEC_CONTROL_CODE_SIZE)); if (!image->control_code_page) { printk(KERN_ERR "Could not allocate control_code_buffer\n"); goto out; } result = 0; out: if (result == 0) *rimage = image; else kfree(image); return result;}static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, unsigned long nr_segments, struct kexec_segment __user *segments){ int result; struct kimage *image; unsigned long i; image = NULL; /* Verify we have a valid entry point */ if ((entry < crashk_res.start) || (entry > crashk_res.end)) { result = -EADDRNOTAVAIL; goto out; } /* Allocate and initialize a controlling structure */ result = do_kimage_alloc(&image, entry, nr_segments, segments); if (result) goto out; /* Enable the special crash kernel control page * allocation policy. */ image->control_page = crashk_res.start; image->type = KEXEC_TYPE_CRASH; /* * Verify we have good destination addresses. Normally * the caller is responsible for making certain we don't * attempt to load the new image into invalid or reserved * areas of RAM. But crash kernels are preloaded into a * reserved area of ram. We must ensure the addresses * are in the reserved area otherwise preloading the * kernel could corrupt things. */ result = -EADDRNOTAVAIL; for (i = 0; i < nr_segments; i++) { unsigned long mstart, mend; mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz - 1; /* Ensure we are within the crash kernel limits */ if ((mstart < crashk_res.start) || (mend > crashk_res.end)) goto out; } /* * Find a location for the control code buffer, and add * the vector of segments so that it's pages will also be * counted as destination pages. */ result = -ENOMEM; image->control_code_page = kimage_alloc_control_pages(image, get_order(KEXEC_CONTROL_CODE_SIZE)); if (!image->control_code_page) { printk(KERN_ERR "Could not allocate control_code_buffer\n"); goto out; } result = 0;out: if (result == 0) *rimage = image; else kfree(image); return result;}static int kimage_is_destination_range(struct kimage *image, unsigned long start, unsigned long end){ unsigned long i; for (i = 0; i < image->nr_segments; i++) { unsigned long mstart, mend; mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz; if ((end > mstart) && (start < mend)) return 1; } return 0;}static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order){ struct page *pages; pages = alloc_pages(gfp_mask, order); if (pages) { unsigned int count, i; pages->mapping = NULL; set_page_private(pages, order); count = 1 << order; for (i = 0; i < count; i++) SetPageReserved(pages + i); } return pages;}static void kimage_free_pages(struct page *page){ unsigned int order, count, i; order = page_private(page); count = 1 << order; for (i = 0; i < count; i++) ClearPageReserved(page + i); __free_pages(page, order);}static void kimage_free_page_list(struct list_head *list){ struct list_head *pos, *next; list_for_each_safe(pos, next, list) { struct page *page; page = list_entry(pos, struct page, lru); list_del(&page->lru); kimage_free_pages(page); }}static struct page *kimage_alloc_normal_control_pages(struct kimage *image, unsigned int order){ /* Control pages are special, they are the intermediaries * that are needed while we copy the rest of the pages * to their final resting place. As such they must * not conflict with either the destination addresses * or memory the kernel is already using. * * The only case where we really need more than one of * these are for architectures where we cannot disable * the MMU and must instead generate an identity mapped * page table for all of the memory. * * At worst this runs in O(N) of the image size. */ struct list_head extra_pages; struct page *pages; unsigned int count; count = 1 << order; INIT_LIST_HEAD(&extra_pages); /* Loop while I can allocate a page and the page allocated * is a destination page. */ do { unsigned long pfn, epfn, addr, eaddr; pages = kimage_alloc_pages(GFP_KERNEL, order); if (!pages) break; pfn = page_to_pfn(pages); epfn = pfn + count; addr = pfn << PAGE_SHIFT; eaddr = epfn << PAGE_SHIFT; if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || kimage_is_destination_range(image, addr, eaddr)) { list_add(&pages->lru, &extra_pages); pages = NULL; } } while (!pages); if (pages) { /* Remember the allocated page... */ list_add(&pages->lru, &image->control_pages); /* Because the page is already in it's destination * location we will never allocate another page at * that address. Therefore kimage_alloc_pages * will not return it (again) and we don't need * to give it an entry in image->segment[]. */ } /* Deal with the destination pages I have inadvertently allocated. * * Ideally I would convert multi-page allocations into single * page allocations, and add everyting to image->dest_pages. * * For now it is simpler to just free the pages. */ kimage_free_page_list(&extra_pages); return pages;}static struct page *kimage_alloc_crash_control_pages(struct kimage *image, unsigned int order){ /* Control pages are special, they are the intermediaries * that are needed while we copy the rest of the pages * to their final resting place. As such they must * not conflict with either the destination addresses * or memory the kernel is already using. * * Control pages are also the only pags we must allocate * when loading a crash kernel. All of the other pages * are specified by the segments and we just memcpy * into them directly. * * The only case where we really need more than one of * these are for architectures where we cannot disable * the MMU and must instead generate an identity mapped * page table for all of the memory. * * Given the low demand this implements a very simple * allocator that finds the first hole of the appropriate * size in the reserved memory region, and allocates all * of the memory up to and including the hole. */ unsigned long hole_start, hole_end, size; struct page *pages; pages = NULL; size = (1 << order) << PAGE_SHIFT; hole_start = (image->control_page + (size - 1)) & ~(size - 1); hole_end = hole_start + size - 1; while (hole_end <= crashk_res.end) { unsigned long i; if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT) break; if (hole_end > crashk_res.end) break; /* See if I overlap any of the segments */ for (i = 0; i < image->nr_segments; i++) { unsigned long mstart, mend; mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz - 1; if ((hole_end >= mstart) && (hole_start <= mend)) { /* Advance the hole to the end of the segment */ hole_start = (mend + (size - 1)) & ~(size - 1); hole_end = hole_start + size - 1; break; } } /* If I don't overlap any segments I have found my hole! */ if (i == image->nr_segments) { pages = pfn_to_page(hole_start >> PAGE_SHIFT); break; } } if (pages) image->control_page = hole_end; return pages;}struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order){ struct page *pages = NULL; switch (image->type) { case KEXEC_TYPE_DEFAULT: pages = kimage_alloc_normal_control_pages(image, order); break; case KEXEC_TYPE_CRASH: pages = kimage_alloc_crash_control_pages(image, order); break; } return pages;}static int kimage_add_entry(struct kimage *image, kimage_entry_t entry){ if (*image->entry != 0) image->entry++; if (image->entry == image->last_entry) { kimage_entry_t *ind_page; struct page *page; page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); if (!page) return -ENOMEM; ind_page = page_address(page); *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; image->entry = ind_page; image->last_entry = ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); } *image->entry = entry; image->entry++; *image->entry = 0; return 0;}static int kimage_set_destination(struct kimage *image, unsigned long destination){ int result; destination &= PAGE_MASK; result = kimage_add_entry(image, destination | IND_DESTINATION); if (result == 0) image->destination = destination; return result;}static int kimage_add_page(struct kimage *image, unsigned long page){ int result; page &= PAGE_MASK; result = kimage_add_entry(image, page | IND_SOURCE); if (result == 0) image->destination += PAGE_SIZE;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -