core.c
来自「linux 内核源代码」· C语言 代码 · 共 582 行 · 第 1/2 页
C
582 行
/* * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. * Copyright (C) 2007, Jes Sorensen <jes@sgi.com> SGI. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or * NON INFRINGEMENT. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include <linux/kernel.h>#include <linux/start_kernel.h>#include <linux/string.h>#include <linux/console.h>#include <linux/screen_info.h>#include <linux/irq.h>#include <linux/interrupt.h>#include <linux/clocksource.h>#include <linux/clockchips.h>#include <linux/cpu.h>#include <linux/lguest.h>#include <linux/lguest_launcher.h>#include <asm/paravirt.h>#include <asm/param.h>#include <asm/page.h>#include <asm/pgtable.h>#include <asm/desc.h>#include <asm/setup.h>#include <asm/lguest.h>#include <asm/uaccess.h>#include <asm/i387.h>#include "../lg.h"static int cpu_had_pge;static struct { unsigned long offset; unsigned short segment;} lguest_entry;/* Offset from where switcher.S was compiled to where we've copied it */static unsigned long switcher_offset(void){ return SWITCHER_ADDR - (unsigned long)start_switcher_text;}/* This cpu's struct lguest_pages. */static struct lguest_pages *lguest_pages(unsigned int cpu){ return &(((struct lguest_pages *) (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);}static DEFINE_PER_CPU(struct lguest *, last_guest);/*S:010 * We approach the Switcher. * * Remember that each CPU has two pages which are visible to the Guest when it * runs on that CPU. This has to contain the state for that Guest: we copy the * state in just before we run the Guest. * * Each Guest has "changed" flags which indicate what has changed in the Guest * since it last ran. We saw this set in interrupts_and_traps.c and * segments.c. */static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages){ /* Copying all this data can be quite expensive. We usually run the * same Guest we ran last time (and that Guest hasn't run anywhere else * meanwhile). If that's not the case, we pretend everything in the * Guest has changed. */ if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) { __get_cpu_var(last_guest) = lg; lg->last_pages = pages; lg->changed = CHANGED_ALL; } /* These copies are pretty cheap, so we do them unconditionally: */ /* Save the current Host top-level page directory. */ pages->state.host_cr3 = __pa(current->mm->pgd); /* Set up the Guest's page tables to see this CPU's pages (and no * other CPU's pages). */ map_switcher_in_guest(lg, pages); /* Set up the two "TSS" members which tell the CPU what stack to use * for traps which do directly into the Guest (ie. traps at privilege * level 1). */ pages->state.guest_tss.esp1 = lg->esp1; pages->state.guest_tss.ss1 = lg->ss1; /* Copy direct-to-Guest trap entries. */ if (lg->changed & CHANGED_IDT) copy_traps(lg, pages->state.guest_idt, default_idt_entries); /* Copy all GDT entries which the Guest can change. */ if (lg->changed & CHANGED_GDT) copy_gdt(lg, pages->state.guest_gdt); /* If only the TLS entries have changed, copy them. */ else if (lg->changed & CHANGED_GDT_TLS) copy_gdt_tls(lg, pages->state.guest_gdt); /* Mark the Guest as unchanged for next time. */ lg->changed = 0;}/* Finally: the code to actually call into the Switcher to run the Guest. */static void run_guest_once(struct lguest *lg, struct lguest_pages *pages){ /* This is a dummy value we need for GCC's sake. */ unsigned int clobber; /* Copy the guest-specific information into this CPU's "struct * lguest_pages". */ copy_in_guest_info(lg, pages); /* Set the trap number to 256 (impossible value). If we fault while * switching to the Guest (bad segment registers or bug), this will * cause us to abort the Guest. */ lg->regs->trapnum = 256; /* Now: we push the "eflags" register on the stack, then do an "lcall". * This is how we change from using the kernel code segment to using * the dedicated lguest code segment, as well as jumping into the * Switcher. * * The lcall also pushes the old code segment (KERNEL_CS) onto the * stack, then the address of this call. This stack layout happens to * exactly match the stack layout created by an interrupt... */ asm volatile("pushf; lcall *lguest_entry" /* This is how we tell GCC that %eax ("a") and %ebx ("b") * are changed by this routine. The "=" means output. */ : "=a"(clobber), "=b"(clobber) /* %eax contains the pages pointer. ("0" refers to the * 0-th argument above, ie "a"). %ebx contains the * physical address of the Guest's top-level page * directory. */ : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir)) /* We tell gcc that all these registers could change, * which means we don't have to save and restore them in * the Switcher. */ : "memory", "%edx", "%ecx", "%edi", "%esi");}/*:*//*M:002 There are hooks in the scheduler which we can register to tell when we * get kicked off the CPU (preempt_notifier_register()). This would allow us * to lazily disable SYSENTER which would regain some performance, and should * also simplify copy_in_guest_info(). Note that we'd still need to restore * things when we exit to Launcher userspace, but that's fairly easy. * * The hooks were designed for KVM, but we can also put them to good use. :*//*H:040 This is the i386-specific code to setup and run the Guest. Interrupts * are disabled: we own the CPU. */void lguest_arch_run_guest(struct lguest *lg){ /* Remember the awfully-named TS bit? If the Guest has asked to set it * we set it now, so we can trap and pass that trap to the Guest if it * uses the FPU. */ if (lg->ts) lguest_set_ts(); /* SYSENTER is an optimized way of doing system calls. We can't allow * it because it always jumps to privilege level 0. A normal Guest * won't try it because we don't advertise it in CPUID, but a malicious * Guest (or malicious Guest userspace program) could, so we tell the * CPU to disable it before running the Guest. */ if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); /* Now we actually run the Guest. It will return when something * interesting happens, and we can examine its registers to see what it * was doing. */ run_guest_once(lg, lguest_pages(raw_smp_processor_id())); /* Note that the "regs" pointer contains two extra entries which are * not really registers: a trap number which says what interrupt or * trap made the switcher code come back, and an error code which some * traps set. */ /* If the Guest page faulted, then the cr2 register will tell us the * bad virtual address. We have to grab this now, because once we * re-enable interrupts an interrupt could fault and thus overwrite * cr2, or we could even move off to a different CPU. */ if (lg->regs->trapnum == 14) lg->arch.last_pagefault = read_cr2(); /* Similarly, if we took a trap because the Guest used the FPU, * we have to restore the FPU it expects to see. */ else if (lg->regs->trapnum == 7) math_state_restore(); /* Restore SYSENTER if it's supposed to be on. */ if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);}/*H:130 Now we've examined the hypercall code; our Guest can make requests. * Our Guest is usually so well behaved; it never tries to do things it isn't * allowed to, and uses hypercalls instead. Unfortunately, Linux's paravirtual * infrastructure isn't quite complete, because it doesn't contain replacements * for the Intel I/O instructions. As a result, the Guest sometimes fumbles * across one during the boot process as it probes for various things which are * usually attached to a PC. * * When the Guest uses one of these instructions, we get a trap (General * Protection Fault) and come here. We see if it's one of those troublesome * instructions and skip over it. We return true if we did. */static int emulate_insn(struct lguest *lg){ u8 insn; unsigned int insnlen = 0, in = 0, shift = 0; /* The eip contains the *virtual* address of the Guest's instruction: * guest_pa just subtracts the Guest's page_offset. */ unsigned long physaddr = guest_pa(lg, lg->regs->eip); /* This must be the Guest kernel trying to do something, not userspace! * The bottom two bits of the CS segment register are the privilege * level. */ if ((lg->regs->cs & 3) != GUEST_PL) return 0; /* Decoding x86 instructions is icky. */ insn = lgread(lg, physaddr, u8); /* 0x66 is an "operand prefix". It means it's using the upper 16 bits of the eax register. */ if (insn == 0x66) { shift = 16; /* The instruction is 1 byte so far, read the next byte. */ insnlen = 1; insn = lgread(lg, physaddr + insnlen, u8); } /* We can ignore the lower bit for the moment and decode the 4 opcodes * we need to emulate. */ switch (insn & 0xFE) { case 0xE4: /* in <next byte>,%al */ insnlen += 2; in = 1; break; case 0xEC: /* in (%dx),%al */ insnlen += 1; in = 1; break; case 0xE6: /* out %al,<next byte> */ insnlen += 2; break; case 0xEE: /* out %al,(%dx) */ insnlen += 1; break; default: /* OK, we don't know what this is, can't emulate. */ return 0; } /* If it was an "IN" instruction, they expect the result to be read * into %eax, so we change %eax. We always return all-ones, which * traditionally means "there's nothing there". */ if (in) { /* Lower bit tells is whether it's a 16 or 32 bit access */ if (insn & 0x1) lg->regs->eax = 0xFFFFFFFF; else lg->regs->eax |= (0xFFFF << shift); } /* Finally, we've "done" the instruction, so move past it. */ lg->regs->eip += insnlen; /* Success! */ return 1;}/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */void lguest_arch_handle_trap(struct lguest *lg){ switch (lg->regs->trapnum) { case 13: /* We've intercepted a General Protection Fault. */ /* Check if this was one of those annoying IN or OUT * instructions which we need to emulate. If so, we just go * back into the Guest after we've done it. */ if (lg->regs->errcode == 0) { if (emulate_insn(lg)) return;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?