core.c

来自「linux 内核源代码」· C语言 代码 · 共 582 行 · 第 1/2 页

C
582
字号
/* * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. * Copyright (C) 2007, Jes Sorensen <jes@sgi.com> SGI. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or * NON INFRINGEMENT.  See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include <linux/kernel.h>#include <linux/start_kernel.h>#include <linux/string.h>#include <linux/console.h>#include <linux/screen_info.h>#include <linux/irq.h>#include <linux/interrupt.h>#include <linux/clocksource.h>#include <linux/clockchips.h>#include <linux/cpu.h>#include <linux/lguest.h>#include <linux/lguest_launcher.h>#include <asm/paravirt.h>#include <asm/param.h>#include <asm/page.h>#include <asm/pgtable.h>#include <asm/desc.h>#include <asm/setup.h>#include <asm/lguest.h>#include <asm/uaccess.h>#include <asm/i387.h>#include "../lg.h"static int cpu_had_pge;static struct {	unsigned long offset;	unsigned short segment;} lguest_entry;/* Offset from where switcher.S was compiled to where we've copied it */static unsigned long switcher_offset(void){	return SWITCHER_ADDR - (unsigned long)start_switcher_text;}/* This cpu's struct lguest_pages. */static struct lguest_pages *lguest_pages(unsigned int cpu){	return &(((struct lguest_pages *)		  (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);}static DEFINE_PER_CPU(struct lguest *, last_guest);/*S:010 * We approach the Switcher. * * Remember that each CPU has two pages which are visible to the Guest when it * runs on that CPU.  This has to contain the state for that Guest: we copy the * state in just before we run the Guest. * * Each Guest has "changed" flags which indicate what has changed in the Guest * since it last ran.  We saw this set in interrupts_and_traps.c and * segments.c. */static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages){	/* Copying all this data can be quite expensive.  We usually run the	 * same Guest we ran last time (and that Guest hasn't run anywhere else	 * meanwhile).  If that's not the case, we pretend everything in the	 * Guest has changed. */	if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) {		__get_cpu_var(last_guest) = lg;		lg->last_pages = pages;		lg->changed = CHANGED_ALL;	}	/* These copies are pretty cheap, so we do them unconditionally: */	/* Save the current Host top-level page directory. */	pages->state.host_cr3 = __pa(current->mm->pgd);	/* Set up the Guest's page tables to see this CPU's pages (and no	 * other CPU's pages). */	map_switcher_in_guest(lg, pages);	/* Set up the two "TSS" members which tell the CPU what stack to use	 * for traps which do directly into the Guest (ie. traps at privilege	 * level 1). */	pages->state.guest_tss.esp1 = lg->esp1;	pages->state.guest_tss.ss1 = lg->ss1;	/* Copy direct-to-Guest trap entries. */	if (lg->changed & CHANGED_IDT)		copy_traps(lg, pages->state.guest_idt, default_idt_entries);	/* Copy all GDT entries which the Guest can change. */	if (lg->changed & CHANGED_GDT)		copy_gdt(lg, pages->state.guest_gdt);	/* If only the TLS entries have changed, copy them. */	else if (lg->changed & CHANGED_GDT_TLS)		copy_gdt_tls(lg, pages->state.guest_gdt);	/* Mark the Guest as unchanged for next time. */	lg->changed = 0;}/* Finally: the code to actually call into the Switcher to run the Guest. */static void run_guest_once(struct lguest *lg, struct lguest_pages *pages){	/* This is a dummy value we need for GCC's sake. */	unsigned int clobber;	/* Copy the guest-specific information into this CPU's "struct	 * lguest_pages". */	copy_in_guest_info(lg, pages);	/* Set the trap number to 256 (impossible value).  If we fault while	 * switching to the Guest (bad segment registers or bug), this will	 * cause us to abort the Guest. */	lg->regs->trapnum = 256;	/* Now: we push the "eflags" register on the stack, then do an "lcall".	 * This is how we change from using the kernel code segment to using	 * the dedicated lguest code segment, as well as jumping into the	 * Switcher.	 *	 * The lcall also pushes the old code segment (KERNEL_CS) onto the	 * stack, then the address of this call.  This stack layout happens to	 * exactly match the stack layout created by an interrupt... */	asm volatile("pushf; lcall *lguest_entry"		     /* This is how we tell GCC that %eax ("a") and %ebx ("b")		      * are changed by this routine.  The "=" means output. */		     : "=a"(clobber), "=b"(clobber)		     /* %eax contains the pages pointer.  ("0" refers to the		      * 0-th argument above, ie "a").  %ebx contains the		      * physical address of the Guest's top-level page		      * directory. */		     : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir))		     /* We tell gcc that all these registers could change,		      * which means we don't have to save and restore them in		      * the Switcher. */		     : "memory", "%edx", "%ecx", "%edi", "%esi");}/*:*//*M:002 There are hooks in the scheduler which we can register to tell when we * get kicked off the CPU (preempt_notifier_register()).  This would allow us * to lazily disable SYSENTER which would regain some performance, and should * also simplify copy_in_guest_info().  Note that we'd still need to restore * things when we exit to Launcher userspace, but that's fairly easy. * * The hooks were designed for KVM, but we can also put them to good use. :*//*H:040 This is the i386-specific code to setup and run the Guest.  Interrupts * are disabled: we own the CPU. */void lguest_arch_run_guest(struct lguest *lg){	/* Remember the awfully-named TS bit?  If the Guest has asked to set it	 * we set it now, so we can trap and pass that trap to the Guest if it	 * uses the FPU. */	if (lg->ts)		lguest_set_ts();	/* SYSENTER is an optimized way of doing system calls.  We can't allow	 * it because it always jumps to privilege level 0.  A normal Guest	 * won't try it because we don't advertise it in CPUID, but a malicious	 * Guest (or malicious Guest userspace program) could, so we tell the	 * CPU to disable it before running the Guest. */	if (boot_cpu_has(X86_FEATURE_SEP))		wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);	/* Now we actually run the Guest.  It will return when something	 * interesting happens, and we can examine its registers to see what it	 * was doing. */	run_guest_once(lg, lguest_pages(raw_smp_processor_id()));	/* Note that the "regs" pointer contains two extra entries which are	 * not really registers: a trap number which says what interrupt or	 * trap made the switcher code come back, and an error code which some	 * traps set.  */	/* If the Guest page faulted, then the cr2 register will tell us the	 * bad virtual address.  We have to grab this now, because once we	 * re-enable interrupts an interrupt could fault and thus overwrite	 * cr2, or we could even move off to a different CPU. */	if (lg->regs->trapnum == 14)		lg->arch.last_pagefault = read_cr2();	/* Similarly, if we took a trap because the Guest used the FPU,	 * we have to restore the FPU it expects to see. */	else if (lg->regs->trapnum == 7)		math_state_restore();	/* Restore SYSENTER if it's supposed to be on. */	if (boot_cpu_has(X86_FEATURE_SEP))		wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);}/*H:130 Now we've examined the hypercall code; our Guest can make requests. * Our Guest is usually so well behaved; it never tries to do things it isn't * allowed to, and uses hypercalls instead.  Unfortunately, Linux's paravirtual * infrastructure isn't quite complete, because it doesn't contain replacements * for the Intel I/O instructions.  As a result, the Guest sometimes fumbles * across one during the boot process as it probes for various things which are * usually attached to a PC. * * When the Guest uses one of these instructions, we get a trap (General * Protection Fault) and come here.  We see if it's one of those troublesome * instructions and skip over it.  We return true if we did. */static int emulate_insn(struct lguest *lg){	u8 insn;	unsigned int insnlen = 0, in = 0, shift = 0;	/* The eip contains the *virtual* address of the Guest's instruction:	 * guest_pa just subtracts the Guest's page_offset. */	unsigned long physaddr = guest_pa(lg, lg->regs->eip);	/* This must be the Guest kernel trying to do something, not userspace!	 * The bottom two bits of the CS segment register are the privilege	 * level. */	if ((lg->regs->cs & 3) != GUEST_PL)		return 0;	/* Decoding x86 instructions is icky. */	insn = lgread(lg, physaddr, u8);	/* 0x66 is an "operand prefix".  It means it's using the upper 16 bits	   of the eax register. */	if (insn == 0x66) {		shift = 16;		/* The instruction is 1 byte so far, read the next byte. */		insnlen = 1;		insn = lgread(lg, physaddr + insnlen, u8);	}	/* We can ignore the lower bit for the moment and decode the 4 opcodes	 * we need to emulate. */	switch (insn & 0xFE) {	case 0xE4: /* in     <next byte>,%al */		insnlen += 2;		in = 1;		break;	case 0xEC: /* in     (%dx),%al */		insnlen += 1;		in = 1;		break;	case 0xE6: /* out    %al,<next byte> */		insnlen += 2;		break;	case 0xEE: /* out    %al,(%dx) */		insnlen += 1;		break;	default:		/* OK, we don't know what this is, can't emulate. */		return 0;	}	/* If it was an "IN" instruction, they expect the result to be read	 * into %eax, so we change %eax.  We always return all-ones, which	 * traditionally means "there's nothing there". */	if (in) {		/* Lower bit tells is whether it's a 16 or 32 bit access */		if (insn & 0x1)			lg->regs->eax = 0xFFFFFFFF;		else			lg->regs->eax |= (0xFFFF << shift);	}	/* Finally, we've "done" the instruction, so move past it. */	lg->regs->eip += insnlen;	/* Success! */	return 1;}/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */void lguest_arch_handle_trap(struct lguest *lg){	switch (lg->regs->trapnum) {	case 13: /* We've intercepted a General Protection Fault. */		/* Check if this was one of those annoying IN or OUT		 * instructions which we need to emulate.  If so, we just go		 * back into the Guest after we've done it. */		if (lg->regs->errcode == 0) {			if (emulate_insn(lg))				return;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?