core.c

来自「linux 内核源代码」· C语言 代码 · 共 582 行 · 第 1/2 页

C
582
字号
		}		break;	case 14: /* We've intercepted a Page Fault. */		/* The Guest accessed a virtual address that wasn't mapped.		 * This happens a lot: we don't actually set up most of the		 * page tables for the Guest at all when we start: as it runs		 * it asks for more and more, and we set them up as		 * required. In this case, we don't even tell the Guest that		 * the fault happened.		 *		 * The errcode tells whether this was a read or a write, and		 * whether kernel or userspace code. */		if (demand_page(lg, lg->arch.last_pagefault, lg->regs->errcode))			return;		/* OK, it's really not there (or not OK): the Guest needs to		 * know.  We write out the cr2 value so it knows where the		 * fault occurred.		 *		 * Note that if the Guest were really messed up, this could		 * happen before it's done the LHCALL_LGUEST_INIT hypercall, so		 * lg->lguest_data could be NULL */		if (lg->lguest_data &&		    put_user(lg->arch.last_pagefault, &lg->lguest_data->cr2))			kill_guest(lg, "Writing cr2");		break;	case 7: /* We've intercepted a Device Not Available fault. */		/* If the Guest doesn't want to know, we already restored the		 * Floating Point Unit, so we just continue without telling		 * it. */		if (!lg->ts)			return;		break;	case 32 ... 255:		/* These values mean a real interrupt occurred, in which case		 * the Host handler has already been run.  We just do a		 * friendly check if another process should now be run, then		 * return to run the Guest again */		cond_resched();		return;	case LGUEST_TRAP_ENTRY:		/* Our 'struct hcall_args' maps directly over our regs: we set		 * up the pointer now to indicate a hypercall is pending. */		lg->hcall = (struct hcall_args *)lg->regs;		return;	}	/* We didn't handle the trap, so it needs to go to the Guest. */	if (!deliver_trap(lg, lg->regs->trapnum))		/* If the Guest doesn't have a handler (either it hasn't		 * registered any yet, or it's one of the faults we don't let		 * it handle), it dies with a cryptic error message. */		kill_guest(lg, "unhandled trap %li at %#lx (%#lx)",			   lg->regs->trapnum, lg->regs->eip,			   lg->regs->trapnum == 14 ? lg->arch.last_pagefault			   : lg->regs->errcode);}/* Now we can look at each of the routines this calls, in increasing order of * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(), * deliver_trap() and demand_page().  After all those, we'll be ready to * examine the Switcher, and our philosophical understanding of the Host/Guest * duality will be complete. :*/static void adjust_pge(void *on){	if (on)		write_cr4(read_cr4() | X86_CR4_PGE);	else		write_cr4(read_cr4() & ~X86_CR4_PGE);}/*H:020 Now the Switcher is mapped and every thing else is ready, we need to do * some more i386-specific initialization. */void __init lguest_arch_host_init(void){	int i;	/* Most of the i386/switcher.S doesn't care that it's been moved; on	 * Intel, jumps are relative, and it doesn't access any references to	 * external code or data.	 *	 * The only exception is the interrupt handlers in switcher.S: their	 * addresses are placed in a table (default_idt_entries), so we need to	 * update the table with the new addresses.  switcher_offset() is a	 * convenience function which returns the distance between the builtin	 * switcher code and the high-mapped copy we just made. */	for (i = 0; i < IDT_ENTRIES; i++)		default_idt_entries[i] += switcher_offset();	/*	 * Set up the Switcher's per-cpu areas.	 *	 * Each CPU gets two pages of its own within the high-mapped region	 * (aka. "struct lguest_pages").  Much of this can be initialized now,	 * but some depends on what Guest we are running (which is set up in	 * copy_in_guest_info()).	 */	for_each_possible_cpu(i) {		/* lguest_pages() returns this CPU's two pages. */		struct lguest_pages *pages = lguest_pages(i);		/* This is a convenience pointer to make the code fit one		 * statement to a line. */		struct lguest_ro_state *state = &pages->state;		/* The Global Descriptor Table: the Host has a different one		 * for each CPU.  We keep a descriptor for the GDT which says		 * where it is and how big it is (the size is actually the last		 * byte, not the size, hence the "-1"). */		state->host_gdt_desc.size = GDT_SIZE-1;		state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);		/* All CPUs on the Host use the same Interrupt Descriptor		 * Table, so we just use store_idt(), which gets this CPU's IDT		 * descriptor. */		store_idt(&state->host_idt_desc);		/* The descriptors for the Guest's GDT and IDT can be filled		 * out now, too.  We copy the GDT & IDT into ->guest_gdt and		 * ->guest_idt before actually running the Guest. */		state->guest_idt_desc.size = sizeof(state->guest_idt)-1;		state->guest_idt_desc.address = (long)&state->guest_idt;		state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1;		state->guest_gdt_desc.address = (long)&state->guest_gdt;		/* We know where we want the stack to be when the Guest enters		 * the switcher: in pages->regs.  The stack grows upwards, so		 * we start it at the end of that structure. */		state->guest_tss.esp0 = (long)(&pages->regs + 1);		/* And this is the GDT entry to use for the stack: we keep a		 * couple of special LGUEST entries. */		state->guest_tss.ss0 = LGUEST_DS;		/* x86 can have a finegrained bitmap which indicates what I/O		 * ports the process can use.  We set it to the end of our		 * structure, meaning "none". */		state->guest_tss.io_bitmap_base = sizeof(state->guest_tss);		/* Some GDT entries are the same across all Guests, so we can		 * set them up now. */		setup_default_gdt_entries(state);		/* Most IDT entries are the same for all Guests, too.*/		setup_default_idt_entries(state, default_idt_entries);		/* The Host needs to be able to use the LGUEST segments on this		 * CPU, too, so put them in the Host GDT. */		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;	}	/* In the Switcher, we want the %cs segment register to use the	 * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so	 * it will be undisturbed when we switch.  To change %cs and jump we	 * need this structure to feed to Intel's "lcall" instruction. */	lguest_entry.offset = (long)switch_to_guest + switcher_offset();	lguest_entry.segment = LGUEST_CS;	/* Finally, we need to turn off "Page Global Enable".  PGE is an	 * optimization where page table entries are specially marked to show	 * they never change.  The Host kernel marks all the kernel pages this	 * way because it's always present, even when userspace is running.	 *	 * Lguest breaks this: unbeknownst to the rest of the Host kernel, we	 * switch to the Guest kernel.  If you don't disable this on all CPUs,	 * you'll get really weird bugs that you'll chase for two days.	 *	 * I used to turn PGE off every time we switched to the Guest and back	 * on when we return, but that slowed the Switcher down noticibly. */	/* We don't need the complexity of CPUs coming and going while we're	 * doing this. */	lock_cpu_hotplug();	if (cpu_has_pge) { /* We have a broader idea of "global". */		/* Remember that this was originally set (for cleanup). */		cpu_had_pge = 1;		/* adjust_pge is a helper function which sets or unsets the PGE		 * bit on its CPU, depending on the argument (0 == unset). */		on_each_cpu(adjust_pge, (void *)0, 0, 1);		/* Turn off the feature in the global feature set. */		clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);	}	unlock_cpu_hotplug();};/*:*/void __exit lguest_arch_host_fini(void){	/* If we had PGE before we started, turn it back on now. */	lock_cpu_hotplug();	if (cpu_had_pge) {		set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);		/* adjust_pge's argument "1" means set PGE. */		on_each_cpu(adjust_pge, (void *)1, 0, 1);	}	unlock_cpu_hotplug();}/*H:122 The i386-specific hypercalls simply farm out to the right functions. */int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args){	switch (args->arg0) {	case LHCALL_LOAD_GDT:		load_guest_gdt(lg, args->arg1, args->arg2);		break;	case LHCALL_LOAD_IDT_ENTRY:		load_guest_idt_entry(lg, args->arg1, args->arg2, args->arg3);		break;	case LHCALL_LOAD_TLS:		guest_load_tls(lg, args->arg1);		break;	default:		/* Bad Guest.  Bad! */		return -EIO;	}	return 0;}/*H:126 i386-specific hypercall initialization: */int lguest_arch_init_hypercalls(struct lguest *lg){	u32 tsc_speed;	/* The pointer to the Guest's "struct lguest_data" is the only	 * argument.  We check that address now. */	if (!lguest_address_ok(lg, lg->hcall->arg1, sizeof(*lg->lguest_data)))		return -EFAULT;	/* Having checked it, we simply set lg->lguest_data to point straight	 * into the Launcher's memory at the right place and then use	 * copy_to_user/from_user from now on, instead of lgread/write.  I put	 * this in to show that I'm not immune to writing stupid	 * optimizations. */	lg->lguest_data = lg->mem_base + lg->hcall->arg1;	/* We insist that the Time Stamp Counter exist and doesn't change with	 * cpu frequency.  Some devious chip manufacturers decided that TSC	 * changes could be handled in software.  I decided that time going	 * backwards might be good for benchmarks, but it's bad for users.	 *	 * We also insist that the TSC be stable: the kernel detects unreliable	 * TSCs for its own purposes, and we use that here. */	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable())		tsc_speed = tsc_khz;	else		tsc_speed = 0;	if (put_user(tsc_speed, &lg->lguest_data->tsc_khz))		return -EFAULT;	/* The interrupt code might not like the system call vector. */	if (!check_syscall_vector(lg))		kill_guest(lg, "bad syscall vector");	return 0;}/*L:030 lguest_arch_setup_regs() * * Most of the Guest's registers are left alone: we used get_zeroed_page() to * allocate the structure, so they will be 0. */void lguest_arch_setup_regs(struct lguest *lg, unsigned long start){	struct lguest_regs *regs = lg->regs;	/* There are four "segment" registers which the Guest needs to boot:	 * The "code segment" register (cs) refers to the kernel code segment	 * __KERNEL_CS, and the "data", "extra" and "stack" segment registers	 * refer to the kernel data segment __KERNEL_DS.	 *	 * The privilege level is packed into the lower bits.  The Guest runs	 * at privilege level 1 (GUEST_PL).*/	regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL;	regs->cs = __KERNEL_CS|GUEST_PL;	/* The "eflags" register contains miscellaneous flags.  Bit 1 (0x002)	 * is supposed to always be "1".  Bit 9 (0x200) controls whether	 * interrupts are enabled.  We always leave interrupts enabled while	 * running the Guest. */	regs->eflags = X86_EFLAGS_IF | 0x2;	/* The "Extended Instruction Pointer" register says where the Guest is	 * running. */	regs->eip = start;	/* %esi points to our boot information, at physical address 0, so don't	 * touch it. */	/* There are a couple of GDT entries the Guest expects when first	 * booting. */	setup_guest_gdt(lg);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?