⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 boot.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
{	/* Set up the timer interrupt (0) to go to our simple timer routine */	set_irq_handler(0, lguest_time_irq);	/* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can	 * use the TSC, otherwise it's a dumb nanosecond-resolution clock.	 * Either way, the "rating" is set so high that it's always chosen over	 * any other clocksource. */	if (lguest_data.tsc_khz)		lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,							 lguest_clock.shift);	clock_base = lguest_clock_read();	clocksource_register(&lguest_clock);	/* Now we've set up our clock, we can use it as the scheduler clock */	pv_time_ops.sched_clock = lguest_sched_clock;	/* We can't set cpumask in the initializer: damn C limitations!  Set it	 * here and register our timer device. */	lguest_clockevent.cpumask = cpumask_of_cpu(0);	clockevents_register_device(&lguest_clockevent);	/* Finally, we unblock the timer interrupt. */	enable_lguest_irq(0);}/* * Miscellaneous bits and pieces. * * Here is an oddball collection of functions which the Guest needs for things * to work.  They're pretty simple. *//* The Guest needs to tell the Host what stack it expects traps to use.  For * native hardware, this is part of the Task State Segment mentioned above in * lguest_load_tr_desc(), but to help hypervisors there's this special call. * * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data * segment), the privilege level (we're privilege level 1, the Host is 0 and * will not tolerate us trying to use that), the stack pointer, and the number * of pages in the stack. */static void lguest_load_esp0(struct tss_struct *tss,				     struct thread_struct *thread){	lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->esp0,		   THREAD_SIZE/PAGE_SIZE);}/* Let's just say, I wouldn't do debugging under a Guest. */static void lguest_set_debugreg(int regno, unsigned long value){	/* FIXME: Implement */}/* There are times when the kernel wants to make sure that no memory writes are * caught in the cache (that they've all reached real hardware devices).  This * doesn't matter for the Guest which has virtual hardware. * * On the Pentium 4 and above, cpuid() indicates that the Cache Line Flush * (clflush) instruction is available and the kernel uses that.  Otherwise, it * uses the older "Write Back and Invalidate Cache" (wbinvd) instruction. * Unlike clflush, wbinvd can only be run at privilege level 0.  So we can * ignore clflush, but replace wbinvd. */static void lguest_wbinvd(void){}/* If the Guest expects to have an Advanced Programmable Interrupt Controller, * we play dumb by ignoring writes and returning 0 for reads.  So it's no * longer Programmable nor Controlling anything, and I don't think 8 lines of * code qualifies for Advanced.  It will also never interrupt anything.  It * does, however, allow us to get through the Linux boot code. */#ifdef CONFIG_X86_LOCAL_APICstatic void lguest_apic_write(unsigned long reg, unsigned long v){}static unsigned long lguest_apic_read(unsigned long reg){	return 0;}#endif/* STOP!  Until an interrupt comes in. */static void lguest_safe_halt(void){	hcall(LHCALL_HALT, 0, 0, 0);}/* Perhaps CRASH isn't the best name for this hypercall, but we use it to get a * message out when we're crashing as well as elegant termination like powering * off. * * Note that the Host always prefers that the Guest speak in physical addresses * rather than virtual addresses, so we use __pa() here. */static void lguest_power_off(void){	hcall(LHCALL_CRASH, __pa("Power down"), 0, 0);}/* * Panicing. * * Don't.  But if you did, this is what happens. */static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p){	hcall(LHCALL_CRASH, __pa(p), 0, 0);	/* The hcall won't return, but to keep gcc happy, we're "done". */	return NOTIFY_DONE;}static struct notifier_block paniced = {	.notifier_call = lguest_panic};/* Setting up memory is fairly easy. */static __init char *lguest_memory_setup(void){	/* We do this here and not earlier because lockcheck barfs if we do it	 * before start_kernel() */	atomic_notifier_chain_register(&panic_notifier_list, &paniced);	/* The Linux bootloader header contains an "e820" memory map: the	 * Launcher populated the first entry with our memory limit. */	add_memory_region(boot_params.e820_map[0].addr,			  boot_params.e820_map[0].size,			  boot_params.e820_map[0].type);	/* This string is for the boot messages. */	return "LGUEST";}/* We will eventually use the virtio console device to produce console output, * but before that is set up we use LHCALL_NOTIFY on normal memory to produce * console output. */static __init int early_put_chars(u32 vtermno, const char *buf, int count){	char scratch[17];	unsigned int len = count;	/* We use a nul-terminated string, so we have to make a copy.  Icky,	 * huh? */	if (len > sizeof(scratch) - 1)		len = sizeof(scratch) - 1;	scratch[len] = '\0';	memcpy(scratch, buf, len);	hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0);	/* This routine returns the number of bytes actually written. */	return len;}/*G:050 * Patching (Powerfully Placating Performance Pedants) * * We have already seen that pv_ops structures let us replace simple * native instructions with calls to the appropriate back end all throughout * the kernel.  This allows the same kernel to run as a Guest and as a native * kernel, but it's slow because of all the indirect branches. * * Remember that David Wheeler quote about "Any problem in computer science can * be solved with another layer of indirection"?  The rest of that quote is * "... But that usually will create another problem."  This is the first of * those problems. * * Our current solution is to allow the paravirt back end to optionally patch * over the indirect calls to replace them with something more efficient.  We * patch the four most commonly called functions: disable interrupts, enable * interrupts, restore interrupts and save interrupts.  We usually have 6 or 10 * bytes to patch into: the Guest versions of these operations are small enough * that we can fit comfortably. * * First we need assembly templates of each of the patchable Guest operations, * and these are in lguest_asm.S. *//*G:060 We construct a table from the assembler templates: */static const struct lguest_insns{	const char *start, *end;} lguest_insns[] = {	[PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },	[PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },	[PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },	[PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },};/* Now our patch routine is fairly simple (based on the native one in * paravirt.c).  If we have a replacement, we copy it in and return how much of * the available space we used. */static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,			     unsigned long addr, unsigned len){	unsigned int insn_len;	/* Don't do anything special if we don't have a replacement */	if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start)		return paravirt_patch_default(type, clobber, ibuf, addr, len);	insn_len = lguest_insns[type].end - lguest_insns[type].start;	/* Similarly if we can't fit replacement (shouldn't happen, but let's	 * be thorough). */	if (len < insn_len)		return paravirt_patch_default(type, clobber, ibuf, addr, len);	/* Copy in our instructions. */	memcpy(ibuf, lguest_insns[type].start, insn_len);	return insn_len;}/*G:030 Once we get to lguest_init(), we know we're a Guest.  The pv_ops * structures in the kernel provide points for (almost) every routine we have * to override to avoid privileged instructions. */__init void lguest_init(void){	/* We're under lguest, paravirt is enabled, and we're running at	 * privilege level 1, not 0 as normal. */	pv_info.name = "lguest";	pv_info.paravirt_enabled = 1;	pv_info.kernel_rpl = 1;	/* We set up all the lguest overrides for sensitive operations.  These	 * are detailed with the operations themselves. */	/* interrupt-related operations */	pv_irq_ops.init_IRQ = lguest_init_IRQ;	pv_irq_ops.save_fl = save_fl;	pv_irq_ops.restore_fl = restore_fl;	pv_irq_ops.irq_disable = irq_disable;	pv_irq_ops.irq_enable = irq_enable;	pv_irq_ops.safe_halt = lguest_safe_halt;	/* init-time operations */	pv_init_ops.memory_setup = lguest_memory_setup;	pv_init_ops.patch = lguest_patch;	/* Intercepts of various cpu instructions */	pv_cpu_ops.load_gdt = lguest_load_gdt;	pv_cpu_ops.cpuid = lguest_cpuid;	pv_cpu_ops.load_idt = lguest_load_idt;	pv_cpu_ops.iret = lguest_iret;	pv_cpu_ops.load_esp0 = lguest_load_esp0;	pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;	pv_cpu_ops.set_ldt = lguest_set_ldt;	pv_cpu_ops.load_tls = lguest_load_tls;	pv_cpu_ops.set_debugreg = lguest_set_debugreg;	pv_cpu_ops.clts = lguest_clts;	pv_cpu_ops.read_cr0 = lguest_read_cr0;	pv_cpu_ops.write_cr0 = lguest_write_cr0;	pv_cpu_ops.read_cr4 = lguest_read_cr4;	pv_cpu_ops.write_cr4 = lguest_write_cr4;	pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;	pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;	pv_cpu_ops.wbinvd = lguest_wbinvd;	pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;	pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;	/* pagetable management */	pv_mmu_ops.write_cr3 = lguest_write_cr3;	pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user;	pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single;	pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel;	pv_mmu_ops.set_pte = lguest_set_pte;	pv_mmu_ops.set_pte_at = lguest_set_pte_at;	pv_mmu_ops.set_pmd = lguest_set_pmd;	pv_mmu_ops.read_cr2 = lguest_read_cr2;	pv_mmu_ops.read_cr3 = lguest_read_cr3;	pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;	pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;#ifdef CONFIG_X86_LOCAL_APIC	/* apic read/write intercepts */	pv_apic_ops.apic_write = lguest_apic_write;	pv_apic_ops.apic_write_atomic = lguest_apic_write;	pv_apic_ops.apic_read = lguest_apic_read;#endif	/* time operations */	pv_time_ops.get_wallclock = lguest_get_wallclock;	pv_time_ops.time_init = lguest_time_init;	/* Now is a good time to look at the implementations of these functions	 * before returning to the rest of lguest_init(). */	/*G:070 Now we've seen all the paravirt_ops, we return to	 * lguest_init() where the rest of the fairly chaotic boot setup	 * occurs. */	/* The native boot code sets up initial page tables immediately after	 * the kernel itself, and sets init_pg_tables_end so they're not	 * clobbered.  The Launcher places our initial pagetables somewhere at	 * the top of our physical memory, so we don't need extra space: set	 * init_pg_tables_end to the end of the kernel. */	init_pg_tables_end = __pa(pg0);	/* Load the %fs segment register (the per-cpu segment register) with	 * the normal data segment to get through booting. */	asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");	/* The Host uses the top of the Guest's virtual address space for the	 * Host<->Guest Switcher, and it tells us how big that is in	 * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */	reserve_top_address(lguest_data.reserve_mem);	/* If we don't initialize the lock dependency checker now, it crashes	 * paravirt_disable_iospace. */	lockdep_init();	/* The IDE code spends about 3 seconds probing for disks: if we reserve	 * all the I/O ports up front it can't get them and so doesn't probe.	 * Other device drivers are similar (but less severe).  This cuts the	 * kernel boot time on my machine from 4.1 seconds to 0.45 seconds. */	paravirt_disable_iospace();	/* This is messy CPU setup stuff which the native boot code does before	 * start_kernel, so we have to do, too: */	cpu_detect(&new_cpu_data);	/* head.S usually sets up the first capability word, so do it here. */	new_cpu_data.x86_capability[0] = cpuid_edx(1);	/* Math is always hard! */	new_cpu_data.hard_math = 1;#ifdef CONFIG_X86_MCE	mce_disabled = 1;#endif#ifdef CONFIG_ACPI	acpi_disabled = 1;	acpi_ht = 0;#endif	/* We set the perferred console to "hvc".  This is the "hypervisor	 * virtual console" driver written by the PowerPC people, which we also	 * adapted for lguest's use. */	add_preferred_console("hvc", 0, NULL);	/* Register our very early console. */	virtio_cons_early_init(early_put_chars);	/* Last of all, we set the power management poweroff hook to point to	 * the Guest routine to power off. */	pm_power_off = lguest_power_off;	/* Now we're set up, call start_kernel() in init/main.c and we proceed	 * to boot as normal.  It never returns. */	start_kernel();}/* * This marks the end of stage II of our journey, The Guest. * * It is now time for us to explore the layer of virtual drivers and complete * our understanding of the Guest in "make Drivers". */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -