core.c
来自「linux 内核源代码」· C语言 代码 · 共 582 行 · 第 1/2 页
C
582 行
} break; case 14: /* We've intercepted a Page Fault. */ /* The Guest accessed a virtual address that wasn't mapped. * This happens a lot: we don't actually set up most of the * page tables for the Guest at all when we start: as it runs * it asks for more and more, and we set them up as * required. In this case, we don't even tell the Guest that * the fault happened. * * The errcode tells whether this was a read or a write, and * whether kernel or userspace code. */ if (demand_page(lg, lg->arch.last_pagefault, lg->regs->errcode)) return; /* OK, it's really not there (or not OK): the Guest needs to * know. We write out the cr2 value so it knows where the * fault occurred. * * Note that if the Guest were really messed up, this could * happen before it's done the LHCALL_LGUEST_INIT hypercall, so * lg->lguest_data could be NULL */ if (lg->lguest_data && put_user(lg->arch.last_pagefault, &lg->lguest_data->cr2)) kill_guest(lg, "Writing cr2"); break; case 7: /* We've intercepted a Device Not Available fault. */ /* If the Guest doesn't want to know, we already restored the * Floating Point Unit, so we just continue without telling * it. */ if (!lg->ts) return; break; case 32 ... 255: /* These values mean a real interrupt occurred, in which case * the Host handler has already been run. We just do a * friendly check if another process should now be run, then * return to run the Guest again */ cond_resched(); return; case LGUEST_TRAP_ENTRY: /* Our 'struct hcall_args' maps directly over our regs: we set * up the pointer now to indicate a hypercall is pending. */ lg->hcall = (struct hcall_args *)lg->regs; return; } /* We didn't handle the trap, so it needs to go to the Guest. */ if (!deliver_trap(lg, lg->regs->trapnum)) /* If the Guest doesn't have a handler (either it hasn't * registered any yet, or it's one of the faults we don't let * it handle), it dies with a cryptic error message. */ kill_guest(lg, "unhandled trap %li at %#lx (%#lx)", lg->regs->trapnum, lg->regs->eip, lg->regs->trapnum == 14 ? lg->arch.last_pagefault : lg->regs->errcode);}/* Now we can look at each of the routines this calls, in increasing order of * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(), * deliver_trap() and demand_page(). After all those, we'll be ready to * examine the Switcher, and our philosophical understanding of the Host/Guest * duality will be complete. :*/static void adjust_pge(void *on){ if (on) write_cr4(read_cr4() | X86_CR4_PGE); else write_cr4(read_cr4() & ~X86_CR4_PGE);}/*H:020 Now the Switcher is mapped and every thing else is ready, we need to do * some more i386-specific initialization. */void __init lguest_arch_host_init(void){ int i; /* Most of the i386/switcher.S doesn't care that it's been moved; on * Intel, jumps are relative, and it doesn't access any references to * external code or data. * * The only exception is the interrupt handlers in switcher.S: their * addresses are placed in a table (default_idt_entries), so we need to * update the table with the new addresses. switcher_offset() is a * convenience function which returns the distance between the builtin * switcher code and the high-mapped copy we just made. */ for (i = 0; i < IDT_ENTRIES; i++) default_idt_entries[i] += switcher_offset(); /* * Set up the Switcher's per-cpu areas. * * Each CPU gets two pages of its own within the high-mapped region * (aka. "struct lguest_pages"). Much of this can be initialized now, * but some depends on what Guest we are running (which is set up in * copy_in_guest_info()). */ for_each_possible_cpu(i) { /* lguest_pages() returns this CPU's two pages. */ struct lguest_pages *pages = lguest_pages(i); /* This is a convenience pointer to make the code fit one * statement to a line. */ struct lguest_ro_state *state = &pages->state; /* The Global Descriptor Table: the Host has a different one * for each CPU. We keep a descriptor for the GDT which says * where it is and how big it is (the size is actually the last * byte, not the size, hence the "-1"). */ state->host_gdt_desc.size = GDT_SIZE-1; state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); /* All CPUs on the Host use the same Interrupt Descriptor * Table, so we just use store_idt(), which gets this CPU's IDT * descriptor. */ store_idt(&state->host_idt_desc); /* The descriptors for the Guest's GDT and IDT can be filled * out now, too. We copy the GDT & IDT into ->guest_gdt and * ->guest_idt before actually running the Guest. */ state->guest_idt_desc.size = sizeof(state->guest_idt)-1; state->guest_idt_desc.address = (long)&state->guest_idt; state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; state->guest_gdt_desc.address = (long)&state->guest_gdt; /* We know where we want the stack to be when the Guest enters * the switcher: in pages->regs. The stack grows upwards, so * we start it at the end of that structure. */ state->guest_tss.esp0 = (long)(&pages->regs + 1); /* And this is the GDT entry to use for the stack: we keep a * couple of special LGUEST entries. */ state->guest_tss.ss0 = LGUEST_DS; /* x86 can have a finegrained bitmap which indicates what I/O * ports the process can use. We set it to the end of our * structure, meaning "none". */ state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); /* Some GDT entries are the same across all Guests, so we can * set them up now. */ setup_default_gdt_entries(state); /* Most IDT entries are the same for all Guests, too.*/ setup_default_idt_entries(state, default_idt_entries); /* The Host needs to be able to use the LGUEST segments on this * CPU, too, so put them in the Host GDT. */ get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; } /* In the Switcher, we want the %cs segment register to use the * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so * it will be undisturbed when we switch. To change %cs and jump we * need this structure to feed to Intel's "lcall" instruction. */ lguest_entry.offset = (long)switch_to_guest + switcher_offset(); lguest_entry.segment = LGUEST_CS; /* Finally, we need to turn off "Page Global Enable". PGE is an * optimization where page table entries are specially marked to show * they never change. The Host kernel marks all the kernel pages this * way because it's always present, even when userspace is running. * * Lguest breaks this: unbeknownst to the rest of the Host kernel, we * switch to the Guest kernel. If you don't disable this on all CPUs, * you'll get really weird bugs that you'll chase for two days. * * I used to turn PGE off every time we switched to the Guest and back * on when we return, but that slowed the Switcher down noticibly. */ /* We don't need the complexity of CPUs coming and going while we're * doing this. */ lock_cpu_hotplug(); if (cpu_has_pge) { /* We have a broader idea of "global". */ /* Remember that this was originally set (for cleanup). */ cpu_had_pge = 1; /* adjust_pge is a helper function which sets or unsets the PGE * bit on its CPU, depending on the argument (0 == unset). */ on_each_cpu(adjust_pge, (void *)0, 0, 1); /* Turn off the feature in the global feature set. */ clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); } unlock_cpu_hotplug();};/*:*/void __exit lguest_arch_host_fini(void){ /* If we had PGE before we started, turn it back on now. */ lock_cpu_hotplug(); if (cpu_had_pge) { set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); /* adjust_pge's argument "1" means set PGE. */ on_each_cpu(adjust_pge, (void *)1, 0, 1); } unlock_cpu_hotplug();}/*H:122 The i386-specific hypercalls simply farm out to the right functions. */int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args){ switch (args->arg0) { case LHCALL_LOAD_GDT: load_guest_gdt(lg, args->arg1, args->arg2); break; case LHCALL_LOAD_IDT_ENTRY: load_guest_idt_entry(lg, args->arg1, args->arg2, args->arg3); break; case LHCALL_LOAD_TLS: guest_load_tls(lg, args->arg1); break; default: /* Bad Guest. Bad! */ return -EIO; } return 0;}/*H:126 i386-specific hypercall initialization: */int lguest_arch_init_hypercalls(struct lguest *lg){ u32 tsc_speed; /* The pointer to the Guest's "struct lguest_data" is the only * argument. We check that address now. */ if (!lguest_address_ok(lg, lg->hcall->arg1, sizeof(*lg->lguest_data))) return -EFAULT; /* Having checked it, we simply set lg->lguest_data to point straight * into the Launcher's memory at the right place and then use * copy_to_user/from_user from now on, instead of lgread/write. I put * this in to show that I'm not immune to writing stupid * optimizations. */ lg->lguest_data = lg->mem_base + lg->hcall->arg1; /* We insist that the Time Stamp Counter exist and doesn't change with * cpu frequency. Some devious chip manufacturers decided that TSC * changes could be handled in software. I decided that time going * backwards might be good for benchmarks, but it's bad for users. * * We also insist that the TSC be stable: the kernel detects unreliable * TSCs for its own purposes, and we use that here. */ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) tsc_speed = tsc_khz; else tsc_speed = 0; if (put_user(tsc_speed, &lg->lguest_data->tsc_khz)) return -EFAULT; /* The interrupt code might not like the system call vector. */ if (!check_syscall_vector(lg)) kill_guest(lg, "bad syscall vector"); return 0;}/*L:030 lguest_arch_setup_regs() * * Most of the Guest's registers are left alone: we used get_zeroed_page() to * allocate the structure, so they will be 0. */void lguest_arch_setup_regs(struct lguest *lg, unsigned long start){ struct lguest_regs *regs = lg->regs; /* There are four "segment" registers which the Guest needs to boot: * The "code segment" register (cs) refers to the kernel code segment * __KERNEL_CS, and the "data", "extra" and "stack" segment registers * refer to the kernel data segment __KERNEL_DS. * * The privilege level is packed into the lower bits. The Guest runs * at privilege level 1 (GUEST_PL).*/ regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL; regs->cs = __KERNEL_CS|GUEST_PL; /* The "eflags" register contains miscellaneous flags. Bit 1 (0x002) * is supposed to always be "1". Bit 9 (0x200) controls whether * interrupts are enabled. We always leave interrupts enabled while * running the Guest. */ regs->eflags = X86_EFLAGS_IF | 0x2; /* The "Extended Instruction Pointer" register says where the Guest is * running. */ regs->eip = start; /* %esi points to our boot information, at physical address 0, so don't * touch it. */ /* There are a couple of GDT entries the Guest expects when first * booting. */ setup_guest_gdt(lg);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?