📄 domain.c
字号:
/* * Copyright (C) 1995 Linus Torvalds * * Pentium III FXSR, SSE support * Gareth Hughes <gareth@valinux.com>, May 2000 * * Copyright (C) 2005 Intel Co * Kun Tian (Kevin Tian) <kevin.tian@intel.com> * * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add VTI domain support * * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> * VA Linux Systems Japan K.K. * dom0 vp model support */#include <xen/config.h>#include <xen/init.h>#include <xen/lib.h>#include <xen/errno.h>#include <xen/sched.h>#include <xen/smp.h>#include <xen/delay.h>#include <xen/softirq.h>#include <xen/mm.h>#include <xen/iocap.h>#include <asm/asm-xsi-offsets.h>#include <asm/system.h>#include <asm/io.h>#include <asm/processor.h>#include <xen/event.h>#include <xen/console.h>#include <xen/version.h>#include <public/libelf.h>#include <asm/pgalloc.h>#include <asm/offsets.h> /* for IA64_THREAD_INFO_SIZE */#include <asm/vcpu.h> /* for function declarations */#include <public/xen.h>#include <xen/domain.h>#include <asm/vmx.h>#include <asm/vmx_vcpu.h>#include <asm/vmx_vpd.h>#include <asm/vmx_phy_mode.h>#include <asm/vmx_vcpu_save.h>#include <asm/vhpt.h>#include <asm/vcpu.h>#include <asm/tlbflush.h>#include <asm/regionreg.h>#include <asm/dom_fw.h>#include <asm/shadow.h>#include <xen/guest_access.h>#include <asm/tlb_track.h>#include <asm/perfmon.h>#include <asm/sal.h>#include <public/vcpu.h>#include <linux/cpu.h>#include <linux/notifier.h>#include <asm/debugger.h>/* dom0_size: default memory allocation for dom0 (~4GB) */static unsigned long __initdata dom0_size = 4096UL*1024UL*1024UL;/* dom0_max_vcpus: maximum number of VCPUs to create for dom0. */static unsigned int __initdata dom0_max_vcpus = 4;integer_param("dom0_max_vcpus", dom0_max_vcpus); extern char dom0_command_line[];/* forward declaration */static void init_switch_stack(struct vcpu *v);/* Address of vpsr.i (in fact evtchn_upcall_mask) of current vcpu. This is a Xen virtual address. */DEFINE_PER_CPU(uint8_t *, current_psr_i_addr);DEFINE_PER_CPU(int *, current_psr_ic_addr);DEFINE_PER_CPU(struct vcpu *, fp_owner);#include <xen/sched-if.h>static void flush_vtlb_for_context_switch(struct vcpu* prev, struct vcpu* next){ int cpu = smp_processor_id(); int last_vcpu_id, last_processor; if (!is_idle_domain(prev->domain)) tlbflush_update_time (&prev->domain->arch.last_vcpu[cpu].tlbflush_timestamp, tlbflush_current_time()); if (is_idle_domain(next->domain)) return; last_vcpu_id = next->domain->arch.last_vcpu[cpu].vcpu_id; last_processor = next->arch.last_processor; next->domain->arch.last_vcpu[cpu].vcpu_id = next->vcpu_id; next->arch.last_processor = cpu; if ((last_vcpu_id != next->vcpu_id && last_vcpu_id != INVALID_VCPU_ID) || (last_vcpu_id == next->vcpu_id && last_processor != cpu && last_processor != INVALID_PROCESSOR)) {#ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK u32 last_tlbflush_timestamp = next->domain->arch.last_vcpu[cpu].tlbflush_timestamp;#endif int vhpt_is_flushed = 0; // if the vTLB implementation was changed, // the followings must be updated either. if (VMX_DOMAIN(next)) { // currently vTLB for vt-i domian is per vcpu. // so any flushing isn't needed. } else if (HAS_PERVCPU_VHPT(next->domain)) { // nothing to do } else { if (NEED_FLUSH(__get_cpu_var(vhpt_tlbflush_timestamp), last_tlbflush_timestamp)) { local_vhpt_flush(); vhpt_is_flushed = 1; } } if (vhpt_is_flushed || NEED_FLUSH(__get_cpu_var(tlbflush_time), last_tlbflush_timestamp)) { local_flush_tlb_all(); perfc_incr(tlbflush_clock_cswitch_purge); } else { perfc_incr(tlbflush_clock_cswitch_skip); } perfc_incr(flush_vtlb_for_context_switch); }}static void flush_cache_for_context_switch(struct vcpu *next){ extern cpumask_t cpu_cache_coherent_map; int cpu = smp_processor_id(); if (is_idle_vcpu(next) || __test_and_clear_bit(cpu, &next->arch.cache_coherent_map)) { if (cpu_test_and_clear(cpu, cpu_cache_coherent_map)) { unsigned long flags; u64 progress = 0; s64 status; local_irq_save(flags); status = ia64_pal_cache_flush(4, 0, &progress, NULL); local_irq_restore(flags); if (status != 0) panic_domain(NULL, "PAL_CACHE_FLUSH ERROR, " "cache_type=4 status %lx", status); } }}static void set_current_psr_i_addr(struct vcpu* v){ __ia64_per_cpu_var(current_psr_i_addr) = (uint8_t*)(v->domain->arch.shared_info_va + INT_ENABLE_OFFSET(v)); __ia64_per_cpu_var(current_psr_ic_addr) = (int *) (v->domain->arch.shared_info_va + XSI_PSR_IC_OFS);}static void clear_current_psr_i_addr(void){ __ia64_per_cpu_var(current_psr_i_addr) = NULL; __ia64_per_cpu_var(current_psr_ic_addr) = NULL;}static void lazy_fp_switch(struct vcpu *prev, struct vcpu *next){ /* * Implement eager save, lazy restore */ if (!is_idle_vcpu(prev)) { if (VMX_DOMAIN(prev)) { if (FP_PSR(prev) & IA64_PSR_MFH) { __ia64_save_fpu(prev->arch._thread.fph); __ia64_per_cpu_var(fp_owner) = prev; } } else { if (PSCB(prev, hpsr_mfh)) { __ia64_save_fpu(prev->arch._thread.fph); __ia64_per_cpu_var(fp_owner) = prev; } } } if (!is_idle_vcpu(next)) { if (VMX_DOMAIN(next)) { FP_PSR(next) = IA64_PSR_DFH; vcpu_regs(next)->cr_ipsr |= IA64_PSR_DFH; } else { PSCB(next, hpsr_dfh) = 1; PSCB(next, hpsr_mfh) = 0; vcpu_regs(next)->cr_ipsr |= IA64_PSR_DFH; } }}static void load_state(struct vcpu *v){ load_region_regs(v); ia64_set_pta(vcpu_pta(v)); vcpu_load_kernel_regs(v); if (vcpu_pkr_in_use(v)) vcpu_pkr_load_regs(v); set_current_psr_i_addr(v);}void schedule_tail(struct vcpu *prev){ extern char ia64_ivt; context_saved(prev); if (VMX_DOMAIN(current)) vmx_do_resume(current); else { if (VMX_DOMAIN(prev)) ia64_set_iva(&ia64_ivt); load_state(current); migrate_timer(¤t->arch.hlt_timer, current->processor); } flush_vtlb_for_context_switch(prev, current);}void context_switch(struct vcpu *prev, struct vcpu *next){ uint64_t spsr; local_irq_save(spsr); if (VMX_DOMAIN(prev)) { vmx_save_state(prev); if (!VMX_DOMAIN(next)) { /* VMX domains can change the physical cr.dcr. * Restore default to prevent leakage. */ uint64_t dcr = ia64_getreg(_IA64_REG_CR_DCR); /* xenoprof: * don't change psr.pp. * It is manipulated by xenoprof. */ dcr = (IA64_DEFAULT_DCR_BITS & ~IA64_DCR_PP) | (dcr & IA64_DCR_PP); ia64_setreg(_IA64_REG_CR_DCR, dcr); } } lazy_fp_switch(prev, current); if (prev->arch.dbg_used || next->arch.dbg_used) { /* * Load debug registers either because they are valid or to clear * the previous one. */ ia64_load_debug_regs(next->arch.dbr); } /* * disable VHPT walker. * ia64_switch_to() might cause VHPT fault because it flushes * dtr[IA64_TR_VHPT] and reinsert the mapping with dtr[IA64_TR_STACK]. * (VHPT_SIZE_LOG2 << 2) is just for avoiding * Reserved Register/Field fault. */ ia64_set_pta(VHPT_SIZE_LOG2 << 2); prev = ia64_switch_to(next); /* Note: ia64_switch_to does not return here at vcpu initialization. */ if (VMX_DOMAIN(current)) { vmx_load_state(current); } else { extern char ia64_ivt; if (VMX_DOMAIN(prev)) ia64_set_iva(&ia64_ivt); if (!is_idle_vcpu(current)) { load_state(current); vcpu_set_next_timer(current); if (vcpu_timer_expired(current)) vcpu_pend_timer(current); /* steal time accounting */ if (!guest_handle_is_null(runstate_guest(current))) __copy_to_guest(runstate_guest(current), ¤t->runstate, 1); } else { /* When switching to idle domain, only need to disable vhpt * walker. Then all accesses happen within idle context will * be handled by TR mapping and identity mapping. */ clear_current_psr_i_addr(); } } local_irq_restore(spsr); /* lazy fp */ if (current->processor != current->arch.last_processor) { unsigned long *addr; addr = (unsigned long *)per_cpu_addr(fp_owner, current->arch.last_processor); ia64_cmpxchg(acq, addr, current, 0, 8); } flush_vtlb_for_context_switch(prev, current); flush_cache_for_context_switch(current); context_saved(prev);}void continue_running(struct vcpu *same){ /* nothing to do */}#ifdef CONFIG_PERFMONstatic int pal_halt = 1;static int can_do_pal_halt = 1;static int __init nohalt_setup(char * str){ pal_halt = can_do_pal_halt = 0; return 1;}__setup("nohalt", nohalt_setup);voidupdate_pal_halt_status(int status){ can_do_pal_halt = pal_halt && status;}#else#define can_do_pal_halt (1)#endifstatic void default_idle(void){ local_irq_disable(); if ( !softirq_pending(smp_processor_id()) ) { if (can_do_pal_halt) safe_halt(); else cpu_relax(); } local_irq_enable();}extern void play_dead(void);static void continue_cpu_idle_loop(void){ int cpu = smp_processor_id(); for ( ; ; ) {#ifdef IA64// __IRQ_STAT(cpu, idle_timestamp) = jiffies#else irq_stat[cpu].idle_timestamp = jiffies;#endif page_scrub_schedule_work(); while ( !softirq_pending(cpu) ) default_idle(); raise_softirq(SCHEDULE_SOFTIRQ); do_softirq(); if (!cpu_online(cpu)) play_dead(); }}void startup_cpu_idle_loop(void){ /* Just some sanity to ensure that the scheduler is set up okay. */ ASSERT(current->domain->domain_id == IDLE_DOMAIN_ID); raise_softirq(SCHEDULE_SOFTIRQ); continue_cpu_idle_loop();}/* compile time test for get_order(sizeof(mapped_regs_t)) != * get_order_from_shift(XMAPPEDREGS_SHIFT)) */#if !(((1 << (XMAPPEDREGS_SHIFT - 1)) < MAPPED_REGS_T_SIZE) && \ (MAPPED_REGS_T_SIZE < (1 << (XMAPPEDREGS_SHIFT + 1))))# error "XMAPPEDREGS_SHIFT doesn't match sizeof(mapped_regs_t)."#endifvoid hlt_timer_fn(void *data){ struct vcpu *v = data; vcpu_unblock(v);}void relinquish_vcpu_resources(struct vcpu *v){ if (HAS_PERVCPU_VHPT(v->domain)) pervcpu_vhpt_free(v); if (v->arch.privregs != NULL) { free_xenheap_pages(v->arch.privregs, get_order_from_shift(XMAPPEDREGS_SHIFT)); v->arch.privregs = NULL; } kill_timer(&v->arch.hlt_timer);}struct vcpu *alloc_vcpu_struct(void){ struct page_info *page; struct vcpu *v; struct thread_info *ti; static int first_allocation = 1; if (first_allocation) { first_allocation = 0; /* Still keep idle vcpu0 static allocated at compilation, due * to some code from Linux still requires it in early phase. */ return idle_vcpu[0]; } page = alloc_domheap_pages(NULL, KERNEL_STACK_SIZE_ORDER, 0); if (page == NULL) return NULL; v = page_to_virt(page); memset(v, 0, sizeof(*v)); ti = alloc_thread_info(v); /* Clear thread_info to clear some important fields, like * preempt_count */ memset(ti, 0, sizeof(struct thread_info)); init_switch_stack(v); return v;}void free_vcpu_struct(struct vcpu *v){ free_domheap_pages(virt_to_page(v), KERNEL_STACK_SIZE_ORDER);}int vcpu_initialise(struct vcpu *v){ struct domain *d = v->domain; if (!is_idle_domain(d)) { v->arch.metaphysical_rid_dt = d->arch.metaphysical_rid_dt; v->arch.metaphysical_rid_d = d->arch.metaphysical_rid_d; /* Set default values to saved_rr. */ v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rid_dt; v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rid_dt; /* Is it correct ? It depends on the domain rid usage. A domain may share rid among its processor (eg having a global VHPT). In this case, we should also share rid among vcpus and the rid range should be the same. However a domain may have per cpu rid allocation. In this case we don't want to share rid among vcpus, but we may do it if two vcpus are on the same cpu... */ v->arch.starting_rid = d->arch.starting_rid; v->arch.ending_rid = d->arch.ending_rid;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -