📄 emulate.c
字号:
/****************************************************************************** * hvm/emulate.c * * HVM instruction emulation. Used for MMIO and VMX real mode. * * Copyright (c) 2008, Citrix Systems, Inc. * * Authors: * Keir Fraser <keir.fraser@citrix.com> */#include <xen/config.h>#include <xen/init.h>#include <xen/lib.h>#include <xen/sched.h>#include <xen/paging.h>#include <asm/event.h>#include <asm/hvm/emulate.h>#include <asm/hvm/hvm.h>#include <asm/hvm/support.h>static int hvmemul_do_io( int is_mmio, paddr_t addr, unsigned long *reps, int size, paddr_t ram_gpa, int dir, int df, void *p_data){ paddr_t value = ram_gpa; int value_is_ptr = (p_data == NULL); struct vcpu *curr = current; vcpu_iodata_t *vio = get_ioreq(curr); ioreq_t *p = &vio->vp_ioreq; int rc; /* * Weird-sized accesses have undefined behaviour: we discard writes * and read all-ones. */ if ( unlikely((size > sizeof(long)) || (size & (size - 1))) ) { gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size); ASSERT(p_data != NULL); /* cannot happen with a REP prefix */ if ( dir == IOREQ_READ ) memset(p_data, ~0, size); return X86EMUL_UNHANDLEABLE; } if ( (p_data != NULL) && (dir == IOREQ_WRITE) ) { memcpy(&value, p_data, size); p_data = NULL; } if ( is_mmio && !value_is_ptr ) { /* Part of a multi-cycle read or write? */ if ( dir == IOREQ_WRITE ) { paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes; if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) return X86EMUL_OKAY; } else { paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes; if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) { memcpy(p_data, &curr->arch.hvm_vcpu.mmio_large_read[addr - pa], size); return X86EMUL_OKAY; } } } switch ( curr->arch.hvm_vcpu.io_state ) { case HVMIO_none: break; case HVMIO_completed: curr->arch.hvm_vcpu.io_state = HVMIO_none; if ( p_data == NULL ) return X86EMUL_UNHANDLEABLE; goto finish_access; case HVMIO_dispatched: /* May have to wait for previous cycle of a multi-write to complete. */ if ( is_mmio && !value_is_ptr && (dir == IOREQ_WRITE) && (addr == (curr->arch.hvm_vcpu.mmio_large_write_pa + curr->arch.hvm_vcpu.mmio_large_write_bytes)) ) return X86EMUL_RETRY; default: return X86EMUL_UNHANDLEABLE; } if ( p->state != STATE_IOREQ_NONE ) { gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n", p->state); return X86EMUL_UNHANDLEABLE; } curr->arch.hvm_vcpu.io_state = (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion; p->dir = dir; p->data_is_ptr = value_is_ptr; p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO; p->size = size; p->addr = addr; p->count = *reps; p->df = df; p->data = value; p->io_count++; if ( is_mmio ) { rc = hvm_mmio_intercept(p); if ( rc == X86EMUL_UNHANDLEABLE ) rc = hvm_buffered_io_intercept(p); } else { rc = hvm_portio_intercept(p); } switch ( rc ) { case X86EMUL_OKAY: case X86EMUL_RETRY: *reps = p->count; p->state = STATE_IORESP_READY; hvm_io_assist(); curr->arch.hvm_vcpu.io_state = HVMIO_none; break; case X86EMUL_UNHANDLEABLE: hvm_send_assist_req(curr); rc = (p_data != NULL) ? X86EMUL_RETRY : X86EMUL_OKAY; break; default: BUG(); } if ( rc != X86EMUL_OKAY ) return rc; finish_access: if ( p_data != NULL ) memcpy(p_data, &curr->arch.hvm_vcpu.io_data, size); if ( is_mmio && !value_is_ptr ) { /* Part of a multi-cycle read or write? */ if ( dir == IOREQ_WRITE ) { paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes; if ( bytes == 0 ) pa = curr->arch.hvm_vcpu.mmio_large_write_pa = addr; if ( addr == (pa + bytes) ) curr->arch.hvm_vcpu.mmio_large_write_bytes += size; } else { paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes; if ( bytes == 0 ) pa = curr->arch.hvm_vcpu.mmio_large_read_pa = addr; if ( (addr == (pa + bytes)) && ((bytes + size) < sizeof(curr->arch.hvm_vcpu.mmio_large_read)) ) { memcpy(&curr->arch.hvm_vcpu.mmio_large_read[addr - pa], p_data, size); curr->arch.hvm_vcpu.mmio_large_read_bytes += size; } } } return X86EMUL_OKAY;}static int hvmemul_do_pio( unsigned long port, unsigned long *reps, int size, paddr_t ram_gpa, int dir, int df, void *p_data){ return hvmemul_do_io(0, port, reps, size, ram_gpa, dir, df, p_data);}static int hvmemul_do_mmio( paddr_t gpa, unsigned long *reps, int size, paddr_t ram_gpa, int dir, int df, void *p_data){ return hvmemul_do_io(1, gpa, reps, size, ram_gpa, dir, df, p_data);}/* * Convert addr from linear to physical form, valid over the range * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to * the valid computed range. It is always >0 when X86EMUL_OKAY is returned. * @pfec indicates the access checks to be performed during page-table walks. */static int hvmemul_linear_to_phys( unsigned long addr, paddr_t *paddr, unsigned int bytes_per_rep, unsigned long *reps, uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt){ struct vcpu *curr = current; unsigned long pfn, npfn, done, todo, i, offset = addr & ~PAGE_MASK; int reverse; /* * Clip repetitions to a sensible maximum. This avoids extensive looping in * this function while still amortising the cost of I/O trap-and-emulate. */ *reps = min_t(unsigned long, *reps, 4096); /* With no paging it's easy: linear == physical. */ if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) ) { *paddr = addr; return X86EMUL_OKAY; } /* Reverse mode if this is a backwards multi-iteration string operation. */ reverse = (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1); if ( reverse && ((PAGE_SIZE - offset) < bytes_per_rep) ) { /* Do page-straddling first iteration forwards via recursion. */ paddr_t _paddr; unsigned long one_rep = 1; int rc = hvmemul_linear_to_phys( addr, &_paddr, bytes_per_rep, &one_rep, pfec, hvmemul_ctxt); if ( rc != X86EMUL_OKAY ) return rc; pfn = _paddr >> PAGE_SHIFT; } else if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN ) { hvm_inject_exception(TRAP_page_fault, pfec, addr); return X86EMUL_EXCEPTION; } done = reverse ? bytes_per_rep + offset : PAGE_SIZE - offset; todo = *reps * bytes_per_rep; for ( i = 1; done < todo; i++ ) { /* Get the next PFN in the range. */ addr += reverse ? -PAGE_SIZE : PAGE_SIZE; npfn = paging_gva_to_gfn(curr, addr, &pfec); /* Is it contiguous with the preceding PFNs? If not then we're done. */ if ( (npfn == INVALID_GFN) || (npfn != (pfn + (reverse ? -i : i))) ) { done /= bytes_per_rep; if ( done == 0 ) { ASSERT(!reverse); if ( npfn != INVALID_GFN ) return X86EMUL_UNHANDLEABLE; hvm_inject_exception(TRAP_page_fault, pfec, addr & PAGE_MASK); return X86EMUL_EXCEPTION; } *reps = done; break; } done += PAGE_SIZE; } *paddr = ((paddr_t)pfn << PAGE_SHIFT) | offset; return X86EMUL_OKAY;} static int hvmemul_virtual_to_linear( enum x86_segment seg, unsigned long offset, unsigned int bytes_per_rep, unsigned long *reps, enum hvm_access_type access_type, struct hvm_emulate_ctxt *hvmemul_ctxt, unsigned long *paddr){ struct segment_register *reg; int okay; if ( seg == x86_seg_none ) { *paddr = offset; return X86EMUL_OKAY; } /* * Clip repetitions to avoid overflow when multiplying by @bytes_per_rep. * The chosen maximum is very conservative but it's what we use in * hvmemul_linear_to_phys() so there is no point in using a larger value. */ *reps = min_t(unsigned long, *reps, 4096); reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); if ( (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1) ) { /* * x86_emulate() clips the repetition count to ensure we don't wrap * the effective-address index register. Hence this assertion holds. */ ASSERT(offset >= ((*reps - 1) * bytes_per_rep)); okay = hvm_virtual_to_linear_addr( seg, reg, offset - (*reps - 1) * bytes_per_rep, *reps * bytes_per_rep, access_type, hvmemul_ctxt->ctxt.addr_size, paddr); *paddr += (*reps - 1) * bytes_per_rep; if ( hvmemul_ctxt->ctxt.addr_size != 64 ) *paddr = (uint32_t)*paddr; } else { okay = hvm_virtual_to_linear_addr( seg, reg, offset, *reps * bytes_per_rep, access_type, hvmemul_ctxt->ctxt.addr_size, paddr); } if ( okay ) return X86EMUL_OKAY; /* If this is a string operation, emulate each iteration separately. */ if ( *reps != 1 ) return X86EMUL_UNHANDLEABLE; /* This is a singleton operation: fail it with an exception. */ hvmemul_ctxt->exn_pending = 1; hvmemul_ctxt->exn_vector = TRAP_gp_fault; hvmemul_ctxt->exn_error_code = 0; hvmemul_ctxt->exn_insn_len = 0; return X86EMUL_EXCEPTION;}static int __hvmemul_read( enum x86_segment seg, unsigned long offset, void *p_data, unsigned int bytes, enum hvm_access_type access_type, struct hvm_emulate_ctxt *hvmemul_ctxt){ struct vcpu *curr = current; unsigned long addr, reps = 1; uint32_t pfec = PFEC_page_present; paddr_t gpa; int rc; rc = hvmemul_virtual_to_linear( seg, offset, bytes, &reps, access_type, hvmemul_ctxt, &addr); if ( rc != X86EMUL_OKAY ) return rc; if ( unlikely(curr->arch.hvm_vcpu.mmio_gva == (addr & PAGE_MASK)) && curr->arch.hvm_vcpu.mmio_gva ) { unsigned int off = addr & (PAGE_SIZE - 1); if ( access_type == hvm_access_insn_fetch ) return X86EMUL_UNHANDLEABLE; gpa = (((paddr_t)curr->arch.hvm_vcpu.mmio_gpfn << PAGE_SHIFT) | off); if ( (off + bytes) <= PAGE_SIZE ) return hvmemul_do_mmio(gpa, &reps, bytes, 0, IOREQ_READ, 0, p_data); } if ( (seg != x86_seg_none) && (hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3) ) pfec |= PFEC_user_mode; rc = ((access_type == hvm_access_insn_fetch) ? hvm_fetch_from_guest_virt(p_data, addr, bytes, pfec) : hvm_copy_from_guest_virt(p_data, addr, bytes, pfec)); if ( rc == HVMCOPY_bad_gva_to_gfn ) return X86EMUL_EXCEPTION; if ( rc == HVMCOPY_bad_gfn_to_mfn ) { if ( access_type == hvm_access_insn_fetch ) return X86EMUL_UNHANDLEABLE; rc = hvmemul_linear_to_phys( addr, &gpa, bytes, &reps, pfec, hvmemul_ctxt); if ( rc != X86EMUL_OKAY ) return rc; return hvmemul_do_mmio(gpa, &reps, bytes, 0, IOREQ_READ, 0, p_data); } return X86EMUL_OKAY;}static int hvmemul_read( enum x86_segment seg, unsigned long offset, void *p_data, unsigned int bytes, struct x86_emulate_ctxt *ctxt){ return __hvmemul_read( seg, offset, p_data, bytes, hvm_access_read, container_of(ctxt, struct hvm_emulate_ctxt, ctxt));}static int hvmemul_insn_fetch( enum x86_segment seg, unsigned long offset, void *p_data, unsigned int bytes, struct x86_emulate_ctxt *ctxt){ struct hvm_emulate_ctxt *hvmemul_ctxt = container_of(ctxt, struct hvm_emulate_ctxt, ctxt); unsigned int insn_off = offset - hvmemul_ctxt->insn_buf_eip; /* Fall back if requested bytes are not in the prefetch cache. */ if ( unlikely((insn_off + bytes) > hvmemul_ctxt->insn_buf_bytes) ) return __hvmemul_read( seg, offset, p_data, bytes, hvm_access_insn_fetch, hvmemul_ctxt); /* Hit the cache. Simple memcpy. */ memcpy(p_data, &hvmemul_ctxt->insn_buf[insn_off], bytes); return X86EMUL_OKAY;}static int hvmemul_write( enum x86_segment seg, unsigned long offset, void *p_data, unsigned int bytes, struct x86_emulate_ctxt *ctxt){ struct hvm_emulate_ctxt *hvmemul_ctxt = container_of(ctxt, struct hvm_emulate_ctxt, ctxt); struct vcpu *curr = current; unsigned long addr, reps = 1; uint32_t pfec = PFEC_page_present | PFEC_write_access; paddr_t gpa; int rc; rc = hvmemul_virtual_to_linear( seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr); if ( rc != X86EMUL_OKAY ) return rc; if ( unlikely(curr->arch.hvm_vcpu.mmio_gva == (addr & PAGE_MASK)) && curr->arch.hvm_vcpu.mmio_gva ) { unsigned int off = addr & (PAGE_SIZE - 1); gpa = (((paddr_t)curr->arch.hvm_vcpu.mmio_gpfn << PAGE_SHIFT) | off); if ( (off + bytes) <= PAGE_SIZE ) return hvmemul_do_mmio(gpa, &reps, bytes, 0, IOREQ_WRITE, 0, p_data); } if ( (seg != x86_seg_none) && (hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3) ) pfec |= PFEC_user_mode; rc = hvm_copy_to_guest_virt(addr, p_data, bytes, pfec); if ( rc == HVMCOPY_bad_gva_to_gfn ) return X86EMUL_EXCEPTION; if ( rc == HVMCOPY_bad_gfn_to_mfn ) { rc = hvmemul_linear_to_phys( addr, &gpa, bytes, &reps, pfec, hvmemul_ctxt); if ( rc != X86EMUL_OKAY ) return rc; return hvmemul_do_mmio(gpa, &reps, bytes, 0, IOREQ_WRITE, 0, p_data); } return X86EMUL_OKAY;}static int hvmemul_cmpxchg( enum x86_segment seg, unsigned long offset, void *p_old, void *p_new, unsigned int bytes, struct x86_emulate_ctxt *ctxt){ /* Fix this in case the guest is really relying on r-m-w atomicity. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -