📄 iommu.c
字号:
u64 addr, unsigned int pages, int non_present_entry_flush){ unsigned int align; struct iommu_flush *flush = iommu_get_flush(iommu); ASSERT(!(addr & (~PAGE_MASK_4K))); ASSERT(pages > 0); /* Fallback to domain selective flush if no PSI support */ if ( !cap_pgsel_inv(iommu->cap) ) return iommu_flush_iotlb_dsi(iommu, did, non_present_entry_flush); /* * PSI requires page size is 2 ^ x, and the base address is naturally * aligned to the size */ align = get_alignment(addr >> PAGE_SHIFT_4K, pages); /* Fallback to domain selective flush if size is too big */ if ( align > cap_max_amask_val(iommu->cap) ) return iommu_flush_iotlb_dsi(iommu, did, non_present_entry_flush); addr >>= PAGE_SHIFT_4K + align; addr <<= PAGE_SHIFT_4K + align; return flush->iotlb(iommu, did, addr, align, DMA_TLB_PSI_FLUSH, non_present_entry_flush);}void iommu_flush_all(void){ struct acpi_drhd_unit *drhd; struct iommu *iommu; wbinvd(); for_each_drhd_unit ( drhd ) { iommu = drhd->iommu; iommu_flush_context_global(iommu, 0); iommu_flush_iotlb_global(iommu, 0); }}/* clear one page's page table */static void dma_pte_clear_one(struct domain *domain, u64 addr){ struct hvm_iommu *hd = domain_hvm_iommu(domain); struct acpi_drhd_unit *drhd; struct iommu *iommu; struct dma_pte *page = NULL, *pte = NULL; u64 pg_maddr; /* get last level pte */ pg_maddr = addr_to_dma_page_maddr(domain, addr, 0); if ( pg_maddr == 0 ) return; page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); pte = page + address_level_offset(addr, 1); if ( !dma_pte_present(*pte) ) { unmap_vtd_domain_page(page); return; } dma_clear_pte(*pte); iommu_flush_cache_entry(pte); for_each_drhd_unit ( drhd ) { iommu = drhd->iommu; if ( test_bit(iommu->index, &hd->iommu_bitmap) ) if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain), addr, 1, 0)) iommu_flush_write_buffer(iommu); } unmap_vtd_domain_page(page);}/* clear last level pte, a tlb flush should be followed */static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end){ struct hvm_iommu *hd = domain_hvm_iommu(domain); int addr_width = agaw_to_width(hd->agaw); start &= (((u64)1) << addr_width) - 1; end &= (((u64)1) << addr_width) - 1; /* in case it's partial page */ start = PAGE_ALIGN_4K(start); end &= PAGE_MASK_4K; /* we don't need lock here, nobody else touches the iova range */ while ( start < end ) { dma_pte_clear_one(domain, start); start += PAGE_SIZE_4K; }}static void iommu_free_pagetable(u64 pt_maddr, int level){ int i; struct dma_pte *pt_vaddr, *pte; int next_level = level - 1; if ( pt_maddr == 0 ) return; pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr); for ( i = 0; i < PTE_NUM; i++ ) { pte = &pt_vaddr[i]; if ( !dma_pte_present(*pte) ) continue; if ( next_level >= 1 ) iommu_free_pagetable(dma_pte_addr(*pte), next_level); dma_clear_pte(*pte); iommu_flush_cache_entry(pte); } unmap_vtd_domain_page(pt_vaddr); free_pgtable_maddr(pt_maddr);}static int iommu_set_root_entry(struct iommu *iommu){ u32 cmd, sts; unsigned long flags; s_time_t start_time; if ( iommu->root_maddr != 0 ) { free_pgtable_maddr(iommu->root_maddr); iommu->root_maddr = 0; } spin_lock_irqsave(&iommu->register_lock, flags); iommu->root_maddr = alloc_pgtable_maddr(); if ( iommu->root_maddr == 0 ) { spin_unlock_irqrestore(&iommu->register_lock, flags); return -ENOMEM; } dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr); cmd = iommu->gcmd | DMA_GCMD_SRTP; dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd); /* Make sure hardware complete it */ start_time = NOW(); for ( ; ; ) { sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); if ( sts & DMA_GSTS_RTPS ) break; if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT ) panic("%s: DMAR hardware is malfunctional," " please disable IOMMU\n", __func__); cpu_relax(); } spin_unlock_irqrestore(&iommu->register_lock, flags); return 0;}static int iommu_enable_translation(struct iommu *iommu){ u32 sts; unsigned long flags; s_time_t start_time; dprintk(XENLOG_INFO VTDPREFIX, "iommu_enable_translation: iommu->reg = %p\n", iommu->reg); spin_lock_irqsave(&iommu->register_lock, flags); iommu->gcmd |= DMA_GCMD_TE; dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd); /* Make sure hardware complete it */ start_time = NOW(); for ( ; ; ) { sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); if ( sts & DMA_GSTS_TES ) break; if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT ) panic("%s: DMAR hardware is malfunctional," " please disable IOMMU\n", __func__); cpu_relax(); } /* Disable PMRs when VT-d engine takes effect per spec definition */ disable_pmr(iommu); spin_unlock_irqrestore(&iommu->register_lock, flags); return 0;}int iommu_disable_translation(struct iommu *iommu){ u32 sts; unsigned long flags; s_time_t start_time; spin_lock_irqsave(&iommu->register_lock, flags); iommu->gcmd &= ~ DMA_GCMD_TE; dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd); /* Make sure hardware complete it */ start_time = NOW(); for ( ; ; ) { sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); if ( !(sts & DMA_GSTS_TES) ) break; if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT ) panic("%s: DMAR hardware is malfunctional," " please disable IOMMU\n", __func__); cpu_relax(); } spin_unlock_irqrestore(&iommu->register_lock, flags); return 0;}static struct iommu *vector_to_iommu[NR_VECTORS];static int iommu_page_fault_do_one(struct iommu *iommu, int type, u8 fault_reason, u16 source_id, u64 addr){ dprintk(XENLOG_WARNING VTDPREFIX, "iommu_fault:%s: %x:%x.%x addr %"PRIx64" REASON %x " "iommu->reg = %p\n", (type ? "DMA Read" : "DMA Write"), (source_id >> 8), PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr, fault_reason, iommu->reg);#ifndef __i386__ /* map_domain_page() cannot be used in this context */ if ( fault_reason < 0x20 ) print_vtd_entries(iommu, (source_id >> 8), (source_id & 0xff), (addr >> PAGE_SHIFT));#endif return 0;}static void iommu_fault_status(u32 fault_status){ if ( fault_status & DMA_FSTS_PFO ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Fault Overflow\n"); else if ( fault_status & DMA_FSTS_PPF ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Primary Pending Fault\n"); else if ( fault_status & DMA_FSTS_AFO ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Advanced Fault Overflow\n"); else if ( fault_status & DMA_FSTS_APF ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Advanced Pending Fault\n"); else if ( fault_status & DMA_FSTS_IQE ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Invalidation Queue Error\n"); else if ( fault_status & DMA_FSTS_ICE ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Invalidation Completion Error\n"); else if ( fault_status & DMA_FSTS_ITE ) dprintk(XENLOG_ERR VTDPREFIX, "iommu_fault_status: Invalidation Time-out Error\n");}#define PRIMARY_FAULT_REG_LEN (16)static void iommu_page_fault(int vector, void *dev_id, struct cpu_user_regs *regs){ struct iommu *iommu = dev_id; int reg, fault_index; u32 fault_status; unsigned long flags; dprintk(XENLOG_WARNING VTDPREFIX, "iommu_page_fault: iommu->reg = %p\n", iommu->reg); spin_lock_irqsave(&iommu->register_lock, flags); fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG); spin_unlock_irqrestore(&iommu->register_lock, flags); iommu_fault_status(fault_status); /* FIXME: ignore advanced fault log */ if ( !(fault_status & DMA_FSTS_PPF) ) return; fault_index = dma_fsts_fault_record_index(fault_status); reg = cap_fault_reg_offset(iommu->cap); for ( ; ; ) { u8 fault_reason; u16 source_id; u32 data; u64 guest_addr; int type; /* highest 32 bits */ spin_lock_irqsave(&iommu->register_lock, flags); data = dmar_readl(iommu->reg, reg + fault_index * PRIMARY_FAULT_REG_LEN + 12); if ( !(data & DMA_FRCD_F) ) { spin_unlock_irqrestore(&iommu->register_lock, flags); break; } fault_reason = dma_frcd_fault_reason(data); type = dma_frcd_type(data); data = dmar_readl(iommu->reg, reg + fault_index * PRIMARY_FAULT_REG_LEN + 8); source_id = dma_frcd_source_id(data); guest_addr = dmar_readq(iommu->reg, reg + fault_index * PRIMARY_FAULT_REG_LEN); guest_addr = dma_frcd_page_addr(guest_addr); /* clear the fault */ dmar_writel(iommu->reg, reg + fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F); spin_unlock_irqrestore(&iommu->register_lock, flags); iommu_page_fault_do_one(iommu, type, fault_reason, source_id, guest_addr); fault_index++; if ( fault_index > cap_num_fault_regs(iommu->cap) ) fault_index = 0; } /* clear primary fault overflow */ if ( fault_status & DMA_FSTS_PFO ) { spin_lock_irqsave(&iommu->register_lock, flags); dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO); spin_unlock_irqrestore(&iommu->register_lock, flags); }}static void dma_msi_unmask(unsigned int vector){ struct iommu *iommu = vector_to_iommu[vector]; unsigned long flags; /* unmask it */ spin_lock_irqsave(&iommu->register_lock, flags); dmar_writel(iommu->reg, DMAR_FECTL_REG, 0); spin_unlock_irqrestore(&iommu->register_lock, flags);}static void dma_msi_mask(unsigned int vector){ unsigned long flags; struct iommu *iommu = vector_to_iommu[vector]; /* mask it */ spin_lock_irqsave(&iommu->register_lock, flags); dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM); spin_unlock_irqrestore(&iommu->register_lock, flags);}static unsigned int dma_msi_startup(unsigned int vector){ dma_msi_unmask(vector); return 0;}static void dma_msi_end(unsigned int vector){ dma_msi_unmask(vector); ack_APIC_irq();}static void dma_msi_data_init(struct iommu *iommu, int vector){ u32 msi_data = 0; unsigned long flags; /* Fixed, edge, assert mode. Follow MSI setting */ msi_data |= vector & 0xff; msi_data |= 1 << 14; spin_lock_irqsave(&iommu->register_lock, flags); dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data); spin_unlock_irqrestore(&iommu->register_lock, flags);}static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu){ u64 msi_address; unsigned long flags; /* Physical, dedicated cpu. Follow MSI setting */ msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8)); msi_address |= MSI_PHYSICAL_MODE << 2; msi_address |= MSI_REDIRECTION_HINT_MODE << 3; msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT; spin_lock_irqsave(&iommu->register_lock, flags); dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address); dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32)); spin_unlock_irqrestore(&iommu->register_lock, flags);}static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest){ struct iommu *iommu = vector_to_iommu[vector]; dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));}static struct hw_interrupt_type dma_msi_type = { .typename = "DMA_MSI", .startup = dma_msi_startup, .shutdown = dma_msi_mask, .enable = dma_msi_unmask, .disable = dma_msi_mask, .ack = dma_msi_mask, .end = dma_msi_end, .set_affinity = dma_msi_set_affinity,};int iommu_set_interrupt(struct iommu *iommu){ int vector, ret; vector = assign_irq_vector(AUTO_ASSIGN); vector_to_iommu[vector] = iommu; /* VT-d fault is a MSI, make irq == vector */ irq_vector[vector] = vector; vector_irq[vector] = vector; if ( !vector ) { gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n"); return -EINVAL; } irq_desc[vector].handler = &dma_msi_type; ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu); if ( ret ) gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n"); return vector;}static int iommu_alloc(struct acpi_drhd_unit *drhd){ struct iommu *iommu; unsigned long sagaw; int agaw; if ( nr_iommus > MAX_IOMMUS ) { gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus); return -ENOMEM; } iommu = xmalloc(struct iommu); if ( iommu == NULL ) return -ENOMEM; memset(iommu, 0, sizeof(struct iommu)); iommu->intel = alloc_intel_iommu(); if ( iommu->intel == NULL ) { xfree(iommu); return -ENOMEM; } set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address); iommu->reg = (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus); iommu->index = nr_iommus++; iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG); iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG); /* Calculate number of pagetable levels: between 2 and 4. */ sagaw = cap_sagaw(iommu->cap); for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- ) if ( test_bit(agaw, &sagaw) ) break; if ( agaw < 0 ) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -