📄 intel-iommu.c
字号:
return 0;}static void dma_msi_end(unsigned int vector){ dma_msi_unmask(vector); ack_APIC_irq();}static void dma_msi_data_init(struct iommu *iommu, int vector){ u32 msi_data = 0; unsigned long flags; /* Fixed, edge, assert mode. Follow MSI setting */ msi_data |= vector & 0xff; msi_data |= 1 << 14; spin_lock_irqsave(&iommu->register_lock, flags); dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data); spin_unlock_irqrestore(&iommu->register_lock, flags);}static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu){ u64 msi_address; unsigned long flags; /* Physical, dedicated cpu. Follow MSI setting */ msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8)); msi_address |= MSI_PHYSICAL_MODE << 2; msi_address |= MSI_REDIRECTION_HINT_MODE << 3; msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT; spin_lock_irqsave(&iommu->register_lock, flags); dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address); dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32)); spin_unlock_irqrestore(&iommu->register_lock, flags);}static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest){ struct iommu *iommu = vector_to_iommu[vector]; dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));}static struct hw_interrupt_type dma_msi_type = { .typename = "DMA_MSI", .startup = dma_msi_startup, .shutdown = dma_msi_mask, .enable = dma_msi_unmask, .disable = dma_msi_mask, .ack = dma_msi_mask, .end = dma_msi_end, .set_affinity = dma_msi_set_affinity,};int iommu_set_interrupt(struct iommu *iommu){ int vector, ret; vector = assign_irq_vector(AUTO_ASSIGN); vector_to_iommu[vector] = iommu; /* VT-d fault is a MSI, make irq == vector */ irq_vector[vector] = vector; vector_irq[vector] = vector; if ( !vector ) { gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n"); return -EINVAL; } irq_desc[vector].handler = &dma_msi_type; ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu); if ( ret ) gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n"); return vector;}struct iommu *iommu_alloc(void *hw_data){ struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data; struct iommu *iommu; unsigned long sagaw; int agaw; if ( nr_iommus > MAX_IOMMUS ) { gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus); return NULL; } iommu = xmalloc(struct iommu); if ( !iommu ) return NULL; memset(iommu, 0, sizeof(struct iommu)); set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address); iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus); dprintk(XENLOG_INFO VTDPREFIX, "iommu_alloc: iommu->reg = %p drhd->address = %lx\n", iommu->reg, drhd->address); iommu->index = nr_iommus++; if ( !iommu->reg ) { printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n"); goto error; } iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG); iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG); /* Calculate number of pagetable levels: between 2 and 4. */ sagaw = cap_sagaw(iommu->cap); for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- ) if ( test_bit(agaw, &sagaw) ) break; if ( agaw < 0 ) { gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: unsupported sagaw %lx\n", sagaw); xfree(iommu); return NULL; } iommu->nr_pt_levels = agaw_to_level(agaw); if ( !ecap_coherent(iommu->ecap) ) iommus_incoherent = 1; spin_lock_init(&iommu->lock); spin_lock_init(&iommu->register_lock); drhd->iommu = iommu; return iommu; error: xfree(iommu); return NULL;}static void free_iommu(struct iommu *iommu){ if ( !iommu ) return; if ( iommu->root_entry ) free_xenheap_page((void *)iommu->root_entry); if ( iommu->reg ) iounmap(iommu->reg); free_irq(iommu->vector); xfree(iommu);}#define guestwidth_to_adjustwidth(gaw) ({ \ int agaw, r = (gaw - 12) % 9; \ agaw = (r == 0) ? gaw : (gaw + 9 - r); \ if ( agaw > 64 ) \ agaw = 64; \ agaw; })int iommu_domain_init(struct domain *domain){ struct hvm_iommu *hd = domain_hvm_iommu(domain); struct iommu *iommu = NULL; struct acpi_drhd_unit *drhd; spin_lock_init(&hd->mapping_lock); spin_lock_init(&hd->iommu_list_lock); INIT_LIST_HEAD(&hd->pdev_list); INIT_LIST_HEAD(&hd->g2m_ioport_list); if ( !vtd_enabled || list_empty(&acpi_drhd_units) ) return 0; for_each_drhd_unit ( drhd ) iommu = drhd->iommu ? : iommu_alloc(drhd); hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); return 0;}static int domain_context_mapping_one( struct domain *domain, struct iommu *iommu, u8 bus, u8 devfn){ struct hvm_iommu *hd = domain_hvm_iommu(domain); struct context_entry *context; unsigned long flags; int ret = 0; u64 pgd_maddr; int agaw = -1; context = device_to_context_entry(iommu, bus, devfn); if ( !context ) { gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping_one:context == NULL:" "bdf = %x:%x:%x\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); return -ENOMEM; } if ( context_present(*context) ) { gdprintk(XENLOG_WARNING VTDPREFIX, "domain_context_mapping_one:context present:bdf=%x:%x:%x\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); return 0; } spin_lock_irqsave(&iommu->lock, flags); if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) ) context_set_translation_type(*context, CONTEXT_TT_PASS_THRU); else { /* Ensure we have pagetables allocated down to leaf PTE. */ if ( !hd->pgd ) { addr_to_dma_page(domain, 0); if ( !hd->pgd ) { nomem: spin_unlock_irqrestore(&hd->mapping_lock, flags); return -ENOMEM; } } /* Skip top levels of page tables for 2- and 3-level DRHDs. */ pgd_maddr = virt_to_maddr(hd->pgd); for ( agaw = level_to_agaw(4); agaw != level_to_agaw(iommu->nr_pt_levels); agaw-- ) { if ( agaw == level_to_agaw(4) ) pgd_maddr = dma_pte_addr(*hd->pgd); else { struct dma_pte *p = map_domain_page(pgd_maddr); pgd_maddr = dma_pte_addr(*p); unmap_domain_page(p); if ( pgd_maddr == 0 ) goto nomem; } } context_set_address_root(*context, pgd_maddr); context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); } /* * domain_id 0 is not valid on Intel's IOMMU, force domain_id to * be 1 based as required by intel's iommu hw. */ BUG_ON(agaw == -1); context_set_domain_id(context, domain); context_set_address_width(*context, agaw); context_set_fault_enable(*context); context_set_present(*context); iommu_flush_cache_entry(context); gdprintk(XENLOG_INFO VTDPREFIX, "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64 " hd->pgd=%p\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn), context->hi, context->lo, hd->pgd); if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain), (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1) ) iommu_flush_write_buffer(iommu); else iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0); set_bit(iommu->index, &hd->iommu_bitmap); spin_unlock_irqrestore(&iommu->lock, flags); return ret;}static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap){ u8 id; int ttl = 48; while ( ttl-- ) { pos = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos); if ( pos < 0x40 ) break; pos &= ~3; id = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos + PCI_CAP_LIST_ID); if ( id == 0xff ) break; if ( id == cap ) return pos; pos += PCI_CAP_LIST_NEXT; } return 0;}#define PCI_BASE_CLASS_BRIDGE 0x06#define PCI_CLASS_BRIDGE_PCI 0x0604#define DEV_TYPE_PCIe_ENDPOINT 1#define DEV_TYPE_PCI_BRIDGE 2#define DEV_TYPE_PCI 3int pdev_type(struct pci_dev *dev){ u16 class_device; u16 status; class_device = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE); if ( class_device == PCI_CLASS_BRIDGE_PCI ) return DEV_TYPE_PCI_BRIDGE; status = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), PCI_STATUS); if ( !(status & PCI_STATUS_CAP_LIST) ) return DEV_TYPE_PCI; if ( __pci_find_next_cap(dev->bus, dev->devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP) ) return DEV_TYPE_PCIe_ENDPOINT; return DEV_TYPE_PCI;}#define MAX_BUSES 256struct pci_dev bus2bridge[MAX_BUSES];static int domain_context_mapping( struct domain *domain, struct iommu *iommu, struct pci_dev *pdev){ int ret = 0; int dev, func, sec_bus, sub_bus; u32 type; type = pdev_type(pdev); switch ( type ) { case DEV_TYPE_PCI_BRIDGE: sec_bus = read_pci_config_byte( pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS); if ( bus2bridge[sec_bus].bus == 0 ) { bus2bridge[sec_bus].bus = pdev->bus; bus2bridge[sec_bus].devfn = pdev->devfn; } sub_bus = read_pci_config_byte( pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS); if ( sec_bus != sub_bus ) gdprintk(XENLOG_WARNING VTDPREFIX, "context_context_mapping: nested PCI bridge not " "supported: bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n", pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), sec_bus, sub_bus); break; case DEV_TYPE_PCIe_ENDPOINT: gdprintk(XENLOG_INFO VTDPREFIX, "domain_context_mapping:PCIe : bdf = %x:%x:%x\n", pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); ret = domain_context_mapping_one(domain, iommu, (u8)(pdev->bus), (u8)(pdev->devfn)); break; case DEV_TYPE_PCI: gdprintk(XENLOG_INFO VTDPREFIX, "domain_context_mapping:PCI: bdf = %x:%x:%x\n", pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); if ( pdev->bus == 0 ) ret = domain_context_mapping_one( domain, iommu, (u8)(pdev->bus), (u8)(pdev->devfn)); else { if ( bus2bridge[pdev->bus].bus != 0 ) gdprintk(XENLOG_WARNING VTDPREFIX, "domain_context_mapping:bus2bridge" "[%d].bus != 0\n", pdev->bus); ret = domain_context_mapping_one( domain, iommu, (u8)(bus2bridge[pdev->bus].bus), (u8)(bus2bridge[pdev->bus].devfn)); /* now map everything behind the PCI bridge */ for ( dev = 0; dev < 32; dev++ ) { for ( func = 0; func < 8; func++ ) { ret = domain_context_mapping_one( domain, iommu, pdev->bus, (u8)PCI_DEVFN(dev, func)); if ( ret ) return ret; } } } break; default: gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping:unknown type : bdf = %x:%x:%x\n", pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); ret = -EINVAL; break; } return ret;}static int domain_context_unmap_one( struct domain *domain, struct iommu *iommu, u8 bus, u8 devfn){ struct context_entry *context; unsigned long flags; context = device_to_context_entry(iommu, bus, devfn); if ( !context ) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -