📄 pci-gart.c
字号:
/* * Dynamic DMA mapping support for AMD Hammer. * * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI. * This allows to use PCI devices that only support 32bit addresses on systems * with more than 4GB. * * See Documentation/DMA-mapping.txt for the interface specification. * * Copyright 2002 Andi Kleen, SuSE Labs. */#include <linux/config.h>#include <linux/types.h>#include <linux/ctype.h>#include <linux/agp_backend.h>#include <linux/init.h>#include <linux/mm.h>#include <linux/string.h>#include <linux/spinlock.h>#include <linux/pci.h>#include <linux/module.h>#include <linux/topology.h>#include <linux/interrupt.h>#include <asm/atomic.h>#include <asm/io.h>#include <asm/mtrr.h>#include <asm/bitops.h>#include <asm/pgtable.h>#include <asm/proto.h>#include <asm/cacheflush.h>#include <asm/kdebug.h>#ifdef CONFIG_PREEMPT#define preempt_atomic() in_atomic()#else#define preempt_atomic() 1#endifdma_addr_t bad_dma_address;unsigned long iommu_bus_base; /* GART remapping area (physical) */static unsigned long iommu_size; /* size of remapping area bytes */static unsigned long iommu_pages; /* .. and in pages */u32 *iommu_gatt_base; /* Remapping table */int no_iommu; static int no_agp; #ifdef CONFIG_IOMMU_DEBUGint panic_on_overflow = 1; int force_iommu = 1;#elseint panic_on_overflow = 0;int force_iommu = 0;#endifint iommu_merge = 0; int iommu_sac_force = 0; /* If this is disabled the IOMMU will use an optimized flushing strategy of only flushing when an mapping is reused. With it true the GART is flushed for every mapping. Problem is that doing the lazy flush seems to trigger bugs with some popular PCI cards, in particular 3ware (but has been also also seen with Qlogic at least). */int iommu_fullflush = 1;#define MAX_NB 8/* Allocation bitmap for the remapping area */ static spinlock_t iommu_bitmap_lock = SPIN_LOCK_UNLOCKED;static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */static u32 gart_unmapped_entry; #define GPTE_VALID 1#define GPTE_COHERENT 2#define GPTE_ENCODE(x) \ (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))#define to_pages(addr,size) \ (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)#define for_all_nb(dev) \ dev = NULL; \ while ((dev = pci_find_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\ if (dev->bus->number == 0 && \ (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31))static struct pci_dev *northbridges[MAX_NB];static u32 northbridge_flush_word[MAX_NB];#define EMERGENCY_PAGES 32 /* = 128KB */ #ifdef CONFIG_AGP#define AGPEXTERN extern#else#define AGPEXTERN#endif/* backdoor interface to AGP driver */AGPEXTERN int agp_memory_reserved;AGPEXTERN __u32 *agp_gatt_table;static unsigned long next_bit; /* protected by iommu_bitmap_lock */static int need_flush; /* global flush state. set for each gart wrap */static dma_addr_t pci_map_area(struct pci_dev *dev, unsigned long phys_mem, size_t size, int dir);static unsigned long alloc_iommu(int size) { unsigned long offset, flags; spin_lock_irqsave(&iommu_bitmap_lock, flags); offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); if (offset == -1) { need_flush = 1; offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size); } if (offset != -1) { set_bit_string(iommu_gart_bitmap, offset, size); next_bit = offset+size; if (next_bit >= iommu_pages) { next_bit = 0; need_flush = 1; } } if (iommu_fullflush) need_flush = 1; spin_unlock_irqrestore(&iommu_bitmap_lock, flags); return offset;} static void free_iommu(unsigned long offset, int size){ unsigned long flags; if (size == 1) { clear_bit(offset, iommu_gart_bitmap); return; } spin_lock_irqsave(&iommu_bitmap_lock, flags); __clear_bit_string(iommu_gart_bitmap, offset, size); spin_unlock_irqrestore(&iommu_bitmap_lock, flags);} /* * Use global flush state to avoid races with multiple flushers. */static void flush_gart(struct pci_dev *dev){ unsigned long flags; int flushed = 0; int i; spin_lock_irqsave(&iommu_bitmap_lock, flags); if (need_flush) { for (i = 0; i < MAX_NB; i++) { u32 w; if (!northbridges[i]) continue; pci_write_config_dword(northbridges[i], 0x9c, northbridge_flush_word[i] | 1); /* Make sure the hardware actually executed the flush. */ do { pci_read_config_dword(northbridges[i], 0x9c, &w); } while (w & 1); flushed++; } if (!flushed) printk("nothing to flush?\n"); need_flush = 0; } spin_unlock_irqrestore(&iommu_bitmap_lock, flags);} /* * Allocate memory for a consistent mapping. */void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle){ void *memory; int gfp = preempt_atomic() ? GFP_ATOMIC : GFP_KERNEL; unsigned long dma_mask = 0; u64 bus; if (hwdev) dma_mask = hwdev->dev.coherent_dma_mask; if (dma_mask == 0) dma_mask = 0xffffffff; /* Kludge to make it bug-to-bug compatible with i386. i386 uses the normal dma_mask for alloc_consistent. */ if (hwdev) dma_mask &= hwdev->dma_mask; again: memory = (void *)__get_free_pages(gfp, get_order(size)); if (memory == NULL) return NULL; { int high, mmu; bus = virt_to_bus(memory); high = (bus + size) >= dma_mask; mmu = high; if (force_iommu && !(gfp & GFP_DMA)) mmu = 1; if (no_iommu || dma_mask < 0xffffffffUL) { if (high) { if (!(gfp & GFP_DMA)) { gfp |= GFP_DMA; goto again; } goto free; } mmu = 0; } memset(memory, 0, size); if (!mmu) { *dma_handle = virt_to_bus(memory); return memory; } } *dma_handle = pci_map_area(hwdev, bus, size, PCI_DMA_BIDIRECTIONAL); if (*dma_handle == bad_dma_address) goto error; flush_gart(hwdev); return memory; error: if (panic_on_overflow) panic("pci_alloc_consistent: overflow %lu bytes\n", size); free: free_pages((unsigned long)memory, get_order(size)); return NULL; }/* * Unmap consistent memory. * The caller must ensure that the device has finished accessing the mapping. */void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t bus){ pci_unmap_single(hwdev, bus, size, 0); free_pages((unsigned long)vaddr, get_order(size)); }#ifdef CONFIG_IOMMU_LEAK#define SET_LEAK(x) if (iommu_leak_tab) \ iommu_leak_tab[x] = __builtin_return_address(0);#define CLEAR_LEAK(x) if (iommu_leak_tab) \ iommu_leak_tab[x] = NULL;/* Debugging aid for drivers that don't free their IOMMU tables */static void **iommu_leak_tab; static int leak_trace;int iommu_leak_pages = 20; void dump_leak(void){ int i; static int dump; if (dump || !iommu_leak_tab) return; dump = 1; show_stack(NULL,NULL); /* Very crude. dump some from the end of the table too */ printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages); for (i = 0; i < iommu_leak_pages; i+=2) { printk("%lu: ", iommu_pages-i); printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]); printk("%c", (i+1)%2 == 0 ? '\n' : ' '); } printk("\n");}#else#define SET_LEAK(x)#define CLEAR_LEAK(x)#endifstatic void iommu_full(struct pci_dev *dev, size_t size, int dir){ /* * Ran out of IOMMU space for this operation. This is very bad. * Unfortunately the drivers cannot handle this operation properly. * Return some non mapped prereserved space in the aperture and * let the Northbridge deal with it. This will result in garbage * in the IO operation. When the size exceeds the prereserved space * memory corruption will occur or random memory will be DMAed * out. Hopefully no network devices use single mappings that big. */ printk(KERN_ERR "PCI-DMA: Out of IOMMU space for %lu bytes at device %s[%s]\n", size, dev ? pci_pretty_name(dev) : "", dev ? dev->slot_name : "?"); if (size > PAGE_SIZE*EMERGENCY_PAGES) { if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) panic("PCI-DMA: Memory will be corrupted\n"); if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) panic("PCI-DMA: Random memory will be DMAed\n"); } #ifdef CONFIG_IOMMU_LEAK dump_leak(); #endif} static inline int need_iommu(struct pci_dev *dev, unsigned long addr, size_t size){ u64 mask = dev ? dev->dma_mask : 0xffffffff; int high = addr + size >= mask; int mmu = high; if (force_iommu) mmu = 1; if (no_iommu) { if (high) panic("PCI-DMA: high address but no IOMMU.\n"); mmu = 0; } return mmu; }static inline int nonforced_iommu(struct pci_dev *dev, unsigned long addr, size_t size){ u64 mask = dev ? dev->dma_mask : 0xffffffff; int high = addr + size >= mask; int mmu = high; if (no_iommu) { if (high) panic("PCI-DMA: high address but no IOMMU.\n"); mmu = 0; } return mmu; }/* Map a single continuous physical area into the IOMMU. * Caller needs to check if the iommu is needed and flush. */static dma_addr_t pci_map_area(struct pci_dev *dev, unsigned long phys_mem, size_t size, int dir){ unsigned long npages = to_pages(phys_mem, size); unsigned long iommu_page = alloc_iommu(npages); int i; if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) return phys_mem; if (panic_on_overflow) panic("pci_map_area overflow %lu bytes\n", size); iommu_full(dev, size, dir); return bad_dma_address; } for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); SET_LEAK(iommu_page + i); phys_mem += PAGE_SIZE; } return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);}/* Map a single area into the IOMMU */dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size, int dir){ unsigned long phys_mem, bus; BUG_ON(dir == PCI_DMA_NONE);#ifdef CONFIG_SWIOTLB if (swiotlb) return swiotlb_map_single(&dev->dev,addr,size,dir);#endif phys_mem = virt_to_phys(addr); if (!need_iommu(dev, phys_mem, size)) return phys_mem; bus = pci_map_area(dev, phys_mem, size, dir); flush_gart(dev); return bus; } /* Fallback for pci_map_sg in case of overflow */ static int pci_map_sg_nonforce(struct pci_dev *dev, struct scatterlist *sg, int nents, int dir){ int i;#ifdef CONFIG_IOMMU_DEBUG printk(KERN_DEBUG "pci_map_sg overflow\n");#endif for (i = 0; i < nents; i++ ) { struct scatterlist *s = &sg[i]; unsigned long addr = page_to_phys(s->page) + s->offset; if (nonforced_iommu(dev, addr, s->length)) { addr = pci_map_area(dev, addr, s->length, dir); if (addr == bad_dma_address) { if (i > 0) pci_unmap_sg(dev, sg, i, dir); nents = 0; sg[0].dma_length = 0; break; } } s->dma_address = addr; s->dma_length = s->length; } flush_gart(dev); return nents;}/* Map multiple scatterlist entries continuous into the first. */static int __pci_map_cont(struct scatterlist *sg, int start, int stopat, struct scatterlist *sout, unsigned long pages){ unsigned long iommu_start = alloc_iommu(pages); unsigned long iommu_page = iommu_start; int i; if (iommu_start == -1) return -1; for (i = start; i < stopat; i++) { struct scatterlist *s = &sg[i]; unsigned long pages, addr; unsigned long phys_addr = s->dma_address; BUG_ON(i > start && s->offset); if (i == start) { *sout = *s; sout->dma_address = iommu_bus_base; sout->dma_address += iommu_page*PAGE_SIZE + s->offset; sout->dma_length = s->length; } else { sout->dma_length += s->length; } addr = phys_addr; pages = to_pages(s->offset, s->length); while (pages--) { iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); SET_LEAK(iommu_page); addr += PAGE_SIZE; iommu_page++; } } BUG_ON(iommu_page - iommu_start != pages); return 0;}static inline int pci_map_cont(struct scatterlist *sg, int start, int stopat, struct scatterlist *sout, unsigned long pages, int need){ if (!need) { BUG_ON(stopat - start != 1); *sout = sg[start]; sout->dma_length = sg[start].length; return 0; } return __pci_map_cont(sg, start, stopat, sout, pages);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -