📄 swiotlb.c
字号:
/* * Dynamic DMA mapping support. * * This implementation is for IA-64 platforms that do not support * I/O TLBs (aka DMA address translation hardware). * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> * Copyright (C) 2000, 2003 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> * * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid * unnecessary i-cache flushing. * 04/07/.. ak Better overflow handling. Assorted fixes. */#include <linux/cache.h>#include <linux/mm.h>#include <linux/module.h>#include <linux/pci.h>#include <linux/spinlock.h>#include <linux/string.h>#include <linux/types.h>#include <linux/ctype.h>#include <asm/io.h>#include <asm/pci.h>#include <asm/dma.h>#include <linux/init.h>#include <linux/bootmem.h>#define OFFSET(val,align) ((unsigned long) \ ( (val) & ( (align) - 1)))#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))/* * Maximum allowable number of contiguous slabs to map, * must be a power of 2. What is the appropriate value ? * The complexity of {map,unmap}_single is linearly dependent on this value. */#define IO_TLB_SEGSIZE 128/* * log of the size of each IO TLB slab. The number of slabs is command line * controllable. */#define IO_TLB_SHIFT 11int swiotlb_force;/* * Used to do a quick range check in swiotlb_unmap_single and * swiotlb_sync_single_*, to see if the memory was in fact allocated by this * API. */static char *io_tlb_start, *io_tlb_end;/* * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. */static unsigned long io_tlb_nslabs;/* * When the IOMMU overflows we return a fallback buffer. This sets the size. */static unsigned long io_tlb_overflow = 32*1024;void *io_tlb_overflow_buffer;/* * This is a free list describing the number of free entries available from * each index */static unsigned int *io_tlb_list;static unsigned int io_tlb_index;/* * We need to save away the original address corresponding to a mapped entry * for the sync operations. */static unsigned char **io_tlb_orig_addr;/* * Protect the above data structures in the map and unmap calls */static DEFINE_SPINLOCK(io_tlb_lock);static int __initsetup_io_tlb_npages(char *str){ if (isdigit(*str)) { io_tlb_nslabs = simple_strtoul(str, &str, 0) << (PAGE_SHIFT - IO_TLB_SHIFT); /* avoid tail segment of size < IO_TLB_SEGSIZE */ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); } if (*str == ',') ++str; if (!strcmp(str, "force")) swiotlb_force = 1; return 1;}__setup("swiotlb=", setup_io_tlb_npages);/* make io_tlb_overflow tunable too? *//* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the PCI DMA API. */voidswiotlb_init_with_default_size (size_t default_size){ unsigned long i; if (!io_tlb_nslabs) { io_tlb_nslabs = (default_size >> PAGE_SHIFT); io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); } /* * Get IO TLB memory from the low pages */ io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT)); if (!io_tlb_start) panic("Cannot allocate SWIOTLB buffer"); io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); /* * Allocate and initialize the free list array. This array is used * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE * between io_tlb_start and io_tlb_end. */ io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); for (i = 0; i < io_tlb_nslabs; i++) io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); io_tlb_index = 0; io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *)); /* * Get the overflow emergency buffer */ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));}voidswiotlb_init (void){ swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */}static inline intaddress_needs_mapping(struct device *hwdev, dma_addr_t addr){ dma_addr_t mask = 0xffffffff; /* If the device has a mask, use it, otherwise default to 32 bits */ if (hwdev && hwdev->dma_mask) mask = *hwdev->dma_mask; return (addr & ~mask) != 0;}/* * Allocates bounce buffer and returns its kernel virtual address. */static void *map_single(struct device *hwdev, char *buffer, size_t size, int dir){ unsigned long flags; char *dma_addr; unsigned int nslots, stride, index, wrap; int i; /* * For mappings greater than a page, we limit the stride (and * hence alignment) to a page size. */ nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; if (size > PAGE_SIZE) stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); else stride = 1; if (!nslots) BUG(); /* * Find suitable number of IO TLB entries size that will fit this * request and allocate a buffer from that IO TLB pool. */ spin_lock_irqsave(&io_tlb_lock, flags); { wrap = index = ALIGN(io_tlb_index, stride); if (index >= io_tlb_nslabs) wrap = index = 0; do { /* * If we find a slot that indicates we have 'nslots' * number of contiguous buffers, we allocate the * buffers from that slot and mark the entries as '0' * indicating unavailable. */ if (io_tlb_list[index] >= nslots) { int count = 0; for (i = index; i < (int) (index + nslots); i++) io_tlb_list[i] = 0; for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) io_tlb_list[i] = ++count; dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); /* * Update the indices to avoid searching in * the next round. */ io_tlb_index = ((index + nslots) < io_tlb_nslabs ? (index + nslots) : 0); goto found; } index += stride; if (index >= io_tlb_nslabs) index = 0; } while (index != wrap); spin_unlock_irqrestore(&io_tlb_lock, flags); return NULL; } found: spin_unlock_irqrestore(&io_tlb_lock, flags); /* * Save away the mapping from the original address to the DMA address. * This is needed when we sync the memory. Then we sync the buffer if * needed. */ io_tlb_orig_addr[index] = buffer; if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) memcpy(dma_addr, buffer, size); return dma_addr;}/* * dma_addr is the kernel virtual address of the bounce buffer to unmap. */static voidunmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir){ unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; char *buffer = io_tlb_orig_addr[index]; /* * First, sync the memory before unmapping the entry */ if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) /* * bounce... copy the data back into the original buffer * and * delete the bounce buffer. */ memcpy(buffer, dma_addr, size); /* * Return the buffer to the free list by setting the corresponding * entries to indicate the number of contigous entries available. * While returning the entries to the free list, we merge the entries * with slots below and above the pool being returned. */ spin_lock_irqsave(&io_tlb_lock, flags); { count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? io_tlb_list[index + nslots] : 0); /* * Step 1: return the slots to the free list, merging the * slots with superceeding slots */ for (i = index + nslots - 1; i >= index; i--) io_tlb_list[i] = ++count; /* * Step 2: merge the returned slots with the preceding slots, * if available (non zero) */ for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) io_tlb_list[i] = ++count; } spin_unlock_irqrestore(&io_tlb_lock, flags);}static voidsync_single(struct device *hwdev, char *dma_addr, size_t size, int dir){ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; char *buffer = io_tlb_orig_addr[index]; /* * bounce... copy the data back into/from the original buffer * XXX How do you handle DMA_BIDIRECTIONAL here ? */ if (dir == DMA_FROM_DEVICE) memcpy(buffer, dma_addr, size); else if (dir == DMA_TO_DEVICE) memcpy(dma_addr, buffer, size); else BUG();}void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, int flags){ unsigned long dev_addr; void *ret; int order = get_order(size); /* * XXX fix me: the DMA API should pass us an explicit DMA mask * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32 * bit range instead of a 16MB one). */ flags |= GFP_DMA;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -