📄 sba_iommu.c
字号:
/*** IA64 System Bus Adapter (SBA) I/O MMU manager**** (c) Copyright 2002 Alex Williamson** (c) Copyright 2002 Hewlett-Packard Company**** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code)** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code)**** This program is free software; you can redistribute it and/or modify** it under the terms of the GNU General Public License as published by** the Free Software Foundation; either version 2 of the License, or** (at your option) any later version.****** This module initializes the IOC (I/O Controller) found on HP** McKinley machines and their successors.***/#include <linux/config.h>#include <linux/types.h>#include <linux/kernel.h>#include <linux/module.h>#include <linux/spinlock.h>#include <linux/slab.h>#include <linux/init.h>#include <linux/mm.h>#include <linux/string.h>#include <linux/pci.h>#include <linux/proc_fs.h>#include <linux/acpi.h>#include <linux/efi.h>#include <asm/delay.h> /* ia64_get_itc() */#include <asm/io.h>#include <asm/page.h> /* PAGE_OFFSET */#define PFX "IOC: "/*** This option allows cards capable of 64bit DMA to bypass the IOMMU. If** not defined, all DMA will be 32bit and go through the TLB.*/#define ALLOW_IOV_BYPASS/*** If a device prefetches beyond the end of a valid pdir entry, it will cause** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should** disconnect on 4k boundaries and prevent such issues. If the device is** particularly agressive, this option will keep the entire pdir valid such** that prefetching will hit a valid address. This could severely impact** error containment, and is therefore off by default. The page that is** used for spill-over is poisoned, so that should help debugging somewhat.*/#undef FULL_VALID_PDIR#define ENABLE_MARK_CLEAN/*** The number of debug flags is a clue - this code is fragile.*/#undef DEBUG_SBA_INIT#undef DEBUG_SBA_RUN#undef DEBUG_SBA_RUN_SG#undef DEBUG_SBA_RESOURCE#undef ASSERT_PDIR_SANITY#undef DEBUG_LARGE_SG_ENTRIES#undef DEBUG_BYPASS#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY)#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive#endif#define SBA_INLINE __inline__/* #define SBA_INLINE */#ifdef DEBUG_SBA_INIT#define DBG_INIT(x...) printk(x)#else#define DBG_INIT(x...)#endif#ifdef DEBUG_SBA_RUN#define DBG_RUN(x...) printk(x)#else#define DBG_RUN(x...)#endif#ifdef DEBUG_SBA_RUN_SG#define DBG_RUN_SG(x...) printk(x)#else#define DBG_RUN_SG(x...)#endif#ifdef DEBUG_SBA_RESOURCE#define DBG_RES(x...) printk(x)#else#define DBG_RES(x...)#endif#ifdef DEBUG_BYPASS#define DBG_BYPASS(x...) printk(x)#else#define DBG_BYPASS(x...)#endif#ifdef ASSERT_PDIR_SANITY#define ASSERT(expr) \ if(!(expr)) { \ printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \ panic(#expr); \ }#else#define ASSERT(expr)#endif/*** The number of pdir entries to "free" before issueing** a read to PCOM register to flush out PCOM writes.** Interacts with allocation granularity (ie 4 or 8 entries** allocated and free'd/purged at a time might make this** less interesting).*/#define DELAYED_RESOURCE_CNT 16#define DEFAULT_DMA_HINT_REG 0#define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP)#define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP)#define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */#define IOC_FUNC_ID 0x000#define IOC_FCLASS 0x008 /* function class, bist, header, rev... */#define IOC_IBASE 0x300 /* IO TLB */#define IOC_IMASK 0x308#define IOC_PCOM 0x310#define IOC_TCNFG 0x318#define IOC_PDIR_BASE 0x320/* AGP GART driver looks for this */#define ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL/*** IOC supports 4/8/16/64KB page sizes (see TCNFG register)** It's safer (avoid memory corruption) to keep DMA page mappings** equivalently sized to VM PAGE_SIZE.**** We really can't avoid generating a new mapping for each** page since the Virtual Coherence Index has to be generated** and updated for each page.**** IOVP_SIZE could only be greater than PAGE_SIZE if we are** confident the drivers really only touch the next physical** page iff that driver instance owns it.*/#define IOVP_SIZE PAGE_SIZE#define IOVP_SHIFT PAGE_SHIFT#define IOVP_MASK PAGE_MASKstruct ioc { void *ioc_hpa; /* I/O MMU base address */ char *res_map; /* resource map, bit == pdir entry */ u64 *pdir_base; /* physical base address */ unsigned long ibase; /* pdir IOV Space base */ unsigned long imask; /* pdir IOV Space mask */ unsigned long *res_hint; /* next avail IOVP - circular search */ spinlock_t res_lock; unsigned long hint_mask_pdir; /* bits used for DMA hints */ unsigned int res_bitshift; /* from the RIGHT! */ unsigned int res_size; /* size of resource map in bytes */ unsigned int hint_shift_pdir; unsigned long dma_mask;#if DELAYED_RESOURCE_CNT > 0 int saved_cnt; struct sba_dma_pair { dma_addr_t iova; size_t size; } saved[DELAYED_RESOURCE_CNT];#endif#ifdef CONFIG_PROC_FS#define SBA_SEARCH_SAMPLE 0x100 unsigned long avg_search[SBA_SEARCH_SAMPLE]; unsigned long avg_idx; /* current index into avg_search */ unsigned long used_pages; unsigned long msingle_calls; unsigned long msingle_pages; unsigned long msg_calls; unsigned long msg_pages; unsigned long usingle_calls; unsigned long usingle_pages; unsigned long usg_calls; unsigned long usg_pages;#ifdef ALLOW_IOV_BYPASS unsigned long msingle_bypass; unsigned long usingle_bypass; unsigned long msg_bypass;#endif#endif /* Stuff we don't need in performance path */ struct ioc *next; /* list of IOC's in system */ acpi_handle handle; /* for multiple IOC's */ const char *name; unsigned int func_id; unsigned int rev; /* HW revision of chip */ u32 iov_size; unsigned int pdir_size; /* in bytes, determined by IOV Space size */ struct pci_dev *sac_only_dev;};static struct ioc *ioc_list;static int reserve_sba_gart = 1;#define sba_sg_address(sg) (sg->address ? sg->address : \ page_address((sg)->page) + (sg)->offset)#ifdef FULL_VALID_PDIRstatic void* prefetch_spill_page;#endif#define GET_IOC(dev) ((struct ioc *) PCI_CONTROLLER(dev)->iommu)/*** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up** (or rather not merge) DMA's into managable chunks.** On parisc, this is more of the software/tuning constraint** rather than the HW. I/O MMU allocation alogorithms can be** faster with smaller size is (to some degree).*/#define DMA_CHUNK_SIZE (BITS_PER_LONG*PAGE_SIZE)#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1))/************************************** SBA register read and write support**** BE WARNED: register writes are posted.** (ie follow writes which must reach HW with a read)***/#define READ_REG(addr) __raw_readq(addr)#define WRITE_REG(val, addr) __raw_writeq(val, addr)#ifdef DEBUG_SBA_INIT/** * sba_dump_tlb - debugging only - print IOMMU operating parameters * @hpa: base address of the IOMMU * * Print the size/location of the IO MMU PDIR. */static voidsba_dump_tlb(char *hpa){ DBG_INIT("IO TLB at 0x%p\n", (void *)hpa); DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE)); DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK)); DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG)); DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE)); DBG_INIT("\n");}#endif#ifdef ASSERT_PDIR_SANITY/** * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry * @ioc: IO MMU structure which owns the pdir we are interested in. * @msg: text to print ont the output line. * @pide: pdir index. * * Print one entry of the IO MMU PDIR in human readable form. */static voidsba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide){ /* start printing from lowest pde in rval */ u64 *ptr = &(ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)]); unsigned long *rptr = (unsigned long *) &(ioc->res_map[(pide >>3) & ~(sizeof(unsigned long) - 1)]); uint rcnt; /* printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n", */ printk("SBA: %s rp %p bit %d rval 0x%lx\n", msg, rptr, pide & (BITS_PER_LONG - 1), *rptr); rcnt = 0; while (rcnt < BITS_PER_LONG) { printk("%s %2d %p %016Lx\n", (rcnt == (pide & (BITS_PER_LONG - 1))) ? " -->" : " ", rcnt, ptr, *ptr ); rcnt++; ptr++; } printk("%s", msg);}/** * sba_check_pdir - debugging only - consistency checker * @ioc: IO MMU structure which owns the pdir we are interested in. * @msg: text to print ont the output line. * * Verify the resource map and pdir state is consistent */static intsba_check_pdir(struct ioc *ioc, char *msg){ u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]); u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */ u64 *pptr = ioc->pdir_base; /* pdir ptr */ uint pide = 0; while (rptr < rptr_end) { u64 rval; int rcnt; /* number of bits we might check */ rval = *rptr; rcnt = 64; while (rcnt) { /* Get last byte and highest bit from that */ u32 pde = ((u32)((*pptr >> (63)) & 0x1)); if ((rval & 0x1) ^ pde) { /* ** BUMMER! -- res_map != pdir -- ** Dump rval and matching pdir entries */ sba_dump_pdir_entry(ioc, msg, pide); return(1); } rcnt--; rval >>= 1; /* try the next bit */ pptr++; pide++; } rptr++; /* look at next word of res_map */ } /* It'd be nice if we always got here :^) */ return 0;}/** * sba_dump_sg - debugging only - print Scatter-Gather list * @ioc: IO MMU structure which owns the pdir we are interested in. * @startsg: head of the SG list * @nents: number of entries in SG list * * print the SG list so we can verify it's correct by hand. */static voidsba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents){ while (nents-- > 0) { printk(" %d : %08lx/%05x %p\n", nents, startsg->dma_address, startsg->dma_length, sba_sg_address(startsg)); startsg++; }}static voidsba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents){ struct scatterlist *the_sg = startsg; int the_nents = nents; while (the_nents-- > 0) { if (sba_sg_address(the_sg) == 0x0UL) sba_dump_sg(NULL, startsg, nents); the_sg++; }}#endif /* ASSERT_PDIR_SANITY *//**************************************************************** I/O Pdir Resource Management** Bits set in the resource map are in use.* Each bit can represent a number of pages.* LSbs represent lower addresses (IOVA's).****************************************************************/#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed *//* Convert from IOVP to IOVA and vice versa. */#define SBA_IOVA(ioc,iovp,offset,hint_reg) ((ioc->ibase) | (iovp) | (offset) | ((hint_reg)<<(ioc->hint_shift_pdir)))#define SBA_IOVP(ioc,iova) (((iova) & ioc->hint_mask_pdir) & ~(ioc->ibase))/* FIXME : review these macros to verify correctness and usage */#define PDIR_INDEX(iovp) ((iovp)>>IOVP_SHIFT)#define RESMAP_MASK(n) ~(~0UL << (n))#define RESMAP_IDX_MASK (sizeof(unsigned long) - 1)/** * sba_search_bitmap - find free space in IO PDIR resource bitmap * @ioc: IO MMU structure which owns the pdir we are interested in. * @bits_wanted: number of entries we need. * * Find consecutive free bits in resource bitmap. * Each bit represents one entry in the IO Pdir. * Cool perf optimization: search for log2(size) bits at a time. */static SBA_INLINE unsigned longsba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted){ unsigned long *res_ptr = ioc->res_hint; unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); unsigned long pide = ~0UL; ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); ASSERT(res_ptr < res_end); if (bits_wanted > (BITS_PER_LONG/2)) { /* Search word at a time - no mask needed */ for(; res_ptr < res_end; ++res_ptr) { if (*res_ptr == 0) { *res_ptr = RESMAP_MASK(bits_wanted); pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); pide <<= 3; /* convert to bit address */ break; } } /* point to the next word on next pass */ res_ptr++; ioc->res_bitshift = 0; } else { /* ** Search the resource bit map on well-aligned values. ** "o" is the alignment. ** We need the alignment to invalidate I/O TLB using ** SBA HW features in the unmap path. */ unsigned long o = 1 << get_order(bits_wanted << PAGE_SHIFT); uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o); unsigned long mask; if (bitshiftcnt >= BITS_PER_LONG) { bitshiftcnt = 0; res_ptr++; } mask = RESMAP_MASK(bits_wanted) << bitshiftcnt; DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr); while(res_ptr < res_end) { DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); ASSERT(0 != mask); if(0 == ((*res_ptr) & mask)) { *res_ptr |= mask; /* mark resources busy! */ pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); pide <<= 3; /* convert to bit address */ pide += bitshiftcnt; break; } mask <<= o; bitshiftcnt += o; if (0 == mask) { mask = RESMAP_MASK(bits_wanted); bitshiftcnt=0; res_ptr++; } } /* look in the same word on the next pass */ ioc->res_bitshift = bitshiftcnt + bits_wanted; } /* wrapped ? */ if (res_end <= res_ptr) { ioc->res_hint = (unsigned long *) ioc->res_map;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -