⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sba_iommu.c

📁 linux-2.6.15.6
💻 C
📖 第 1 页 / 共 4 页
字号:
/***  IA64 System Bus Adapter (SBA) I/O MMU manager****	(c) Copyright 2002-2005 Alex Williamson**	(c) Copyright 2002-2003 Grant Grundler**	(c) Copyright 2002-2005 Hewlett-Packard Company****	Portions (c) 2000 Grant Grundler (from parisc I/O MMU code)**	Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code)****	This program is free software; you can redistribute it and/or modify**	it under the terms of the GNU General Public License as published by**      the Free Software Foundation; either version 2 of the License, or**      (at your option) any later version.****** This module initializes the IOC (I/O Controller) found on HP** McKinley machines and their successors.***/#include <linux/config.h>#include <linux/types.h>#include <linux/kernel.h>#include <linux/module.h>#include <linux/spinlock.h>#include <linux/slab.h>#include <linux/init.h>#include <linux/mm.h>#include <linux/string.h>#include <linux/pci.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <linux/acpi.h>#include <linux/efi.h>#include <linux/nodemask.h>#include <linux/bitops.h>         /* hweight64() */#include <asm/delay.h>		/* ia64_get_itc() */#include <asm/io.h>#include <asm/page.h>		/* PAGE_OFFSET */#include <asm/dma.h>#include <asm/system.h>		/* wmb() */#include <asm/acpi-ext.h>#define PFX "IOC: "/*** Enabling timing search of the pdir resource map.  Output in /proc.** Disabled by default to optimize performance.*/#undef PDIR_SEARCH_TIMING/*** This option allows cards capable of 64bit DMA to bypass the IOMMU.  If** not defined, all DMA will be 32bit and go through the TLB.** There's potentially a conflict in the bio merge code with us** advertising an iommu, but then bypassing it.  Since I/O MMU bypassing** appears to give more performance than bio-level virtual merging, we'll** do the former for now.  NOTE: BYPASS_SG also needs to be undef'd to** completely restrict DMA to the IOMMU.*/#define ALLOW_IOV_BYPASS/*** This option specifically allows/disallows bypassing scatterlists with** multiple entries.  Coalescing these entries can allow better DMA streaming** and in some cases shows better performance than entirely bypassing the** IOMMU.  Performance increase on the order of 1-2% sequential output/input** using bonnie++ on a RAID0 MD device (sym2 & mpt).*/#undef ALLOW_IOV_BYPASS_SG/*** If a device prefetches beyond the end of a valid pdir entry, it will cause** a hard failure, ie. MCA.  Version 3.0 and later of the zx1 LBA should** disconnect on 4k boundaries and prevent such issues.  If the device is** particularly agressive, this option will keep the entire pdir valid such** that prefetching will hit a valid address.  This could severely impact** error containment, and is therefore off by default.  The page that is** used for spill-over is poisoned, so that should help debugging somewhat.*/#undef FULL_VALID_PDIR#define ENABLE_MARK_CLEAN/*** The number of debug flags is a clue - this code is fragile.  NOTE: since** tightening the use of res_lock the resource bitmap and actual pdir are no** longer guaranteed to stay in sync.  The sanity checking code isn't going to** like that.*/#undef DEBUG_SBA_INIT#undef DEBUG_SBA_RUN#undef DEBUG_SBA_RUN_SG#undef DEBUG_SBA_RESOURCE#undef ASSERT_PDIR_SANITY#undef DEBUG_LARGE_SG_ENTRIES#undef DEBUG_BYPASS#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY)#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive#endif#define SBA_INLINE	__inline__/* #define SBA_INLINE */#ifdef DEBUG_SBA_INIT#define DBG_INIT(x...)	printk(x)#else#define DBG_INIT(x...)#endif#ifdef DEBUG_SBA_RUN#define DBG_RUN(x...)	printk(x)#else#define DBG_RUN(x...)#endif#ifdef DEBUG_SBA_RUN_SG#define DBG_RUN_SG(x...)	printk(x)#else#define DBG_RUN_SG(x...)#endif#ifdef DEBUG_SBA_RESOURCE#define DBG_RES(x...)	printk(x)#else#define DBG_RES(x...)#endif#ifdef DEBUG_BYPASS#define DBG_BYPASS(x...)	printk(x)#else#define DBG_BYPASS(x...)#endif#ifdef ASSERT_PDIR_SANITY#define ASSERT(expr) \        if(!(expr)) { \                printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \                panic(#expr); \        }#else#define ASSERT(expr)#endif/*** The number of pdir entries to "free" before issuing** a read to PCOM register to flush out PCOM writes.** Interacts with allocation granularity (ie 4 or 8 entries** allocated and free'd/purged at a time might make this** less interesting).*/#define DELAYED_RESOURCE_CNT	64#define PCI_DEVICE_ID_HP_SX2000_IOC	0x12ec#define ZX1_IOC_ID	((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP)#define ZX2_IOC_ID	((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP)#define REO_IOC_ID	((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP)#define SX1000_IOC_ID	((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP)#define SX2000_IOC_ID	((PCI_DEVICE_ID_HP_SX2000_IOC << 16) | PCI_VENDOR_ID_HP)#define ZX1_IOC_OFFSET	0x1000	/* ACPI reports SBA, we want IOC */#define IOC_FUNC_ID	0x000#define IOC_FCLASS	0x008	/* function class, bist, header, rev... */#define IOC_IBASE	0x300	/* IO TLB */#define IOC_IMASK	0x308#define IOC_PCOM	0x310#define IOC_TCNFG	0x318#define IOC_PDIR_BASE	0x320#define IOC_ROPE0_CFG	0x500#define   IOC_ROPE_AO	  0x10	/* Allow "Relaxed Ordering" *//* AGP GART driver looks for this */#define ZX1_SBA_IOMMU_COOKIE	0x0000badbadc0ffeeUL/*** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register)**** Some IOCs (sx1000) can run at the above pages sizes, but are** really only supported using the IOC at a 4k page size.**** iovp_size could only be greater than PAGE_SIZE if we are** confident the drivers really only touch the next physical** page iff that driver instance owns it.*/static unsigned long iovp_size;static unsigned long iovp_shift;static unsigned long iovp_mask;struct ioc {	void __iomem	*ioc_hpa;	/* I/O MMU base address */	char		*res_map;	/* resource map, bit == pdir entry */	u64		*pdir_base;	/* physical base address */	unsigned long	ibase;		/* pdir IOV Space base */	unsigned long	imask;		/* pdir IOV Space mask */	unsigned long	*res_hint;	/* next avail IOVP - circular search */	unsigned long	dma_mask;	spinlock_t	res_lock;	/* protects the resource bitmap, but must be held when */					/* clearing pdir to prevent races with allocations. */	unsigned int	res_bitshift;	/* from the RIGHT! */	unsigned int	res_size;	/* size of resource map in bytes */#ifdef CONFIG_NUMA	unsigned int	node;		/* node where this IOC lives */#endif#if DELAYED_RESOURCE_CNT > 0	spinlock_t	saved_lock;	/* may want to try to get this on a separate cacheline */					/* than res_lock for bigger systems. */	int		saved_cnt;	struct sba_dma_pair {		dma_addr_t	iova;		size_t		size;	} saved[DELAYED_RESOURCE_CNT];#endif#ifdef PDIR_SEARCH_TIMING#define SBA_SEARCH_SAMPLE	0x100	unsigned long avg_search[SBA_SEARCH_SAMPLE];	unsigned long avg_idx;	/* current index into avg_search */#endif	/* Stuff we don't need in performance path */	struct ioc	*next;		/* list of IOC's in system */	acpi_handle	handle;		/* for multiple IOC's */	const char 	*name;	unsigned int	func_id;	unsigned int	rev;		/* HW revision of chip */	u32		iov_size;	unsigned int	pdir_size;	/* in bytes, determined by IOV Space size */	struct pci_dev	*sac_only_dev;};static struct ioc *ioc_list;static int reserve_sba_gart = 1;static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t);static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t);#define sba_sg_address(sg)	(page_address((sg)->page) + (sg)->offset)#ifdef FULL_VALID_PDIRstatic u64 prefetch_spill_page;#endif#ifdef CONFIG_PCI# define GET_IOC(dev)	(((dev)->bus == &pci_bus_type)						\			 ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL)#else# define GET_IOC(dev)	NULL#endif/*** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up** (or rather not merge) DMA's into managable chunks.** On parisc, this is more of the software/tuning constraint** rather than the HW. I/O MMU allocation alogorithms can be** faster with smaller size is (to some degree).*/#define DMA_CHUNK_SIZE  (BITS_PER_LONG*iovp_size)#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1))/************************************** SBA register read and write support**** BE WARNED: register writes are posted.**  (ie follow writes which must reach HW with a read)***/#define READ_REG(addr)       __raw_readq(addr)#define WRITE_REG(val, addr) __raw_writeq(val, addr)#ifdef DEBUG_SBA_INIT/** * sba_dump_tlb - debugging only - print IOMMU operating parameters * @hpa: base address of the IOMMU * * Print the size/location of the IO MMU PDIR. */static voidsba_dump_tlb(char *hpa){	DBG_INIT("IO TLB at 0x%p\n", (void *)hpa);	DBG_INIT("IOC_IBASE    : %016lx\n", READ_REG(hpa+IOC_IBASE));	DBG_INIT("IOC_IMASK    : %016lx\n", READ_REG(hpa+IOC_IMASK));	DBG_INIT("IOC_TCNFG    : %016lx\n", READ_REG(hpa+IOC_TCNFG));	DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE));	DBG_INIT("\n");}#endif#ifdef ASSERT_PDIR_SANITY/** * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry * @ioc: IO MMU structure which owns the pdir we are interested in. * @msg: text to print ont the output line. * @pide: pdir index. * * Print one entry of the IO MMU PDIR in human readable form. */static voidsba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide){	/* start printing from lowest pde in rval */	u64 *ptr = &ioc->pdir_base[pide  & ~(BITS_PER_LONG - 1)];	unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)];	uint rcnt;	printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n",		 msg, rptr, pide & (BITS_PER_LONG - 1), *rptr);	rcnt = 0;	while (rcnt < BITS_PER_LONG) {		printk(KERN_DEBUG "%s %2d %p %016Lx\n",		       (rcnt == (pide & (BITS_PER_LONG - 1)))		       ? "    -->" : "       ",		       rcnt, ptr, (unsigned long long) *ptr );		rcnt++;		ptr++;	}	printk(KERN_DEBUG "%s", msg);}/** * sba_check_pdir - debugging only - consistency checker * @ioc: IO MMU structure which owns the pdir we are interested in. * @msg: text to print ont the output line. * * Verify the resource map and pdir state is consistent */static intsba_check_pdir(struct ioc *ioc, char *msg){	u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]);	u64 *rptr = (u64 *) ioc->res_map;	/* resource map ptr */	u64 *pptr = ioc->pdir_base;	/* pdir ptr */	uint pide = 0;	while (rptr < rptr_end) {		u64 rval;		int rcnt; /* number of bits we might check */		rval = *rptr;		rcnt = 64;		while (rcnt) {			/* Get last byte and highest bit from that */			u32 pde = ((u32)((*pptr >> (63)) & 0x1));			if ((rval & 0x1) ^ pde)			{				/*				** BUMMER!  -- res_map != pdir --				** Dump rval and matching pdir entries				*/				sba_dump_pdir_entry(ioc, msg, pide);				return(1);			}			rcnt--;			rval >>= 1;	/* try the next bit */			pptr++;			pide++;		}		rptr++;	/* look at next word of res_map */	}	/* It'd be nice if we always got here :^) */	return 0;}/** * sba_dump_sg - debugging only - print Scatter-Gather list * @ioc: IO MMU structure which owns the pdir we are interested in. * @startsg: head of the SG list * @nents: number of entries in SG list * * print the SG list so we can verify it's correct by hand. */static voidsba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents){	while (nents-- > 0) {		printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents,		       startsg->dma_address, startsg->dma_length,		       sba_sg_address(startsg));		startsg++;	}}static voidsba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents){	struct scatterlist *the_sg = startsg;	int the_nents = nents;	while (the_nents-- > 0) {		if (sba_sg_address(the_sg) == 0x0UL)			sba_dump_sg(NULL, startsg, nents);		the_sg++;	}}#endif /* ASSERT_PDIR_SANITY *//****************************************************************   I/O Pdir Resource Management**   Bits set in the resource map are in use.*   Each bit can represent a number of pages.*   LSbs represent lower addresses (IOVA's).****************************************************************/#define PAGES_PER_RANGE 1	/* could increase this to 4 or 8 if needed *//* Convert from IOVP to IOVA and vice versa. */#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset))#define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase))#define PDIR_ENTRY_SIZE	sizeof(u64)#define PDIR_INDEX(iovp)   ((iovp)>>iovp_shift)#define RESMAP_MASK(n)    ~(~0UL << (n))#define RESMAP_IDX_MASK   (sizeof(unsigned long) - 1)/** * For most cases the normal get_order is sufficient, however it limits us * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity. * It only incurs about 1 clock cycle to use this one with the static variable * and makes the code more intuitive. */static SBA_INLINE intget_iovp_order (unsigned long size){	long double d = size - 1;	long order;	order = ia64_getf_exp(d);	order = order - iovp_shift - 0xffff + 1;	if (order < 0)		order = 0;	return order;}/** * sba_search_bitmap - find free space in IO PDIR resource bitmap * @ioc: IO MMU structure which owns the pdir we are interested in. * @bits_wanted: number of entries we need. * @use_hint: use res_hint to indicate where to start looking * * Find consecutive free bits in resource bitmap. * Each bit represents one entry in the IO Pdir. * Cool perf optimization: search for log2(size) bits at a time. */static SBA_INLINE unsigned longsba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint){	unsigned long *res_ptr;	unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);	unsigned long flags, pide = ~0UL;	ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);	ASSERT(res_ptr < res_end);	spin_lock_irqsave(&ioc->res_lock, flags);	/* Allow caller to force a search through the entire resource space */	if (likely(use_hint)) {		res_ptr = ioc->res_hint;	} else {		res_ptr = (ulong *)ioc->res_map;		ioc->res_bitshift = 0;	}	/*	 * N.B.  REO/Grande defect AR2305 can cause TLB fetch timeouts	 * if a TLB entry is purged while in use.  sba_mark_invalid()	 * purges IOTLB entries in power-of-two sizes, so we also	 * allocate IOVA space in power-of-two sizes.	 */	bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift);	if (likely(bits_wanted == 1)) {		unsigned int bitshiftcnt;		for(; res_ptr < res_end ; res_ptr++) {			if (likely(*res_ptr != ~0UL)) {				bitshiftcnt = ffz(*res_ptr);				*res_ptr |= (1UL << bitshiftcnt);				pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);				pide <<= 3;	/* convert to bit address */				pide += bitshiftcnt;				ioc->res_bitshift = bitshiftcnt + bits_wanted;				goto found_it;			}		}		goto not_found;	}		if (likely(bits_wanted <= BITS_PER_LONG/2)) {		/*		** Search the resource bit map on well-aligned values.		** "o" is the alignment.		** We need the alignment to invalidate I/O TLB using		** SBA HW features in the unmap path.		*/		unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift);		uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o);		unsigned long mask, base_mask;		base_mask = RESMAP_MASK(bits_wanted);		mask = base_mask << bitshiftcnt;		DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr);		for(; res_ptr < res_end ; res_ptr++)		{ 			DBG_RES("    %p %lx %lx\n", res_ptr, mask, *res_ptr);			ASSERT(0 != mask);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -