ccio-dma.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 1,588 行 · 第 1/4 页

C
1,588
字号
/*** ccio-dma.c:**	DMA management routines for first generation cache-coherent machines.**	Program U2/Uturn in "Virtual Mode" and use the I/O MMU.****	(c) Copyright 2000 Grant Grundler**	(c) Copyright 2000 Ryan Bradetich**	(c) Copyright 2000 Hewlett-Packard Company**** This program is free software; you can redistribute it and/or modify** it under the terms of the GNU General Public License as published by** the Free Software Foundation; either version 2 of the License, or** (at your option) any later version.******  "Real Mode" operation refers to U2/Uturn chip operation.**  U2/Uturn were designed to perform coherency checks w/o using**  the I/O MMU - basically what x86 does.****  Philipp Rumpf has a "Real Mode" driver for PCX-W machines at:**      CVSROOT=:pserver:anonymous@198.186.203.37:/cvsroot/linux-parisc**      cvs -z3 co linux/arch/parisc/kernel/dma-rm.c****  I've rewritten his code to work under TPG's tree. See ccio-rm-dma.c.****  Drawbacks of using Real Mode are:**	o outbound DMA is slower - U2 won't prefetch data (GSC+ XQL signal).**      o Inbound DMA less efficient - U2 can't use DMA_FAST attribute.**	o Ability to do scatter/gather in HW is lost.**	o Doesn't work under PCX-U/U+ machines since they didn't follow**        the coherency design originally worked out. Only PCX-W does.*/#include <linux/config.h>#include <linux/types.h>#include <linux/init.h>#include <linux/mm.h>#include <linux/spinlock.h>#include <linux/slab.h>#include <linux/string.h>#include <linux/pci.h>#include <linux/reboot.h>#include <asm/byteorder.h>#include <asm/cache.h>		/* for L1_CACHE_BYTES */#include <asm/uaccess.h>#include <asm/page.h>#include <asm/dma.h>#include <asm/io.h>#include <asm/hardware.h>       /* for register_module() */#include <asm/parisc-device.h>/* ** Choose "ccio" since that's what HP-UX calls it.** Make it easier for folks to migrate from one to the other :^)*/#define MODULE_NAME "ccio"#undef DEBUG_CCIO_RES#undef DEBUG_CCIO_RUN#undef DEBUG_CCIO_INIT#undef DEBUG_CCIO_RUN_SG#ifdef CONFIG_PROC_FS/* * CCIO_SEARCH_TIME can help measure how fast the bitmap search is. * impacts performance though - ditch it if you don't use it. */#define CCIO_SEARCH_TIME#undef CCIO_MAP_STATS#else#undef CCIO_SEARCH_TIME#undef CCIO_MAP_STATS#endif#include <linux/proc_fs.h>#include <asm/runway.h>		/* for proc_runway_root */#ifdef DEBUG_CCIO_INIT#define DBG_INIT(x...)  printk(x)#else#define DBG_INIT(x...)#endif#ifdef DEBUG_CCIO_RUN#define DBG_RUN(x...)   printk(x)#else#define DBG_RUN(x...)#endif#ifdef DEBUG_CCIO_RES#define DBG_RES(x...)   printk(x)#else#define DBG_RES(x...)#endif#ifdef DEBUG_CCIO_RUN_SG#define DBG_RUN_SG(x...) printk(x)#else#define DBG_RUN_SG(x...)#endif#define CCIO_INLINE	/* inline */#define WRITE_U32(value, addr) gsc_writel(value, (u32 *)(addr))#define READ_U32(addr) gsc_readl((u32 *)(addr))#define U2_IOA_RUNWAY 0x580#define U2_BC_GSC     0x501#define UTURN_IOA_RUNWAY 0x581#define UTURN_BC_GSC     0x502#define IOA_NORMAL_MODE      0x00020080 /* IO_CONTROL to turn on CCIO        */#define CMD_TLB_DIRECT_WRITE 35         /* IO_COMMAND for I/O TLB Writes     */#define CMD_TLB_PURGE        33         /* IO_COMMAND to Purge I/O TLB entry */struct ioa_registers {        /* Runway Supervisory Set */        volatile int32_t    unused1[12];        volatile uint32_t   io_command;             /* Offset 12 */        volatile uint32_t   io_status;              /* Offset 13 */        volatile uint32_t   io_control;             /* Offset 14 */        volatile int32_t    unused2[1];        /* Runway Auxiliary Register Set */        volatile uint32_t   io_err_resp;            /* Offset  0 */        volatile uint32_t   io_err_info;            /* Offset  1 */        volatile uint32_t   io_err_req;             /* Offset  2 */        volatile uint32_t   io_err_resp_hi;         /* Offset  3 */        volatile uint32_t   io_tlb_entry_m;         /* Offset  4 */        volatile uint32_t   io_tlb_entry_l;         /* Offset  5 */        volatile uint32_t   unused3[1];        volatile uint32_t   io_pdir_base;           /* Offset  7 */        volatile uint32_t   io_io_low_hv;           /* Offset  8 */        volatile uint32_t   io_io_high_hv;          /* Offset  9 */        volatile uint32_t   unused4[1];        volatile uint32_t   io_chain_id_mask;       /* Offset 11 */        volatile uint32_t   unused5[2];        volatile uint32_t   io_io_low;              /* Offset 14 */        volatile uint32_t   io_io_high;             /* Offset 15 */};/*** IOA Registers** -------------**** Runway IO_CONTROL Register (+0x38)** ** The Runway IO_CONTROL register controls the forwarding of transactions.**** | 0  ...  13  |  14 15 | 16 ... 21 | 22 | 23 24 |  25 ... 31 |** |    HV       |   TLB  |  reserved | HV | mode  |  reserved  |**** o mode field indicates the address translation of transactions**   forwarded from Runway to GSC+:**       Mode Name     Value        Definition**       Off (default)   0          Opaque to matching addresses.**       Include         1          Transparent for matching addresses.**       Peek            3          Map matching addresses.****       + "Off" mode: Runway transactions which match the I/O range**         specified by the IO_IO_LOW/IO_IO_HIGH registers will be ignored.**       + "Include" mode: all addresses within the I/O range specified**         by the IO_IO_LOW and IO_IO_HIGH registers are transparently**         forwarded. This is the I/O Adapter's normal operating mode.**       + "Peek" mode: used during system configuration to initialize the**         GSC+ bus. Runway Write_Shorts in the address range specified by**         IO_IO_LOW and IO_IO_HIGH are forwarded through the I/O Adapter**         *AND* the GSC+ address is remapped to the Broadcast Physical**         Address space by setting the 14 high order address bits of the**         32 bit GSC+ address to ones.**** o TLB field affects transactions which are forwarded from GSC+ to Runway.**   "Real" mode is the poweron default.** **   TLB Mode  Value  Description**   Real        0    No TLB translation. Address is directly mapped and the**                    virtual address is composed of selected physical bits.**   Error       1    Software fills the TLB manually.**   Normal      2    IOA fetches IO TLB misses from IO PDIR (in host memory).****** IO_IO_LOW_HV	  +0x60 (HV dependent)** IO_IO_HIGH_HV  +0x64 (HV dependent)** IO_IO_LOW      +0x78	(Architected register)** IO_IO_HIGH     +0x7c	(Architected register)**** IO_IO_LOW and IO_IO_HIGH set the lower and upper bounds of the** I/O Adapter address space, respectively.**** 0  ... 7 | 8 ... 15 |  16   ...   31 |** 11111111 | 11111111 |      address   |**** Each LOW/HIGH pair describes a disjoint address space region.** (2 per GSC+ port). Each incoming Runway transaction address is compared** with both sets of LOW/HIGH registers. If the address is in the range** greater than or equal to IO_IO_LOW and less than IO_IO_HIGH the transaction** for forwarded to the respective GSC+ bus.** Specify IO_IO_LOW equal to or greater than IO_IO_HIGH to avoid specifying** an address space region.**** In order for a Runway address to reside within GSC+ extended address space:**	Runway Address [0:7]    must identically compare to 8'b11111111**	Runway Address [8:11]   must be equal to IO_IO_LOW(_HV)[16:19]** 	Runway Address [12:23]  must be greater than or equal to**	           IO_IO_LOW(_HV)[20:31] and less than IO_IO_HIGH(_HV)[20:31].**	Runway Address [24:39]  is not used in the comparison.**** When the Runway transaction is forwarded to GSC+, the GSC+ address is** as follows:**	GSC+ Address[0:3]	4'b1111**	GSC+ Address[4:29]	Runway Address[12:37]**	GSC+ Address[30:31]	2'b00**** All 4 Low/High registers must be initialized (by PDC) once the lower bus** is interrogated and address space is defined. The operating system will** modify the architectural IO_IO_LOW and IO_IO_HIGH registers following** the PDC initialization.  However, the hardware version dependent IO_IO_LOW** and IO_IO_HIGH registers should not be subsequently altered by the OS.** ** Writes to both sets of registers will take effect immediately, bypassing** the queues, which ensures that subsequent Runway transactions are checked** against the updated bounds values. However reads are queued, introducing** the possibility of a read being bypassed by a subsequent write to the same** register. This sequence can be avoided by having software wait for read** returns before issuing subsequent writes.*/struct ioc {	struct ioa_registers *ioc_hpa;  /* I/O MMU base address */	u8  *res_map;	                /* resource map, bit == pdir entry */	u64 *pdir_base;	                /* physical base address */	u32 pdir_size; 			/* bytes, function of IOV Space size */	u32 res_hint;	                /* next available IOVP - 					   circular search */	u32 res_size;		    	/* size of resource map in bytes */	spinlock_t res_lock;#ifdef CCIO_SEARCH_TIME#define CCIO_SEARCH_SAMPLE 0x100	unsigned long avg_search[CCIO_SEARCH_SAMPLE];	unsigned long avg_idx;		  /* current index into avg_search */#endif#ifdef CCIO_MAP_STATS	unsigned long used_pages;	unsigned long msingle_calls;	unsigned long msingle_pages;	unsigned long msg_calls;	unsigned long msg_pages;	unsigned long usingle_calls;	unsigned long usingle_pages;	unsigned long usg_calls;	unsigned long usg_pages;#endif	unsigned short cujo20_bug;	/* STUFF We don't need in performance path */	u32 chainid_shift; 		/* specify bit location of chain_id */	struct ioc *next;		/* Linked list of discovered iocs */	const char *name;		/* device name from firmware */	unsigned int hw_path;           /* the hardware path this ioc is associatd with */	struct pci_dev *fake_pci_dev;   /* the fake pci_dev for non-pci devs */	struct resource mmio_region[2]; /* The "routed" MMIO regions */};/* Ratio of Host MEM to IOV Space size */static unsigned long ccio_mem_ratio = 4;static struct ioc *ioc_list;static int ioc_count;/****************************************************************   I/O Pdir Resource Management**   Bits set in the resource map are in use.*   Each bit can represent a number of pages.*   LSbs represent lower addresses (IOVA's).**   This was was copied from sba_iommu.c. Don't try to unify*   the two resource managers unless a way to have different*   allocation policies is also adjusted. We'd like to avoid*   I/O TLB thrashing by having resource allocation policy*   match the I/O TLB replacement policy.****************************************************************/#define IOVP_SIZE PAGE_SIZE#define IOVP_SHIFT PAGE_SHIFT#define IOVP_MASK PAGE_MASK/* Convert from IOVP to IOVA and vice versa. */#define CCIO_IOVA(iovp,offset) ((iovp) | (offset))#define CCIO_IOVP(iova) ((iova) & IOVP_MASK)#define PDIR_INDEX(iovp)    ((iovp)>>IOVP_SHIFT)#define MKIOVP(pdir_idx)    ((long)(pdir_idx) << IOVP_SHIFT)#define MKIOVA(iovp,offset) (dma_addr_t)((long)iovp | (long)offset)#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1))/*** Don't worry about the 150% average search length on a miss.** If the search wraps around, and passes the res_hint, it will** cause the kernel to panic anyhow.*/#define CCIO_SEARCH_LOOP(ioc, res_idx, mask, size)  \       for(; res_ptr < res_end; ++res_ptr) { \               if(0 == (*res_ptr & mask)) { \                       *res_ptr |= mask; \                       res_idx = (unsigned int)((unsigned long)res_ptr - (unsigned long)ioc->res_map); \                       ioc->res_hint = res_idx + (size >> 3); \                       goto resource_found; \               } \       }#define CCIO_FIND_FREE_MAPPING(ioa, res_idx, mask, size) \       u##size *res_ptr = (u##size *)&((ioc)->res_map[ioa->res_hint & ~((size >> 3) - 1)]); \       u##size *res_end = (u##size *)&(ioc)->res_map[ioa->res_size]; \       CCIO_SEARCH_LOOP(ioc, res_idx, mask, size); \       res_ptr = (u##size *)&(ioc)->res_map[0]; \       CCIO_SEARCH_LOOP(ioa, res_idx, mask, size);/*** Find available bit in this ioa's resource map.** Use a "circular" search:**   o Most IOVA's are "temporary" - avg search time should be small.** o keep a history of what happened for debugging** o KISS.**** Perf optimizations:** o search for log2(size) bits at a time.** o search for available resource bits using byte/word/whatever.** o use different search for "large" (eg > 4 pages) or "very large"**   (eg > 16 pages) mappings.*//** * ccio_alloc_range - Allocate pages in the ioc's resource map. * @ioc: The I/O Controller. * @pages_needed: The requested number of pages to be mapped into the * I/O Pdir... * * This function searches the resource map of the ioc to locate a range * of available pages for the requested size. */static intccio_alloc_range(struct ioc *ioc, size_t size){	unsigned int pages_needed = size >> IOVP_SHIFT;	unsigned int res_idx;#ifdef CCIO_SEARCH_TIME	unsigned long cr_start = mfctl(16);#endif		BUG_ON(pages_needed == 0);	BUG_ON((pages_needed * IOVP_SIZE) > DMA_CHUNK_SIZE);     	DBG_RES("%s() size: %d pages_needed %d\n", 		__FUNCTION__, size, pages_needed);	/*	** "seek and ye shall find"...praying never hurts either...	** ggg sacrifices another 710 to the computer gods.	*/	if (pages_needed <= 8) {		/*		 * LAN traffic will not thrash the TLB IFF the same NIC		 * uses 8 adjacent pages to map seperate payload data.		 * ie the same byte in the resource bit map.		 */#if 0		/* FIXME: bit search should shift it's way through		 * an unsigned long - not byte at a time. As it is now,		 * we effectively allocate this byte to this mapping.		 */		unsigned long mask = ~(~0UL >> pages_needed);		CCIO_FIND_FREE_MAPPING(ioc, res_idx, mask, 8);#else		CCIO_FIND_FREE_MAPPING(ioc, res_idx, 0xff, 8);#endif	} else if (pages_needed <= 16) {		CCIO_FIND_FREE_MAPPING(ioc, res_idx, 0xffff, 16);	} else if (pages_needed <= 32) {		CCIO_FIND_FREE_MAPPING(ioc, res_idx, ~(unsigned int)0, 32);#ifdef __LP64__	} else if (pages_needed <= 64) {		CCIO_FIND_FREE_MAPPING(ioc, res_idx, ~0UL, 64);#endif	} else {		panic("%s: %s() Too many pages to map. pages_needed: %u\n",		       __FILE__,  __FUNCTION__, pages_needed);	}	panic("%s: %s() I/O MMU is out of mapping resources.\n", __FILE__,	      __FUNCTION__);	resource_found:		DBG_RES("%s() res_idx %d res_hint: %d\n",

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?