📄 ccio-dma.c
字号:
/*** ccio-dma.c:** DMA management routines for first generation cache-coherent machines.** Program U2/Uturn in "Virtual Mode" and use the I/O MMU.**** (c) Copyright 2000 Grant Grundler** (c) Copyright 2000 Ryan Bradetich** (c) Copyright 2000 Hewlett-Packard Company**** This program is free software; you can redistribute it and/or modify** it under the terms of the GNU General Public License as published by** the Free Software Foundation; either version 2 of the License, or** (at your option) any later version.****** "Real Mode" operation refers to U2/Uturn chip operation.** U2/Uturn were designed to perform coherency checks w/o using** the I/O MMU - basically what x86 does.**** Philipp Rumpf has a "Real Mode" driver for PCX-W machines at:** CVSROOT=:pserver:anonymous@198.186.203.37:/cvsroot/linux-parisc** cvs -z3 co linux/arch/parisc/kernel/dma-rm.c**** I've rewritten his code to work under TPG's tree. See ccio-rm-dma.c.**** Drawbacks of using Real Mode are:** o outbound DMA is slower - U2 won't prefetch data (GSC+ XQL signal).** o Inbound DMA less efficient - U2 can't use DMA_FAST attribute.** o Ability to do scatter/gather in HW is lost.** o Doesn't work under PCX-U/U+ machines since they didn't follow** the coherency design originally worked out. Only PCX-W does.*/#include <linux/config.h>#include <linux/types.h>#include <linux/init.h>#include <linux/mm.h>#include <linux/spinlock.h>#include <linux/slab.h>#include <linux/string.h>#include <linux/pci.h>#include <asm/byteorder.h>#include <asm/cache.h> /* for L1_CACHE_BYTES */#include <asm/uaccess.h>#include <asm/pgalloc.h>#include <asm/page.h>#include <asm/io.h>#include <asm/gsc.h> /* for gsc_writeN()... *//* ** Choose "ccio" since that's what HP-UX calls it.** Make it easier for folks to migrate from one to the other :^)*/#define MODULE_NAME "ccio"/*#define DEBUG_CCIO_RES#define DEBUG_CCIO_RUN#define DEBUG_CCIO_INIT#define DUMP_RESMAP*/#include <linux/proc_fs.h>#include <asm/runway.h> /* for proc_runway_root */#ifdef DEBUG_CCIO_INIT#define DBG_INIT(x...) printk(x)#else#define DBG_INIT(x...)#endif#ifdef DEBUG_CCIO_RUN#define DBG_RUN(x...) printk(x)#else#define DBG_RUN(x...)#endif#ifdef DEBUG_CCIO_RES#define DBG_RES(x...) printk(x)#else#define DBG_RES(x...)#endif#define CCIO_INLINE /* inline */#define WRITE_U32(value, addr) gsc_writel(value, (u32 *) (addr))#define U2_IOA_RUNWAY 0x580#define U2_BC_GSC 0x501#define UTURN_IOA_RUNWAY 0x581#define UTURN_BC_GSC 0x502/* We *can't* support JAVA (T600). Venture there at your own risk. */static void dump_resmap(void);static int ccio_driver_callback(struct hp_device *, struct pa_iodc_driver *);static struct pa_iodc_driver ccio_drivers_for[] = { {HPHW_IOA, U2_IOA_RUNWAY, 0x0, 0xb, 0, 0x10, DRIVER_CHECK_HVERSION + DRIVER_CHECK_SVERSION + DRIVER_CHECK_HWTYPE, MODULE_NAME, "U2 I/O MMU", (void *) ccio_driver_callback}, {HPHW_IOA, UTURN_IOA_RUNWAY, 0x0, 0xb, 0, 0x10, DRIVER_CHECK_HVERSION + DRIVER_CHECK_SVERSION + DRIVER_CHECK_HWTYPE, MODULE_NAME, "Uturn I/O MMU", (void *) ccio_driver_callback},/*** FIXME: The following claims the GSC bus port, not the IOA.** And there are two busses below a single I/O TLB.**** These should go away once we have a real PA bus walk.** Firmware wants to tell the PA bus walk code about the GSC ports** since they are not "architected" PA I/O devices. Ie a PA bus walk** wouldn't discover them. But the PA bus walk code could check** the "fixed module table" to add such devices to an I/O Tree** and proceed with the recursive, depth first bus walk.*/ {HPHW_BCPORT, U2_BC_GSC, 0x0, 0xc, 0, 0x10, DRIVER_CHECK_HVERSION + DRIVER_CHECK_SVERSION + DRIVER_CHECK_HWTYPE, MODULE_NAME, "U2 GSC+ BC", (void *) ccio_driver_callback}, {HPHW_BCPORT, UTURN_BC_GSC, 0x0, 0xc, 0, 0x10, DRIVER_CHECK_HVERSION + DRIVER_CHECK_SVERSION + DRIVER_CHECK_HWTYPE, MODULE_NAME, "Uturn GSC+ BC", (void *) ccio_driver_callback}, {0,0,0,0,0,0, 0, (char *) NULL, (char *) NULL, (void *) NULL }};#define IS_U2(id) ( \ (((id)->hw_type == HPHW_IOA) && ((id)->hversion == U2_IOA_RUNWAY)) || \ (((id)->hw_type == HPHW_BCPORT) && ((id)->hversion == U2_BC_GSC)) \)#define IS_UTURN(id) ( \ (((id)->hw_type == HPHW_IOA) && ((id)->hversion == UTURN_IOA_RUNWAY)) || \ (((id)->hw_type == HPHW_BCPORT) && ((id)->hversion == UTURN_BC_GSC)) \)#define IOA_NORMAL_MODE 0x00020080 /* IO_CONTROL to turn on CCIO */#define CMD_TLB_DIRECT_WRITE 35 /* IO_COMMAND for I/O TLB Writes */#define CMD_TLB_PURGE 33 /* IO_COMMAND to Purge I/O TLB entry */struct ioa_registers { /* Runway Supervisory Set */ volatile int32_t unused1[12]; volatile uint32_t io_command; /* Offset 12 */ volatile uint32_t io_status; /* Offset 13 */ volatile uint32_t io_control; /* Offset 14 */ volatile int32_t unused2[1]; /* Runway Auxiliary Register Set */ volatile uint32_t io_err_resp; /* Offset 0 */ volatile uint32_t io_err_info; /* Offset 1 */ volatile uint32_t io_err_req; /* Offset 2 */ volatile uint32_t io_err_resp_hi; /* Offset 3 */ volatile uint32_t io_tlb_entry_m; /* Offset 4 */ volatile uint32_t io_tlb_entry_l; /* Offset 5 */ volatile uint32_t unused3[1]; volatile uint32_t io_pdir_base; /* Offset 7 */ volatile uint32_t io_io_low_hv; /* Offset 8 */ volatile uint32_t io_io_high_hv; /* Offset 9 */ volatile uint32_t unused4[1]; volatile uint32_t io_chain_id_mask; /* Offset 11 */ volatile uint32_t unused5[2]; volatile uint32_t io_io_low; /* Offset 14 */ volatile uint32_t io_io_high; /* Offset 15 */};struct ccio_device { struct ccio_device *next; /* list of LBA's in system */ struct hp_device *iodc; /* data about dev from firmware */ spinlock_t ccio_lock; struct ioa_registers *ccio_hpa; /* base address */ u64 *pdir_base; /* physical base address */ char *res_map; /* resource map, bit == pdir entry */ int res_hint; /* next available IOVP - circular search */ int res_size; /* size of resource map in bytes */ int chainid_shift; /* specify bit location of chain_id */ int flags; /* state/functionality enabled */#ifdef DELAYED_RESOURCE_CNT dma_addr_t res_delay[DELAYED_RESOURCE_CNT];#endif /* STUFF We don't need in performance path */ int pdir_size; /* in bytes, determined by IOV Space size */ int hw_rev; /* HW revision of chip */};/* Ratio of Host MEM to IOV Space size */static unsigned long ccio_mem_ratio = 4;static struct ccio_device *ccio_list = NULL;static int ccio_proc_info(char *buffer, char **start, off_t offset, int length);static unsigned long ccio_used_bytes = 0;static unsigned long ccio_used_pages = 0;static int ccio_cujo_bug = 0;static unsigned long ccio_alloc_size = 0;static unsigned long ccio_free_size = 0;/**************************************************************** I/O Pdir Resource Management** Bits set in the resource map are in use.* Each bit can represent a number of pages.* LSbs represent lower addresses (IOVA's).** This was was copied from sba_iommu.c. Don't try to unify* the two resource managers unless a way to have different* allocation policies is also adjusted. We'd like to avoid* I/O TLB thrashing by having resource allocation policy* match the I/O TLB replacement policy.****************************************************************/#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */#define IOVP_SIZE PAGE_SIZE#define IOVP_SHIFT PAGE_SHIFT#define IOVP_MASK PAGE_MASK/* Convert from IOVP to IOVA and vice versa. */#define CCIO_IOVA(iovp,offset) ((iovp) | (offset))#define CCIO_IOVP(iova) ((iova) & ~(IOVP_SIZE-1) )#define PDIR_INDEX(iovp) ((iovp)>>IOVP_SHIFT)#define MKIOVP(pdir_idx) ((long)(pdir_idx) << IOVP_SHIFT)#define MKIOVA(iovp,offset) (dma_addr_t)((long)iovp | (long)offset)/* CUJO20 KLUDGE start */#define CUJO_20_BITMASK 0x0ffff000 /* upper nibble is a don't care */#define CUJO_20_STEP 0x10000000 /* inc upper nibble */#define CUJO_20_BADPAGE1 0x01003000 /* pages that hpmc on raven U+ */#define CUJO_20_BADPAGE2 0x01607000 /* pages that hpmc on firehawk U+ */#define CUJO_20_BADHVERS 0x6821 /* low nibble 1 is cujo rev 2.0 */#define CUJO_RAVEN_LOC 0xf1000000UL /* cujo location on raven U+ */#define CUJO_FIREHAWK_LOC 0xf1604000UL /* cujo location on firehawk U+ *//* CUJO20 KLUDGE end *//*** Don't worry about the 150% average search length on a miss.** If the search wraps around, and passes the res_hint, it will** cause the kernel to panic anyhow.*//* ioa->res_hint = idx + (size >> 3); \ */#define CCIO_SEARCH_LOOP(ioa, idx, mask, size) \ for(; res_ptr < res_end; ++res_ptr) \ { \ if(0 == ((*res_ptr) & mask)) { \ *res_ptr |= mask; \ idx = (int)((unsigned long)res_ptr - (unsigned long)ioa->res_map); \ ioa->res_hint = 0;\ goto resource_found; \ } \ }#define CCIO_FIND_FREE_MAPPING(ioa, idx, mask, size) { \ u##size *res_ptr = (u##size *)&((ioa)->res_map[ioa->res_hint & ~((size >> 3) - 1)]); \ u##size *res_end = (u##size *)&(ioa)->res_map[ioa->res_size]; \ CCIO_SEARCH_LOOP(ioa, idx, mask, size); \ res_ptr = (u##size *)&(ioa)->res_map[0]; \ CCIO_SEARCH_LOOP(ioa, idx, mask, size); \}/*** Find available bit in this ioa's resource map.** Use a "circular" search:** o Most IOVA's are "temporary" - avg search time should be small.** o keep a history of what happened for debugging** o KISS.**** Perf optimizations:** o search for log2(size) bits at a time.** o search for available resource bits using byte/word/whatever.** o use different search for "large" (eg > 4 pages) or "very large"** (eg > 16 pages) mappings.*/static intccio_alloc_range(struct ccio_device *ioa, size_t size){ int res_idx; unsigned long mask, flags; unsigned int pages_needed = size >> PAGE_SHIFT; ASSERT(pages_needed); ASSERT((pages_needed * IOVP_SIZE) < DMA_CHUNK_SIZE); ASSERT(pages_needed < (BITS_PER_LONG - IOVP_SHIFT)); mask = (unsigned long) -1L; mask >>= BITS_PER_LONG - pages_needed; DBG_RES(__FUNCTION__ " size: %d pages_needed %d pages_mask 0x%08lx\n", size, pages_needed, mask); spin_lock_irqsave(&ioa->ccio_lock, flags); /* ** "seek and ye shall find"...praying never hurts either... ** ggg sacrafices another 710 to the computer gods. */ if(pages_needed <= 8) { CCIO_FIND_FREE_MAPPING(ioa, res_idx, mask, 8); } else if(pages_needed <= 16) { CCIO_FIND_FREE_MAPPING(ioa, res_idx, mask, 16); } else if(pages_needed <= 32) { CCIO_FIND_FREE_MAPPING(ioa, res_idx, mask, 32);#ifdef __LP64__ } else if(pages_needed <= 64) { CCIO_FIND_FREE_MAPPING(ioa, res_idx, mask, 64)#endif } else { panic(__FILE__ ":" __FUNCTION__ "() Too many pages to map.\n"); }#ifdef DUMP_RESMAP dump_resmap();#endif panic(__FILE__ ":" __FUNCTION__ "() I/O MMU is out of mapping resources\n"); resource_found: DBG_RES(__FUNCTION__ " res_idx %d mask 0x%08lx res_hint: %d\n", res_idx, mask, ioa->res_hint); ccio_used_pages += pages_needed; ccio_used_bytes += ((pages_needed >> 3) ? (pages_needed >> 3) : 1); spin_unlock_irqrestore(&ioa->ccio_lock, flags);#ifdef DUMP_RESMAP dump_resmap();#endif /* ** return the bit address (convert from byte to bit). */ return (res_idx << 3);}#define CCIO_FREE_MAPPINGS(ioa, idx, mask, size) \ u##size *res_ptr = (u##size *)&((ioa)->res_map[idx + (((size >> 3) - 1) & ~((size >> 3) - 1))]); \ ASSERT((*res_ptr & mask) == mask); \ *res_ptr &= ~mask;/*** clear bits in the ioa's resource map*/static voidccio_free_range(struct ccio_device *ioa, dma_addr_t iova, size_t size){ unsigned long mask, flags; unsigned long iovp = CCIO_IOVP(iova); unsigned int res_idx = PDIR_INDEX(iovp)>>3; unsigned int pages_mapped = (size >> IOVP_SHIFT) + !!(size & ~IOVP_MASK); ASSERT(pages_needed); ASSERT((pages_needed * IOVP_SIZE) < DMA_CHUNK_SIZE); ASSERT(pages_needed < (BITS_PER_LONG - IOVP_SHIFT)); mask = (unsigned long) -1L; mask >>= BITS_PER_LONG - pages_mapped; DBG_RES(__FUNCTION__ " res_idx: %d size: %d pages_mapped %d mask 0x%08lx\n", res_idx, size, pages_mapped, mask); spin_lock_irqsave(&ioa->ccio_lock, flags); if(pages_mapped <= 8) { CCIO_FREE_MAPPINGS(ioa, res_idx, mask, 8); } else if(pages_mapped <= 16) { CCIO_FREE_MAPPINGS(ioa, res_idx, mask, 16); } else if(pages_mapped <= 32) { CCIO_FREE_MAPPINGS(ioa, res_idx, mask, 32);#ifdef __LP64__ } else if(pages_mapped <= 64) { CCIO_FREE_MAPPINGS(ioa, res_idx, mask, 64);#endif } else { panic(__FILE__ ":" __FUNCTION__ "() Too many pages to unmap.\n"); } ccio_used_pages -= (pages_mapped ? pages_mapped : 1); ccio_used_bytes -= ((pages_mapped >> 3) ? (pages_mapped >> 3) : 1); spin_unlock_irqrestore(&ioa->ccio_lock, flags);#ifdef DUMP_RESMAP
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -