ipath_file_ops.c
来自「LINUX 2.6.17.4的源码」· C语言 代码 · 共 1,915 行 · 第 1/4 页
C
1,915 行
/* * Set the full membership bit, because it has to be * set in the register or the packet, and it seems * cleaner to set in the register than to force all * callers to set it. (see bug 4331) */ key |= 0x8000; for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { if (!pd->port_pkeys[i] && pidx == -1) pidx = i; if (pd->port_pkeys[i] == key) { ipath_cdbg(VERBOSE, "p%u tries to set same pkey " "(%x) more than once\n", pd->port_port, key); ret = -EEXIST; goto bail; } } if (pidx == -1) { ipath_dbg("All pkeys for port %u already in use, " "can't set %x\n", pd->port_port, key); ret = -EBUSY; goto bail; } for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { if (!dd->ipath_pkeys[i]) { any++; continue; } if (dd->ipath_pkeys[i] == key) { atomic_t *pkrefs = &dd->ipath_pkeyrefs[i]; if (atomic_inc_return(pkrefs) > 1) { pd->port_pkeys[pidx] = key; ipath_cdbg(VERBOSE, "p%u set key %x " "matches #%d, count now %d\n", pd->port_port, key, i, atomic_read(pkrefs)); ret = 0; goto bail; } else { /* * lost race, decrement count, catch below */ atomic_dec(pkrefs); ipath_cdbg(VERBOSE, "Lost race, count was " "0, after dec, it's %d\n", atomic_read(pkrefs)); any++; } } if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { /* * It makes no sense to have both the limited and * full membership PKEY set at the same time since * the unlimited one will disable the limited one. */ ret = -EEXIST; goto bail; } } if (!any) { ipath_dbg("port %u, all pkeys already in use, " "can't set %x\n", pd->port_port, key); ret = -EBUSY; goto bail; } for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { if (!dd->ipath_pkeys[i] && atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { u64 pkey; /* for ipathstats, etc. */ ipath_stats.sps_pkeys[i] = lkey; pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key; pkey = (u64) dd->ipath_pkeys[0] | ((u64) dd->ipath_pkeys[1] << 16) | ((u64) dd->ipath_pkeys[2] << 32) | ((u64) dd->ipath_pkeys[3] << 48); ipath_cdbg(PROC, "p%u set key %x in #%d, " "portidx %d, new pkey reg %llx\n", pd->port_port, key, i, pidx, (unsigned long long) pkey); ipath_write_kreg( dd, dd->ipath_kregs->kr_partitionkey, pkey); ret = 0; goto bail; } } ipath_dbg("port %u, all pkeys already in use 2nd pass, " "can't set %x\n", pd->port_port, key); ret = -EBUSY;bail: return ret;}/** * ipath_manage_rcvq - manage a port's receive queue * @pd: the port * @start_stop: action to carry out * * start_stop == 0 disables receive on the port, for use in queue * overflow conditions. start_stop==1 re-enables, to be used to * re-init the software copy of the head register */static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop){ struct ipath_devdata *dd = pd->port_dd; u64 tval; ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n", start_stop ? "en" : "dis", dd->ipath_unit, pd->port_port); /* atomically clear receive enable port. */ if (start_stop) { /* * On enable, force in-memory copy of the tail register to * 0, so that protocol code doesn't have to worry about * whether or not the chip has yet updated the in-memory * copy or not on return from the system call. The chip * always resets it's tail register back to 0 on a * transition from disabled to enabled. This could cause a * problem if software was broken, and did the enable w/o * the disable, but eventually the in-memory copy will be * updated and correct itself, even in the face of software * bugs. */ *pd->port_rcvhdrtail_kvaddr = 0; set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, &dd->ipath_rcvctrl); } else clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, &dd->ipath_rcvctrl); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); /* now be sure chip saw it before we return */ tval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); if (start_stop) { /* * And try to be sure that tail reg update has happened too. * This should in theory interlock with the RXE changes to * the tail register. Don't assign it to the tail register * in memory copy, since we could overwrite an update by the * chip if we did. */ tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); } /* always; new head should be equal to new tail; see above */ return 0;}static void ipath_clean_part_key(struct ipath_portdata *pd, struct ipath_devdata *dd){ int i, j, pchanged = 0; u64 oldpkey; /* for debugging only */ oldpkey = (u64) dd->ipath_pkeys[0] | ((u64) dd->ipath_pkeys[1] << 16) | ((u64) dd->ipath_pkeys[2] << 32) | ((u64) dd->ipath_pkeys[3] << 48); for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { if (!pd->port_pkeys[i]) continue; ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i, pd->port_pkeys[i]); for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) { /* check for match independent of the global bit */ if ((dd->ipath_pkeys[j] & 0x7fff) != (pd->port_pkeys[i] & 0x7fff)) continue; if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) { ipath_cdbg(VERBOSE, "p%u clear key " "%x matches #%d\n", pd->port_port, pd->port_pkeys[i], j); ipath_stats.sps_pkeys[j] = dd->ipath_pkeys[j] = 0; pchanged++; } else ipath_cdbg( VERBOSE, "p%u key %x matches #%d, " "but ref still %d\n", pd->port_port, pd->port_pkeys[i], j, atomic_read(&dd->ipath_pkeyrefs[j])); break; } pd->port_pkeys[i] = 0; } if (pchanged) { u64 pkey = (u64) dd->ipath_pkeys[0] | ((u64) dd->ipath_pkeys[1] << 16) | ((u64) dd->ipath_pkeys[2] << 32) | ((u64) dd->ipath_pkeys[3] << 48); ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, " "new pkey reg %llx\n", pd->port_port, (unsigned long long) oldpkey, (unsigned long long) pkey); ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, pkey); }}/** * ipath_create_user_egr - allocate eager TID buffers * @pd: the port to allocate TID buffers for * * This routine is now quite different for user and kernel, because * the kernel uses skb's, for the accelerated network performance * This is the user port version * * Allocate the eager TID buffers and program them into infinipath * They are no longer completely contiguous, we do multiple allocation * calls. */static int ipath_create_user_egr(struct ipath_portdata *pd){ struct ipath_devdata *dd = pd->port_dd; unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff; size_t size; int ret; egrcnt = dd->ipath_rcvegrcnt; /* TID number offset for this port */ egroff = pd->port_port * egrcnt; egrsize = dd->ipath_rcvegrbufsize; ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); /* * to avoid wasting a lot of memory, we allocate 32KB chunks of * physically contiguous memory, advance through it until used up * and then allocate more. Of course, we need memory to store those * extra pointers, now. Started out with 256KB, but under heavy * memory pressure (creating large files and then copying them over * NFS while doing lots of MPI jobs), we hit some allocation * failures, even though we can sleep... (2.6.10) Still get * failures at 64K. 32K is the lowest we can go without waiting * more memory again. It seems likely that the coalescing in * free_pages, etc. still has issues (as it has had previously * during 2.6.x development). */ size = 0x8000; alloced = ALIGN(egrsize * egrcnt, size); egrperchunk = size / egrsize; chunk = (egrcnt + egrperchunk - 1) / egrperchunk; pd->port_rcvegrbuf_chunks = chunk; pd->port_rcvegrbufs_perchunk = egrperchunk; pd->port_rcvegrbuf_size = size; pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0])); if (!pd->port_rcvegrbuf) { ret = -ENOMEM; goto bail; } pd->port_rcvegrbuf_phys = vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0])); if (!pd->port_rcvegrbuf_phys) { ret = -ENOMEM; goto bail_rcvegrbuf; } for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { /* * GFP_USER, but without GFP_FS, so buffer cache can be * coalesced (we hope); otherwise, even at order 4, * heavy filesystem activity makes these fail */ gfp_t gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; pd->port_rcvegrbuf[e] = dma_alloc_coherent( &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e], gfp_flags); if (!pd->port_rcvegrbuf[e]) { ret = -ENOMEM; goto bail_rcvegrbuf_phys; } } pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0]; for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) { dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk]; unsigned i; for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { dd->ipath_f_put_tid(dd, e + egroff + (u64 __iomem *) ((char __iomem *) dd->ipath_kregbase + dd->ipath_rcvegrbase), 0, pa); pa += egrsize; } cond_resched(); /* don't hog the cpu */ } ret = 0; goto bail;bail_rcvegrbuf_phys: for (e = 0; e < pd->port_rcvegrbuf_chunks && pd->port_rcvegrbuf[e]; e++) dma_free_coherent(&dd->pcidev->dev, size, pd->port_rcvegrbuf[e], pd->port_rcvegrbuf_phys[e]); vfree(pd->port_rcvegrbuf_phys); pd->port_rcvegrbuf_phys = NULL;bail_rcvegrbuf: vfree(pd->port_rcvegrbuf); pd->port_rcvegrbuf = NULL;bail: return ret;}static int ipath_do_user_init(struct ipath_portdata *pd, const struct ipath_user_info *uinfo){ int ret = 0; struct ipath_devdata *dd = pd->port_dd; u64 physaddr, uaddr, off, atmp; struct page *pagep; u32 head32; u64 head; /* for now, if major version is different, bail */ if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { dev_info(&dd->pcidev->dev, "User major version %d not same as driver " "major %d\n", uinfo->spu_userversion >> 16, IPATH_USER_SWMAJOR); ret = -ENODEV; goto done; } if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) ipath_dbg("User minor version %d not same as driver " "minor %d\n", uinfo->spu_userversion & 0xffff, IPATH_USER_SWMINOR); if (uinfo->spu_rcvhdrsize) { ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); if (ret) goto done; } /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ /* set up for the rcvhdr Q tail register writeback to user memory */ if (!uinfo->spu_rcvhdraddr || !access_ok(VERIFY_WRITE, (u64 __user *) (unsigned long) uinfo->spu_rcvhdraddr, sizeof(u64))) { ipath_dbg("Port %d rcvhdrtail addr %llx not valid\n", pd->port_port, (unsigned long long) uinfo->spu_rcvhdraddr); ret = -EINVAL; goto done; } off = offset_in_page(uinfo->spu_rcvhdraddr); uaddr = PAGE_MASK & (unsigned long) uinfo->spu_rcvhdraddr; ret = ipath_get_user_pages_nocopy(uaddr, &pagep); if (ret) { dev_info(&dd->pcidev->dev, "Failed to lookup and lock " "address %llx for rcvhdrtail: errno %d\n", (unsigned long long) uinfo->spu_rcvhdraddr, -ret); goto done; } ipath_stats.sps_pagelocks++; pd->port_rcvhdrtail_uaddr = uaddr; pd->port_rcvhdrtail_pagep = pagep; pd->port_rcvhdrtail_kvaddr = page_address(pagep); pd->port_rcvhdrtail_kvaddr += off; physaddr = page_to_phys(pagep) + off; ipath_cdbg(VERBOSE, "port %d user addr %llx hdrtailaddr, %llx " "physical (off=%llx)\n", pd->port_port, (unsigned long long) uinfo->spu_rcvhdraddr, (unsigned long long) physaddr, (unsigned long long) off); ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr, pd->port_port, physaddr); atmp = ipath_read_kreg64_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr, pd->port_port); if (physaddr != atmp) { ipath_dev_err(dd, "Catastrophic software error, " "RcvHdrTailAddr%u written as %llx, " "read back as %llx\n", pd->port_port, (unsigned long long) physaddr, (unsigned long long) atmp); ret = -EINVAL; goto done; } /* for right now, kernel piobufs are at end, so port 1 is at 0 */ pd->port_piobufs = dd->ipath_piobufbase + dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign; ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", pd->port_port, pd->port_piobufs); /* * Now allocate the rcvhdr Q and eager TIDs; skip the TID * array for time being. If pd->port_port > chip-supported, * we need to do extra stuff here to handle by handling overflow * through port 0, someday */ ret = ipath_create_rcvhdrq(dd, pd); if (!ret) ret = ipath_create_user_egr(pd); if (ret) goto done; /* enable receives now */ /* atomically set enable bit for this port */ set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, &dd->ipath_rcvctrl); /* * set the head registers for this port to the current values * of the tail pointers, since we don't know if they were * updated on last use of the port. */ head32 = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); head = (u64) head32; ipath_write_ureg(dd, ur_rcvhdrhead, head, pd->port_port); head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); dd->ipath_lastegrheads[pd->port_port] = -1; dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; ipath_cdbg(VERBOSE, "Wrote port%d head %llx, egrhead %x from " "tail regs\n", pd->port_port, (unsigned long long) head, head32); pd->port_tidcursor = 0; /* start at beginning after open */ /* * now enable the port; the tail registers will be written to memory * by the chip as soon as it sees the write to * dd->ipath_kregs->kr_rcvctrl. The update only happens on * transition from 0 to 1, so clear it first, then set it as part of * enabling the port. This will (very briefly) affect any other * open ports, but it shouldn't be long enough to be an issue. */ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl);done: return ret;}static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, u64 ureg){ unsigned long phys; int ret; /* it's the real hardware, so io_remap works */ if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen " "%lx > PAGE\n", vma->vm_end - vma->vm_start); ret = -EFAULT; } else { phys = dd->ipath_physaddr + ureg; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?