ipath_file_ops.c

来自「LINUX 2.6.17.4的源码」· C语言 代码 · 共 1,915 行 · 第 1/4 页

C
1,915
字号
	/*	 * Set the full membership bit, because it has to be	 * set in the register or the packet, and it seems	 * cleaner to set in the register than to force all	 * callers to set it. (see bug 4331)	 */	key |= 0x8000;	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {		if (!pd->port_pkeys[i] && pidx == -1)			pidx = i;		if (pd->port_pkeys[i] == key) {			ipath_cdbg(VERBOSE, "p%u tries to set same pkey "				   "(%x) more than once\n",				   pd->port_port, key);			ret = -EEXIST;			goto bail;		}	}	if (pidx == -1) {		ipath_dbg("All pkeys for port %u already in use, "			  "can't set %x\n", pd->port_port, key);		ret = -EBUSY;		goto bail;	}	for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {		if (!dd->ipath_pkeys[i]) {			any++;			continue;		}		if (dd->ipath_pkeys[i] == key) {			atomic_t *pkrefs = &dd->ipath_pkeyrefs[i];			if (atomic_inc_return(pkrefs) > 1) {				pd->port_pkeys[pidx] = key;				ipath_cdbg(VERBOSE, "p%u set key %x "					   "matches #%d, count now %d\n",					   pd->port_port, key, i,					   atomic_read(pkrefs));				ret = 0;				goto bail;			} else {				/*				 * lost race, decrement count, catch below				 */				atomic_dec(pkrefs);				ipath_cdbg(VERBOSE, "Lost race, count was "					   "0, after dec, it's %d\n",					   atomic_read(pkrefs));				any++;			}		}		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {			/*			 * It makes no sense to have both the limited and			 * full membership PKEY set at the same time since			 * the unlimited one will disable the limited one.			 */			ret = -EEXIST;			goto bail;		}	}	if (!any) {		ipath_dbg("port %u, all pkeys already in use, "			  "can't set %x\n", pd->port_port, key);		ret = -EBUSY;		goto bail;	}	for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {		if (!dd->ipath_pkeys[i] &&		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {			u64 pkey;			/* for ipathstats, etc. */			ipath_stats.sps_pkeys[i] = lkey;			pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;			pkey =				(u64) dd->ipath_pkeys[0] |				((u64) dd->ipath_pkeys[1] << 16) |				((u64) dd->ipath_pkeys[2] << 32) |				((u64) dd->ipath_pkeys[3] << 48);			ipath_cdbg(PROC, "p%u set key %x in #%d, "				   "portidx %d, new pkey reg %llx\n",				   pd->port_port, key, i, pidx,				   (unsigned long long) pkey);			ipath_write_kreg(				dd, dd->ipath_kregs->kr_partitionkey, pkey);			ret = 0;			goto bail;		}	}	ipath_dbg("port %u, all pkeys already in use 2nd pass, "		  "can't set %x\n", pd->port_port, key);	ret = -EBUSY;bail:	return ret;}/** * ipath_manage_rcvq - manage a port's receive queue * @pd: the port * @start_stop: action to carry out * * start_stop == 0 disables receive on the port, for use in queue * overflow conditions.  start_stop==1 re-enables, to be used to * re-init the software copy of the head register */static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop){	struct ipath_devdata *dd = pd->port_dd;	u64 tval;	ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n",		   start_stop ? "en" : "dis", dd->ipath_unit,		   pd->port_port);	/* atomically clear receive enable port. */	if (start_stop) {		/*		 * On enable, force in-memory copy of the tail register to		 * 0, so that protocol code doesn't have to worry about		 * whether or not the chip has yet updated the in-memory		 * copy or not on return from the system call. The chip		 * always resets it's tail register back to 0 on a		 * transition from disabled to enabled.  This could cause a		 * problem if software was broken, and did the enable w/o		 * the disable, but eventually the in-memory copy will be		 * updated and correct itself, even in the face of software		 * bugs.		 */		*pd->port_rcvhdrtail_kvaddr = 0;		set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,			&dd->ipath_rcvctrl);	} else		clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,			  &dd->ipath_rcvctrl);	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,			 dd->ipath_rcvctrl);	/* now be sure chip saw it before we return */	tval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);	if (start_stop) {		/*		 * And try to be sure that tail reg update has happened too.		 * This should in theory interlock with the RXE changes to		 * the tail register.  Don't assign it to the tail register		 * in memory copy, since we could overwrite an update by the		 * chip if we did.		 */		tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);	}	/* always; new head should be equal to new tail; see above */	return 0;}static void ipath_clean_part_key(struct ipath_portdata *pd,				 struct ipath_devdata *dd){	int i, j, pchanged = 0;	u64 oldpkey;	/* for debugging only */	oldpkey = (u64) dd->ipath_pkeys[0] |		((u64) dd->ipath_pkeys[1] << 16) |		((u64) dd->ipath_pkeys[2] << 32) |		((u64) dd->ipath_pkeys[3] << 48);	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {		if (!pd->port_pkeys[i])			continue;		ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i,			   pd->port_pkeys[i]);		for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) {			/* check for match independent of the global bit */			if ((dd->ipath_pkeys[j] & 0x7fff) !=			    (pd->port_pkeys[i] & 0x7fff))				continue;			if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) {				ipath_cdbg(VERBOSE, "p%u clear key "					   "%x matches #%d\n",					   pd->port_port,					   pd->port_pkeys[i], j);				ipath_stats.sps_pkeys[j] =					dd->ipath_pkeys[j] = 0;				pchanged++;			}			else ipath_cdbg(				VERBOSE, "p%u key %x matches #%d, "				"but ref still %d\n", pd->port_port,				pd->port_pkeys[i], j,				atomic_read(&dd->ipath_pkeyrefs[j]));			break;		}		pd->port_pkeys[i] = 0;	}	if (pchanged) {		u64 pkey = (u64) dd->ipath_pkeys[0] |			((u64) dd->ipath_pkeys[1] << 16) |			((u64) dd->ipath_pkeys[2] << 32) |			((u64) dd->ipath_pkeys[3] << 48);		ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, "			   "new pkey reg %llx\n", pd->port_port,			   (unsigned long long) oldpkey,			   (unsigned long long) pkey);		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,				 pkey);	}}/** * ipath_create_user_egr - allocate eager TID buffers * @pd: the port to allocate TID buffers for * * This routine is now quite different for user and kernel, because * the kernel uses skb's, for the accelerated network performance * This is the user port version * * Allocate the eager TID buffers and program them into infinipath * They are no longer completely contiguous, we do multiple allocation * calls. */static int ipath_create_user_egr(struct ipath_portdata *pd){	struct ipath_devdata *dd = pd->port_dd;	unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff;	size_t size;	int ret;	egrcnt = dd->ipath_rcvegrcnt;	/* TID number offset for this port */	egroff = pd->port_port * egrcnt;	egrsize = dd->ipath_rcvegrbufsize;	ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "		   "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);	/*	 * to avoid wasting a lot of memory, we allocate 32KB chunks of	 * physically contiguous memory, advance through it until used up	 * and then allocate more.  Of course, we need memory to store those	 * extra pointers, now.  Started out with 256KB, but under heavy	 * memory pressure (creating large files and then copying them over	 * NFS while doing lots of MPI jobs), we hit some allocation	 * failures, even though we can sleep...  (2.6.10) Still get	 * failures at 64K.  32K is the lowest we can go without waiting	 * more memory again.  It seems likely that the coalescing in	 * free_pages, etc. still has issues (as it has had previously	 * during 2.6.x development).	 */	size = 0x8000;	alloced = ALIGN(egrsize * egrcnt, size);	egrperchunk = size / egrsize;	chunk = (egrcnt + egrperchunk - 1) / egrperchunk;	pd->port_rcvegrbuf_chunks = chunk;	pd->port_rcvegrbufs_perchunk = egrperchunk;	pd->port_rcvegrbuf_size = size;	pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]));	if (!pd->port_rcvegrbuf) {		ret = -ENOMEM;		goto bail;	}	pd->port_rcvegrbuf_phys =		vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]));	if (!pd->port_rcvegrbuf_phys) {		ret = -ENOMEM;		goto bail_rcvegrbuf;	}	for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {		/*		 * GFP_USER, but without GFP_FS, so buffer cache can be		 * coalesced (we hope); otherwise, even at order 4,		 * heavy filesystem activity makes these fail		 */		gfp_t gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;		pd->port_rcvegrbuf[e] = dma_alloc_coherent(			&dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],			gfp_flags);		if (!pd->port_rcvegrbuf[e]) {			ret = -ENOMEM;			goto bail_rcvegrbuf_phys;		}	}	pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0];	for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {		dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk];		unsigned i;		for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {			dd->ipath_f_put_tid(dd, e + egroff +					    (u64 __iomem *)					    ((char __iomem *)					     dd->ipath_kregbase +					     dd->ipath_rcvegrbase), 0, pa);			pa += egrsize;		}		cond_resched();	/* don't hog the cpu */	}	ret = 0;	goto bail;bail_rcvegrbuf_phys:	for (e = 0; e < pd->port_rcvegrbuf_chunks &&		     pd->port_rcvegrbuf[e]; e++)		dma_free_coherent(&dd->pcidev->dev, size,				  pd->port_rcvegrbuf[e],				  pd->port_rcvegrbuf_phys[e]);	vfree(pd->port_rcvegrbuf_phys);	pd->port_rcvegrbuf_phys = NULL;bail_rcvegrbuf:	vfree(pd->port_rcvegrbuf);	pd->port_rcvegrbuf = NULL;bail:	return ret;}static int ipath_do_user_init(struct ipath_portdata *pd,			      const struct ipath_user_info *uinfo){	int ret = 0;	struct ipath_devdata *dd = pd->port_dd;	u64 physaddr, uaddr, off, atmp;	struct page *pagep;	u32 head32;	u64 head;	/* for now, if major version is different, bail */	if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {		dev_info(&dd->pcidev->dev,			 "User major version %d not same as driver "			 "major %d\n", uinfo->spu_userversion >> 16,			 IPATH_USER_SWMAJOR);		ret = -ENODEV;		goto done;	}	if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)		ipath_dbg("User minor version %d not same as driver "			  "minor %d\n", uinfo->spu_userversion & 0xffff,			  IPATH_USER_SWMINOR);	if (uinfo->spu_rcvhdrsize) {		ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);		if (ret)			goto done;	}	/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */	/* set up for the rcvhdr Q tail register writeback to user memory */	if (!uinfo->spu_rcvhdraddr ||	    !access_ok(VERIFY_WRITE, (u64 __user *) (unsigned long)		       uinfo->spu_rcvhdraddr, sizeof(u64))) {		ipath_dbg("Port %d rcvhdrtail addr %llx not valid\n",			  pd->port_port,			  (unsigned long long) uinfo->spu_rcvhdraddr);		ret = -EINVAL;		goto done;	}	off = offset_in_page(uinfo->spu_rcvhdraddr);	uaddr = PAGE_MASK & (unsigned long) uinfo->spu_rcvhdraddr;	ret = ipath_get_user_pages_nocopy(uaddr, &pagep);	if (ret) {		dev_info(&dd->pcidev->dev, "Failed to lookup and lock "			 "address %llx for rcvhdrtail: errno %d\n",			 (unsigned long long) uinfo->spu_rcvhdraddr, -ret);		goto done;	}	ipath_stats.sps_pagelocks++;	pd->port_rcvhdrtail_uaddr = uaddr;	pd->port_rcvhdrtail_pagep = pagep;	pd->port_rcvhdrtail_kvaddr =		page_address(pagep);	pd->port_rcvhdrtail_kvaddr += off;	physaddr = page_to_phys(pagep) + off;	ipath_cdbg(VERBOSE, "port %d user addr %llx hdrtailaddr, %llx "		   "physical (off=%llx)\n",		   pd->port_port,		   (unsigned long long) uinfo->spu_rcvhdraddr,		   (unsigned long long) physaddr, (unsigned long long) off);	ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,			      pd->port_port, physaddr);	atmp = ipath_read_kreg64_port(dd,				      dd->ipath_kregs->kr_rcvhdrtailaddr,				      pd->port_port);	if (physaddr != atmp) {		ipath_dev_err(dd,			      "Catastrophic software error, "			      "RcvHdrTailAddr%u written as %llx, "			      "read back as %llx\n", pd->port_port,			      (unsigned long long) physaddr,			      (unsigned long long) atmp);		ret = -EINVAL;		goto done;	}	/* for right now, kernel piobufs are at end, so port 1 is at 0 */	pd->port_piobufs = dd->ipath_piobufbase +		dd->ipath_pbufsport * (pd->port_port -				       1) * dd->ipath_palign;	ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",		   pd->port_port, pd->port_piobufs);	/*	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID	 * array for time being.  If pd->port_port > chip-supported,	 * we need to do extra stuff here to handle by handling overflow	 * through port 0, someday	 */	ret = ipath_create_rcvhdrq(dd, pd);	if (!ret)		ret = ipath_create_user_egr(pd);	if (ret)		goto done;	/* enable receives now */	/* atomically set enable bit for this port */	set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,		&dd->ipath_rcvctrl);	/*	 * set the head registers for this port to the current values	 * of the tail pointers, since we don't know if they were	 * updated on last use of the port.	 */	head32 = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);	head = (u64) head32;	ipath_write_ureg(dd, ur_rcvhdrhead, head, pd->port_port);	head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);	ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);	dd->ipath_lastegrheads[pd->port_port] = -1;	dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;	ipath_cdbg(VERBOSE, "Wrote port%d head %llx, egrhead %x from "		   "tail regs\n", pd->port_port,		   (unsigned long long) head, head32);	pd->port_tidcursor = 0;	/* start at beginning after open */	/*	 * now enable the port; the tail registers will be written to memory	 * by the chip as soon as it sees the write to	 * dd->ipath_kregs->kr_rcvctrl.  The update only happens on	 * transition from 0 to 1, so clear it first, then set it as part of	 * enabling the port.  This will (very briefly) affect any other	 * open ports, but it shouldn't be long enough to be an issue.	 */	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,			 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,			 dd->ipath_rcvctrl);done:	return ret;}static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,		     u64 ureg){	unsigned long phys;	int ret;	/* it's the real hardware, so io_remap works */	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {		dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "			 "%lx > PAGE\n", vma->vm_end - vma->vm_start);		ret = -EFAULT;	} else {		phys = dd->ipath_physaddr + ureg;		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;		ret = io_remap_pfn_range(vma, vma->vm_start,					 phys >> PAGE_SHIFT,					 vma->vm_end - vma->vm_start,					 vma->vm_page_prot);	}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?