tehuti.c

来自「linux 内核源代码」· C语言 代码 · 共 2,340 行 · 第 1/5 页

C
2,340
字号
{	struct rxdb *db;	int i;	db = vmalloc(sizeof(struct rxdb)		     + (nelem * sizeof(int))		     + (nelem * sizeof(struct rx_map)));	if (likely(db != NULL)) {		db->stack = (int *)(db + 1);		db->elems = (void *)(db->stack + nelem);		db->nelem = nelem;		db->top = nelem;		for (i = 0; i < nelem; i++)			db->stack[i] = nelem - i - 1;	/* to make first allocs							   close to db struct*/	}	return db;}static inline int bdx_rxdb_alloc_elem(struct rxdb *db){	BDX_ASSERT(db->top <= 0);	return db->stack[--(db->top)];}static inline void *bdx_rxdb_addr_elem(struct rxdb *db, int n){	BDX_ASSERT((n < 0) || (n >= db->nelem));	return db->elems + n;}static inline int bdx_rxdb_available(struct rxdb *db){	return db->top;}static inline void bdx_rxdb_free_elem(struct rxdb *db, int n){	BDX_ASSERT((n >= db->nelem) || (n < 0));	db->stack[(db->top)++] = n;}/************************************************************************* *     Rx Init                                                           * *************************************************************************//* bdx_rx_init - initialize RX all related HW and SW resources * @priv - NIC private structure * * Returns 0 on success, negative value on failure * * It creates rxf and rxd fifos, update relevant HW registers, preallocate * skb for rx. It assumes that Rx is desabled in HW * funcs are grouped for better cache usage * * RxD fifo is smaller then RxF fifo by design. Upon high load, RxD will be * filled and packets will be dropped by nic without getting into host or * cousing interrupt. Anyway, in that condition, host has no chance to proccess * all packets, but dropping in nic is cheaper, since it takes 0 cpu cycles *//* TBD: ensure proper packet size */static int bdx_rx_init(struct bdx_priv *priv){	ENTER;	if (bdx_fifo_init(priv, &priv->rxd_fifo0.m, priv->rxd_size,			  regRXD_CFG0_0, regRXD_CFG1_0,			  regRXD_RPTR_0, regRXD_WPTR_0))		goto err_mem;	if (bdx_fifo_init(priv, &priv->rxf_fifo0.m, priv->rxf_size,			  regRXF_CFG0_0, regRXF_CFG1_0,			  regRXF_RPTR_0, regRXF_WPTR_0))		goto err_mem;	if (!	    (priv->rxdb =	     bdx_rxdb_create(priv->rxf_fifo0.m.memsz /			     sizeof(struct rxf_desc))))		goto err_mem;	priv->rxf_fifo0.m.pktsz = priv->ndev->mtu + VLAN_ETH_HLEN;	return 0;err_mem:	ERR("%s: %s: Rx init failed\n", BDX_DRV_NAME, priv->ndev->name);	return -ENOMEM;}/* bdx_rx_free_skbs - frees and unmaps all skbs allocated for the fifo * @priv - NIC private structure * @f - RXF fifo */static void bdx_rx_free_skbs(struct bdx_priv *priv, struct rxf_fifo *f){	struct rx_map *dm;	struct rxdb *db = priv->rxdb;	u16 i;	ENTER;	DBG("total=%d free=%d busy=%d\n", db->nelem, bdx_rxdb_available(db),	    db->nelem - bdx_rxdb_available(db));	while (bdx_rxdb_available(db) > 0) {		i = bdx_rxdb_alloc_elem(db);		dm = bdx_rxdb_addr_elem(db, i);		dm->dma = 0;	}	for (i = 0; i < db->nelem; i++) {		dm = bdx_rxdb_addr_elem(db, i);		if (dm->dma) {			pci_unmap_single(priv->pdev,					 dm->dma, f->m.pktsz,					 PCI_DMA_FROMDEVICE);			dev_kfree_skb(dm->skb);		}	}}/* bdx_rx_free - release all Rx resources * @priv - NIC private structure * It assumes that Rx is desabled in HW */static void bdx_rx_free(struct bdx_priv *priv){	ENTER;	if (priv->rxdb) {		bdx_rx_free_skbs(priv, &priv->rxf_fifo0);		bdx_rxdb_destroy(priv->rxdb);		priv->rxdb = NULL;	}	bdx_fifo_free(priv, &priv->rxf_fifo0.m);	bdx_fifo_free(priv, &priv->rxd_fifo0.m);	RET();}/************************************************************************* *     Rx Engine                                                         * *************************************************************************//* bdx_rx_alloc_skbs - fill rxf fifo with new skbs * @priv - nic's private structure * @f - RXF fifo that needs skbs * It allocates skbs, build rxf descs and push it (rxf descr) into rxf fifo. * skb's virtual and physical addresses are stored in skb db. * To calculate free space, func uses cached values of RPTR and WPTR * When needed, it also updates RPTR and WPTR. *//* TBD: do not update WPTR if no desc were written */static void bdx_rx_alloc_skbs(struct bdx_priv *priv, struct rxf_fifo *f){	struct sk_buff *skb;	struct rxf_desc *rxfd;	struct rx_map *dm;	int dno, delta, idx;	struct rxdb *db = priv->rxdb;	ENTER;	dno = bdx_rxdb_available(db) - 1;	while (dno > 0) {		if (!(skb = dev_alloc_skb(f->m.pktsz + NET_IP_ALIGN))) {			ERR("NO MEM: dev_alloc_skb failed\n");			break;		}		skb->dev = priv->ndev;		skb_reserve(skb, NET_IP_ALIGN);		idx = bdx_rxdb_alloc_elem(db);		dm = bdx_rxdb_addr_elem(db, idx);		dm->dma = pci_map_single(priv->pdev,					 skb->data, f->m.pktsz,					 PCI_DMA_FROMDEVICE);		dm->skb = skb;		rxfd = (struct rxf_desc *)(f->m.va + f->m.wptr);		rxfd->info = CPU_CHIP_SWAP32(0x10003);	/* INFO=1 BC=3 */		rxfd->va_lo = idx;		rxfd->pa_lo = CPU_CHIP_SWAP32(L32_64(dm->dma));		rxfd->pa_hi = CPU_CHIP_SWAP32(H32_64(dm->dma));		rxfd->len = CPU_CHIP_SWAP32(f->m.pktsz);		print_rxfd(rxfd);		f->m.wptr += sizeof(struct rxf_desc);		delta = f->m.wptr - f->m.memsz;		if (unlikely(delta >= 0)) {			f->m.wptr = delta;			if (delta > 0) {				memcpy(f->m.va, f->m.va + f->m.memsz, delta);				DBG("wrapped descriptor\n");			}		}		dno--;	}	/*TBD: to do - delayed rxf wptr like in txd */	WRITE_REG(priv, f->m.reg_WPTR, f->m.wptr & TXF_WPTR_WR_PTR);	RET();}static inline voidNETIF_RX_MUX(struct bdx_priv *priv, u32 rxd_val1, u16 rxd_vlan,	     struct sk_buff *skb){	ENTER;	DBG("rxdd->flags.bits.vtag=%d vlgrp=%p\n", GET_RXD_VTAG(rxd_val1),	    priv->vlgrp);	if (priv->vlgrp && GET_RXD_VTAG(rxd_val1)) {		DBG("%s: vlan rcv vlan '%x' vtag '%x', device name '%s'\n",		    priv->ndev->name,		    GET_RXD_VLAN_ID(rxd_vlan),		    GET_RXD_VTAG(rxd_val1),		    vlan_group_get_device(priv->vlgrp,					  GET_RXD_VLAN_ID(rxd_vlan))->name);		/* NAPI variant of receive functions */		vlan_hwaccel_receive_skb(skb, priv->vlgrp,					 GET_RXD_VLAN_ID(rxd_vlan));	} else {		netif_receive_skb(skb);	}}static void bdx_recycle_skb(struct bdx_priv *priv, struct rxd_desc *rxdd){	struct rxf_desc *rxfd;	struct rx_map *dm;	struct rxf_fifo *f;	struct rxdb *db;	struct sk_buff *skb;	int delta;	ENTER;	DBG("priv=%p rxdd=%p\n", priv, rxdd);	f = &priv->rxf_fifo0;	db = priv->rxdb;	DBG("db=%p f=%p\n", db, f);	dm = bdx_rxdb_addr_elem(db, rxdd->va_lo);	DBG("dm=%p\n", dm);	skb = dm->skb;	rxfd = (struct rxf_desc *)(f->m.va + f->m.wptr);	rxfd->info = CPU_CHIP_SWAP32(0x10003);	/* INFO=1 BC=3 */	rxfd->va_lo = rxdd->va_lo;	rxfd->pa_lo = CPU_CHIP_SWAP32(L32_64(dm->dma));	rxfd->pa_hi = CPU_CHIP_SWAP32(H32_64(dm->dma));	rxfd->len = CPU_CHIP_SWAP32(f->m.pktsz);	print_rxfd(rxfd);	f->m.wptr += sizeof(struct rxf_desc);	delta = f->m.wptr - f->m.memsz;	if (unlikely(delta >= 0)) {		f->m.wptr = delta;		if (delta > 0) {			memcpy(f->m.va, f->m.va + f->m.memsz, delta);			DBG("wrapped descriptor\n");		}	}	RET();}/* bdx_rx_receive - recieves full packets from RXD fifo and pass them to OS * NOTE: a special treatment is given to non-continous descriptors * that start near the end, wraps around and continue at the beginning. a second * part is copied right after the first, and then descriptor is interpreted as * normal. fifo has an extra space to allow such operations * @priv - nic's private structure * @f - RXF fifo that needs skbs *//* TBD: replace memcpy func call by explicite inline asm */static int bdx_rx_receive(struct bdx_priv *priv, struct rxd_fifo *f, int budget){	struct sk_buff *skb, *skb2;	struct rxd_desc *rxdd;	struct rx_map *dm;	struct rxf_fifo *rxf_fifo;	int tmp_len, size;	int done = 0;	int max_done = BDX_MAX_RX_DONE;	struct rxdb *db = NULL;	/* Unmarshalled descriptor - copy of descriptor in host order */	u32 rxd_val1;	u16 len;	u16 rxd_vlan;	ENTER;	max_done = budget;	priv->ndev->last_rx = jiffies;	f->m.wptr = READ_REG(priv, f->m.reg_WPTR) & TXF_WPTR_WR_PTR;	size = f->m.wptr - f->m.rptr;	if (size < 0)		size = f->m.memsz + size;	/* size is negative :-) */	while (size > 0) {		rxdd = (struct rxd_desc *)(f->m.va + f->m.rptr);		rxd_val1 = CPU_CHIP_SWAP32(rxdd->rxd_val1);		len = CPU_CHIP_SWAP16(rxdd->len);		rxd_vlan = CPU_CHIP_SWAP16(rxdd->rxd_vlan);		print_rxdd(rxdd, rxd_val1, len, rxd_vlan);		tmp_len = GET_RXD_BC(rxd_val1) << 3;		BDX_ASSERT(tmp_len <= 0);		size -= tmp_len;		if (size < 0)	/* test for partially arrived descriptor */			break;		f->m.rptr += tmp_len;		tmp_len = f->m.rptr - f->m.memsz;		if (unlikely(tmp_len >= 0)) {			f->m.rptr = tmp_len;			if (tmp_len > 0) {				DBG("wrapped desc rptr=%d tmp_len=%d\n",				    f->m.rptr, tmp_len);				memcpy(f->m.va + f->m.memsz, f->m.va, tmp_len);			}		}		if (unlikely(GET_RXD_ERR(rxd_val1))) {			DBG("rxd_err = 0x%x\n", GET_RXD_ERR(rxd_val1));			priv->net_stats.rx_errors++;			bdx_recycle_skb(priv, rxdd);			continue;		}		rxf_fifo = &priv->rxf_fifo0;		db = priv->rxdb;		dm = bdx_rxdb_addr_elem(db, rxdd->va_lo);		skb = dm->skb;		if (len < BDX_COPYBREAK &&		    (skb2 = dev_alloc_skb(len + NET_IP_ALIGN))) {			skb_reserve(skb2, NET_IP_ALIGN);			/*skb_put(skb2, len); */			pci_dma_sync_single_for_cpu(priv->pdev,						    dm->dma, rxf_fifo->m.pktsz,						    PCI_DMA_FROMDEVICE);			memcpy(skb2->data, skb->data, len);			bdx_recycle_skb(priv, rxdd);			skb = skb2;		} else {			pci_unmap_single(priv->pdev,					 dm->dma, rxf_fifo->m.pktsz,					 PCI_DMA_FROMDEVICE);			bdx_rxdb_free_elem(db, rxdd->va_lo);		}		priv->net_stats.rx_bytes += len;		skb_put(skb, len);		skb->dev = priv->ndev;		skb->ip_summed = CHECKSUM_UNNECESSARY;		skb->protocol = eth_type_trans(skb, priv->ndev);		/* Non-IP packets aren't checksum-offloaded */		if (GET_RXD_PKT_ID(rxd_val1) == 0)			skb->ip_summed = CHECKSUM_NONE;		NETIF_RX_MUX(priv, rxd_val1, rxd_vlan, skb);		if (++done >= max_done)			break;	}	priv->net_stats.rx_packets += done;	/* FIXME: do smth to minimize pci accesses    */	WRITE_REG(priv, f->m.reg_RPTR, f->m.rptr & TXF_WPTR_WR_PTR);	bdx_rx_alloc_skbs(priv, &priv->rxf_fifo0);	RET(done);}/************************************************************************* * Debug / Temprorary Code                                               * *************************************************************************/static void print_rxdd(struct rxd_desc *rxdd, u32 rxd_val1, u16 len,		       u16 rxd_vlan){	DBG("ERROR: rxdd bc %d rxfq %d to %d type %d err %d rxp %d "	    "pkt_id %d vtag %d len %d vlan_id %d cfi %d prio %d "	    "va_lo %d va_hi %d\n",	    GET_RXD_BC(rxd_val1), GET_RXD_RXFQ(rxd_val1), GET_RXD_TO(rxd_val1),	    GET_RXD_TYPE(rxd_val1), GET_RXD_ERR(rxd_val1),	    GET_RXD_RXP(rxd_val1), GET_RXD_PKT_ID(rxd_val1),	    GET_RXD_VTAG(rxd_val1), len, GET_RXD_VLAN_ID(rxd_vlan),	    GET_RXD_CFI(rxd_vlan), GET_RXD_PRIO(rxd_vlan), rxdd->va_lo,	    rxdd->va_hi);}static void print_rxfd(struct rxf_desc *rxfd){	DBG("=== RxF desc CHIP ORDER/ENDIANESS =============\n"	    "info 0x%x va_lo %u pa_lo 0x%x pa_hi 0x%x len 0x%x\n",	    rxfd->info, rxfd->va_lo, rxfd->pa_lo, rxfd->pa_hi, rxfd->len);}/* * TX HW/SW interaction overview * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * There are 2 types of TX communication channels betwean driver and NIC. * 1) TX Free Fifo - TXF - holds ack descriptors for sent packets * 2) TX Data Fifo - TXD - holds descriptors of full buffers. * * Currently NIC supports TSO, checksuming and gather DMA * UFO and IP fragmentation is on the way * * RX SW Data Structures * ~~~~~~~~~~~~~~~~~~~~~ * txdb - used to keep track of all skbs owned by SW and their dma addresses. * For TX case, ownership lasts from geting packet via hard_xmit and until HW * acknowledges sent by TXF descriptors. * Implemented as cyclic buffer. * fifo - keeps info about fifo's size and location, relevant HW registers, * usage and skb db. Each RXD and RXF Fifo has its own fifo structure. * Implemented as simple struct. * * TX SW Execution Flow * ~~~~~~~~~~~~~~~~~~~~ * OS calls driver's hard_xmit method with packet to sent. * Driver creates DMA mappings, builds TXD descriptors and kicks HW * by updating TXD WPTR. * When packet is sent, HW write us TXF descriptor and SW frees original skb. * To prevent TXD fifo overflow without reading HW registers every time, * SW deploys "tx level" technique. * Upon strart up, tx level is initialized to TXD fifo length. * For every sent packet, SW gets its TXD descriptor sizei * (from precalculated array) and substructs it from tx level. * The size is also stored in txdb. When TXF ack arrives, SW fetch size of * original TXD descriptor from txdb and adds it to tx level. * When Tx level drops under some predefined treshhold, the driver * stops the TX queue. When TX level rises above that level, * the tx queue is enabled again. * * This technique avoids eccessive reading of RPTR and WPTR registers. * As our benchmarks shows, it adds 1.5 Gbit/sec to NIS's throuput. *//************************************************************************* *     Tx DB                                                             * *************************************************************************/static inline int bdx_tx_db_size(struct txdb *db){	int taken = db->wptr - db->rptr;	if (taken < 0)		taken = db->size + 1 + taken;	/* (size + 1) equals memsz */	return db->size - taken;}/* __bdx_tx_ptr_next - helper function, increment read/write pointer + wrap * @d   - tx data base * @ptr - read or write pointer */static inline void __bdx_tx_db_ptr_next(struct txdb *db, struct tx_map **pptr){	BDX_ASSERT(db == NULL || pptr == NULL);	/* sanity */	BDX_ASSERT(*pptr != db->rptr &&	/* expect either read */		   *pptr != db->wptr);	/* or write pointer */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?