📄 myri10ge.c
字号:
} } *hdr_flags = LRO_IPV4; iph = (struct iphdr *)(va + ll_hlen); *ip_hdr = iph; if (iph->protocol != IPPROTO_TCP) return -1; *hdr_flags |= LRO_TCP; *tcpudp_hdr = (u8 *) (*ip_hdr) + (iph->ihl << 2); /* verify the IP checksum */ if (unlikely(ip_fast_csum((u8 *) iph, iph->ihl))) return -1; /* verify the checksum */ if (unlikely(csum_tcpudp_magic(iph->saddr, iph->daddr, ntohs(iph->tot_len) - (iph->ihl << 2), IPPROTO_TCP, csum))) return -1; return 0;}static int myri10ge_open(struct net_device *dev){ struct myri10ge_priv *mgp; struct myri10ge_cmd cmd; struct net_lro_mgr *lro_mgr; int status, big_pow2; mgp = netdev_priv(dev); if (mgp->running != MYRI10GE_ETH_STOPPED) return -EBUSY; mgp->running = MYRI10GE_ETH_STARTING; status = myri10ge_reset(mgp); if (status != 0) { printk(KERN_ERR "myri10ge: %s: failed reset\n", dev->name); goto abort_with_nothing; } status = myri10ge_request_irq(mgp); if (status != 0) goto abort_with_nothing; /* decide what small buffer size to use. For good TCP rx * performance, it is important to not receive 1514 byte * frames into jumbo buffers, as it confuses the socket buffer * accounting code, leading to drops and erratic performance. */ if (dev->mtu <= ETH_DATA_LEN) /* enough for a TCP header */ mgp->small_bytes = (128 > SMP_CACHE_BYTES) ? (128 - MXGEFW_PAD) : (SMP_CACHE_BYTES - MXGEFW_PAD); else /* enough for a vlan encapsulated ETH_DATA_LEN frame */ mgp->small_bytes = VLAN_ETH_FRAME_LEN; /* Override the small buffer size? */ if (myri10ge_small_bytes > 0) mgp->small_bytes = myri10ge_small_bytes; /* get the lanai pointers to the send and receive rings */ status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd, 0); mgp->tx.lanai = (struct mcp_kreq_ether_send __iomem *)(mgp->sram + cmd.data0); status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd, 0); mgp->rx_small.lanai = (struct mcp_kreq_ether_recv __iomem *)(mgp->sram + cmd.data0); status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd, 0); mgp->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *)(mgp->sram + cmd.data0); if (status != 0) { printk(KERN_ERR "myri10ge: %s: failed to get ring sizes or locations\n", dev->name); mgp->running = MYRI10GE_ETH_STOPPED; goto abort_with_irq; } if (myri10ge_wcfifo && mgp->wc_enabled) { mgp->tx.wc_fifo = (u8 __iomem *) mgp->sram + MXGEFW_ETH_SEND_4; mgp->rx_small.wc_fifo = (u8 __iomem *) mgp->sram + MXGEFW_ETH_RECV_SMALL; mgp->rx_big.wc_fifo = (u8 __iomem *) mgp->sram + MXGEFW_ETH_RECV_BIG; } else { mgp->tx.wc_fifo = NULL; mgp->rx_small.wc_fifo = NULL; mgp->rx_big.wc_fifo = NULL; } /* Firmware needs the big buff size as a power of 2. Lie and * tell him the buffer is larger, because we only use 1 * buffer/pkt, and the mtu will prevent overruns. */ big_pow2 = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD; if (big_pow2 < MYRI10GE_ALLOC_SIZE / 2) { while (!is_power_of_2(big_pow2)) big_pow2++; mgp->big_bytes = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD; } else { big_pow2 = MYRI10GE_ALLOC_SIZE; mgp->big_bytes = big_pow2; } status = myri10ge_allocate_rings(dev); if (status != 0) goto abort_with_irq; /* now give firmware buffers sizes, and MTU */ cmd.data0 = dev->mtu + ETH_HLEN + VLAN_HLEN; status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd, 0); cmd.data0 = mgp->small_bytes; status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd, 0); cmd.data0 = big_pow2; status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd, 0); if (status) { printk(KERN_ERR "myri10ge: %s: Couldn't set buffer sizes\n", dev->name); goto abort_with_rings; } cmd.data0 = MYRI10GE_LOWPART_TO_U32(mgp->fw_stats_bus); cmd.data1 = MYRI10GE_HIGHPART_TO_U32(mgp->fw_stats_bus); cmd.data2 = sizeof(struct mcp_irq_data); status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0); if (status == -ENOSYS) { dma_addr_t bus = mgp->fw_stats_bus; bus += offsetof(struct mcp_irq_data, send_done_count); cmd.data0 = MYRI10GE_LOWPART_TO_U32(bus); cmd.data1 = MYRI10GE_HIGHPART_TO_U32(bus); status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd, 0); /* Firmware cannot support multicast without STATS_DMA_V2 */ mgp->fw_multicast_support = 0; } else { mgp->fw_multicast_support = 1; } if (status) { printk(KERN_ERR "myri10ge: %s: Couldn't set stats DMA\n", dev->name); goto abort_with_rings; } mgp->link_state = htonl(~0U); mgp->rdma_tags_available = 15; lro_mgr = &mgp->rx_done.lro_mgr; lro_mgr->dev = dev; lro_mgr->features = LRO_F_NAPI; lro_mgr->ip_summed = CHECKSUM_COMPLETE; lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY; lro_mgr->max_desc = MYRI10GE_MAX_LRO_DESCRIPTORS; lro_mgr->lro_arr = mgp->rx_done.lro_desc; lro_mgr->get_frag_header = myri10ge_get_frag_header; lro_mgr->max_aggr = myri10ge_lro_max_pkts; lro_mgr->frag_align_pad = 2; if (lro_mgr->max_aggr > MAX_SKB_FRAGS) lro_mgr->max_aggr = MAX_SKB_FRAGS; napi_enable(&mgp->napi); /* must happen prior to any irq */ status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd, 0); if (status) { printk(KERN_ERR "myri10ge: %s: Couldn't bring up link\n", dev->name); goto abort_with_rings; } mgp->wake_queue = 0; mgp->stop_queue = 0; mgp->running = MYRI10GE_ETH_RUNNING; mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ; add_timer(&mgp->watchdog_timer); netif_wake_queue(dev); return 0;abort_with_rings: myri10ge_free_rings(dev);abort_with_irq: myri10ge_free_irq(mgp);abort_with_nothing: mgp->running = MYRI10GE_ETH_STOPPED; return -ENOMEM;}static int myri10ge_close(struct net_device *dev){ struct myri10ge_priv *mgp; struct myri10ge_cmd cmd; int status, old_down_cnt; mgp = netdev_priv(dev); if (mgp->running != MYRI10GE_ETH_RUNNING) return 0; if (mgp->tx.req_bytes == NULL) return 0; del_timer_sync(&mgp->watchdog_timer); mgp->running = MYRI10GE_ETH_STOPPING; napi_disable(&mgp->napi); netif_carrier_off(dev); netif_stop_queue(dev); old_down_cnt = mgp->down_cnt; mb(); status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0); if (status) printk(KERN_ERR "myri10ge: %s: Couldn't bring down link\n", dev->name); wait_event_timeout(mgp->down_wq, old_down_cnt != mgp->down_cnt, HZ); if (old_down_cnt == mgp->down_cnt) printk(KERN_ERR "myri10ge: %s never got down irq\n", dev->name); netif_tx_disable(dev); myri10ge_free_irq(mgp); myri10ge_free_rings(dev); mgp->running = MYRI10GE_ETH_STOPPED; return 0;}/* copy an array of struct mcp_kreq_ether_send's to the mcp. Copy * backwards one at a time and handle ring wraps */static inline voidmyri10ge_submit_req_backwards(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src, int cnt){ int idx, starting_slot; starting_slot = tx->req; while (cnt > 1) { cnt--; idx = (starting_slot + cnt) & tx->mask; myri10ge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); mb(); }}/* * copy an array of struct mcp_kreq_ether_send's to the mcp. Copy * at most 32 bytes at a time, so as to avoid involving the software * pio handler in the nic. We re-write the first segment's flags * to mark them valid only after writing the entire chain. */static inline voidmyri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src, int cnt){ int idx, i; struct mcp_kreq_ether_send __iomem *dstp, *dst; struct mcp_kreq_ether_send *srcp; u8 last_flags; idx = tx->req & tx->mask; last_flags = src->flags; src->flags = 0; mb(); dst = dstp = &tx->lanai[idx]; srcp = src; if ((idx + cnt) < tx->mask) { for (i = 0; i < (cnt - 1); i += 2) { myri10ge_pio_copy(dstp, srcp, 2 * sizeof(*src)); mb(); /* force write every 32 bytes */ srcp += 2; dstp += 2; } } else { /* submit all but the first request, and ensure * that it is submitted below */ myri10ge_submit_req_backwards(tx, src, cnt); i = 0; } if (i < cnt) { /* submit the first request */ myri10ge_pio_copy(dstp, srcp, sizeof(*src)); mb(); /* barrier before setting valid flag */ } /* re-write the last 32-bits with the valid flags */ src->flags = last_flags; put_be32(*((__be32 *) src + 3), (__be32 __iomem *) dst + 3); tx->req += cnt; mb();}static inline voidmyri10ge_submit_req_wc(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src, int cnt){ tx->req += cnt; mb(); while (cnt >= 4) { myri10ge_pio_copy(tx->wc_fifo, src, 64); mb(); src += 4; cnt -= 4; } if (cnt > 0) { /* pad it to 64 bytes. The src is 64 bytes bigger than it * needs to be so that we don't overrun it */ myri10ge_pio_copy(tx->wc_fifo + MXGEFW_ETH_SEND_OFFSET(cnt), src, 64); mb(); }}/* * Transmit a packet. We need to split the packet so that a single * segment does not cross myri10ge->tx.boundary, so this makes segment * counting tricky. So rather than try to count segments up front, we * just give up if there are too few segments to hold a reasonably * fragmented packet currently available. If we run * out of segments while preparing a packet for DMA, we just linearize * it and try again. */static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev){ struct myri10ge_priv *mgp = netdev_priv(dev); struct mcp_kreq_ether_send *req; struct myri10ge_tx_buf *tx = &mgp->tx; struct skb_frag_struct *frag; dma_addr_t bus; u32 low; __be32 high_swapped; unsigned int len; int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments; u16 pseudo_hdr_offset, cksum_offset; int cum_len, seglen, boundary, rdma_count; u8 flags, odd_flag;again: req = tx->req_list; avail = tx->mask - 1 - (tx->req - tx->done); mss = 0; max_segments = MXGEFW_MAX_SEND_DESC; if (skb_is_gso(skb)) { mss = skb_shinfo(skb)->gso_size; max_segments = MYRI10GE_MAX_SEND_DESC_TSO; } if ((unlikely(avail < max_segments))) { /* we are out of transmit resources */ mgp->stop_queue++; netif_stop_queue(dev); return 1; } /* Setup checksum offloading, if needed */ cksum_offset = 0; pseudo_hdr_offset = 0; odd_flag = 0; flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { cksum_offset = skb_transport_offset(skb); pseudo_hdr_offset = cksum_offset + skb->csum_offset; /* If the headers are excessively large, then we must * fall back to a software checksum */ if (unlikely(!mss && (cksum_offset > 255 || pseudo_hdr_offset > 127))) { if (skb_checksum_help(skb)) goto drop; cksum_offset = 0; pseudo_hdr_offset = 0; } else { odd_flag = MXGEFW_FLAGS_ALIGN_ODD; flags |= MXGEFW_FLAGS_CKSUM; } } cum_len = 0; if (mss) { /* TSO */ /* this removes any CKSUM flag from before */ flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); /* negative cum_len signifies to the * send loop that we are still in the * header portion of the TSO packet. * TSO header can be at most 1KB long */ cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb)); /* for IPv6 TSO, the checksum offset stores the * TCP header length, to save the firmware from * the need to parse the headers */ if (skb_is_gso_v6(skb)) { cksum_offset = tcp_hdrlen(skb); /* Can only handle headers <= max_tso6 long */ if (unlikely(-cum_len > mgp->max_tso6)) return myri10ge_sw_tso(skb, dev); } /* for TSO, pseudo_hdr_offset holds mss. * The firmware figures out where to put * the checksum by parsing the header. */ pseudo_hdr_offset = mss; } else /* Mark small packets, and pad out tiny packets */ if (skb->len <= MXGEFW_SEND_SMALL_SIZE) { flags |= MXGEFW_FLAGS_SMALL; /* pad frames to at least ETH_ZLEN bytes */ if (unlikely(skb->len < ETH_ZLEN)) { if (skb_padto(skb, ETH_ZLEN)) { /* The packet is gone, so we must * return 0 */ mgp->stats.tx_dropped += 1; return 0; } /* adjust the len to account for the zero pad * so that the nic can know how long it is */ skb->len = ETH_ZLEN; } } /* map the skb for DMA */ len = skb->len - skb->data_len; idx = tx->req & tx->mask; tx->info[idx].skb = skb; bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE); pci_unmap_addr_set(&tx->info[idx], bus, bus); pci_unmap_len_set(&tx->info[idx], len, len); frag_cnt = skb_shinfo(skb)->nr_frags; frag_idx = 0; count = 0; rdma_count = 0; /* "rdma_count" is the number of RDMAs belonging to the * current packet BEFORE the current send request. For * non-TSO packets, this is equal to
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -