📄 netpoll.c
字号:
/* * Common framework for low-level network console, dump, and debugger code * * Sep 8 2003 Matt Mackall <mpm@selenic.com> * * based on the netconsole code from: * * Copyright (C) 2001 Ingo Molnar <mingo@redhat.com> * Copyright (C) 2002 Red Hat, Inc. */#include <linux/netdevice.h>#include <linux/etherdevice.h>#include <linux/string.h>#include <linux/if_arp.h>#include <linux/inetdevice.h>#include <linux/inet.h>#include <linux/interrupt.h>#include <linux/netpoll.h>#include <linux/sched.h>#include <linux/delay.h>#include <linux/rcupdate.h>#include <linux/workqueue.h>#include <net/tcp.h>#include <net/udp.h>#include <asm/unaligned.h>/* * We maintain a small pool of fully-sized skbs, to make sure the * message gets out even in extreme OOM situations. */#define MAX_UDP_CHUNK 1460#define MAX_SKBS 32#define MAX_QUEUE_DEPTH (MAX_SKBS / 2)static struct sk_buff_head skb_pool;static atomic_t trapped;#define USEC_PER_POLL 50#define NETPOLL_RX_ENABLED 1#define NETPOLL_RX_DROP 2#define MAX_SKB_SIZE \ (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ sizeof(struct iphdr) + sizeof(struct ethhdr))static void zap_completion_queue(void);static void arp_reply(struct sk_buff *skb);static void queue_process(struct work_struct *work){ struct netpoll_info *npinfo = container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; unsigned long flags; while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; if (!netif_device_present(dev) || !netif_running(dev)) { __kfree_skb(skb); continue; } local_irq_save(flags); netif_tx_lock(dev); if ((netif_queue_stopped(dev) || netif_subqueue_stopped(dev, skb)) || dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); netif_tx_unlock(dev); local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } netif_tx_unlock(dev); local_irq_restore(flags); }}static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, unsigned short ulen, __be32 saddr, __be32 daddr){ __wsum psum; if (uh->check == 0 || skb_csum_unnecessary(skb)) return 0; psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); if (skb->ip_summed == CHECKSUM_COMPLETE && !csum_fold(csum_add(psum, skb->csum))) return 0; skb->csum = psum; return __skb_checksum_complete(skb);}/* * Check whether delayed processing was scheduled for our NIC. If so, * we attempt to grab the poll lock and use ->poll() to pump the card. * If this fails, either we've recursed in ->poll() or it's already * running on another CPU. * * Note: we don't mask interrupts with this lock because we're using * trylock here and interrupts are already disabled in the softirq * case. Further, we test the poll_owner to avoid recursion on UP * systems where the lock doesn't exist. * * In cases where there is bi-directional communications, reading only * one message at a time can lead to packets being dropped by the * network adapter, forcing superfluous retries and possibly timeouts. * Thus, we set our budget to greater than 1. */static int poll_one_napi(struct netpoll_info *npinfo, struct napi_struct *napi, int budget){ int work; /* net_rx_action's ->poll() invocations and our's are * synchronized by this test which is only made while * holding the napi->poll_lock. */ if (!test_bit(NAPI_STATE_SCHED, &napi->state)) return budget; npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); work = napi->poll(napi, budget); atomic_dec(&trapped); npinfo->rx_flags &= ~NETPOLL_RX_DROP; return budget - work;}static void poll_napi(struct netpoll *np){ struct netpoll_info *npinfo = np->dev->npinfo; struct napi_struct *napi; int budget = 16; list_for_each_entry(napi, &np->dev->napi_list, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { budget = poll_one_napi(npinfo, napi, budget); spin_unlock(&napi->poll_lock); if (!budget) break; } }}static void service_arp_queue(struct netpoll_info *npi){ struct sk_buff *skb; if (unlikely(!npi)) return; skb = skb_dequeue(&npi->arp_tx); while (skb != NULL) { arp_reply(skb); skb = skb_dequeue(&npi->arp_tx); }}void netpoll_poll(struct netpoll *np){ if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) return; /* Process pending work on NIC */ np->dev->poll_controller(np->dev); if (!list_empty(&np->dev->napi_list)) poll_napi(np); service_arp_queue(np->dev->npinfo); zap_completion_queue();}static void refill_skbs(void){ struct sk_buff *skb; unsigned long flags; spin_lock_irqsave(&skb_pool.lock, flags); while (skb_pool.qlen < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; __skb_queue_tail(&skb_pool, skb); } spin_unlock_irqrestore(&skb_pool.lock, flags);}static void zap_completion_queue(void){ unsigned long flags; struct softnet_data *sd = &get_cpu_var(softnet_data); if (sd->completion_queue) { struct sk_buff *clist; local_irq_save(flags); clist = sd->completion_queue; sd->completion_queue = NULL; local_irq_restore(flags); while (clist != NULL) { struct sk_buff *skb = clist; clist = clist->next; if (skb->destructor) dev_kfree_skb_any(skb); /* put this one back */ else __kfree_skb(skb); } } put_cpu_var(softnet_data);}static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve){ int count = 0; struct sk_buff *skb; zap_completion_queue(); refill_skbs();repeat: skb = alloc_skb(len, GFP_ATOMIC); if (!skb) skb = skb_dequeue(&skb_pool); if (!skb) { if (++count < 10) { netpoll_poll(np); goto repeat; } return NULL; } atomic_set(&skb->users, 1); skb_reserve(skb, reserve); return skb;}static int netpoll_owner_active(struct net_device *dev){ struct napi_struct *napi; list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner == smp_processor_id()) return 1; } return 0;}static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb){ int status = NETDEV_TX_BUSY; unsigned long tries; struct net_device *dev = np->dev; struct netpoll_info *npinfo = np->dev->npinfo; if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { __kfree_skb(skb); return; } /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { unsigned long flags; local_irq_save(flags); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (netif_tx_trylock(dev)) { if (!netif_queue_stopped(dev) && !netif_subqueue_stopped(dev, skb)) status = dev->hard_start_xmit(skb, dev); netif_tx_unlock(dev); if (status == NETDEV_TX_OK) break; } /* tickle device maybe there is some cleanup */ netpoll_poll(np); udelay(USEC_PER_POLL); } local_irq_restore(flags); } if (status != NETDEV_TX_OK) { skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); }}void netpoll_send_udp(struct netpoll *np, const char *msg, int len){ int total_len, eth_len, ip_len, udp_len; struct sk_buff *skb; struct udphdr *udph; struct iphdr *iph; struct ethhdr *eth; udp_len = len + sizeof(*udph); ip_len = eth_len = udp_len + sizeof(*iph); total_len = eth_len + ETH_HLEN + NET_IP_ALIGN; skb = find_skb(np, total_len, total_len - len); if (!skb) return; skb_copy_to_linear_data(skb, msg, len); skb->len += len; skb_push(skb, sizeof(*udph)); skb_reset_transport_header(skb); udph = udp_hdr(skb); udph->source = htons(np->local_port); udph->dest = htons(np->remote_port); udph->len = htons(udp_len); udph->check = 0; udph->check = csum_tcpudp_magic(htonl(np->local_ip), htonl(np->remote_ip), udp_len, IPPROTO_UDP, csum_partial((unsigned char *)udph, udp_len, 0)); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb_push(skb, sizeof(*iph)); skb_reset_network_header(skb); iph = ip_hdr(skb); /* iph->version = 4; iph->ihl = 5; */ put_unaligned(0x45, (unsigned char *)iph); iph->tos = 0; put_unaligned(htons(ip_len), &(iph->tot_len)); iph->id = 0; iph->frag_off = 0; iph->ttl = 64; iph->protocol = IPPROTO_UDP; iph->check = 0; put_unaligned(htonl(np->local_ip), &(iph->saddr)); put_unaligned(htonl(np->remote_ip), &(iph->daddr)); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth->h_proto = htons(ETH_P_IP); memcpy(eth->h_source, np->local_mac, 6); memcpy(eth->h_dest, np->remote_mac, 6); skb->dev = np->dev; netpoll_send_skb(np, skb);}static void arp_reply(struct sk_buff *skb){ struct netpoll_info *npinfo = skb->dev->npinfo; struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; __be32 sip, tip; unsigned char *sha; struct sk_buff *send_skb; struct netpoll *np = NULL; if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev) np = npinfo->rx_np; if (!np) return; /* No arp on this interface */ if (skb->dev->flags & IFF_NOARP) return; if (!pskb_may_pull(skb, (sizeof(struct arphdr) + (2 * skb->dev->addr_len) + (2 * sizeof(u32))))) return; skb_reset_network_header(skb); skb_reset_transport_header(skb); arp = arp_hdr(skb); if ((arp->ar_hrd != htons(ARPHRD_ETHER) && arp->ar_hrd != htons(ARPHRD_IEEE802)) || arp->ar_pro != htons(ETH_P_IP) || arp->ar_op != htons(ARPOP_REQUEST)) return; arp_ptr = (unsigned char *)(arp+1); /* save the location of the src hw addr */ sha = arp_ptr; arp_ptr += skb->dev->addr_len; memcpy(&sip, arp_ptr, 4); arp_ptr += 4; /* if we actually cared about dst hw addr, it would get copied here */ arp_ptr += skb->dev->addr_len; memcpy(&tip, arp_ptr, 4); /* Should we ignore arp? */ if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip)) return; size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4); send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev), LL_RESERVED_SPACE(np->dev)); if (!send_skb)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -