📄 dev.c
字号:
*/void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev){ struct packet_type *ptype; net_timestamp(&skb->stamp); rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { /* Never send packets back to the socket * they originated from - MvS (miquels@drinkel.ow.org) */ if ((ptype->dev == dev || !ptype->dev) && (ptype->af_packet_priv == NULL || (struct sock *)ptype->af_packet_priv != skb->sk)) { struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); if (!skb2) break; /* skb->nh should be correctly set by sender, so that the second statement is just protection against buggy protocols. */ skb2->mac.raw = skb2->data; if (skb2->nh.raw < skb2->data || skb2->nh.raw > skb2->tail) { if (net_ratelimit()) printk(KERN_CRIT "protocol %04x is " "buggy, dev %s\n", skb2->protocol, dev->name); skb2->nh.raw = skb2->data; } skb2->h.raw = skb2->nh.raw; skb2->pkt_type = PACKET_OUTGOING; ptype->func(skb2, skb->dev, ptype); } } rcu_read_unlock();}/* * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. */int skb_checksum_help(struct sk_buff **pskb, int inward){ unsigned int csum; int ret = 0, offset = (*pskb)->h.raw - (*pskb)->data; if (inward) { (*pskb)->ip_summed = CHECKSUM_NONE; goto out; } if (skb_cloned(*pskb)) { ret = pskb_expand_head(*pskb, 0, 0, GFP_ATOMIC); if (ret) goto out; } if (offset > (int)(*pskb)->len) BUG(); csum = skb_checksum(*pskb, offset, (*pskb)->len-offset, 0); offset = (*pskb)->tail - (*pskb)->h.raw; if (offset <= 0) BUG(); if ((*pskb)->csum + 2 > offset) BUG(); *(u16*)((*pskb)->h.raw + (*pskb)->csum) = csum_fold(csum); (*pskb)->ip_summed = CHECKSUM_NONE;out: return ret;}#ifdef CONFIG_HIGHMEM/* Actually, we should eliminate this check as soon as we know, that: * 1. IOMMU is present and allows to map all the memory. * 2. No high memory really exists on this machine. */static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb){ int i; if (dev->features & NETIF_F_HIGHDMA) return 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) if (skb_shinfo(skb)->frags[i].page >= highmem_start_page) return 1; return 0;}#else#define illegal_highdma(dev, skb) (0)#endifextern void skb_release_data(struct sk_buff *);/* Keep head the same: replace data */int __skb_linearize(struct sk_buff *skb, int gfp_mask){ unsigned int size; u8 *data; long offset; struct skb_shared_info *ninfo; int headerlen = skb->data - skb->head; int expand = (skb->tail + skb->data_len) - skb->end; if (skb_shared(skb)) BUG(); if (expand <= 0) expand = 0; size = skb->end - skb->head + expand; size = SKB_DATA_ALIGN(size); data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); if (!data) return -ENOMEM; /* Copy entire thing */ if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) BUG(); /* Set up shinfo */ ninfo = (struct skb_shared_info*)(data + size); atomic_set(&ninfo->dataref, 1); ninfo->tso_size = skb_shinfo(skb)->tso_size; ninfo->tso_segs = skb_shinfo(skb)->tso_segs; ninfo->nr_frags = 0; ninfo->frag_list = NULL; /* Offset between the two in bytes */ offset = data - skb->head; /* Free old data. */ skb_release_data(skb); skb->head = data; skb->end = data + size; /* Set up new pointers */ skb->h.raw += offset; skb->nh.raw += offset; skb->mac.raw += offset; skb->tail += offset; skb->data += offset; /* We are no longer a clone, even if we were. */ skb->cloned = 0; skb->tail += skb->data_len; skb->data_len = 0; return 0;}#define HARD_TX_LOCK(dev, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ spin_lock(&dev->xmit_lock); \ dev->xmit_lock_owner = cpu; \ } \}#define HARD_TX_UNLOCK(dev) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ dev->xmit_lock_owner = -1; \ spin_unlock(&dev->xmit_lock); \ } \}static inline void qdisc_run(struct net_device *dev){ while (!netif_queue_stopped(dev) && qdisc_restart(dev)<0) /* NOTHING */;}/** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit * * Queue a buffer for transmission to a network device. The caller must * have set the device and priority and built the buffer before calling * this function. The function can be called from an interrupt. * * A negative errno code is returned on a failure. A success does not * guarantee the frame will be transmitted as it may be dropped due * to congestion or traffic shaping. */int dev_queue_xmit(struct sk_buff *skb){ struct net_device *dev = skb->dev; struct Qdisc *q; int rc = -ENOMEM; if (skb_shinfo(skb)->frag_list && !(dev->features & NETIF_F_FRAGLIST) && __skb_linearize(skb, GFP_ATOMIC)) goto out_kfree_skb; /* Fragmented skb is linearized if device does not support SG, * or if at least one of fragments is in highmem and device * does not support DMA from it. */ if (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && __skb_linearize(skb, GFP_ATOMIC)) goto out_kfree_skb; /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ if (skb->ip_summed == CHECKSUM_HW && (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) && (!(dev->features & NETIF_F_IP_CSUM) || skb->protocol != htons(ETH_P_IP)))) if (skb_checksum_help(&skb, 0)) goto out_kfree_skb; /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ local_bh_disable(); /* Updates of qdisc are serialized by queue_lock. * The struct Qdisc which is pointed to by qdisc is now a * rcu structure - it may be accessed without acquiring * a lock (but the structure may be stale.) The freeing of the * qdisc will be deferred until it's known that there are no * more references to it. * * If the qdisc has an enqueue function, we still need to * hold the queue_lock before calling it, since queue_lock * also serializes access to the device queue. */ q = rcu_dereference(dev->qdisc);#ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);#endif if (q->enqueue) { /* Grab device queue */ spin_lock(&dev->queue_lock); rc = q->enqueue(skb, q); qdisc_run(dev); spin_unlock(&dev->queue_lock); rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; goto out; } /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... Really, it is unlikely that xmit_lock protection is necessary here. (f.e. loopback and IP tunnels are clean ignoring statistics counters.) However, it is possible, that they rely on protection made by us here. Check this and shot the lock. It is not prone from deadlocks. Either shot noqueue qdisc, it is even simpler 8) */ if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ if (dev->xmit_lock_owner != cpu) { HARD_TX_LOCK(dev, cpu); if (!netif_queue_stopped(dev)) { if (netdev_nit) dev_queue_xmit_nit(skb, dev); rc = 0; if (!dev->hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); goto out; } } HARD_TX_UNLOCK(dev); if (net_ratelimit()) printk(KERN_CRIT "Virtual device %s asks to " "queue packet!\n", dev->name); goto out_enetdown; } else { /* Recursion is detected! It is possible, * unfortunately */ if (net_ratelimit()) printk(KERN_CRIT "Dead loop on virtual device " "%s, fix it urgently!\n", dev->name); } }out_enetdown: rc = -ENETDOWN;out_kfree_skb: kfree_skb(skb);out: local_bh_enable(); return rc;}/*======================================================================= Receiver routines =======================================================================*/int netdev_max_backlog = 300;int weight_p = 64; /* old backlog weight *//* These numbers are selected based on intuition and some * experimentatiom, if you have more scientific way of doing this * please go ahead and fix things. */int no_cong_thresh = 10;int no_cong = 20;int lo_cong = 100;int mod_cong = 290;DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };#ifdef CONFIG_NET_HW_FLOWCONTROLatomic_t netdev_dropping = ATOMIC_INIT(0);static unsigned long netdev_fc_mask = 1;unsigned long netdev_fc_xoff;spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;static struct{ void (*stimul)(struct net_device *); struct net_device *dev;} netdev_fc_slots[BITS_PER_LONG];int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev)){ int bit = 0; unsigned long flags; spin_lock_irqsave(&netdev_fc_lock, flags); if (netdev_fc_mask != ~0UL) { bit = ffz(netdev_fc_mask); netdev_fc_slots[bit].stimul = stimul; netdev_fc_slots[bit].dev = dev; set_bit(bit, &netdev_fc_mask); clear_bit(bit, &netdev_fc_xoff); } spin_unlock_irqrestore(&netdev_fc_lock, flags); return bit;}void netdev_unregister_fc(int bit){ unsigned long flags; spin_lock_irqsave(&netdev_fc_lock, flags); if (bit > 0) { netdev_fc_slots[bit].stimul = NULL; netdev_fc_slots[bit].dev = NULL; clear_bit(bit, &netdev_fc_mask); clear_bit(bit, &netdev_fc_xoff); } spin_unlock_irqrestore(&netdev_fc_lock, flags);}static void netdev_wakeup(void){ unsigned long xoff; spin_lock(&netdev_fc_lock); xoff = netdev_fc_xoff; netdev_fc_xoff = 0; while (xoff) { int i = ffz(~xoff); xoff &= ~(1 << i); netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev); } spin_unlock(&netdev_fc_lock);}#endifstatic void get_sample_stats(int cpu){#ifdef RAND_LIE unsigned long rd; int rq;#endif struct softnet_data *sd = &per_cpu(softnet_data, cpu); int blog = sd->input_pkt_queue.qlen; int avg_blog = sd->avg_blog; avg_blog = (avg_blog >> 1) + (blog >> 1); if (avg_blog > mod_cong) { /* Above moderate congestion levels. */ sd->cng_level = NET_RX_CN_HIGH;#ifdef RAND_LIE rd = net_random(); rq = rd % netdev_max_backlog; if (rq < avg_blog) /* unlucky bastard */ sd->cng_level = NET_RX_DROP;#endif } else if (avg_blog > lo_cong) { sd->cng_level = NET_RX_CN_MOD;#ifdef RAND_LIE rd = net_random(); rq = rd % netdev_max_backlog; if (rq < avg_blog) /* unlucky bastard */ sd->cng_level = NET_RX_CN_HIGH;#endif } else if (avg_blog > no_cong) sd->cng_level = NET_RX_CN_LOW; else /* no congestion */ sd->cng_level = NET_RX_SUCCESS; sd->avg_blog = avg_blog;}#ifdef OFFLINE_SAMPLEstatic void sample_queue(unsigned long dummy){/* 10 ms 0r 1ms -- i don't care -- JHS */ int next_tick = 1; int cpu = smp_processor_id(); get_sample_stats(cpu); next_tick += jiffies; mod_timer(&samp_timer, next_tick);}#endif/** * netif_rx - post buffer to the network code * @skb: buffer to post * * This function receives a packet from a device driver and queues it for * the upper (protocol) levels to process. It always succeeds. The buffer * may be dropped during processing for congestion control or by the * protocol layers. * * return values: * NET_RX_SUCCESS (no congestion) * NET_RX_CN_LOW (low congestion) * NET_RX_CN_MOD (moderate congestion) * NET_RX_CN_HIGH (high congestion) * NET_RX_DROP (packet was dropped) * */int netif_rx(struct sk_buff *skb){ int this_cpu; struct softnet_data *queue; unsigned long flags;#ifdef CONFIG_NETPOLL if (skb->dev->netpoll_rx && netpoll_rx(skb)) { kfree_skb(skb); return NET_RX_DROP; }#endif if (!skb->stamp.tv_sec) net_timestamp(&skb->stamp); /* * The code is rearranged so that the path is the most * short when CPU is congested, but is still operating. */ local_irq_save(flags); this_cpu = smp_processor_id(); queue = &__get_cpu_var(softnet_data); __get_cpu_var(netdev_rx_stat).total++; if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { if (queue->input_pkt_queue.qlen) { if (queue->throttle) goto drop;enqueue: dev_hold(skb->dev); __skb_queue_tail(&queue->input_pkt_queue, skb);#ifndef OFFLINE_SAMPLE get_sample_stats(this_cpu);#endif local_irq_restore(flags); return queue->cng_level; } if (queue->throttle) { queue->throttle = 0;#ifdef CONFIG_NET_HW_FLOWCONTROL if (atomic_dec_and_test(&netdev_dropping)) netdev_wakeup();#endif } netif_rx_schedule(&queue->backlog_dev); goto enqueue; } if (!queue->throttle) { queue->throttle = 1; __get_cpu_var(netdev_rx_stat).throttled++;#ifdef CONFIG_NET_HW_FLOWCONTROL atomic_inc(&netdev_dropping);#endif }drop: __get_cpu_var(netdev_rx_stat).dropped++; local_irq_restore(flags); kfree_skb(skb); return NET_RX_DROP;}static __inline__ void skb_bond(struct sk_buff *skb){ struct net_device *dev = skb->dev;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -