📄 nta.5
字号:
+}++static unsigned int zc_poll(struct file *file, struct poll_table_struct *wait)+{+ struct zc_control *ctl = &zc_sniffer;+ unsigned int poll_flags = 0;+ + poll_wait(file, &ctl->zc_wait, wait);++ if (ctl->zc_used)+ poll_flags = POLLIN | POLLRDNORM;++ return poll_flags;+}++static int zc_ctl_alloc(struct zc_alloc_ctl *ctl, void __user *arg)+{+ void *ptr;+ unsigned int size = SKB_DATA_ALIGN(ctl->zc.size) + sizeof(struct skb_shared_info);++ ptr = avl_alloc(size, GFP_KERNEL);+ if (!ptr)+ return -ENOMEM;++ avl_fill_zc(&ctl->zc, ptr, ctl->zc.size);++ memset(ptr, 0, size);+ + if (copy_to_user(arg, ctl, sizeof(struct zc_alloc_ctl))) {+ avl_free(ptr, size);+ return -EFAULT;+ }++ return 0;+}++static int netchannel_ip_route_output_flow(struct rtable **rp, struct flowi *flp, int flags)+{+ int err;++ err = __ip_route_output_key(rp, flp);+ if (err)+ return err;++ if (flp->proto) {+ if (!flp->fl4_src)+ flp->fl4_src = (*rp)->rt_src;+ if (!flp->fl4_dst)+ flp->fl4_dst = (*rp)->rt_dst;+ }++ return 0;+}++struct dst_entry *netchannel_route_get_raw(u32 faddr, u16 fport, + u32 laddr, u16 lport, u8 proto)+{+ struct rtable *rt;+ struct flowi fl = { .oif = 0,+ .nl_u = { .ip4_u =+ { .daddr = faddr,+ .saddr = laddr,+ .tos = 0 } },+ .proto = proto,+ .uli_u = { .ports =+ { .sport = lport,+ .dport = fport } } };++ if (netchannel_ip_route_output_flow(&rt, &fl, 0))+ goto no_route;+ return dst_clone(&rt->u.dst);++no_route:+ return NULL;+}++static int zc_ctl_commit(struct zc_alloc_ctl *ctl)+{+ struct iphdr *iph;+ void *data;+ struct sk_buff *skb;+ unsigned int data_len;+ struct skb_shared_info *shinfo;+ u16 *thdr;++ printk("%s: ptr: %p, size: %u, reserved: %u, type: %x.\n", + __func__, ctl->zc.data.ptr, ctl->zc.size, ctl->res_len, ctl->type);+ + if (ctl->type != 0)+ return -ENOTSUPP;++ data = ctl->zc.data.ptr;+ iph = (struct iphdr *)(data + ctl->res_len);+ data_len = ntohs(iph->tot_len);+ thdr = (u16 *)(((u8 *)iph) + (iph->ihl<<2));++ skb = alloc_skb_empty(ctl->zc.size, GFP_KERNEL);+ if (!skb)+ return -ENOMEM;++ skb->head = data;+ skb->data = data;+ skb->tail = data;+ skb->end = data + ctl->zc.size;+ + shinfo = skb_shinfo(skb);+ atomic_set(&shinfo->dataref, 1);+ shinfo->nr_frags = 0;+ shinfo->gso_size = 0;+ shinfo->gso_segs = 0;+ shinfo->gso_type = 0;+ shinfo->ip6_frag_id = 0;+ shinfo->frag_list = NULL;++ skb->csum = 0;+ skb_reserve(skb, ctl->res_len);+ skb_put(skb, data_len-ctl->res_len);++ printk("%u.%u.%u.%u:%u -> %u.%u.%u.%u:%u, proto: %u, len: %u, skb_len: %u.\n", + NIPQUAD(iph->saddr), ntohs(thdr[0]), + NIPQUAD(iph->daddr), ntohs(thdr[1]), + iph->protocol, data_len, skb->len);++ skb->dst = netchannel_route_get_raw(+ iph->daddr, thdr[1], + iph->saddr, thdr[0], + iph->protocol);+ if (!skb->dst) {+ printk("%s: failed to get route.\n", __func__);+ goto err_out_free;+ }++ skb->h.th = (void *)thdr;+ skb->nh.iph = iph;++ printk("%u.%u.%u.%u:%u -> %u.%u.%u.%u:%u, proto: %u, dev: %s, skb: %p, data: %p.\n", + NIPQUAD(iph->saddr), ntohs(thdr[0]), + NIPQUAD(iph->daddr), ntohs(thdr[1]), + iph->protocol, skb->dst->dev ? skb->dst->dev->name : "<NULL>",+ skb, skb->data);++ return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);++err_out_free:+ kfree_skb(skb);+ return -EINVAL;+}++struct zc_status *zc_get_status(int cpu, unsigned int start)+{+ unsigned long flags;+ struct avl_node_entry *e;+ struct avl_allocator_data *alloc = &avl_allocator[cpu];+ struct zc_status *st;+ struct zc_entry_status *es;+ unsigned int num = 0;++ st = kmalloc(sizeof(struct zc_status), GFP_KERNEL);+ if (!st)+ return NULL;+ + spin_lock_irqsave(&alloc->avl_node_lock, flags);+ list_for_each_entry(e, &alloc->avl_node_list, node_entry) {+ if (e->avl_entry_num >= start && num < ZC_MAX_ENTRY_NUM) {+ es = &st->entry[num];++ es->node_order = e->avl_node_order;+ es->node_num = e->avl_node_num;+ num++;+ }+ }+ spin_unlock_irqrestore(&alloc->avl_node_lock, flags);++ st->entry_num = num;++ return st;+}++static int zc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)+{+ struct zc_alloc_ctl ctl;+ struct zc_private *priv = file->private_data;+ int cpu, ret = -EINVAL;+ unsigned int start;+ struct zc_status *st;++ mutex_lock(&priv->lock);++ switch (cmd) {+ case ZC_ALLOC:+ case ZC_COMMIT:+ if (copy_from_user(&ctl, (void __user *)arg, sizeof(struct zc_alloc_ctl))) {+ ret = -EFAULT;+ break;+ }++ if (cmd == ZC_ALLOC) + ret = zc_ctl_alloc(&ctl, (void __user *)arg);+ else+ ret = zc_ctl_commit(&ctl);+ break;+ case ZC_SET_CPU:+ if (copy_from_user(&cpu, (void __user *)arg, sizeof(int))) {+ ret = -EFAULT;+ break;+ }+ if (cpu < NR_CPUS && cpu >= 0) {+ priv->cpu = cpu;+ ret = 0;+ }+ break;+ case ZC_STATUS:+ if (copy_from_user(&start, (void __user *)arg, sizeof(unsigned int))) {+ printk("%s: failed to read initial entry number.\n", __func__);+ ret = -EFAULT;+ break;+ }++ st = zc_get_status(priv->cpu, start);+ if (!st) {+ ret = -ENOMEM;+ break;+ }++ ret = 0;+ if (copy_to_user((void __user *)arg, st, sizeof(struct zc_status))) {+ printk("%s: failed to write CPU%d status.\n", __func__, priv->cpu);+ ret = -EFAULT;+ }+ kfree(st);+ break;+ }++ mutex_unlock(&priv->lock);++ return ret;+}++static struct file_operations zc_ops = {+ .poll = &zc_poll,+ .ioctl = &zc_ioctl,+ .open = &zc_open,+ .release = &zc_release,+ .read = &zc_read,+ .write = &zc_write,+ .mmap = &zc_mmap,+ .owner = THIS_MODULE,+};++int avl_init_zc(void)+{+ struct zc_control *ctl = &zc_sniffer;++ ctl->zc_num = 1024;+ init_waitqueue_head(&ctl->zc_wait);+ spin_lock_init(&ctl->zc_lock);+ ctl->zcb = kmalloc(ctl->zc_num * sizeof(struct zc_data), GFP_KERNEL);+ if (!ctl->zcb)+ return -ENOMEM;++ zc_major = register_chrdev(0, zc_name, &zc_ops);+ if (zc_major < 0) {+ printk(KERN_ERR "Failed to register %s char device: err=%d. Zero-copy is disabled.\n", + zc_name, zc_major);+ return -EINVAL;+ }++ printk(KERN_INFO "Network zero-copy sniffer has been enabled with %d major number.\n", zc_major);++ return 0;+}+diff --git a/net/core/skbuff.c b/net/core/skbuff.cindex 022d889..27f2b9b 100644--- a/net/core/skbuff.c+++ b/net/core/skbuff.c@@ -125,6 +125,104 @@ EXPORT_SYMBOL(skb_truesize_bug); * */ ++/**+ * __alloc_skb_empty - allocate an empty network buffer+ * @size: size to allocate+ * @gfp_mask: allocation mask+ */++struct sk_buff *__alloc_skb_emtpy(unsigned int size, gfp_t gfp_mask)+{+ struct sk_buff *skb;++ /* Get the HEAD */+ skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);+ if (!skb)+ goto out;++ memset(skb, 0, offsetof(struct sk_buff, truesize));+ + size = SKB_DATA_ALIGN(size);+ skb->truesize = size + sizeof(struct sk_buff);+ atomic_set(&skb->users, 1);++out:+ return skb;+}++/**+ * __alloc_skb_nta - allocate a network buffer+ * @size: size to allocate+ * @gfp_mask: allocation mask+ * @fclone: allocate from fclone cache instead of head cache+ * and allocate a cloned (child) skb+ *+ * Allocate a new &sk_buff. The returned buffer has no headroom and a+ * tail room of size bytes. The object has a reference count of one.+ * The return is the buffer. On a failure the return is %NULL.+ *+ * Buffers may only be allocated from interrupts using a @gfp_mask of+ * %GFP_ATOMIC.+ *+ * This function uses special network allocator.+ */+struct sk_buff *__alloc_skb_nta(unsigned int size, gfp_t gfp_mask,+ int fclone)+{+ kmem_cache_t *cache;+ struct skb_shared_info *shinfo;+ struct sk_buff *skb;+ u8 *data;++ cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;++ /* Get the HEAD */+ skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA);+ if (!skb)+ goto out;++ /* Get the DATA. Size must match skb_add_mtu(). */+ size = SKB_DATA_ALIGN(size);+ data = avl_alloc(size + sizeof(struct skb_shared_info), gfp_mask);+ if (!data)+ goto nodata;++ memset(skb, 0, offsetof(struct sk_buff, truesize));+ skb->truesize = size + sizeof(struct sk_buff);+ skb->nta = 1;+ atomic_set(&skb->users, 1);+ skb->head = data;+ skb->data = data;+ skb->tail = data;+ skb->end = data + size;+ /* make sure we initialize shinfo sequentially */+ shinfo = skb_shinfo(skb);+ atomic_set(&shinfo->dataref, 1);+ shinfo->nr_frags = 0;+ shinfo->gso_size = 0;+ shinfo->gso_segs = 0;+ shinfo->gso_type = 0;+ shinfo->ip6_frag_id = 0;+ shinfo->frag_list = NULL;++ if (fclone) {+ struct sk_buff *child = skb + 1;+ atomic_t *fclone_ref = (atomic_t *) (child + 1);++ skb->fclone = SKB_FCLONE_ORIG;+ atomic_set(fclone_ref, 1);++ child->fclone = SKB_FCLONE_UNAVAILABLE;+ }+out:+ return skb;+nodata:+ kmem_cache_free(cache, skb);+ skb = NULL;+ goto out;+}+ /** * __alloc_skb - allocate a network buffer * @size: size to allocate@@ -267,7 +365,7 @@ struct sk_buff *__netdev_alloc_skb(struc { struct sk_buff *skb; - skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);+ skb = __alloc_skb_nta(length + NET_SKB_PAD, gfp_mask, 0); if (likely(skb)) skb_reserve(skb, NET_SKB_PAD); return skb;@@ -313,7 +411,10 @@ static void skb_release_data(struct sk_b if (skb_shinfo(skb)->frag_list) skb_drop_fraglist(skb); - kfree(skb->head);+ if (skb->nta)+ avl_free(skb->head, skb->end - skb->head + sizeof(struct skb_shared_info));+ else+ kfree(skb->head); } } @@ -494,6 +595,7 @@ #ifdef CONFIG_NET_CLS_ACT #endif skb_copy_secmark(n, skb); #endif+ C(nta); C(truesize); atomic_set(&n->users, 1); C(head);@@ -678,7 +780,7 @@ out: int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask) {- int i;+ int i, nta = skb->nta; u8 *data; int size = nhead + (skb->end - skb->head) + ntail; long off;@@ -687,8 +789,10 @@ int pskb_expand_head(struct sk_buff *skb BUG(); size = SKB_DATA_ALIGN(size);-- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);+ if (nta)+ data = avl_alloc(size + sizeof(struct skb_shared_info), gfp_mask);+ else+ data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); if (!data) goto nodata; @@ -714,6 +818,7 @@ int pskb_expand_head(struct sk_buff *skb skb->mac.raw += off; skb->h.raw += off; skb->nh.raw += off;+ skb->nta = nta; skb->cloned = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1);@@ -2057,6 +2162,9 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_fclone_cache) panic("cannot create skbuff cache");++ if (avl_init())+ panic("Failed to initialize network tree allocator.\n"); } EXPORT_SYMBOL(___pskb_trim);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -