📄 nta.4
字号:
+#include <linux/string.h>+#include <linux/errno.h>+#include <linux/slab.h>+#include <linux/spinlock.h>+#include <linux/percpu.h>+#include <linux/list.h>+#include <linux/mm.h>+#include <linux/fs.h>+#include <linux/poll.h>+#include <linux/ioctl.h>+#include <linux/skbuff.h>+#include <linux/netfilter.h>+#include <linux/netfilter_ipv4.h>+#include <linux/ip.h>+#include <net/flow.h>+#include <net/dst.h>+#include <net/route.h>+#include <asm/uaccess.h>++#include "avl.h"++struct zc_private+{+ struct zc_data *zcb;+ struct mutex lock;+ int cpu;+};++static char zc_name[] = "zc";+static int zc_major;+struct zc_control zc_sniffer;++static int zc_release(struct inode *inode, struct file *file)+{+ struct zc_private *priv = file->private_data;++ kfree(priv);+ return 0;+}++static int zc_open(struct inode *inode, struct file *file)+{+ struct zc_private *priv;+ struct zc_control *ctl = &zc_sniffer;++ priv = kzalloc(sizeof(struct zc_private) + ctl->zc_num * sizeof(struct zc_data), GFP_KERNEL);+ if (!priv)+ return -ENOMEM;+ priv->zcb = (struct zc_data *)(priv+1);+ priv->cpu = 0; /* Use CPU0 by default */+ mutex_init(&priv->lock);++ file->private_data = priv;++ return 0;+}++static int zc_mmap(struct file *file, struct vm_area_struct *vma)+{+ struct zc_private *priv = file->private_data;+ struct avl_allocator_data *alloc = &avl_allocator[priv->cpu];+ struct avl_node_entry *e;+ unsigned long flags, start = vma->vm_start;+ int err = 0, idx, off;+ unsigned int i, j, st, num, total_num;++ st = vma->vm_pgoff;+ total_num = (vma->vm_end - vma->vm_start)/PAGE_SIZE;++ printk("%s: start: %lx, end: %lx, total_num: %u, st: %u.\n", __func__, start, vma->vm_end, total_num, st);++ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);+ vma->vm_flags |= VM_RESERVED;+ vma->vm_file = file;++ spin_lock_irqsave(&alloc->avl_node_lock, flags);+ list_for_each_entry(e, &alloc->avl_node_list, node_entry) {+ if (st >= e->avl_node_num*(1U<<e->avl_node_order)) {+#if 0+ printk("%s: continue on cpu: %d, e: %p, total_num: %u, node_num: %u, node_order: %u, pages_in_node: %u, st: %u.\n", + __func__, priv->cpu, e, total_num, e->avl_node_num, e->avl_node_order, + e->avl_node_num*(1U<<e->avl_node_order), st);+#endif+ st -= e->avl_node_num*(1U<<e->avl_node_order);+ continue;+ }+ num = min_t(unsigned int, total_num, e->avl_node_num*(1<<e->avl_node_order));++ printk("%s: cpu: %d, e: %p, total_num: %u, node_num: %u, node_order: %u, st: %u, num: %u.\n", + __func__, priv->cpu, e, total_num, e->avl_node_num, e->avl_node_order, st, num);++ idx = 0;+ off = st;+ for (i=st; i<num; ++i) {+ struct avl_node *node = &e->avl_node_array[idx][off];++ if (++off >= AVL_NODES_ON_PAGE) {+ idx++;+ off = 0;+ }++ for (j=0; (j<(1<<e->avl_node_order)) && (i<num); ++j, ++i) {+ unsigned long virt = node->value + (j<<PAGE_SHIFT);+ err = vm_insert_page(vma, start, virt_to_page(virt));+ if (err) {+ printk("\n%s: Failed to insert page for addr %lx into %lx, err: %d.\n",+ __func__, virt, start, err);+ break;+ }+ start += PAGE_SIZE;+ }+ }+ if (err)+ break;+ total_num -= num;++ if (total_num == 0)+ break;+ }+ spin_unlock_irqrestore(&alloc->avl_node_lock, flags);++ return err;+}++static ssize_t zc_write(struct file *file, const char __user *buf, size_t size, loff_t *off)+{+ ssize_t sz = 0;+ struct zc_private *priv = file->private_data;+ unsigned long flags;+ unsigned int req_num = size/sizeof(struct zc_data), cnum, csize, i;+ struct zc_control *ctl = &zc_sniffer;++ while (size) {+ cnum = min_t(unsigned int, req_num, ctl->zc_num);+ csize = cnum*sizeof(struct zc_data);++ if (copy_from_user(priv->zcb, buf, csize)) {+ printk("%s: copy_from_user() failed.\n", __func__);+ break;+ }++ spin_lock_irqsave(&ctl->zc_lock, flags);+ for (i=0; i<cnum; ++i)+ avl_free_no_zc(priv->zcb[i].data.ptr, priv->zcb[i].size);+ ctl->zc_used -= cnum;+ spin_unlock_irqrestore(&ctl->zc_lock, flags);++ sz += csize;+ size -= csize;+ buf += csize;+ }++ return sz;+}++static ssize_t zc_read(struct file *file, char __user *buf, size_t size, loff_t *off)+{+ ssize_t sz = 0;+ struct zc_private *priv = file->private_data;+ unsigned long flags;+ unsigned int pos, req_num = size/sizeof(struct zc_data), cnum, csize;+ struct zc_control *ctl = &zc_sniffer;++ wait_event_interruptible(ctl->zc_wait, ctl->zc_used > 0);++ spin_lock_irqsave(&ctl->zc_lock, flags);+ cnum = min_t(unsigned int, req_num, ctl->zc_used);+ csize = cnum*sizeof(struct zc_data);+ if (ctl->zc_used) {+ if (ctl->zc_pos >= ctl->zc_used) {+ pos = ctl->zc_pos - ctl->zc_used;+ memcpy(priv->zcb, &ctl->zcb[pos], csize);+ } else {+ memcpy(priv->zcb, &ctl->zcb[0], csize);+ pos = ctl->zc_num - (ctl->zc_used - ctl->zc_pos);+ memcpy(&priv->zcb[ctl->zc_pos], &ctl->zcb[pos], + (ctl->zc_used - ctl->zc_pos)*sizeof(struct zc_data));+ }+ }+ spin_unlock_irqrestore(&ctl->zc_lock, flags);++ sz = csize;++ if (copy_to_user(buf, priv->zcb, cnum*sizeof(struct zc_data)))+ sz = -EFAULT;++ return sz;+}++static unsigned int zc_poll(struct file *file, struct poll_table_struct *wait)+{+ struct zc_control *ctl = &zc_sniffer;+ unsigned int poll_flags = 0;+ + poll_wait(file, &ctl->zc_wait, wait);++ if (ctl->zc_used)+ poll_flags = POLLIN | POLLRDNORM;++ return poll_flags;+}++static int zc_ctl_alloc(struct zc_alloc_ctl *ctl, void __user *arg)+{+ void *ptr;+ unsigned int size = SKB_DATA_ALIGN(ctl->zc.size) + sizeof(struct skb_shared_info);++ ptr = avl_alloc(size, GFP_KERNEL);+ if (!ptr)+ return -ENOMEM;++ avl_fill_zc(&ctl->zc, ptr, ctl->zc.size);++ memset(ptr, 0, size);+ + if (copy_to_user(arg, ctl, sizeof(struct zc_alloc_ctl))) {+ avl_free(ptr, size);+ return -EFAULT;+ }++ return 0;+}++static int netchannel_ip_route_output_flow(struct rtable **rp, struct flowi *flp, int flags)+{+ int err;++ err = __ip_route_output_key(rp, flp);+ if (err)+ return err;++ if (flp->proto) {+ if (!flp->fl4_src)+ flp->fl4_src = (*rp)->rt_src;+ if (!flp->fl4_dst)+ flp->fl4_dst = (*rp)->rt_dst;+ }++ return 0;+}++struct dst_entry *netchannel_route_get_raw(u32 faddr, u16 fport, + u32 laddr, u16 lport, u8 proto)+{+ struct rtable *rt;+ struct flowi fl = { .oif = 0,+ .nl_u = { .ip4_u =+ { .daddr = faddr,+ .saddr = laddr,+ .tos = 0 } },+ .proto = proto,+ .uli_u = { .ports =+ { .sport = lport,+ .dport = fport } } };++ if (netchannel_ip_route_output_flow(&rt, &fl, 0))+ goto no_route;+ return dst_clone(&rt->u.dst);++no_route:+ return NULL;+}++static int zc_ctl_commit(struct zc_alloc_ctl *ctl)+{+ struct iphdr *iph;+ void *data;+ struct sk_buff *skb;+ unsigned int data_len;+ struct skb_shared_info *shinfo;+ u16 *thdr;++ printk("%s: ptr: %p, size: %u, reserved: %u, type: %x.\n", + __func__, ctl->zc.data.ptr, ctl->zc.size, ctl->res_len, ctl->type);+ + if (ctl->type != 0)+ return -ENOTSUPP;++ data = ctl->zc.data.ptr;+ iph = (struct iphdr *)(data + ctl->res_len);+ data_len = ntohs(iph->tot_len);+ thdr = (u16 *)(((u8 *)iph) + (iph->ihl<<2));++ skb = alloc_skb_empty(ctl->zc.size, GFP_KERNEL);+ if (!skb)+ return -ENOMEM;++ skb->head = data;+ skb->data = data;+ skb->tail = data;+ skb->end = data + ctl->zc.size;+ + shinfo = skb_shinfo(skb);+ atomic_set(&shinfo->dataref, 1);+ shinfo->nr_frags = 0;+ shinfo->gso_size = 0;+ shinfo->gso_segs = 0;+ shinfo->gso_type = 0;+ shinfo->ip6_frag_id = 0;+ shinfo->frag_list = NULL;++ skb->csum = 0;+ skb_reserve(skb, ctl->res_len);+ skb_put(skb, data_len-ctl->res_len);++ printk("%u.%u.%u.%u:%u -> %u.%u.%u.%u:%u, proto: %u, len: %u, skb_len: %u.\n", + NIPQUAD(iph->saddr), ntohs(thdr[0]), + NIPQUAD(iph->daddr), ntohs(thdr[1]), + iph->protocol, data_len, skb->len);++ skb->dst = netchannel_route_get_raw(+ iph->daddr, thdr[1], + iph->saddr, thdr[0], + iph->protocol);+ if (!skb->dst) {+ printk("%s: failed to get route.\n", __func__);+ goto err_out_free;+ }++ skb->h.th = (void *)thdr;+ skb->nh.iph = iph;++ printk("%u.%u.%u.%u:%u -> %u.%u.%u.%u:%u, proto: %u, dev: %s, skb: %p, data: %p.\n", + NIPQUAD(iph->saddr), ntohs(thdr[0]), + NIPQUAD(iph->daddr), ntohs(thdr[1]), + iph->protocol, skb->dst->dev ? skb->dst->dev->name : "<NULL>",+ skb, skb->data);++ return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);++err_out_free:+ kfree_skb(skb);+ return -EINVAL;+}++struct zc_status *zc_get_status(int cpu, unsigned int start)+{+ unsigned long flags;+ struct avl_node_entry *e;+ struct avl_allocator_data *alloc = &avl_allocator[cpu];+ struct zc_status *st;+ struct zc_entry_status *es;+ unsigned int num = 0;++ st = kmalloc(sizeof(struct zc_status), GFP_KERNEL);+ if (!st)+ return NULL;+ + spin_lock_irqsave(&alloc->avl_node_lock, flags);+ list_for_each_entry(e, &alloc->avl_node_list, node_entry) {+ if (e->avl_entry_num >= start && num < ZC_MAX_ENTRY_NUM) {+ es = &st->entry[num];++ es->node_order = e->avl_node_order;+ es->node_num = e->avl_node_num;+ num++;+ }+ }+ spin_unlock_irqrestore(&alloc->avl_node_lock, flags);++ st->entry_num = num;++ return st;+}++static int zc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)+{+ struct zc_alloc_ctl ctl;+ struct zc_private *priv = file->private_data;+ int cpu, ret = -EINVAL;+ unsigned int start;+ struct zc_status *st;++ mutex_lock(&priv->lock);++ switch (cmd) {+ case ZC_ALLOC:+ case ZC_COMMIT:+ if (copy_from_user(&ctl, (void __user *)arg, sizeof(struct zc_alloc_ctl))) {+ ret = -EFAULT;+ break;+ }++ if (cmd == ZC_ALLOC) + ret = zc_ctl_alloc(&ctl, (void __user *)arg);+ else+ ret = zc_ctl_commit(&ctl);+ break;+ case ZC_SET_CPU:+ if (copy_from_user(&cpu, (void __user *)arg, sizeof(int))) {+ ret = -EFAULT;+ break;+ }+ if (cpu < NR_CPUS && cpu >= 0) {+ priv->cpu = cpu;+ ret = 0;+ }+ break;+ case ZC_STATUS:+ if (copy_from_user(&start, (void __user *)arg, sizeof(unsigned int))) {+ printk("%s: failed to read initial entry number.\n", __func__);+ ret = -EFAULT;+ break;+ }++ st = zc_get_status(priv->cpu, start);+ if (!st) {+ ret = -ENOMEM;+ break;+ }++ ret = 0;+ if (copy_to_user((void __user *)arg, st, sizeof(struct zc_status))) {+ printk("%s: failed to write CPU%d status.\n", __func__, priv->cpu);+ ret = -EFAULT;+ }+ kfree(st);+ break;+ }++ mutex_unlock(&priv->lock);++ return ret;+}++static struct file_operations zc_ops = {+ .poll = &zc_poll,+ .ioctl = &zc_ioctl,+ .open = &zc_open,+ .release = &zc_release,+ .read = &zc_read,+ .write = &zc_write,+ .mmap = &zc_mmap,+ .owner = THIS_MODULE,+};++int avl_init_zc(void)+{+ struct zc_control *ctl = &zc_sniffer;++ ctl->zc_num = 1024;+ init_waitqueue_head(&ctl->zc_wait);+ spin_lock_init(&ctl->zc_lock);+ ctl->zcb = kmalloc(ctl->zc_num * sizeof(struct zc_data), GFP_KERNEL);+ if (!ctl->zcb)+ return -ENOMEM;++ zc_major = register_chrdev(0, zc_name, &zc_ops);+ if (zc_major < 0) {+ printk(KERN_ERR "Failed to register %s char device: err=%d. Zero-copy is disabled.\n", + zc_name, zc_major);+ return -EINVAL;+ }++ printk(KERN_INFO "Network zero-copy sniffer has been enabled with %d major number.\n", zc_major);++ return 0;+}+diff --git a/net/core/skbuff.c b/net/core/skbuff.cindex 022d889..7eec140 100644--- a/net/core/skbuff.c+++ b/net/core/skbuff.c@@ -125,6 +125,33 @@ EXPORT_SYMBOL(skb_truesize_bug); * */ ++/**+ * __alloc_skb_empty - allocate an empty network buffer+ * @size: size to allocate+ * @gfp_mask: allocation mask+ */++struct sk_buff *__alloc_skb_emtpy(unsigned int size, gfp_t gfp_mask)+{+ struct skb_shared_info *shinfo;+ struct sk_buff *skb;++ /* Get the HEAD */+ skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);+ if (!skb)+ goto out;++ memset(skb, 0, offsetof(struct sk_buff, truesize));+ + size = SKB_DATA_ALIGN(size);+ skb->truesize = size + sizeof(struct sk_buff);+ atomic_set(&skb->users, 1);++out:+ return skb;+}+ /** * __alloc_skb - allocate a network buffer * @size: size to allocate@@ -156,7 +183,7 @@ struct sk_buff *__alloc_skb(unsigned int /* Get the DATA. Size must match skb_add_mtu(). */ size = SKB_DATA_ALIGN(size);- data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);+ data = avl_alloc(size + sizeof(struct skb_shared_info), gfp_mask); if (!data) goto nodata; @@ -223,7 +250,7 @@ struct sk_buff *alloc_skb_from_cache(kme /* Get the DATA. */ size = SKB_DATA_ALIGN(size);- data = kmem_cache_alloc(cp, gfp_mask);+ data = avl_alloc(size, gfp_mask); if (!data) goto nodata; @@ -313,7 +340,7 @@ static void skb_release_data(struct sk_b if (skb_shinfo(skb)->frag_list) skb_drop_fraglist(skb); - kfree(skb->head);+ avl_free(skb->head, skb->end - skb->head + sizeof(struct skb_shared_info)); } } @@ -688,7 +715,7 @@ int pskb_expand_head(struct sk_buff *skb size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);+ data = avl_alloc(size + sizeof(struct skb_shared_info), gfp_mask); if (!data) goto nodata; @@ -2057,6 +2084,9 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_fclone_cache) panic("cannot create skbuff cache");++ if (avl_init())+ panic("Failed to initialize network tree allocator.\n"); } EXPORT_SYMBOL(___pskb_trim);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -