⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zero-copy.5

📁 实现linux平台下零拷贝技术的软件包。
💻 5
📖 第 1 页 / 共 2 页
字号:
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c--- a/drivers/net/8139too.c+++ b/drivers/net/8139too.c@@ -108,6 +108,10 @@ #include <linux/mii.h> #include <linux/completion.h> #include <linux/crc32.h>+#include <linux/if_ether.h>+#include <linux/ip.h>+#include <linux/tcp.h>+#include <linux/workqueue.h> #include <asm/io.h> #include <asm/uaccess.h> #include <asm/irq.h>@@ -1895,16 +1899,23 @@ static void rtl8139_rx_err (u32 rx_statu }  #if RX_BUF_IDX == 3-static __inline__ void wrap_copy(struct sk_buff *skb, const unsigned char *ring,+static __inline__ void __wrap_copy(void *data, const unsigned char *ring, 				 u32 offset, unsigned int size) { 	u32 left = RX_BUF_LEN - offset;  	if (size > left) {-		memcpy(skb->data, ring + offset, left);-		memcpy(skb->data+left, ring, size - left);+		memcpy(data, ring + offset, left);+		memcpy(data+left, ring, size - left); 	} else-		memcpy(skb->data, ring + offset, size);+		memcpy(data, ring + offset, size);++}++static __inline__ void wrap_copy(struct sk_buff *skb, const unsigned char *ring,+				 u32 offset, unsigned int size)+{+	__wrap_copy(skb->data, ring, offset, size); } #endif @@ -1926,6 +1937,46 @@ static void rtl8139_isr_ack(struct rtl81 	} } +static int rtl8139too_move_data(struct zc_buf *zb, unsigned int sz)+{+	struct rtl8139_private *tp = zb->priv_data;+	unsigned char *rx_ring = tp->rx_ring;+	unsigned int cur_rx = tp->cur_rx;+	u32 ring_offset = cur_rx % RX_BUF_LEN;+	int hsize = sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct tcphdr) + 2;+	struct sk_buff *skb = zb->skb;+	skb_frag_t *frag;+	void *dest;+	+	if (skb_shinfo(skb)->nr_frags == 0 || skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS) {+		printk(KERN_ERR "%s: broken skb: zb=%p, nr_frags=%u.\n", __func__, zb, skb_shinfo(skb)->nr_frags);+		return -EINVAL;+	}+	+	frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags-1];+	dest = page_address(frag->page) + frag->page_offset;++	printk("%s: zb=%p, page=%p, offset=%4u, size=%4u, dest=%p, sz=%4u.\n", +			__func__, zb, frag->page, frag->page_offset, frag->size, dest, sz);++#if RX_BUF_IDX == 3+	wrap_copy(dest, rx_ring, ring_offset + 4 + hsize, sz);+#else+	memcpy(dest, &rx_ring[ring_offset + 4 + hsize], sz);+#endif++	return sz;+}++static void rtl8139_work_func(void *data)+{+	struct sk_buff *skb = data;+					+	netif_receive_skb(skb);+}++static DECLARE_WORK(rtl8139_work, &rtl8139_work_func, NULL);+ static int rtl8139_rx(struct net_device *dev, struct rtl8139_private *tp, 		      int budget) {@@ -1956,8 +2007,7 @@ static int rtl8139_rx(struct net_device   		if (netif_msg_rx_status(tp)) 			printk(KERN_DEBUG "%s:  rtl8139_rx() status %4.4x, size %4.4x,"-				" cur %4.4x.\n", dev->name, rx_status,-			 rx_size, cur_rx);+				" cur %4.4x.\n", dev->name, rx_status, rx_size, cur_rx); #if RTL8139_DEBUG > 2 		{ 			int i;@@ -2005,34 +2055,74 @@ no_early_rx: 			goto out; 		} -		/* Malloc up new buffer, compatible with net-2e. */-		/* Omit the four octet CRC from the length. */+		{+			u8 zc_data[sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct tcphdr) + 2 + sizeof(struct zc_buf)];+			int hsize = sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct tcphdr) + 2;+			struct zc_buf *zb;++			memset(&zc_data, 0, sizeof(zc_data));+			zb = (struct zc_buf *)zc_data;++			zb->header = (void *)(zb + 1);+			zb->header_size = hsize - 2;+			zb->size = pkt_size + 2 - hsize;+			zb->priv_data = tp;+			zb->move_data = &rtl8139too_move_data; -		skb = dev_alloc_skb (pkt_size + 2);-		if (likely(skb)) {-			skb->dev = dev;-			skb_reserve (skb, 2);	/* 16 byte align the IP fields. */ #if RX_BUF_IDX == 3-			wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);+			__wrap_copy(zb->header, rx_ring, ring_offset+4, hsize); #else-			eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);+			memcpy(zb->header, &rx_ring[ring_offset + 4], hsize); #endif-			skb_put (skb, pkt_size);--			skb->protocol = eth_type_trans (skb, dev);+			skb = alloc_skb_zerocopy(zb, GFP_ATOMIC);+			if (skb) {+				skb->dev = dev;+				skb->protocol = eth_type_trans(skb, dev);+				+				dev->last_rx = jiffies;+				tp->stats.rx_bytes += pkt_size;+				tp->stats.rx_packets++;+				netif_receive_skb(skb);+				//rtl8139_work.data = skb;+				//schedule_work(&rtl8139_work);+			} else if (zb->status == -1) {+				/* Malloc up new buffer, compatible with net-2e. */+				/* Omit the four octet CRC from the length. */+				+				skb = dev_alloc_skb (pkt_size + 2);+				if (likely(skb)) {+					skb->dev = dev;+					skb_reserve (skb, 2);	/* 16 byte align the IP fields. */+#if RX_BUF_IDX == 3+					wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);+#else+					eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);+#endif+					skb_put (skb, pkt_size); -			dev->last_rx = jiffies;-			tp->stats.rx_bytes += pkt_size;-			tp->stats.rx_packets++;+					skb->protocol = eth_type_trans (skb, dev); -			netif_receive_skb (skb);-		} else {-			if (net_ratelimit()) -				printk (KERN_WARNING-					"%s: Memory squeeze, dropping packet.\n",-					dev->name);-			tp->stats.rx_dropped++;+					dev->last_rx = jiffies;+					tp->stats.rx_bytes += pkt_size;+					tp->stats.rx_packets++;++					netif_receive_skb(skb);+				} else {+					if (net_ratelimit()) +						printk (KERN_WARNING+							"%s: Memory squeeze, dropping packet.\n",+							dev->name);+					tp->stats.rx_dropped++;+				}+			} else {+				if (net_ratelimit()) +					printk (KERN_WARNING+						"%s: Zero-copy failed, dropping packet.\n",+						dev->name);+				tp->stats.rx_dropped++;+			} 		}+ 		received++;  		cur_rx = (cur_rx + rx_size + 4 + 3) & ~3;diff --git a/fs/read_write.c b/fs/read_write.c--- a/fs/read_write.c+++ b/fs/read_write.c@@ -15,6 +15,8 @@ #include <linux/module.h> #include <linux/syscalls.h> +#include <net/sock.h>+ #include <asm/uaccess.h> #include <asm/unistd.h> @@ -670,8 +672,15 @@ static ssize_t do_sendfile(int out_fd, i 	if (!(out_file->f_mode & FMODE_WRITE)) 		goto fput_out; 	retval = -EINVAL;-	if (!out_file->f_op || !out_file->f_op->sendpage)+	if (!out_file->f_op)+		goto fput_out;+	       	+	if (!SOCKET_I(in_file->f_dentry->d_inode) && !out_file->f_op->sendpage) {+		printk("%s: sock=%p, sendpage=%p.\n", __func__, +				SOCKET_I(in_file->f_dentry->d_inode), out_file->f_op->sendpage); 		goto fput_out;+	}+	 	out_inode = out_file->f_dentry->d_inode; 	retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 	if (retval)@@ -688,7 +697,7 @@ static ssize_t do_sendfile(int out_fd, i 	retval = -EINVAL; 	if (unlikely(pos < 0)) 		goto fput_out;-	if (unlikely(pos + count > max)) {+	if (unlikely((unsigned long long)(pos + count) > (unsigned long long)max)) { 		retval = -EOVERFLOW; 		if (pos >= max) 			goto fput_out;diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h--- a/include/linux/skbuff.h+++ b/include/linux/skbuff.h@@ -34,6 +34,8 @@ #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */ #define SLAB_SKB 		/* Slabified skbuffs 	   */ +#define ZEROCOPY_HEADER_CACHE_SIZE	256	/* Maximum receiving zero-copy header size */+ #define CHECKSUM_NONE 0 #define CHECKSUM_HW 1 #define CHECKSUM_UNNECESSARY 2@@ -261,7 +263,8 @@ struct sk_buff { 				nohdr:1, 				nfctinfo:3; 	__u8			pkt_type:3,-				fclone:2;+				fclone:2,+				zerocopy:1; 	__be16			protocol;  	void			(*destructor)(struct sk_buff *skb);@@ -1045,6 +1048,36 @@ static inline struct sk_buff *dev_alloc_ 	return __dev_alloc_skb(length, GFP_ATOMIC); } +struct zc_buf;++struct zc_handler+{+	struct list_head	zc_entry;+	int			(* alloc_data)(struct zc_buf *zb);+	int			(* commit_data)(struct zc_buf *zb);+};++struct zc_buf+{+	struct zc_handler 	*zh;+	void			*header;+	unsigned int		header_size;+	unsigned int		size;+	void			*priv;+	int			status;+	struct sk_buff		*skb;+	int			(* move_data)(struct zc_buf *zb, unsigned int sz);+	void			*priv_data;+};+++extern struct sk_buff *__alloc_skb_zerocopy(struct zc_buf *zb, gfp_t gfp_mask);++static inline struct sk_buff *alloc_skb_zerocopy(struct zc_buf *zb, gfp_t gfp_mask)+{+	return __alloc_skb_zerocopy(zb, gfp_mask);+}+ /**  *	skb_cow - copy header of skb when it is required  *	@skb: buffer to cowdiff --git a/include/net/sock.h b/include/net/sock.h--- a/include/net/sock.h+++ b/include/net/sock.h@@ -117,6 +117,20 @@ struct sock_common { 	struct proto		*skc_prot; }; +enum zc_flags {+	ZC_PAGE_READY = 0,+};++struct zc_page+{+	struct page		*page;+	struct page		*cached_page;+	unsigned int		page_offset;+	unsigned int		size;+	unsigned int		used;+	long			flags;+};+ /**   *	struct sock - network layer representation of sockets   *	@__sk_common: shared layout with inet_timewait_sock@@ -251,6 +265,13 @@ struct sock {   	int			(*sk_backlog_rcv)(struct sock *sk, 						  struct sk_buff *skb);   	void                    (*sk_destruct)(struct sock *sk);++	int			(* zc_alloc_data)(struct zc_buf *zb);+	int			(* zc_commit_data)(struct zc_buf *zb);+	wait_queue_head_t	zc_data_ready;+	spinlock_t		zc_lock;+	struct zc_page		*zc_pages;+	unsigned int		zc_page_num, zc_page_index; };  /*diff --git a/mm/filemap.c b/mm/filemap.c--- a/mm/filemap.c+++ b/mm/filemap.c@@ -1663,7 +1663,7 @@ EXPORT_SYMBOL(read_cache_page);  * caller's lru-buffering pagevec.  This function is specifically for  * generic_file_write().  */-static inline struct page *+struct page * __grab_cache_page(struct address_space *mapping, unsigned long index, 			struct page **cached_page, struct pagevec *lru_pvec) {@@ -1692,6 +1692,8 @@ repeat: 	return page; } +EXPORT_SYMBOL_GPL(__grab_cache_page);+ /*  * The logic we want is  *diff --git a/net/core/Makefile b/net/core/Makefile--- a/net/core/Makefile+++ b/net/core/Makefile@@ -3,7 +3,7 @@ #  obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \-	 gen_stats.o gen_estimator.o+	 gen_stats.o gen_estimator.o zerocopy.o  obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/datagram.c b/net/core/datagram.c--- a/net/core/datagram.c+++ b/net/core/datagram.c@@ -213,6 +213,10 @@ int skb_copy_datagram_iovec(const struct { 	int i, err, fraglen, end = 0; 	struct sk_buff *next = skb_shinfo(skb)->frag_list;++	if (skb->zerocopy)+		return 0;+	 next_skb: 	fraglen = skb_headlen(skb); 	i = -1;diff --git a/net/core/skbuff.c b/net/core/skbuff.c--- a/net/core/skbuff.c+++ b/net/core/skbuff.c@@ -70,6 +70,7 @@  static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly;+static kmem_cache_t *skbuff_head_cache_zerocopy __read_mostly;  /*  *	Keep out-of-line to prevent kernel bloat.@@ -182,6 +183,66 @@ nodata: 	goto out; } +int zc_alloc_data(struct zc_buf *zb);+int zc_commit_data(struct zc_buf *zb);++struct sk_buff *__alloc_skb_zerocopy(struct zc_buf *zb, gfp_t gfp_mask)+{+	struct sk_buff *skb = NULL;+	void *data;+	int err;+	unsigned int size = SKB_DATA_ALIGN(zb->header_size);++	if (size > ZEROCOPY_HEADER_CACHE_SIZE)+		goto out;+	+	zb->status = -1;++	skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);+	if (!skb)+		goto out;+	+	data = kmem_cache_alloc(skbuff_head_cache_zerocopy, gfp_mask & ~__GFP_DMA);+	if (!data)+		goto err_out_free_skb;++	memset(skb, 0, offsetof(struct sk_buff, truesize));+	skb->truesize = size + sizeof(struct sk_buff);+	atomic_set(&skb->users, 1);+	skb->head = data;+	skb->data = data;+	skb->tail = data;+	skb->end  = data + size;+	+	atomic_set(&(skb_shinfo(skb)->dataref), 1);+	skb_shinfo(skb)->nr_frags  = 0;+	skb_shinfo(skb)->tso_size = 0;+	skb_shinfo(skb)->tso_segs = 0;+	skb_shinfo(skb)->frag_list = NULL;++	skb->zerocopy = 1;+	/* It could be zerocopied too, but let's use it as is for now. --zbr 2005_10_27 */+	memcpy(skb->data, zb->header, zb->header_size);+	skb_put(skb, zb->header_size);++	zb->skb = skb;++	err = zc_alloc_data(zb);+	if (err)+		goto err_out_free_skb_data;++	printk("%s: skb=%p, skb->len=%u, skb->data_len=%u.\n", __func__, skb, skb->len, skb->data_len);+	+out:+	return skb;+err_out_free_skb_data:+	kmem_cache_free(skbuff_head_cache_zerocopy, data);+err_out_free_skb:+	kmem_cache_free(skbuff_head_cache, skb);+	skb = NULL;+	goto out;+}+ /**  *	alloc_skb_from_cache	-	allocate a network buffer  *	@cp: kmem_cache from which to allocate the data area@@ -284,7 +345,10 @@ void kfree_skbmem(struct sk_buff *skb) 	struct sk_buff *other; 	atomic_t *fclone_ref; -	skb_release_data(skb);+	if (skb->zerocopy)+		kmem_cache_free(skbuff_head_cache_zerocopy, skb->head);+	else+		skb_release_data(skb); 	switch (skb->fclone) { 	case SKB_FCLONE_UNAVAILABLE: 		kmem_cache_free(skbuff_head_cache, skb);@@ -1706,6 +1770,14 @@ void __init skb_init(void) 						NULL, NULL); 	if (!skbuff_fclone_cache) 		panic("cannot create skbuff cache");+	+	skbuff_head_cache_zerocopy = kmem_cache_create("skbuff_head_cache_zerocopy",+					      ZEROCOPY_HEADER_CACHE_SIZE + sizeof(struct skb_shared_info),+					      0,+					      SLAB_HWCACHE_ALIGN,+					      NULL, NULL);+	if (!skbuff_head_cache_zerocopy)+		panic("cannot create zerocopy skbuff cache"); }  EXPORT_SYMBOL(___pskb_trim);@@ -1739,3 +1811,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text);+EXPORT_SYMBOL(__alloc_skb_zerocopy);diff --git a/net/core/sock.c b/net/core/sock.c--- a/net/core/sock.c+++ b/net/core/sock.c@@ -704,6 +704,18 @@ void sk_free(struct sock *sk) 	module_put(owner); } +static void zc_sk_init(struct sock *sk)+{+	spin_lock_init(&sk->zc_lock);+	init_waitqueue_head(&sk->zc_data_ready);+	sk->zc_pages		= NULL;+	sk->zc_page_num		= 0;+	sk->zc_page_index	= 0;+	sk->zc_alloc_data	= NULL;+	sk->zc_commit_data	= NULL;+}++ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { 	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);@@ -737,6 +749,8 @@ struct sock *sk_clone(const struct sock  		sock_reset_flag(newsk, SOCK_DONE); 		skb_queue_head_init(&newsk->sk_error_queue); +		zc_sk_init(newsk);+ 		filter = newsk->sk_filter; 		if (filter != NULL) 			sk_filter_charge(newsk, filter);@@ -1320,6 +1334,8 @@ void sock_init_data(struct socket *sock, 	sk->sk_stamp.tv_usec    = -1L;  	atomic_set(&sk->sk_refcnt, 1);++	zc_sk_init(sk); }  void fastcall lock_sock(struct sock *sk)diff --git a/net/core/zerocopy.c b/net/core/zerocopy.cnew file mode 100644--- /dev/null+++ b/net/core/zerocopy.c@@ -0,0 +1,165 @@+/*+ * 	zerocopy.c+ * + * 2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>+ * All rights reserved.+ * + * This program is free software; you can redistribute it and/or modify+ * it under the terms of the GNU General Public License as published by+ * the Free Software Foundation; either version 2 of the License, or+ * (at your option) any later version.+ *+ * This program is distributed in the hope that it will be useful,+ * but WITHOUT ANY WARRANTY; without even the implied warranty of+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the+ * GNU General Public License for more details.+ *

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -