⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zero-copy.7

📁 实现linux平台下零拷贝技术的软件包。
💻 7
📖 第 1 页 / 共 3 页
字号:
   	int			(*sk_backlog_rcv)(struct sock *sk, 						  struct sk_buff *skb);   	void                    (*sk_destruct)(struct sock *sk);++	int			(* zc_alloc_data)(struct zc_buf *zb);+	int			(* zc_commit_data)(struct zc_buf *zb);+	wait_queue_head_t	zc_data_ready;+	spinlock_t		zc_lock;+	struct zc_page		*zc_pages;+	unsigned int		zc_page_num, zc_page_index;+	struct pagevec 		zc_lru_pvec;+	loff_t 			zc_pos;+	struct page		*zc_cached_page;+	struct file		*zc_file; }; +void sk_zc_fini(struct sock *sk);+void sk_zc_init(struct sock *sk);+ /*  * Hashed lists helper routines  */diff --git a/mm/filemap.c b/mm/filemap.c--- a/mm/filemap.c+++ b/mm/filemap.c@@ -1663,7 +1663,7 @@ EXPORT_SYMBOL(read_cache_page);  * caller's lru-buffering pagevec.  This function is specifically for  * generic_file_write().  */-static inline struct page *+struct page * __grab_cache_page(struct address_space *mapping, unsigned long index, 			struct page **cached_page, struct pagevec *lru_pvec) {@@ -1692,6 +1692,8 @@ repeat: 	return page; } +EXPORT_SYMBOL_GPL(__grab_cache_page);+ /*  * The logic we want is  *diff --git a/net/core/Makefile b/net/core/Makefile--- a/net/core/Makefile+++ b/net/core/Makefile@@ -3,7 +3,7 @@ #  obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \-	 gen_stats.o gen_estimator.o+	 gen_stats.o gen_estimator.o zerocopy.o  obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/datagram.c b/net/core/datagram.c--- a/net/core/datagram.c+++ b/net/core/datagram.c@@ -213,6 +213,10 @@ int skb_copy_datagram_iovec(const struct { 	int i, err, fraglen, end = 0; 	struct sk_buff *next = skb_shinfo(skb)->frag_list;++	if (skb->zerocopy)+		return 0;+	 next_skb: 	fraglen = skb_headlen(skb); 	i = -1;@@ -364,6 +368,9 @@ int skb_copy_and_csum_datagram_iovec(con { 	unsigned int csum; 	int chunk = skb->len - hlen;+	+	if (skb->zerocopy)+		return 0;  	/* Skip filled elements. 	 * Pretty silly, look at memcpy_toiovec, though 8)diff --git a/net/core/skbuff.c b/net/core/skbuff.c--- a/net/core/skbuff.c+++ b/net/core/skbuff.c@@ -70,6 +70,7 @@  static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly;+static kmem_cache_t *skbuff_head_cache_zerocopy __read_mostly;  /*  *	Keep out-of-line to prevent kernel bloat.@@ -182,6 +183,64 @@ nodata: 	goto out; } +int zc_alloc_data(struct zc_buf *zb);+int zc_commit_data(struct zc_buf *zb);++struct sk_buff *__alloc_skb_zerocopy(struct zc_buf *zb, gfp_t gfp_mask)+{+	struct sk_buff *skb = NULL;+	void *data;+	int err;+	unsigned int size = SKB_DATA_ALIGN(zb->header_size);+	+	zb->status = -1;++	if (size > ZEROCOPY_HEADER_CACHE_SIZE)+		goto out;++	skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);+	if (!skb)+		goto out;+	+	data = kmem_cache_alloc(skbuff_head_cache_zerocopy, gfp_mask & ~__GFP_DMA);+	if (!data)+		goto err_out_free_skb;++	memset(skb, 0, offsetof(struct sk_buff, truesize));+	skb->truesize = size + sizeof(struct sk_buff);+	atomic_set(&skb->users, 1);+	skb->head = data;+	skb->data = data;+	skb->tail = data;+	skb->end  = data + size;+	+	atomic_set(&(skb_shinfo(skb)->dataref), 1);+	skb_shinfo(skb)->nr_frags  = 0;+	skb_shinfo(skb)->tso_size = 0;+	skb_shinfo(skb)->tso_segs = 0;+	skb_shinfo(skb)->frag_list = NULL;++	skb->zerocopy = 1;+	/* It could be zerocopied too, but let's use it as is for now. --zbr 2005_10_27 */+	memcpy(skb->data, zb->header, zb->header_size);+	skb_put(skb, zb->header_size);++	zb->skb = skb;++	err = zc_alloc_data(zb);+	if (err)+		goto err_out_free_skb_data;+	+out:+	return skb;+err_out_free_skb_data:+	kmem_cache_free(skbuff_head_cache_zerocopy, data);+err_out_free_skb:+	kmem_cache_free(skbuff_head_cache, skb);+	skb = NULL;+	goto out;+}+ /**  *	alloc_skb_from_cache	-	allocate a network buffer  *	@cp: kmem_cache from which to allocate the data area@@ -284,7 +343,10 @@ void kfree_skbmem(struct sk_buff *skb) 	struct sk_buff *other; 	atomic_t *fclone_ref; -	skb_release_data(skb);+	if (skb->zerocopy)+		kmem_cache_free(skbuff_head_cache_zerocopy, skb->head);+	else+		skb_release_data(skb); 	switch (skb->fclone) { 	case SKB_FCLONE_UNAVAILABLE: 		kmem_cache_free(skbuff_head_cache, skb);@@ -1706,6 +1768,14 @@ void __init skb_init(void) 						NULL, NULL); 	if (!skbuff_fclone_cache) 		panic("cannot create skbuff cache");+	+	skbuff_head_cache_zerocopy = kmem_cache_create("skbuff_head_cache_zerocopy",+					      ZEROCOPY_HEADER_CACHE_SIZE + sizeof(struct skb_shared_info),+					      0,+					      SLAB_HWCACHE_ALIGN,+					      NULL, NULL);+	if (!skbuff_head_cache_zerocopy)+		panic("cannot create zerocopy skbuff cache"); }  EXPORT_SYMBOL(___pskb_trim);@@ -1739,3 +1809,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text);+EXPORT_SYMBOL(__alloc_skb_zerocopy);diff --git a/net/core/sock.c b/net/core/sock.c--- a/net/core/sock.c+++ b/net/core/sock.c@@ -455,6 +455,9 @@ set_rcvbuf: 			spin_unlock_bh(&sk->sk_lock.slock); 			ret = -ENONET; 			break;+		case SO_ZEROCOPY:+			ret = sock_zc_init(sock, val);+			break;  		/* We implement the SO_SNDLOWAT etc to 		   not be settable (1003.1g 5.3) */@@ -660,6 +663,8 @@ struct sock *sk_alloc(int family, gfp_t  			sock_lock_init(sk); 		} 		+		sk_zc_init(sk);+		 		if (security_sk_alloc(sk, family, priority)) 			goto out_free; @@ -680,6 +685,7 @@ void sk_free(struct sock *sk) { 	struct sk_filter *filter; 	struct module *owner = sk->sk_prot_creator->owner;+	unsigned long flags;  	if (sk->sk_destruct) 		sk->sk_destruct(sk);@@ -692,6 +698,8 @@ void sk_free(struct sock *sk)  	sock_disable_timestamp(sk); +	sk_zc_fini(sk);+ 	if (atomic_read(&sk->sk_omem_alloc)) 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));@@ -1320,6 +1328,8 @@ void sock_init_data(struct socket *sock, 	sk->sk_stamp.tv_usec    = -1L;  	atomic_set(&sk->sk_refcnt, 1);++	sk_zc_init(sk); }  void fastcall lock_sock(struct sock *sk)diff --git a/net/core/zerocopy.c b/net/core/zerocopy.cnew file mode 100644--- /dev/null+++ b/net/core/zerocopy.c@@ -0,0 +1,195 @@+/*+ * 	zerocopy.c+ * + * 2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>+ * All rights reserved.+ * + * This program is free software; you can redistribute it and/or modify+ * it under the terms of the GNU General Public License as published by+ * the Free Software Foundation; either version 2 of the License, or+ * (at your option) any later version.+ *+ * This program is distributed in the hope that it will be useful,+ * but WITHOUT ANY WARRANTY; without even the implied warranty of+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the+ * GNU General Public License for more details.+ *+ * You should have received a copy of the GNU General Public License+ * along with this program; if not, write to the Free Software+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA+ */++#include <linux/spinlock.h>+#include <linux/list.h>+#include <linux/skbuff.h>++#include <net/inet_hashtables.h>++static int tcp_sendfile_alloc_data(struct zc_buf *zb);+static int tcp_sendfile_commit_data(struct zc_buf *zb);++static struct zc_handler zc_tcp_sendfile_handler = {+	.alloc_data		= &tcp_sendfile_alloc_data,+	.commit_data		= &tcp_sendfile_commit_data,+};++static DEFINE_SPINLOCK(zc_lock);+static LIST_HEAD(zc_list);++int zc_alloc_data(struct zc_buf *zb)+{+	struct zc_handler *zh;+	int err = -ENODEV;++	if (unlikely(zb->size > PAGE_SIZE))+		return err;+	+	rcu_read_lock();+	list_for_each_entry_rcu(zh, &zc_list, zc_entry) {+		err = zh->alloc_data(zb);+		if (!err) {+			zb->zh = zh;+			break;+		}+	}+	rcu_read_unlock();++	return err;+}++int zc_commit_data(struct zc_buf *zb)+{+	int err = -EINVAL;+	+	if (zb->zh)+		err = zb->zh->commit_data(zb);+	+	return err;+}++int zc_add_handler(struct zc_handler *h)+{+	if (!h->alloc_data || !h->commit_data)+		return -EINVAL;++	spin_lock(&zc_lock);+	list_add_rcu(&h->zc_entry, &zc_list);+	spin_unlock(&zc_lock);++	return 0;+}++void zc_del_handler(struct zc_handler *h)+{+	spin_lock(&zc_lock);+	list_del_rcu(&h->zc_entry);+	spin_unlock(&zc_lock);++	synchronize_rcu();+}++extern struct inet_hashinfo __cacheline_aligned tcp_hashinfo;++static int tcp_sendfile_alloc_data(struct zc_buf *zb)+{+	struct ethhdr *eth;+	struct iphdr *iph;+	struct tcphdr *tcph;+	struct sock *sk;+	int dif, err = -EINVAL;+	u32 saddr, daddr;+	u16 sport, dport;+			+	if (zb->header_size < sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct tcphdr))+		goto err_out_exit;++	eth = zb->header;++	if (eth->h_proto != htons(ETH_P_IP))+		goto err_out_exit;+	+	iph = (struct iphdr *)(eth + 1);++	if (iph->protocol != IPPROTO_TCP)+		goto err_out_exit;+	+	tcph = (struct tcphdr *)(((u8 *)iph) + iph->ihl*4);++	dif = 0;+	+	saddr = iph->saddr;+	sport = tcph->source;+	daddr = iph->daddr;+	dport = tcph->dest;+	+	/*+	 * I suspect it is not enough to disable BHs,+	 * since it can be [and is] called from hard IRQ context.+	 * Must do something with bound devices.+	 */+	local_irq_disable();+	rcu_read_lock();+	sk = __inet_lookup(&tcp_hashinfo, saddr, sport, daddr, ntohs(dport), dif);++	if (sk) { +		bh_lock_sock(sk);+			+		printk("%s: sk=%p, sk->zc_alloc_data=%p, refcnt=%d.\n", __func__, sk, sk->zc_alloc_data, atomic_read(&sk->sk_refcnt));+#if 1+		printk("%s: sk=%p, %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u, seq=%u, ack=%u, check=%04x.\n", +				__func__, sk, +				NIPQUAD(saddr), ntohs(sport),+				NIPQUAD(daddr), ntohs(dport),+				ntohl(tcph->seq), ntohl(tcph->ack_seq), +				ntohs(tcph->check));+#endif	+		+		spin_lock(&sk->zc_lock);+		if (sk->zc_alloc_data && sk->zc_pages) {+			zb->priv = sk;+			err = sk->zc_alloc_data(zb);+			zb->status = (err)?1:0;+			wake_up(&sk->zc_data_ready);+		}+		spin_unlock(&sk->zc_lock);+#if 1+		printk("%s: sk=%p, %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u, seq=%u, ack=%u, check=%04x err=%d, DONE.\n", +				__func__, sk, +				NIPQUAD(saddr), ntohs(sport),+				NIPQUAD(daddr), ntohs(dport),+				ntohl(tcph->seq), ntohl(tcph->ack_seq), +				ntohs(tcph->check), err);+#endif	+		bh_unlock_sock(sk);+		sock_put(sk);+	}+	rcu_read_unlock();+	local_irq_enable();++err_out_exit:+	return err;+}++static int tcp_sendfile_commit_data(struct zc_buf *zb)+{+	struct sock *sk = zb->priv;+	int err;+	unsigned long flags;++	spin_lock_irqsave(&sk->zc_lock, flags);+	err = sk->zc_commit_data(zb);+	spin_unlock_irqrestore(&sk->zc_lock, flags);+	+	wake_up(&sk->zc_data_ready);+	+	printk("%s: commiting data, sk=%p, size=%4u, err=%d.\n", __func__, sk, zb->size, err);++	return err;+}++int __init zc_add_tcp(void)+{+	return zc_add_handler(&zc_tcp_sendfile_handler);+}++late_initcall(zc_add_tcp);diff --git a/net/socket.c b/net/socket.c--- a/net/socket.c+++ b/net/socket.c@@ -44,6 +44,7 @@  *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)  *		Tigran Aivazian	:	Made listen(2) backlog sanity checks   *					protocol-independent+ *		Evgeniy Polyakov:	Receiving zero-copy.  *  *  *		This program is free software; you can redistribute it and/or@@ -63,6 +64,7 @@ #include <linux/smp_lock.h> #include <linux/socket.h> #include <linux/file.h>+#include <linux/fs.h> #include <linux/net.h> #include <linux/interrupt.h> #include <linux/netdevice.h>@@ -84,6 +86,9 @@ #include <linux/compat.h> #include <linux/kmod.h> #include <linux/audit.h>+#include <linux/pagemap.h>+#include <linux/swap.h>+#include <linux/writeback.h>  #ifdef CONFIG_NET_RADIO #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */@@ -116,6 +121,7 @@ static ssize_t sock_writev(struct file * 			  unsigned long count, loff_t *ppos); static ssize_t sock_sendpage(struct file *file, struct page *page, 			     int offset, size_t size, loff_t *ppos, int more);+ssize_t sock_sendfile(struct file *file, loff_t *ppos, size_t count, read_actor_t actor, void *target);   /*@@ -136,7 +142,8 @@ static struct file_operations socket_fil 	.fasync =	sock_fasync, 	.readv =	sock_readv, 	.writev =	sock_writev,-	.sendpage =	sock_sendpage

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -