⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zero-copy.11

📁 实现linux平台下零拷贝技术的软件包。
💻 11
📖 第 1 页 / 共 4 页
字号:
++#define ZC_POOL_SIZE		1024++extern mempool_t *idx_pool;++#define ZC_MAX_IDX		4++enum zc_state {+	ZC_OK = 0,+	ZC_GROW_UP,+	ZC_GROW_DOWN,+	ZC_GROW_BOTH,+	ZC_NEXT,+};++struct zc_index+{+	u16			off;+	u16			size;+};++struct zc_index_list_entry+{+	struct list_head	entry;+	struct zc_index		idx;+};++struct zc_page+{+	struct page		*page;+	unsigned int		page_offset;+	unsigned int		size;+	unsigned int		used;+	u32			seq;+	long			flags;+	spinlock_t		lock;++	unsigned int		idx_num;+	struct zc_index		idx[ZC_MAX_IDX];+	struct list_head	idx_list;+};++struct zsock+{+	struct list_head	zc_entry;+	struct zc_handler	*handler;+	atomic_t		refcnt;+	struct sock		*sk;+	int			(* zc_alloc_data)(struct zc_buf *zb);+	int			(* zc_commit_data)(struct zc_buf *zb);+	wait_queue_head_t	zc_data_ready;+	rwlock_t		zc_lock;+	struct zc_page		*zc_pages;+	long			zc_flags;+	unsigned int		zc_page_num, zc_page_index;+	struct pagevec 		zc_lru_pvec;+	loff_t 			zc_pos;+	struct page		*zc_cached_page;+	struct file		*zc_file;+	u32			zc_seq_first;+	void			*priv;+	unsigned int		priv_size;+};++int sock_zc_setup_seq(struct zsock *zsk, u32 seq);+void sk_zc_fini(struct zsock *zsk);++int zc_setup(struct socket *sk, void *data, unsigned int size);+void zc_cleanup(struct zsock *zsk);++int zc_sock_alloc_data(struct zc_buf *zb);+int zc_sock_commit_data(struct zc_buf *zb);++int zc_alloc_data(struct zc_buf *zb);+int zc_commit_data(struct zc_buf *zb);++struct zsock *zsk_alloc(struct zc_handler *handler, void *priv, unsigned int priv_size, int (* insert)(struct zsock *zsk), gfp_t gfp_mask);+void zsk_free(struct zsock *zsk);++static inline void zc_handler_get(struct zc_handler *zc)+{+	atomic_inc(&zc->refcnt);+}++static inline void zc_handler_put(struct zc_handler *zc)+{+	if (atomic_dec_and_test(&zc->refcnt))+		printk(KERN_DEBUG "Releasing zc=%p.\n", zc);+}++static inline void *zsk_priv(struct zsock *zsk)+{+	return zsk->priv;+}++static inline void zsk_get(struct zsock *zsk)+{+	atomic_inc(&zsk->refcnt);+}++static inline void zsk_put(struct zsock *zsk)+{+	if (atomic_dec_and_test(&zsk->refcnt))+		zsk_free(zsk);+}++int tcp_udp_v4_zc_sock_insert(struct zsock *zsk);+int tcp_udp_v4_sock_zc_init(struct socket *sock, struct tcp_udp_v4_priv *priv);+extern struct zc_handler tcp_udp_v4_zc_handler;++int commit_page(struct zc_page *zp, struct file *file, struct address_space *mapping);+int prepare_page(struct zc_page *zp, struct zsock *zsk, struct file *file, struct address_space *mapping, +		loff_t *ppos, loff_t count, struct pagevec *lru_pvec);+#endif /* __KERNEL__ */+#endif /* __ZEROCOPY_H */diff --git a/mm/filemap.c b/mm/filemap.c--- a/mm/filemap.c+++ b/mm/filemap.c@@ -1663,7 +1663,7 @@ EXPORT_SYMBOL(read_cache_page);  * caller's lru-buffering pagevec.  This function is specifically for  * generic_file_write().  */-static inline struct page *+struct page * __grab_cache_page(struct address_space *mapping, unsigned long index, 			struct page **cached_page, struct pagevec *lru_pvec) {@@ -1692,6 +1692,8 @@ repeat: 	return page; } +EXPORT_SYMBOL_GPL(__grab_cache_page);+ /*  * The logic we want is  *diff --git a/net/core/Makefile b/net/core/Makefile--- a/net/core/Makefile+++ b/net/core/Makefile@@ -3,7 +3,7 @@ #  obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \-	 gen_stats.o gen_estimator.o+	 gen_stats.o gen_estimator.o zerocopy.o  obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/datagram.c b/net/core/datagram.c--- a/net/core/datagram.c+++ b/net/core/datagram.c@@ -214,6 +214,9 @@ int skb_copy_datagram_iovec(const struct 	int i, err, fraglen, end = 0; 	struct sk_buff *next = skb_shinfo(skb)->frag_list; +	if (skb->zerocopy)+		return 0;+	 	if (!len) 		return 0; @@ -382,6 +385,9 @@ int skb_copy_and_csum_datagram_iovec(str { 	unsigned int csum; 	int chunk = skb->len - hlen;+	+	if (skb->zerocopy)+		return 0;  	/* Skip filled elements. 	 * Pretty silly, look at memcpy_toiovec, though 8)diff --git a/net/core/skbuff.c b/net/core/skbuff.c--- a/net/core/skbuff.c+++ b/net/core/skbuff.c@@ -70,6 +70,7 @@  static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly;+static kmem_cache_t *skbuff_head_cache_zerocopy __read_mostly;  /*  *	Keep out-of-line to prevent kernel bloat.@@ -186,6 +187,62 @@ nodata: 	goto out; } +struct sk_buff *__alloc_skb_zerocopy(struct zc_buf *zb, gfp_t gfp_mask)+{+	struct sk_buff *skb = NULL;+	void *data;+	int err;+	unsigned int size = SKB_DATA_ALIGN(zb->header_size);+	+	zb->status = -1;++	if (size > ZEROCOPY_HEADER_CACHE_SIZE)+		goto out;++	skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);+	if (!skb)+		goto out;+	+	data = kmem_cache_alloc(skbuff_head_cache_zerocopy, gfp_mask & ~__GFP_DMA);+	if (!data)+		goto err_out_free_skb;++	memset(skb, 0, offsetof(struct sk_buff, truesize));+	skb->truesize = size + sizeof(struct sk_buff);+	atomic_set(&skb->users, 1);+	skb->head = data;+	skb->data = data;+	skb->tail = data;+	skb->end  = data + size;+	+	atomic_set(&(skb_shinfo(skb)->dataref), 1);+	skb_shinfo(skb)->nr_frags  = 0;+	skb_shinfo(skb)->tso_size = 0;+	skb_shinfo(skb)->tso_segs = 0;+	skb_shinfo(skb)->frag_list = NULL;+	+	skb->ip_summed = CHECKSUM_UNNECESSARY;+	skb->zerocopy = 1;+	/* It could be zerocopied too, but let's use it as is for now. --zbr 2005_10_27 */+	memcpy(skb->data, zb->header, zb->header_size);+	skb_put(skb, zb->header_size);++	zb->skb = skb;++	err = zc_alloc_data(zb);+	if (err)+		goto err_out_free_skb_data;+	+out:+	return skb;+err_out_free_skb_data:+	kmem_cache_free(skbuff_head_cache_zerocopy, data);+err_out_free_skb:+	kmem_cache_free(skbuff_head_cache, skb);+	skb = NULL;+	goto out;+}+ /**  *	alloc_skb_from_cache	-	allocate a network buffer  *	@cp: kmem_cache from which to allocate the data area@@ -288,7 +345,10 @@ void kfree_skbmem(struct sk_buff *skb) 	struct sk_buff *other; 	atomic_t *fclone_ref; -	skb_release_data(skb);+	if (skb->zerocopy)+		kmem_cache_free(skbuff_head_cache_zerocopy, skb->head);+	else+		skb_release_data(skb); 	switch (skb->fclone) { 	case SKB_FCLONE_UNAVAILABLE: 		kmem_cache_free(skbuff_head_cache, skb);@@ -412,6 +472,7 @@ struct sk_buff *skb_clone(struct sk_buff 	C(priority); 	C(protocol); 	n->destructor = NULL;+	n->zerocopy = 0; #ifdef CONFIG_NETFILTER 	C(nfmark); 	C(nfct);@@ -477,6 +538,7 @@ static void copy_skb_header(struct sk_bu 	memcpy(new->cb, old->cb, sizeof(old->cb)); 	new->local_df	= old->local_df; 	new->fclone	= SKB_FCLONE_UNAVAILABLE;+	new->zerocopy	= 0; 	new->pkt_type	= old->pkt_type; 	new->tstamp	= old->tstamp; 	new->destructor = NULL;@@ -1803,6 +1865,14 @@ void __init skb_init(void) 						NULL, NULL); 	if (!skbuff_fclone_cache) 		panic("cannot create skbuff cache");+	+	skbuff_head_cache_zerocopy = kmem_cache_create("skbuff_head_cache_zerocopy",+					      ZEROCOPY_HEADER_CACHE_SIZE + sizeof(struct skb_shared_info),+					      0,+					      SLAB_HWCACHE_ALIGN,+					      NULL, NULL);+	if (!skbuff_head_cache_zerocopy)+		panic("cannot create zerocopy skbuff cache"); }  EXPORT_SYMBOL(___pskb_trim);@@ -1837,3 +1907,4 @@ EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); EXPORT_SYMBOL(skb_append_datato_frags);+EXPORT_SYMBOL(__alloc_skb_zerocopy);diff --git a/net/core/sock.c b/net/core/sock.c--- a/net/core/sock.c+++ b/net/core/sock.c@@ -129,6 +129,8 @@ #include <net/tcp.h> #endif +#include <net/zerocopy.h>+  /* Take into consideration the size of the struct sk_buff overhead in the  * determination of these values, since that is non-constant across  * platforms.  This makes socket queueing behavior and performance@@ -455,6 +457,18 @@ set_rcvbuf: 			spin_unlock_bh(&sk->sk_lock.slock); 			ret = -ENONET; 			break;+		case SO_ZEROCOPY:+			{+				u8 zcdata[256];++				ret = -EINVAL;+				if (optlen > sizeof(zcdata))+					break;+				if (copy_from_user(zcdata, optval, optlen))+					break;+				ret = zc_setup(sock, zcdata, optlen);+			}+			break;  		/* We implement the SO_SNDLOWAT etc to 		   not be settable (1003.1g 5.3) */@@ -684,6 +698,9 @@ void sk_free(struct sock *sk) 	if (sk->sk_destruct) 		sk->sk_destruct(sk); +	sk_zc_fini(sk->zsk);+	zc_cleanup(sk->zsk);+ 	filter = sk->sk_filter; 	if (filter) { 		sk_filter_release(sk, filter);diff --git a/net/core/zerocopy.c b/net/core/zerocopy.cnew file mode 100644--- /dev/null+++ b/net/core/zerocopy.c@@ -0,0 +1,601 @@+/*+ * 	zerocopy.c+ * + * 2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>+ * All rights reserved.+ * + * This program is free software; you can redistribute it and/or modify+ * it under the terms of the GNU General Public License as published by+ * the Free Software Foundation; either version 2 of the License, or+ * (at your option) any later version.+ *+ * This program is distributed in the hope that it will be useful,+ * but WITHOUT ANY WARRANTY; without even the implied warranty of+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the+ * GNU General Public License for more details.+ *+ * You should have received a copy of the GNU General Public License+ * along with this program; if not, write to the Free Software+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA+ */++#include <linux/config.h>+#include <linux/mm.h>+#include <linux/spinlock.h>+#include <linux/list.h>+#include <linux/skbuff.h>+#include <linux/pagemap.h>+#include <linux/swap.h>+#include <linux/writeback.h>+#include <linux/ip.h>+#include <linux/tcp.h>+#include <linux/tcp.h>+#include <linux/udp.h>+#include <linux/fs.h>+#include <linux/file.h>++#include <asm/semaphore.h>++#include <net/inet_hashtables.h>+#include <net/zerocopy.h>++static int tcp_udp_v4_sendfile_alloc_data(struct zc_handler *zh, struct zc_buf *zb);+static int tcp_udp_v4_sendfile_commit_data(struct zc_handler *zh, struct zc_buf *zb);+static int tcp_udp_v4_sendfile_setup(struct zc_handler *zh, struct socket *sock, struct sock_zc_setup_data *p);+static int tcp_udp_v4_sendfile_cleanup(struct zsock *);++#define ZC_HASH_MASK	0xf+static struct zc_sock_bucket tcp_udp_v4_sock_bucket[ZC_HASH_MASK];++struct zc_handler tcp_udp_v4_zc_handler = {+	.alloc_data		= &tcp_udp_v4_sendfile_alloc_data,+	.commit_data		= &tcp_udp_v4_sendfile_commit_data,+	.setup			= &tcp_udp_v4_sendfile_setup,+	.cleanup		= &tcp_udp_v4_sendfile_cleanup,+	.sock_bucket		= tcp_udp_v4_sock_bucket,+	.sock_bucket_number	= ZC_HASH_MASK,+};++static DECLARE_MUTEX(zc_handler_lock);+static LIST_HEAD(zc_handler_list);++static kmem_cache_t *idx_cache;+mempool_t *idx_pool;++static int zc_init(void)+{+	idx_cache = kmem_cache_create("zc_index", sizeof(struct zc_index_list_entry), 0, SLAB_HWCACHE_ALIGN, NULL, NULL);+	if (!idx_cache)+		return -ENOMEM;++	idx_pool = mempool_create(ZC_POOL_SIZE, mempool_alloc_slab, mempool_free_slab, idx_cache);+	if (!idx_pool) {+		kmem_cache_destroy(idx_cache);+		return -ENOMEM;+	}++	return 0;+}++int zc_alloc_data(struct zc_buf *zb)+{+	struct zc_handler *zh;+	int err = -ENODEV;++	if (unlikely(zb->size > PAGE_SIZE))+		return err;++	rcu_read_lock();+	list_for_each_entry_rcu(zh, &zc_handler_list, zc_entry) {+		err = zh->alloc_data(zh, zb);+		if (!err) {+			zb->zh = zh;+			break;+		}+	}+	rcu_read_unlock();++	return err;+}++int zc_commit_data(struct zc_buf *zb)+{+	int err = -EINVAL;+	+	if (zb->zh)+		err = zb->zh->commit_data(zb->zh, zb);+	+	return err;+}++void zc_cleanup(struct zsock *zsk)+{+	if (!zsk)+		return;++	zsk_put(zsk);+}++int zc_setup(struct socket *sock, void *data, unsigned int size)+{+	struct sock_zc_setup_data *p = data;+	int found = 0;+	struct zc_handler *zh;++	if (size <= sizeof(struct sock_zc_setup_data) || +			size != htonl(p->size) + sizeof(struct sock_zc_setup_data)) {+		goto err_out_exit;+	}++	down(&zc_handler_lock);+	list_for_each_entry(zh, &zc_handler_list, zc_entry) {+		if (!zh->setup(zh, sock, p)) {+			found = 1;+			break;+		}+	}+	up(&zc_handler_lock);++err_out_exit:+	return (found)?0:-ENODEV;+}++int zc_add_handler(struct zc_handler *h)+{+	if (!h->alloc_data || !h->commit_data || !h->sock_bucket || !h->sock_bucket_number || +			!h->setup || !h->cleanup)+		return -EINVAL;+	+	synchronize_rcu();++	down(&zc_handler_lock);+	list_add_rcu(&h->zc_entry, &zc_handler_list);+	up(&zc_handler_lock);++	return 0;+}++void zc_del_handler(struct zc_handler *h)+{+	synchronize_rcu();+	+	down(&zc_handler_lock);+	list_del_rcu(&h->zc_entry);+	up(&zc_handler_lock);+}++static inline void zc_clean_page(struct zc_page *zp)+{+	if (likely(zp->idx_num <= ZC_MAX_IDX)) {+		memset(&zp->idx, 0, sizeof(zp->idx));+	} else {+		struct zc_index_list_entry *e, *n;++		list_for_each_entry_safe(e, n, &zp->idx_list, entry) {+			list_del(&e->entry);+			mempool_free(e, idx_pool);+		}+	}++	INIT_LIST_HEAD(&zp->idx_list);+	zp->idx_num = 0;+}++extern struct page * __grab_cache_page(struct address_space *mapping, unsigned long index,+			struct page **cached_page, struct pagevec *lru_pvec);++int commit_page(struct zc_page *zp, struct file *file, struct address_space *mapping)+{+	int err;+	struct address_space_operations *a_ops = mapping->a_ops;++	if (down_interruptible(&mapping->host->i_sem)) {+		err = -EBUSY;+		goto err_out;+	}+	ClearPageReserved(zp->page);+	flush_dcache_page(zp->page);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -