⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zero-copy.9

📁 实现linux平台下零拷贝技术的软件包。
💻 9
📖 第 1 页 / 共 4 页
字号:
+	down(&zc_handler_lock);+	list_for_each_entry(zh, &zc_handler_list, zc_entry) {+		if (!zh->setup(zh, sock, p)) {+			found = 1;+			break;+		}+	}+	up(&zc_handler_lock);++err_out_exit:+	return (found)?0:-ENODEV;+}++int zc_add_handler(struct zc_handler *h)+{+	if (!h->alloc_data || !h->commit_data || !h->sock_bucket || !h->sock_bucket_number || +			!h->setup || !h->cleanup)+		return -EINVAL;+	+	synchronize_rcu();++	down(&zc_handler_lock);+	list_add_rcu(&h->zc_entry, &zc_handler_list);+	up(&zc_handler_lock);++	return 0;+}++void zc_del_handler(struct zc_handler *h)+{+	synchronize_rcu();+	+	down(&zc_handler_lock);+	list_del_rcu(&h->zc_entry);+	up(&zc_handler_lock);+}++extern struct page * __grab_cache_page(struct address_space *mapping, unsigned long index,+			struct page **cached_page, struct pagevec *lru_pvec);++int commit_page(struct zc_page *zp, struct file *file, struct address_space *mapping)+{+	int err;+	struct address_space_operations *a_ops = mapping->a_ops;++	flush_dcache_page(zp->page);+	err = a_ops->commit_write(file, zp->page, zp->page_offset, zp->page_offset+zp->used);+	unlock_page(zp->page);+	mark_page_accessed(zp->page);+	page_cache_release(zp->page);++	if (err < 0)+		goto err_out_exit;++	balance_dirty_pages_ratelimited(mapping);++err_out_exit:+	return err;+}++int prepare_page(struct zc_page *zp, struct zsock *zsk, struct file *file, struct address_space *mapping, +		loff_t *ppos, loff_t count, struct pagevec *lru_pvec)+{+	unsigned long index;+	unsigned long page_offset;+	unsigned long bytes;+	struct address_space_operations *a_ops = mapping->a_ops;+	loff_t pos_allocated = *ppos;+	int err = 0;++	page_offset = (pos_allocated & (PAGE_CACHE_SIZE -1));+	index = pos_allocated >> PAGE_CACHE_SHIFT;+	bytes = PAGE_CACHE_SIZE - page_offset;+	if (bytes > count)+		bytes = count;++	zp->page = __grab_cache_page(mapping, index, &zsk->zc_cached_page, lru_pvec);+	if (!zp->page) {+		err = -ENOMEM;+		goto err_out_exit;+	}++	err = a_ops->prepare_write(file, zp->page, page_offset, page_offset+bytes);+	if (unlikely(err)) {+		unlock_page(zp->page);+		page_cache_release(zp->page);+		goto err_out_exit;+	}++	zp->page_offset = page_offset;+	zp->size = bytes;+	zp->used = 0;+	zp->seq = zsk->zc_seq_first + pos_allocated;+	clear_bit(ZC_PAGE_READY, &zp->flags);++	pos_allocated += bytes;++	*ppos = pos_allocated;++err_out_exit:+	return err;+}+++void sk_zc_fini(struct zsock *zsk)+{+	if (zsk) {+		unsigned int zc_page_num;+		struct zc_page *zc_pages;+		unsigned long flags;+		struct sock *sk = NULL;+		+		write_lock_irqsave(&zsk->zc_lock, flags);+		zc_page_num 		= zsk->zc_page_num;+		zc_pages 		= zsk->zc_pages;+		+		zsk->zc_pages 		= NULL;+		zsk->zc_page_num 	= 0;+		zsk->zc_page_index 	= 0;+		zsk->zc_alloc_data	= NULL;+		zsk->zc_commit_data	= NULL;+		if (zsk->sk) {+			sk 		= zsk->sk;+			zsk->sk->zsk	= NULL;+			zsk->sk		= NULL;+		}+		write_unlock_irqrestore(&zsk->zc_lock, flags);++		synchronize_rcu();++		if (zc_page_num) {+			struct address_space *mapping = zsk->zc_file->f_mapping;+			int i;++			if (sk)+				skb_queue_purge(&sk->sk_receive_queue);++			zsk->handler->cleanup(zsk);+			zc_handler_put(zsk->handler);++			/*+			 * No new skbs can contribute data into VFS cache after this +			 * condition, so we only must care about those which are +			 * in socket queue already or will be inserted there after+			 * allocation, but allocation itself will always fail+			 * due to above locked changes.+			 */++			if (zsk->zc_cached_page) {+				page_cache_release(zsk->zc_cached_page);+				zsk->zc_cached_page = NULL;+			}++			for (i=0; i<zc_page_num; ++i)+				commit_page(&zc_pages[i], zsk->zc_file, mapping);++			zsk->zc_file->f_mode &= ~FMODE_ZEROCOPY;+			fput(zsk->zc_file);+			zsk->zc_file = NULL;+			+			kfree(zc_pages);+		}+	}+}++static void sk_zc_init(struct zsock *zsk)+{+	rwlock_init(&zsk->zc_lock);+	init_waitqueue_head(&zsk->zc_data_ready);+	zsk->zc_pages		= NULL;+	zsk->zc_page_num	= 0;+	zsk->zc_page_index	= 0;+	zsk->zc_alloc_data	= NULL;+	zsk->zc_commit_data	= NULL;+	zsk->zc_file		= NULL;+	zsk->zc_cached_page	= NULL;+}++struct zsock *zsk_alloc(struct zc_handler *handler, void *priv, unsigned int priv_size, int (* insert)(struct zsock *zsk), gfp_t gfp_mask)+{+	struct zsock *zsk;++	zsk = kzalloc(sizeof(struct zsock) + priv_size, gfp_mask);+	if (!zsk)+		return NULL;++	/* 1 for generic socket usage, i.e. it could be removed from sock_close(). */+	atomic_set(&zsk->refcnt, 1);+	zsk->handler 	= handler;+	zsk->priv_size 	= priv_size;+	if (priv_size) {+		zsk->priv = zsk+1;+		memcpy(zsk->priv, priv, priv_size);+	} else+		zsk->priv = NULL;++	zc_handler_get(handler);+	+	sk_zc_init(zsk);++	if (insert) {+		int err;++		err = insert(zsk);+		if (err) {+			zc_handler_put(handler);+			zsk_free(zsk);+			return NULL;+		}+	}++	return zsk;+}++void zsk_free(struct zsock *zsk)+{+	kfree(zsk);+}++static inline u32 tcp_udp_v4_hash(unsigned int bucket_number, const u32 src, const u16 sport, const u32 dst, const u16 dport)+{+	return inet_ehashfn(src, sport, dst, dport) & (bucket_number - 1);+}++int tcp_udp_v4_zc_sock_insert(struct zsock *zsk)+{+	u32 hash;+	unsigned long flags;+	struct tcp_udp_v4_priv *priv = zsk_priv(zsk);+	struct zc_sock_bucket *b;++	if (!priv)+		return -ENODEV;+	+	hash = tcp_udp_v4_hash(zsk->handler->sock_bucket_number, priv->src, priv->sport, priv->dst, priv->dport);++	b = &zsk->handler->sock_bucket[hash];++	write_lock_irqsave(&b->lock, flags);+	list_add_rcu(&zsk->zc_entry, &b->list);+	write_unlock_irqrestore(&b->lock, flags);++	return 0;+}++int tcp_udp_v4_zc_sock_remove(struct zsock *zsk)+{+	u32 hash;+	unsigned long flags;+	struct tcp_udp_v4_priv *priv = zsk_priv(zsk);+	struct zc_sock_bucket *b;++	if (!priv)+		return -ENODEV;+	+	hash = tcp_udp_v4_hash(zsk->handler->sock_bucket_number, priv->src, priv->sport, priv->dst, priv->dport);++	b = &zsk->handler->sock_bucket[hash];++	write_lock_irqsave(&b->lock, flags);+	list_del_rcu(&zsk->zc_entry);+	write_unlock_irqrestore(&b->lock, flags);++	return 0;+}++/*+ * Must be called under RCU cover and with interrupts disabled. + */+static struct zsock *tcp_udp_v4_zc_sock_lookup(const struct zc_sock_bucket *bucket, const unsigned int bucket_number, +		const u32 src, const u16 sport, const u32 dst, const u16 dport)+{+	u32 hash = tcp_udp_v4_hash(bucket_number, src, sport, dst, dport);+	struct zsock *zsk;+	struct tcp_udp_v4_priv *priv;++	list_for_each_entry_rcu(zsk, &bucket[hash].list, zc_entry) {+		priv = zsk_priv(zsk);++		if (priv->sport == sport && priv->dport == dport && priv->src == src && priv->dst == dst) {+			zsk_get(zsk);+			return zsk;+		}+	}++	return NULL;+}++static int tcp_udp_v4_sendfile_alloc_data(struct zc_handler *zh, struct zc_buf *zb)+{+	struct ethhdr *eth;+	struct iphdr *iph;+	struct zsock *zsk;+	int err = -EINVAL;+	u16 sport, dport;+	unsigned long flags;+	u32 seq, ack;++	if (zb->header_size < sizeof(struct ethhdr) + sizeof(struct iphdr))+		goto err_out_exit;++	eth = zb->header;++	if (eth->h_proto != htons(ETH_P_IP))+		goto err_out_exit;++	iph = (struct iphdr *)(eth + 1);+	+	if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)+		goto err_out_exit;++	if (iph->protocol == IPPROTO_TCP) {+		struct tcphdr *tcph = (struct tcphdr *)(((u8 *)iph) + iph->ihl*4);+		if (zb->header_size < sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct tcphdr))+			goto err_out_exit;+		sport = tcph->source;+		dport = tcph->dest;+		seq = ntohl(tcph->seq);+		ack = ntohl(tcph->ack_seq);+	} else {+		struct udphdr *udph = (struct udphdr *)(((u8 *)iph) + iph->ihl*4);+		if (zb->header_size < sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct udphdr))+			goto err_out_exit;+		sport = udph->source;+		dport = udph->dest;+		seq = ack = 0;+	}+	+	local_irq_save(flags);+	rcu_read_lock();+	zsk = tcp_udp_v4_zc_sock_lookup(zh->sock_bucket, zh->sock_bucket_number, iph->daddr, dport, iph->saddr, sport);+	if (zsk) {+#if 1+		printk("%s: %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u, seq=%u, ack=%u.\n", +			__func__, NIPQUAD(iph->saddr), htons(sport), NIPQUAD(iph->daddr), htons(dport), seq, ack);+#endif++		read_lock(&zsk->zc_lock);+		if (zsk->zc_alloc_data && zsk->zc_pages) {+			zb->priv = zsk;+			err = zsk->zc_alloc_data(zb);+			zb->status = (err)?1:0;+			wake_up(&zsk->zc_data_ready);+		}+		read_unlock(&zsk->zc_lock);+		zsk_put(zsk);+	}+	rcu_read_unlock();+	local_irq_restore(flags);++err_out_exit:+	return err;+}++static int tcp_udp_v4_sendfile_commit_data(struct zc_handler *zh, struct zc_buf *zb)+{+	struct zsock *zsk = zb->priv;+	int err;+	unsigned long flags;++	read_lock_irqsave(&zsk->zc_lock, flags);+	err = zsk->zc_commit_data(zb);+	read_unlock_irqrestore(&zsk->zc_lock, flags);++	wake_up(&zsk->zc_data_ready);++	return err;+}++static int tcp_udp_v4_sendfile_check(struct zc_handler *zh, struct socket *sock, struct sock_zc_setup_data *p)+{+	struct tcp_udp_v4_priv *priv;+	u32 type = ntohl(p->type);+	u32 size = ntohl(p->size);++	if (type != IPPROTO_TCP && type != IPPROTO_UDP)+		return -EINVAL;++	if (size != sizeof(struct tcp_udp_v4_priv))+		return -EINVAL;++	priv = (struct tcp_udp_v4_priv *)p->data;++	return 0;+}++static int tcp_udp_v4_sendfile_setup(struct zc_handler *zh, struct socket *sock, struct sock_zc_setup_data *p)+{+	struct tcp_udp_v4_priv *priv = (struct tcp_udp_v4_priv *)p->data;+	int err;++	err = tcp_udp_v4_sendfile_check(zh, sock, p);+	if (err)+		return err;++	return tcp_udp_v4_sock_zc_init(sock, priv);+}++static int tcp_udp_v4_sendfile_cleanup(struct zsock *zsk)+{+	tcp_udp_v4_zc_sock_remove(zsk);+	return 0;+}++static int zc_add_tcp(void)+{+	int i;++	for (i=0; i<tcp_udp_v4_zc_handler.sock_bucket_number; ++i) {+		INIT_LIST_HEAD(&tcp_udp_v4_zc_handler.sock_bucket[i].list);+		rwlock_init(&tcp_udp_v4_zc_handler.sock_bucket[i].lock);+	}++	atomic_set(&tcp_udp_v4_zc_handler.refcnt, 1);++	return zc_add_handler(&tcp_udp_v4_zc_handler);+}++late_initcall(zc_add_tcp);diff --git a/net/socket.c b/net/socket.c--- a/net/socket.c+++ b/net/socket.c@@ -44,6 +44,7 @@  *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)  *		Tigran Aivazian	:	Made listen(2) backlog sanity checks   *					protocol-independent+ *		Evgeniy Polyakov:	Receiving zero-copy.  *  *  *		This program is free software; you can redistribute it and/or@@ -63,6 +64,7 @@ #include <linux/smp_lock.h> #include <linux/socket.h> #include <linux/file.h>+#include <linux/fs.h> #include <linux/net.h> #include <linux/interrupt.h> #include <linux/netdevice.h>@@ -84,6 +86,11 @@ #include <linux/compat.h> #include <linux/kmod.h> #include <linux/audit.h>+#include <linux/pagemap.h>+#include <linux/swap.h>+#include <linux/writeback.h>+#include <linux/ip.h>+#include <linux/tcp.h>  #ifdef CONFIG_NET_RADIO #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */@@ -116,6 +123,7 @@ static ssize_t sock_writev(struct file * 			  unsigned long count, loff_t *ppos); static ssize_t sock_sendpage(struct file *file, struct page *page, 			     int offset, size_t size, loff_t *ppos, int more);+static ssize_t sock_sendfile(struct file *file, loff_t *ppos, size_t count, read_actor_t actor, void *target);   /*@@ -136,7 +144,8 @@ static struct file_operations socket_fil 	.fasync =	sock_fasync, 	.readv =	sock_readv, 	.writev =	sock_writev,-	.sendpage =	sock_sendpage+	.sendpage =	sock_sendpage,+	.sendfile =	sock_sendfile, };  /*@@ -726,6 +735,467 @@ static ssize_t sock_aio_write(struct kio 	return __sock_sendmsg(iocb, sock, &x->async_msg, size);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -