⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zero-copy.8

📁 实现linux平台下零拷贝技术的软件包。
💻 8
📖 第 1 页 / 共 4 页
字号:
+}++static int tcp_udp_v4_sendfile_cleanup(struct zc_handler *zh, struct socket *sock, struct sock_zc_setup_data *p)+{+	int err;++	err = tcp_udp_v4_sendfile_check(zh, sock, p);+	if (err)+		return err;+	+	sk_zc_fini(sock->sk->zsk);+	return 0;+}++int __init zc_add_tcp(void)+{+	int i;++	for (i=0; i<tcp_udp_v4_zc_handler.sock_bucket_number; ++i) {+		INIT_LIST_HEAD(&tcp_udp_v4_zc_handler.sock_bucket[i].list);+		rwlock_init(&tcp_udp_v4_zc_handler.sock_bucket[i].lock);+	}++	atomic_set(&tcp_udp_v4_zc_handler.refcnt, 1);++	return zc_add_handler(&tcp_udp_v4_zc_handler);+}++late_initcall(zc_add_tcp);diff --git a/net/socket.c b/net/socket.c--- a/net/socket.c+++ b/net/socket.c@@ -44,6 +44,7 @@  *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)  *		Tigran Aivazian	:	Made listen(2) backlog sanity checks   *					protocol-independent+ *		Evgeniy Polyakov:	Receiving zero-copy.  *  *  *		This program is free software; you can redistribute it and/or@@ -63,6 +64,7 @@ #include <linux/smp_lock.h> #include <linux/socket.h> #include <linux/file.h>+#include <linux/fs.h> #include <linux/net.h> #include <linux/interrupt.h> #include <linux/netdevice.h>@@ -84,6 +86,11 @@ #include <linux/compat.h> #include <linux/kmod.h> #include <linux/audit.h>+#include <linux/pagemap.h>+#include <linux/swap.h>+#include <linux/writeback.h>+#include <linux/ip.h>+#include <linux/tcp.h>  #ifdef CONFIG_NET_RADIO #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */@@ -116,6 +123,7 @@ static ssize_t sock_writev(struct file * 			  unsigned long count, loff_t *ppos); static ssize_t sock_sendpage(struct file *file, struct page *page, 			     int offset, size_t size, loff_t *ppos, int more);+static ssize_t sock_sendfile(struct file *file, loff_t *ppos, size_t count, read_actor_t actor, void *target);   /*@@ -136,7 +144,8 @@ static struct file_operations socket_fil 	.fasync =	sock_fasync, 	.readv =	sock_readv, 	.writev =	sock_writev,-	.sendpage =	sock_sendpage+	.sendpage =	sock_sendpage,+	.sendfile =	sock_sendfile, };  /*@@ -726,6 +735,372 @@ static ssize_t sock_aio_write(struct kio 	return __sock_sendmsg(iocb, sock, &x->async_msg, size); } +int zc_sock_alloc_data(struct zc_buf *zb)+{+	struct zsock *zsk = zb->priv;+	struct zc_page *zp;+	int err = -ENODEV;+	unsigned int towrite = zb->size;+	struct sk_buff *skb = zb->skb;+	struct ethhdr *eth;+	struct iphdr *ip;+	int index;++	if (!zsk->zc_pages)+		goto out;++	eth = (struct ethhdr *)zb->header;+	ip = (struct iphdr *)(eth+1);++	if (ip->protocol == IPPROTO_TCP) {+		struct tcphdr *th = (struct tcphdr *)(((u8 *)ip) + ip->ihl*4);+		index = (ntohl(th->seq) - zsk->zc_seq_first) / zsk->zc_page_num;+	} else if (ip->protocol == IPPROTO_UDP) {+		index = zsk->zc_page_index;+	} else+		goto out;++	if (index >= zsk->zc_page_num) {+		zsk->zc_seq_first = zsk->zc_pages[0].seq;+		index = 0;+	}++	while (towrite) {+		zp = &zsk->zc_pages[index];+		if (zp->size == zp->used || test_bit(ZC_PAGE_READY, &zp->flags))+			break;+		if (zp->size - zp->used < towrite && !zb->move_data)+			break;++		if (skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS) {+			err = -ENOMEM;+			break;+		}+		/*+		 * Setup fragment with offset to point to the area where+		 * we actually can write without overwriting old data.+		 * Setup fragment size to be equal not to the real data size,+		 * but size of the area where we actually can write data into.+		 */+		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, zp->page, zp->page_offset+zp->used, zp->size-zp->used);++		printk("%s: [%1d.%1d] data=%p, size=%4u, used=%4u, towrite=%4u.\n", +				__func__, zsk->zc_page_index, skb_shinfo(skb)->nr_frags-1,+				zp->page, zp->size, zp->used, towrite);++		if (zb->move_data) {+			unsigned int sz = min(zp->size - zp->used, towrite);++			err = zb->move_data(zb, sz);+			if (err <= 0)+				break;++			if (zp->used + err == zp->size) {+				set_bit(ZC_PAGE_READY, &zp->flags);+				+				if (++zsk->zc_page_index == zsk->zc_page_num)+					zsk->zc_page_index = 0;+			}+		} else +			err = zb->size;++		skb->len	+= err;+		skb->data_len	+= err;+		skb->truesize	+= err;++		towrite 	-= err;+		zp->used 	+= err;++		err = 0;+	}++out:+	return err;+}++int zc_sock_commit_data(struct zc_buf *zb)+{+	struct zsock *zsk = zb->priv;+	struct zc_page *zp;++	if (!zsk->zc_pages)+		return -1;++	zp = &zsk->zc_pages[zsk->zc_page_index];++	if (unlikely(zb->size != zp->size))+		return 1;++	if (zp->used == zp->size) {+		set_bit(ZC_PAGE_READY, &zp->flags);+		if (++zsk->zc_page_index == zsk->zc_page_num)+			zsk->zc_page_index = 0;+	}++	return 0;+}++/*+ * This should process all socket's related stuff,+ * for example emit TCP ACKs...+ * Since zero-copy skb can only have valid header,+ * this should process that header at skb->data.+ * skb_copy_datagram_iovec() is changed to not even touch+ * zero-copied skb.+ */++static u8 message_buf[PAGE_SIZE];++static int receive_message(struct socket *sock, unsigned int ack_size)+{+	struct msghdr msg;+	struct kvec iov;+	int err;++	sock->sk->sk_allocation |= GFP_NOIO;+	iov.iov_base = message_buf;+	iov.iov_len = min(ack_size, (unsigned int)sizeof(message_buf));+	msg.msg_name = NULL;+	msg.msg_namelen = 0;+	msg.msg_control = NULL;+	msg.msg_controllen = 0;+	msg.msg_namelen = 0;+	msg.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT;++	err = kernel_recvmsg(sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags);++	printk("%s: kernel_recvmsg returned %d, ack_size=%u.\n", __func__, err, ack_size);++	return err;+}++int tcp_udp_v4_sock_zc_init(struct socket *sock, struct tcp_udp_v4_priv *priv)+{+	struct file *file;+	struct zsock *zsk;+	struct zc_page *zc_pages, *zp;+	int pnum_max, err, i;+	unsigned long flags;+	struct address_space *mapping;+	struct inode *inode;+	size_t count;++	/*+	 * Sane setup.+	 */+	count = INT_MAX;+	pnum_max = 16;++	if (!sock->sk)+		return -EINVAL;++	err = -EBADF;+	file = fget(priv->fd);+	if (!file)+		goto err_out_exit;+	if (!(file->f_mode & FMODE_WRITE))+		goto err_out_fput;+	err = -ETXTBSY;+	if (file->f_mode & FMODE_ZEROCOPY)+		goto err_out_fput;+	err = -EINVAL;+	if (!file->f_op)+		goto err_out_fput;++	err = rw_verify_area(WRITE, file, &file->f_pos, count);+	if (err)+		goto err_out_fput;++	err = security_file_permission(file, MAY_WRITE);+	if (err)+		goto err_out_fput;++	err = -ENOMEM;+	zsk = zsk_alloc(&tcp_udp_v4_zc_handler, priv, sizeof(*priv), NULL, GFP_KERNEL);+	if (!zsk)+		goto err_out_fput;++	mapping = file->f_mapping;+	inode = mapping->host;+	+	zc_pages = kzalloc(sizeof(struct zc_page) * pnum_max, GFP_KERNEL);+	if (!zc_pages) {+		err = -ENOMEM;+		goto err_out_zsk_put;+	}++	pagevec_init(&zsk->zc_lru_pvec, 0);++	err = 0;+	for (i=0; i<pnum_max; ++i) {+		zp = &zc_pages[i];++		err = prepare_page(zp, zsk, file, mapping, &zsk->zc_pos, count, &zsk->zc_lru_pvec);+		if (unlikely(err))+			goto err_out_commit_pages;+	}++	file->f_mode |= FMODE_ZEROCOPY;++	spin_lock_irqsave(&zsk->zc_lock, flags);+	zsk->zc_file		= file;+	zsk->zc_pages 		= zc_pages;+	zsk->zc_page_num 	= pnum_max;+	zsk->zc_page_index 	= 0;+	zsk->zc_alloc_data	= &zc_sock_alloc_data;+	zsk->zc_commit_data	= &zc_sock_commit_data;+	spin_unlock_irqrestore(&zsk->zc_lock, flags);++	err = tcp_udp_v4_zc_sock_insert(zsk);+	if (err) {+		i = pnum_max;+		goto err_out_commit_pages;+	}+	+	sock->sk->zsk = zsk;++	printk("%s: sk=%p, zsk=%p, %d pages have been set up.\n", __func__, sock->sk, zsk, pnum_max);++	return 0;++err_out_commit_pages:+	for (--i; i>=0; --i)+		commit_page(&zc_pages[i], file, mapping);++	kfree(zc_pages);+err_out_zsk_put:+	zsk_put(zsk);+err_out_fput:+	file->f_mode &= ~FMODE_ZEROCOPY;+	fput(file);+err_out_exit:+	return err;+}++int sock_zc_setup_seq(struct zsock *zsk, u32 seq)+{+	unsigned long flags;+	u32 off = 0;++	spin_lock_irqsave(&zsk->zc_lock, flags);+	zsk->zc_seq_first = seq;+	+	if (zsk->zc_pages) {+		int i;+		struct zc_page *zp;++		for (i=0; i<zsk->zc_page_num; ++i) {+			zp = &zsk->zc_pages[i];++			zp->seq = zsk->zc_seq_first + off;+			off += zp->size;+		}+	}+	spin_unlock_irqrestore(&zsk->zc_lock, flags);+	+	printk("%s: zc_seq_first=%u, off=%u.\n", __func__, zsk->zc_seq_first, off);++	return 0;+}+++static ssize_t sock_sendfile(struct file *in_file, loff_t *ppos, size_t count, read_actor_t actor, void *target)+{+	struct socket *sock;+	struct sock *sk;+	int err = 0;+	size_t written = 0;+	struct file *file = target;+	struct address_space *mapping = file->f_mapping;+	struct inode *inode = mapping->host;+	unsigned long flags;+	int i;+	unsigned int ack_size, zc_page_index = 0;+	struct zsock *zsk;++	if (!count)+		return 0;++	sock = SOCKET_I(in_file->f_dentry->d_inode);++	if (!sock || !sock->sk || !sock->sk->zsk) {+		err = -ENODEV;+		goto err_out_exit;+	}+	sk = sock->sk;+	zsk = sk->zsk;++	err = generic_write_checks(file, &zsk->zc_pos, &count, S_ISBLK(inode->i_mode));+	if (err)+		goto err_out_exit;++	spin_lock_irqsave(&zsk->zc_lock, flags);+	if (!zsk->zc_pages) {+		spin_unlock_irqrestore(&zsk->zc_lock, flags);+		err = -EINVAL;+		goto err_out_exit;+	}+	spin_unlock_irqrestore(&zsk->zc_lock, flags);++	while (count) {+		struct zc_page *zp;++		wait_event_interruptible_timeout(zsk->zc_data_ready, zsk->zc_page_index != zc_page_index, 5*HZ);+		zc_page_index = zsk->zc_page_index;++		printk("%s: wakeup: zsk->zc_page_index=%d.\n", __func__, zsk->zc_page_index);++		ack_size = 0;+		for (i=0; i<zsk->zc_page_num; ++i) {+			zp = &zsk->zc_pages[i];++			if (test_bit(ZC_PAGE_READY, &zp->flags)) {+				printk("%s: checking page %p [%d]: page=%p, flags=%08lx, page_offset=%08x, size=%08x, used=%08x, written=%zx.\n", +						__func__, zp, i, zp->page, zp->flags, zp->page_offset, zp->size, zp->used, written);++				err = commit_page(zp, file, mapping);+				if (err)+					goto err_out_release_all_pages;++				count -= zp->used;+				written += zp->used;+				ack_size += zp->used;++				err = prepare_page(zp, zsk, file, mapping, &zsk->zc_pos, count, &zsk->zc_lru_pvec);+			}+		}++		printk("%s: Going to ack %u bytes.\n", __func__, ack_size);++		while (ack_size > 0) {+			err = receive_message(sock, ack_size);+			if (err > 0) {+				ack_size -= err;+			} else+				break;+		}++		if (signal_pending(current))+			break;+	}++	pagevec_lru_add(&zsk->zc_lru_pvec);++	*ppos = written;+	err = written;++err_out_release_all_pages:+	printk("%s: Releasing zero-copy socket zsk=%p, sk=%p.\n", __func__, zsk, sk);++	synchronize_rcu();+	sk_zc_fini(zsk);+	sk->zsk = NULL;++err_out_exit:++	return err;+}+ static ssize_t sock_sendpage(struct file *file, struct page *page, 			     int offset, size_t size, loff_t *ppos, int more) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -