📄 zero-copy.3
字号:
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c--- a/drivers/net/8139too.c+++ b/drivers/net/8139too.c@@ -108,6 +108,9 @@ #include <linux/mii.h> #include <linux/completion.h> #include <linux/crc32.h>+#include <linux/if_ether.h>+#include <linux/ip.h>+#include <linux/tcp.h> #include <asm/io.h> #include <asm/uaccess.h> #include <asm/irq.h>@@ -1895,16 +1898,23 @@ static void rtl8139_rx_err (u32 rx_statu } #if RX_BUF_IDX == 3-static __inline__ void wrap_copy(struct sk_buff *skb, const unsigned char *ring,+static __inline__ void __wrap_copy(void *data, const unsigned char *ring, u32 offset, unsigned int size) { u32 left = RX_BUF_LEN - offset; if (size > left) {- memcpy(skb->data, ring + offset, left);- memcpy(skb->data+left, ring, size - left);+ memcpy(data, ring + offset, left);+ memcpy(data+left, ring, size - left); } else- memcpy(skb->data, ring + offset, size);+ memcpy(data, ring + offset, size);++}++static __inline__ void wrap_copy(struct sk_buff *skb, const unsigned char *ring,+ u32 offset, unsigned int size)+{+ __wrap_copy(skb->data, ring, offset, size); } #endif @@ -2005,34 +2015,71 @@ no_early_rx: goto out; } - /* Malloc up new buffer, compatible with net-2e. */- /* Omit the four octet CRC from the length. */+ {+ u8 header[128];+ int hsize = sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct tcphdr) + 2;+ int status;+ void *priv;+ struct zc_handler *zh; - skb = dev_alloc_skb (pkt_size + 2);- if (likely(skb)) {- skb->dev = dev;- skb_reserve (skb, 2); /* 16 byte align the IP fields. */ #if RX_BUF_IDX == 3- wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);+ __wrap_copy(header, rx_ring, ring_offset+4, hsize); #else- eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);+ memcpy(header, &rx_ring[ring_offset + 4], hsize); #endif- skb_put (skb, pkt_size);+ + skb = alloc_skb_zerocopy(pkt_size +2 - hsize, GFP_ATOMIC, + header, sizeof(header), &priv, &zh, &status);+ if (skb) {+ dev->last_rx = jiffies;+ tp->stats.rx_bytes += pkt_size;+ tp->stats.rx_packets++; - skb->protocol = eth_type_trans (skb, dev);+#if RX_BUF_IDX == 3+ wrap_copy(skb, rx_ring, ring_offset+4 + hsize, pkt_size+2-hsize);+#else+ memcpy(skb->data, &rx_ring[ring_offset + 4 + hsize], pkt_size+2-hsize);+#endif+ kfree_skb_zerocopy(skb, header, hsize, priv, zh);+ + } else if (status == -1) {+ /* Malloc up new buffer, compatible with net-2e. */+ /* Omit the four octet CRC from the length. */+ + skb = dev_alloc_skb (pkt_size + 2);+ if (likely(skb)) {+ skb->dev = dev;+ skb_reserve (skb, 2); /* 16 byte align the IP fields. */+#if RX_BUF_IDX == 3+ wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);+#else+ eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);+#endif+ skb_put (skb, pkt_size); - dev->last_rx = jiffies;- tp->stats.rx_bytes += pkt_size;- tp->stats.rx_packets++;+ skb->protocol = eth_type_trans (skb, dev); - netif_receive_skb (skb);- } else {- if (net_ratelimit()) - printk (KERN_WARNING- "%s: Memory squeeze, dropping packet.\n",- dev->name);- tp->stats.rx_dropped++;+ dev->last_rx = jiffies;+ tp->stats.rx_bytes += pkt_size;+ tp->stats.rx_packets++;++ netif_receive_skb (skb);+ } else {+ if (net_ratelimit()) + printk (KERN_WARNING+ "%s: Memory squeeze, dropping packet.\n",+ dev->name);+ tp->stats.rx_dropped++;+ }+ } else {+ if (net_ratelimit()) + printk (KERN_WARNING+ "%s: Zero-copy failed, dropping packet.\n",+ dev->name);+ tp->stats.rx_dropped++;+ } }+ received++; cur_rx = (cur_rx + rx_size + 4 + 3) & ~3;diff --git a/fs/read_write.c b/fs/read_write.c--- a/fs/read_write.c+++ b/fs/read_write.c@@ -15,6 +15,8 @@ #include <linux/module.h> #include <linux/syscalls.h> +#include <net/sock.h>+ #include <asm/uaccess.h> #include <asm/unistd.h> @@ -670,8 +672,15 @@ static ssize_t do_sendfile(int out_fd, i if (!(out_file->f_mode & FMODE_WRITE)) goto fput_out; retval = -EINVAL;- if (!out_file->f_op || !out_file->f_op->sendpage)+ if (!out_file->f_op)+ goto fput_out;+ + if (!SOCKET_I(in_file->f_dentry->d_inode) && !out_file->f_op->sendpage) {+ printk("%s: sock=%p, sendpage=%p.\n", __func__, + SOCKET_I(in_file->f_dentry->d_inode), out_file->f_op->sendpage); goto fput_out;+ }+ out_inode = out_file->f_dentry->d_inode; retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); if (retval)@@ -688,7 +697,7 @@ static ssize_t do_sendfile(int out_fd, i retval = -EINVAL; if (unlikely(pos < 0)) goto fput_out;- if (unlikely(pos + count > max)) {+ if (unlikely((unsigned long long)(pos + count) > (unsigned long long)max)) { retval = -EOVERFLOW; if (pos >= max) goto fput_out;diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h--- a/include/linux/skbuff.h+++ b/include/linux/skbuff.h@@ -1045,6 +1045,30 @@ static inline struct sk_buff *dev_alloc_ return __dev_alloc_skb(length, GFP_ATOMIC); } +struct zc_handler+{+ struct list_head zc_entry;+ void *(* alloc_data)(struct zc_handler *zh, void *header, unsigned int header_size, unsigned int size, void **priv, int *status);+ void (* commit_data)(struct zc_handler *zh, void *header, unsigned int header_size, unsigned int size, void *priv);+};++extern void __kfree_skb_zerocopy(struct sk_buff *skb, void *header, unsigned int header_size, + void *priv, struct zc_handler *zh);+extern struct sk_buff *__alloc_skb_zerocopy(unsigned int size, gfp_t gfp_mask,+ void *header, unsigned int header_size, void **priv, struct zc_handler **zh, int *status);++static inline struct sk_buff *alloc_skb_zerocopy(unsigned int size, gfp_t gfp_mask,+ void *header, unsigned int header_size, void **priv, struct zc_handler **zh, int *status)+{+ return __alloc_skb_zerocopy(size, gfp_mask, header, header_size, priv, zh, status);+}++static inline void kfree_skb_zerocopy(struct sk_buff *skb,+ void *header, unsigned int header_size, void *priv, struct zc_handler *zh)+{+ __kfree_skb_zerocopy(skb, header, header_size, priv, zh);+}+ /** * skb_cow - copy header of skb when it is required * @skb: buffer to cowdiff --git a/include/net/sock.h b/include/net/sock.h--- a/include/net/sock.h+++ b/include/net/sock.h@@ -117,6 +117,20 @@ struct sock_common { struct proto *skc_prot; }; +enum zc_flags {+ ZC_PAGE_READY = 0,+};++struct zc_page+{+ struct page *page;+ struct page *cached_page;+ unsigned int page_offset;+ unsigned int size;+ unsigned int used;+ long flags;+};+ /** * struct sock - network layer representation of sockets * @__sk_common: shared layout with inet_timewait_sock@@ -251,6 +265,13 @@ struct sock { int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); void (*sk_destruct)(struct sock *sk);++ void *(* zc_alloc_data)(unsigned int size, void *priv);+ int (* zc_commit_data)(unsigned int size, void *priv);+ wait_queue_head_t zc_data_ready;+ spinlock_t zc_lock;+ struct zc_page *zc_pages;+ unsigned int zc_page_num, zc_page_index; }; /*diff --git a/mm/filemap.c b/mm/filemap.c--- a/mm/filemap.c+++ b/mm/filemap.c@@ -1663,7 +1663,7 @@ EXPORT_SYMBOL(read_cache_page); * caller's lru-buffering pagevec. This function is specifically for * generic_file_write(). */-static inline struct page *+struct page * __grab_cache_page(struct address_space *mapping, unsigned long index, struct page **cached_page, struct pagevec *lru_pvec) {@@ -1692,6 +1692,8 @@ repeat: return page; } +EXPORT_SYMBOL_GPL(__grab_cache_page);+ /* * The logic we want is *diff --git a/net/core/Makefile b/net/core/Makefile--- a/net/core/Makefile+++ b/net/core/Makefile@@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \- gen_stats.o gen_estimator.o+ gen_stats.o gen_estimator.o zerocopy.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/skbuff.c b/net/core/skbuff.c--- a/net/core/skbuff.c+++ b/net/core/skbuff.c@@ -182,6 +182,56 @@ nodata: goto out; } +void *zc_alloc_data(void *header, unsigned int header_size, unsigned int size, + void **priv, struct zc_handler **__zh, int *status);+void zc_commit_data(void *header, unsigned int header_size, unsigned int size, + void *priv, struct zc_handler *zh);++struct sk_buff *__alloc_skb_zerocopy(unsigned int size, gfp_t gfp_mask,+ void *header, unsigned int header_size, + void **priv, struct zc_handler **zh, int *status)+{+ struct sk_buff *skb;+ u8 *data;++ *status = -1;++ skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);+ if (!skb)+ goto out;++ data = zc_alloc_data(header, header_size, size, priv, zh, status);+ if (!data)+ goto err_out_free_skb;++ memset(skb, 0, offsetof(struct sk_buff, truesize));+ skb->truesize = size + sizeof(struct sk_buff);+ atomic_set(&skb->users, 1);+ skb->head = data;+ skb->data = data;+ skb->tail = data;+ skb->end = data + size;+ + atomic_set(&(skb_shinfo(skb)->dataref), 1);+ skb_shinfo(skb)->nr_frags = 0;+ skb_shinfo(skb)->tso_size = 0;+ skb_shinfo(skb)->tso_segs = 0;+ skb_shinfo(skb)->frag_list = NULL;+out:+ return skb;+err_out_free_skb:+ kmem_cache_free(skbuff_head_cache, skb);+ skb = NULL;+ goto out;+}++void __kfree_skb_zerocopy(struct sk_buff *skb, + void *header, unsigned int header_size, void *priv, struct zc_handler *zh)+{+ zc_commit_data(header, header_size, skb->truesize - sizeof(struct sk_buff), priv, zh);+ kmem_cache_free(skbuff_head_cache, skb);+}+ /** * alloc_skb_from_cache - allocate a network buffer * @cp: kmem_cache from which to allocate the data area@@ -1739,3 +1789,5 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text);+EXPORT_SYMBOL(__alloc_skb_zerocopy);+EXPORT_SYMBOL(__kfree_skb_zerocopy);diff --git a/net/core/sock.c b/net/core/sock.c--- a/net/core/sock.c+++ b/net/core/sock.c@@ -704,6 +704,18 @@ void sk_free(struct sock *sk) module_put(owner); } +static void zc_sk_init(struct sock *sk)+{+ spin_lock_init(&sk->zc_lock);+ init_waitqueue_head(&sk->zc_data_ready);+ sk->zc_pages = NULL;+ sk->zc_page_num = 0;+ sk->zc_page_index = 0;+ sk->zc_alloc_data = NULL;+ sk->zc_commit_data = NULL;+}++ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);@@ -737,6 +749,8 @@ struct sock *sk_clone(const struct sock sock_reset_flag(newsk, SOCK_DONE); skb_queue_head_init(&newsk->sk_error_queue); + zc_sk_init(newsk);+ filter = newsk->sk_filter; if (filter != NULL) sk_filter_charge(newsk, filter);@@ -1320,6 +1334,8 @@ void sock_init_data(struct socket *sock, sk->sk_stamp.tv_usec = -1L; atomic_set(&sk->sk_refcnt, 1);++ zc_sk_init(sk); } void fastcall lock_sock(struct sock *sk)diff --git a/net/core/zerocopy.c b/net/core/zerocopy.cnew file mode 100644--- /dev/null+++ b/net/core/zerocopy.c@@ -0,0 +1,169 @@+/*+ * zerocopy.c+ * + * 2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>+ * All rights reserved.+ * + * This program is free software; you can redistribute it and/or modify+ * it under the terms of the GNU General Public License as published by+ * the Free Software Foundation; either version 2 of the License, or+ * (at your option) any later version.+ *+ * This program is distributed in the hope that it will be useful,+ * but WITHOUT ANY WARRANTY; without even the implied warranty of+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the+ * GNU General Public License for more details.+ *+ * You should have received a copy of the GNU General Public License+ * along with this program; if not, write to the Free Software+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA+ */++#include <linux/spinlock.h>+#include <linux/list.h>+#include <linux/skbuff.h>++#include <net/inet_hashtables.h>++static void *tcp_sendfile_alloc_data(struct zc_handler *zh, void *header, unsigned int header_size, unsigned int size, void **priv, int *status);+static void tcp_sendfile_commit_data(struct zc_handler *zh, void *header, unsigned int header_size, unsigned int size, void *priv);++static struct zc_handler zc_tcp_sendfile_handler = {+ .alloc_data = &tcp_sendfile_alloc_data,+ .commit_data = &tcp_sendfile_commit_data,+};++static DEFINE_SPINLOCK(zc_lock);+static LIST_HEAD(zc_list);++void *zc_alloc_data(void *header, unsigned int header_size, unsigned int size, void **priv, struct zc_handler **__zh, int *status)+{+ struct zc_handler *zh;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -