📄 zero-copy.9
字号:
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c--- a/drivers/net/8139too.c+++ b/drivers/net/8139too.c@@ -82,6 +82,8 @@ Robert Kuebel - Save kernel thread from dying on any signal. + Evgeniy Polyakov - Added receiving zero-copy support.+ Submitting bug reports: "rtl8139-diag -mmmaaavvveefN" output@@ -91,7 +93,7 @@ #define DRV_NAME "8139too" #define DRV_VERSION "0.9.27"-+#define DRV_EXT "-zc" #include <linux/config.h> #include <linux/module.h>@@ -108,11 +110,16 @@ #include <linux/mii.h> #include <linux/completion.h> #include <linux/crc32.h>+#include <linux/if_ether.h>+#include <linux/ip.h>+#include <linux/tcp.h>+#include <linux/udp.h>+#include <linux/workqueue.h> #include <asm/io.h> #include <asm/uaccess.h> #include <asm/irq.h> -#define RTL8139_DRIVER_NAME DRV_NAME " Fast Ethernet driver " DRV_VERSION+#define RTL8139_DRIVER_NAME DRV_NAME " Fast Ethernet driver " DRV_VERSION DRV_EXT #define PFX DRV_NAME ": " /* Default Message level */@@ -791,7 +798,7 @@ static int __devinit rtl8139_init_board /* set this immediately, we need to know before * we talk to the chip directly */- DPRINTK("PIO region size == 0x%02X\n", pio_len);+ DPRINTK("PIO region size == 0x%02lX\n", pio_len); DPRINTK("MMIO region size == 0x%02lX\n", mmio_len); #ifdef USE_IO_OPS@@ -1895,16 +1902,23 @@ static void rtl8139_rx_err (u32 rx_statu } #if RX_BUF_IDX == 3-static __inline__ void wrap_copy(struct sk_buff *skb, const unsigned char *ring,+static __inline__ void __wrap_copy(void *data, const unsigned char *ring, u32 offset, unsigned int size) { u32 left = RX_BUF_LEN - offset; if (size > left) {- memcpy(skb->data, ring + offset, left);- memcpy(skb->data+left, ring, size - left);+ memcpy(data, ring + offset, left);+ memcpy(data+left, ring, size - left); } else- memcpy(skb->data, ring + offset, size);+ memcpy(data, ring + offset, size);++}++static __inline__ void wrap_copy(struct sk_buff *skb, const unsigned char *ring,+ u32 offset, unsigned int size)+{+ __wrap_copy(skb->data, ring, offset, size); } #endif @@ -1926,6 +1940,103 @@ static void rtl8139_isr_ack(struct rtl81 } } +static void rtl8139_copy(void *dst, unsigned char *rx_ring, u32 ring_offset, int size)+{+ if (!size)+ return;+#if RX_BUF_IDX == 3+ __wrap_copy(dst, rx_ring, ring_offset, size);+#else+ memcpy(dst, &rx_ring[ring_offset], size);+#endif+}++static int rtl8139_move_data(struct zc_buf *zb, unsigned int sz)+{+ struct rtl8139_private *tp = zb->priv_data;+ unsigned char *rx_ring = tp->rx_ring;+ unsigned int cur_rx = tp->cur_rx;+ u32 ring_offset = cur_rx % RX_BUF_LEN;+ struct sk_buff *skb = zb->skb;+ skb_frag_t *frag;+ void *dest;+ + if (unlikely(skb_shinfo(skb)->nr_frags == 0) || unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS))+ return -EINVAL;+ + frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags-1];+ dest = kmap_atomic(frag->page, KM_USER0);+ if (!dest)+ return -ENOMEM;++ rtl8139_copy(dest + frag->page_offset, rx_ring, ring_offset + 4 + zb->header_size, sz);++ kunmap_atomic(dest, KM_USER0);++ return sz;+}++static int rtl8139_copy_header(struct zc_buf *zb)+{+ struct rtl8139_private *tp = zb->priv_data;+ unsigned char *rx_ring = tp->rx_ring;+ unsigned int cur_rx = tp->cur_rx;+ u32 ring_offset = cur_rx % RX_BUF_LEN;+ u8 *orig_ptr, *ptr = zb->header;+ int tocopy, hsize = 0;+ struct iphdr *iph;+ struct tcphdr *tcph;+ struct ethhdr *eth;++ orig_ptr = ptr;++ ring_offset += 4;++ tocopy = sizeof(struct ethhdr);+ rtl8139_copy(ptr, rx_ring, ring_offset, tocopy);+ ptr += tocopy;+ ring_offset += tocopy;++ eth = (struct ethhdr *)(ptr - sizeof(struct ethhdr));+ if (eth->h_proto != htons(ETH_P_IP))+ return -1;+ + tocopy = sizeof(struct iphdr);+ rtl8139_copy(ptr, rx_ring, ring_offset, tocopy);+ ptr += tocopy;+ ring_offset += tocopy;++ iph = (struct iphdr *)(ptr - sizeof(struct iphdr));+ if (iph->protocol == IPPROTO_TCP) {+ hsize = sizeof(struct tcphdr);+ } else if (iph->protocol == IPPROTO_UDP) {+ hsize = sizeof(struct udphdr);+ } else+ return -1;++ tocopy = iph->ihl*4 - sizeof(struct iphdr) + hsize;+ if (tocopy + ptr - orig_ptr > zb->header_size)+ return -1;+ rtl8139_copy(ptr, rx_ring, ring_offset, tocopy);+ ptr += tocopy;+ ring_offset += tocopy;+ + if (iph->protocol == IPPROTO_TCP) {+ tcph = (struct tcphdr *)(((u8 *)(iph)) + iph->ihl*4);+ tocopy = tcph->doff*4;+ if (tocopy + ptr - orig_ptr > zb->header_size)+ return -1;+ rtl8139_copy(ptr, rx_ring, ring_offset, tocopy);+ ptr += tocopy;+ ring_offset += tocopy;+ }+ + zb->header_size = ptr - orig_ptr;+ zb->size -= zb->header_size;++ return 0;+}+ static int rtl8139_rx(struct net_device *dev, struct rtl8139_private *tp, int budget) {@@ -1956,8 +2067,7 @@ static int rtl8139_rx(struct net_device if (netif_msg_rx_status(tp)) printk(KERN_DEBUG "%s: rtl8139_rx() status %4.4x, size %4.4x,"- " cur %4.4x.\n", dev->name, rx_status,- rx_size, cur_rx);+ " cur %4.4x.\n", dev->name, rx_status, rx_size, cur_rx); #if RTL8139_DEBUG > 2 { int i;@@ -2005,37 +2115,77 @@ no_early_rx: goto out; } - /* Malloc up new buffer, compatible with net-2e. */- /* Omit the four octet CRC from the length. */+ {+ u8 zc_data[256];+ struct zc_buf *zb;++ memset(&zc_data, 0, sizeof(zc_data));+ zb = (struct zc_buf *)zc_data; - skb = dev_alloc_skb (pkt_size + 2);- if (likely(skb)) {- skb->dev = dev;- skb_reserve (skb, 2); /* 16 byte align the IP fields. */+ zb->header = (void *)(zb + 1);+ zb->header_size = sizeof(zc_data) - sizeof(struct zc_buf);+ zb->size = pkt_size;+ zb->priv_data = tp;+ zb->move_data = &rtl8139_move_data;++ if (!rtl8139_copy_header(zb)) {+ skb = alloc_skb_zerocopy(zb, GFP_ATOMIC);+ if (skb) {+ int err;+ + skb->dev = dev;+ skb->protocol = eth_type_trans(skb, dev);+ + dev->last_rx = jiffies;+ tp->stats.rx_bytes += pkt_size;+ tp->stats.rx_packets++;+ err = netif_receive_skb(skb);+ printk("%s: netif_receive_skb()=%d.\n", __func__, err);+ }+ } else {+ skb = NULL;+ zb->status = -1;+ }+ + if (!skb) { + if (zb->status == -1) {+ /* Malloc up new buffer, compatible with net-2e. */+ /* Omit the four octet CRC from the length. */+ + skb = dev_alloc_skb (pkt_size + 2);+ if (likely(skb)) {+ skb->dev = dev;+ skb_reserve (skb, 2); /* 16 byte align the IP fields. */ #if RX_BUF_IDX == 3- wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);+ wrap_copy(skb, rx_ring, ring_offset+4, pkt_size); #else- eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);+ eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0); #endif- skb_put (skb, pkt_size);-- skb->protocol = eth_type_trans (skb, dev);+ skb_put (skb, pkt_size); - dev->last_rx = jiffies;- tp->stats.rx_bytes += pkt_size;- tp->stats.rx_packets++;+ skb->protocol = eth_type_trans (skb, dev); - netif_receive_skb (skb);- } else {- if (net_ratelimit()) - printk (KERN_WARNING- "%s: Memory squeeze, dropping packet.\n",- dev->name);- tp->stats.rx_dropped++;+ dev->last_rx = jiffies;+ tp->stats.rx_bytes += pkt_size;+ tp->stats.rx_packets++;++ netif_receive_skb(skb);+ } else {+ if (net_ratelimit()) + printk (KERN_WARNING+ "%s: Memory squeeze, dropping packet.\n",+ dev->name);+ tp->stats.rx_dropped++;+ }+ } else {+ tp->stats.rx_dropped++;+ }+ } }+ received++; - cur_rx = (cur_rx + rx_size + 4 + 3) & ~3;+ tp->cur_rx = cur_rx = (cur_rx + rx_size + 4 + 3) & ~3; RTL_W16 (RxBufPtr, (u16) (cur_rx - 16)); rtl8139_isr_ack(tp);diff --git a/fs/read_write.c b/fs/read_write.c--- a/fs/read_write.c+++ b/fs/read_write.c@@ -15,6 +15,8 @@ #include <linux/module.h> #include <linux/syscalls.h> +#include <net/sock.h>+ #include <asm/uaccess.h> #include <asm/unistd.h> @@ -670,8 +672,15 @@ static ssize_t do_sendfile(int out_fd, i if (!(out_file->f_mode & FMODE_WRITE)) goto fput_out; retval = -EINVAL;- if (!out_file->f_op || !out_file->f_op->sendpage)+ if (!out_file->f_op)+ goto fput_out;+ + if (!SOCKET_I(in_file->f_dentry->d_inode) && !out_file->f_op->sendpage) {+ printk("%s: sock=%p, sendpage=%p.\n", __func__, + SOCKET_I(in_file->f_dentry->d_inode), out_file->f_op->sendpage); goto fput_out;+ }+ out_inode = out_file->f_dentry->d_inode; retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); if (retval)@@ -688,7 +697,7 @@ static ssize_t do_sendfile(int out_fd, i retval = -EINVAL; if (unlikely(pos < 0)) goto fput_out;- if (unlikely(pos + count > max)) {+ if (unlikely((unsigned long long)(pos + count) > (unsigned long long)max)) { retval = -EOVERFLOW; if (pos >= max) goto fput_out;diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h--- a/include/asm-i386/socket.h+++ b/include/asm-i386/socket.h@@ -48,5 +48,6 @@ #define SO_ACCEPTCONN 30 #define SO_PEERSEC 31+#define SO_ZEROCOPY 34 #endif /* _ASM_SOCKET_H */diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h--- a/include/asm-x86_64/socket.h+++ b/include/asm-x86_64/socket.h@@ -49,4 +49,6 @@ #define SO_PEERSEC 31 +#define SO_ZEROCOPY 34+ #endif /* _ASM_SOCKET_H */diff --git a/include/linux/fs.h b/include/linux/fs.h--- a/include/linux/fs.h+++ b/include/linux/fs.h@@ -64,6 +64,7 @@ extern int dir_notify_enable; #define FMODE_LSEEK 4 #define FMODE_PREAD 8 #define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */+#define FMODE_ZEROCOPY 16 #define RW_MASK 1 #define RWA_MASK 2diff --git a/include/linux/net.h b/include/linux/net.h--- a/include/linux/net.h+++ b/include/linux/net.h@@ -173,6 +173,7 @@ struct net_proto_family { struct iovec; struct kvec; +extern int sock_zc_init(struct socket *sock, int fd); extern int sock_wake_async(struct socket *sk, int how, int band); extern int sock_register(struct net_proto_family *fam); extern int sock_unregister(int family);diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h--- a/include/linux/skbuff.h+++ b/include/linux/skbuff.h@@ -29,11 +29,14 @@ #include <linux/net.h> #include <linux/textsearch.h> #include <net/checksum.h>+#include <net/zerocopy.h> #define HAVE_ALLOC_SKB /* For the drivers to know */ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ #define SLAB_SKB /* Slabified skbuffs */ +#define ZEROCOPY_HEADER_CACHE_SIZE 256 /* Maximum receiving zero-copy header size */+ #define CHECKSUM_NONE 0 #define CHECKSUM_HW 1 #define CHECKSUM_UNNECESSARY 2@@ -261,7 +264,8 @@ struct sk_buff { nohdr:1, nfctinfo:3; __u8 pkt_type:3,- fclone:2;+ fclone:2,+ zerocopy:1; __be16 protocol; void (*destructor)(struct sk_buff *skb);@@ -1045,6 +1049,11 @@ static inline struct sk_buff *dev_alloc_ return __dev_alloc_skb(length, GFP_ATOMIC); } +static inline struct sk_buff *alloc_skb_zerocopy(struct zc_buf *zb, gfp_t gfp_mask)+{+ return __alloc_skb_zerocopy(zb, gfp_mask);+}+ /** * skb_cow - copy header of skb when it is required * @skb: buffer to cowdiff --git a/include/net/sock.h b/include/net/sock.h--- a/include/net/sock.h+++ b/include/net/sock.h@@ -54,6 +54,7 @@ #include <asm/atomic.h> #include <net/dst.h> #include <net/checksum.h>+#include <net/zerocopy.h> /* * This structure really needs to be cleaned up.@@ -61,6 +62,8 @@ * the other protocols. */ +//#define SOCK_REFCNT_DEBUG+ /* Define this to get the SOCK_DBG debugging facility. */ #define SOCK_DEBUGGING #ifdef SOCK_DEBUGGING@@ -212,6 +215,9 @@ struct sock { int sk_route_caps; unsigned long sk_flags; unsigned long sk_lingertime;++ struct zsock *zsk;+ /* * The backlog queue is special, it is always used with * the per-socket spinlock held and requires low latencydiff --git a/include/net/zerocopy.h b/include/net/zerocopy.hnew file mode 100644--- /dev/null+++ b/include/net/zerocopy.h@@ -0,0 +1,175 @@+/*+ * zerocopy.h+ * + * 2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>+ * All rights reserved.+ * + * This program is free software; you can redistribute it and/or modify+ * it under the terms of the GNU General Public License as published by+ * the Free Software Foundation; either version 2 of the License, or+ * (at your option) any later version.+ *+ * This program is distributed in the hope that it will be useful,+ * but WITHOUT ANY WARRANTY; without even the implied warranty of+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the+ * GNU General Public License for more details.+ *+ * You should have received a copy of the GNU General Public License+ * along with this program; if not, write to the Free Software+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA+ */++#ifndef __ZEROCOPY_H+#define __ZEROCOPY_H++struct sock_zc_setup_data
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -