📄 zero-copy.8
字号:
++int zc_commit_data(struct zc_buf *zb)+{+ int err = -EINVAL;+ + if (zb->zh)+ err = zb->zh->commit_data(zb->zh, zb);+ + return err;+}++void zc_cleanup(struct socket *sock, void *data, unsigned int size)+{+ struct sock_zc_setup_data *p = data;+ int found = 0;+ struct zc_handler *zh;++ if (size <= sizeof(struct sock_zc_setup_data) || + size != htonl(p->size) + sizeof(struct sock_zc_setup_data)) {+ goto err_out_exit;+ }++ down(&zc_handler_lock);+ list_for_each_entry(zh, &zc_handler_list, zc_entry) {+ if (!zh->cleanup(zh, sock, p)) {+ found = 1;+ zc_handler_put(zh);+ break;+ }+ }+ up(&zc_handler_lock);++err_out_exit:+ return;++}++int zc_setup(struct socket *sock, void *data, unsigned int size)+{+ struct sock_zc_setup_data *p = data;+ int found = 0;+ struct zc_handler *zh;++ if (size <= sizeof(struct sock_zc_setup_data) || + size != htonl(p->size) + sizeof(struct sock_zc_setup_data)) {+ goto err_out_exit;+ }++ down(&zc_handler_lock);+ list_for_each_entry(zh, &zc_handler_list, zc_entry) {+ if (!zh->setup(zh, sock, p)) {+ found = 1;+ break;+ }+ }+ up(&zc_handler_lock);++err_out_exit:+ return (found)?0:-ENODEV;+}++int zc_add_handler(struct zc_handler *h)+{+ if (!h->alloc_data || !h->commit_data || !h->sock_bucket || !h->sock_bucket_number || + !h->setup || !h->cleanup)+ return -EINVAL;+ + synchronize_rcu();++ down(&zc_handler_lock);+ list_add_rcu(&h->zc_entry, &zc_handler_list);+ up(&zc_handler_lock);++ return 0;+}++void zc_del_handler(struct zc_handler *h)+{+ synchronize_rcu();+ + down(&zc_handler_lock);+ list_del_rcu(&h->zc_entry);+ up(&zc_handler_lock);+}++extern struct page * __grab_cache_page(struct address_space *mapping, unsigned long index,+ struct page **cached_page, struct pagevec *lru_pvec);++int commit_page(struct zc_page *zp, struct file *file, struct address_space *mapping)+{+ int err;+ struct address_space_operations *a_ops = mapping->a_ops;++ flush_dcache_page(zp->page);+ err = a_ops->commit_write(file, zp->page, zp->page_offset, zp->page_offset+zp->used);+ unlock_page(zp->page);+ mark_page_accessed(zp->page);+ page_cache_release(zp->page);++ printk("%s: zp=%p, page=%p, page_offset=%u, used=%u, size=%u has been committed: err=%d.\n", + __func__, zp, zp->page, zp->page_offset, zp->used, zp->size, err);++ if (err < 0)+ goto err_out_exit;++ balance_dirty_pages_ratelimited(mapping);++err_out_exit:+ return err;+}++int prepare_page(struct zc_page *zp, struct zsock *zsk, struct file *file, struct address_space *mapping, + loff_t *ppos, loff_t count, struct pagevec *lru_pvec)+{+ unsigned long index;+ unsigned long page_offset;+ unsigned long bytes;+ struct address_space_operations *a_ops = mapping->a_ops;+ loff_t pos_allocated = *ppos;+ int err = 0;++ page_offset = (pos_allocated & (PAGE_CACHE_SIZE -1));+ index = pos_allocated >> PAGE_CACHE_SHIFT;+ bytes = PAGE_CACHE_SIZE - page_offset;+ if (bytes > count)+ bytes = count;++ zp->page = __grab_cache_page(mapping, index, &zsk->zc_cached_page, lru_pvec);+ if (!zp->page) {+ err = -ENOMEM;+ goto err_out_exit;+ }++ err = a_ops->prepare_write(file, zp->page, page_offset, page_offset+bytes);+ if (unlikely(err)) {+ unlock_page(zp->page);+ page_cache_release(zp->page);+ goto err_out_exit;+ }++ zp->page_offset = page_offset;+ zp->size = bytes;+ zp->used = 0;+ zp->seq = zsk->zc_seq_first + pos_allocated;+ clear_bit(ZC_PAGE_READY, &zp->flags);++ printk("%s: zp=%p, seq=%u, page=%p, page_offset=%u, used=%u, size=%u has been prepared: err=%d.\n", + __func__, zp, zp->seq, zp->page, zp->page_offset, zp->used, zp->size, err);++ pos_allocated += bytes;++ *ppos = pos_allocated;++err_out_exit:+ return err;+}+++void sk_zc_fini(struct zsock *zsk)+{+ if (zsk) {+ unsigned int zc_page_num;+ struct zc_page *zc_pages;+ unsigned long flags;+ + spin_lock_irqsave(&zsk->zc_lock, flags);+ zc_page_num = zsk->zc_page_num;+ zc_pages = zsk->zc_pages;+ + zsk->zc_pages = NULL;+ zsk->zc_page_num = 0;+ zsk->zc_page_index = 0;+ zsk->zc_alloc_data = NULL;+ zsk->zc_commit_data = NULL;+ spin_unlock_irqrestore(&zsk->zc_lock, flags);++ if (zc_page_num) {+ struct address_space *mapping = zsk->zc_file->f_mapping;+ int i;++ printk("%s: zsk=%p, zc_page_num=%u, zc_pages=%p, refcnt=%d.\n", + __func__, zsk, zc_page_num, zc_pages, atomic_read(&zsk->refcnt));+ + /*+ * No new skbs can contribute data into VFS cache after this + * condition, so we only must care about those which are + * in socket queue already or will be inserted there after+ * allocation, but allocation itself will always fail+ * due to above locked changes.+ */++ if (zsk->zc_cached_page) {+ page_cache_release(zsk->zc_cached_page);+ zsk->zc_cached_page = NULL;+ }++ for (i=0; i<zc_page_num; ++i)+ commit_page(&zc_pages[i], zsk->zc_file, mapping);++ zsk->zc_file->f_mode &= ~FMODE_ZEROCOPY;+ fput(zsk->zc_file);+ zsk->zc_file = NULL;+ + kfree(zc_pages);+ printk("%s: sk=%p has been released.\n", __func__, zsk);++ zsk_put(zsk);+ }+ }+}++void sk_zc_init(struct zsock *zsk)+{+ spin_lock_init(&zsk->zc_lock);+ init_waitqueue_head(&zsk->zc_data_ready);+ zsk->zc_pages = NULL;+ zsk->zc_page_num = 0;+ zsk->zc_page_index = 0;+ zsk->zc_alloc_data = NULL;+ zsk->zc_commit_data = NULL;+ zsk->zc_file = NULL;+ zsk->zc_cached_page = NULL;+}++struct zsock *zsk_alloc(struct zc_handler *handler, void *priv, unsigned int priv_size, int (* insert)(struct zsock *zsk), gfp_t gfp_mask)+{+ struct zsock *zsk;++ zsk = kzalloc(sizeof(struct zsock) + priv_size, gfp_mask);+ if (!zsk)+ return NULL;++ atomic_set(&zsk->refcnt, 1);+ zsk->handler = handler;+ zsk->priv_size = priv_size;+ if (priv_size) {+ zsk->priv = zsk+1;+ memcpy(zsk->priv, priv, priv_size);+ } else+ zsk->priv = NULL;++ zc_handler_get(handler);+ + sk_zc_init(zsk);++ if (insert) {+ int err;++ err = insert(zsk);+ if (err) {+ zc_handler_put(handler);+ zsk_free(zsk);+ return NULL;+ }+ }++ return zsk;+}++void zsk_free(struct zsock *zsk)+{+ zc_handler_put(zsk->handler);+ kfree(zsk);+}++static inline u32 tcp_udp_v4_hash(unsigned int bucket_number, const u32 src, const u16 sport, const u32 dst, const u16 dport)+{+ return inet_ehashfn(src, sport, dst, dport) & (bucket_number - 1);+}++int tcp_udp_v4_zc_sock_insert(struct zsock *zsk)+{+ u32 hash;+ unsigned long flags;+ struct tcp_udp_v4_priv *priv = zsk_priv(zsk);+ struct zc_sock_bucket *b;++ if (!priv)+ return -ENODEV;+ + hash = tcp_udp_v4_hash(zsk->handler->sock_bucket_number, priv->src, priv->sport, priv->dst, priv->dport);++ b = &zsk->handler->sock_bucket[hash];++ printk("%s: hash=%x, b=%p.\n", __func__, hash, b);++ write_lock_irqsave(&b->lock, flags);+ list_add_rcu(&zsk->zc_entry, &b->list);+ write_unlock_irqrestore(&b->lock, flags);++ printk("%s: done.\n", __func__);++ return 0;+}++int tcp_udp_v4_zc_sock_remove(struct zsock *zsk)+{+ u32 hash;+ unsigned long flags;+ struct tcp_udp_v4_priv *priv = zsk_priv(zsk);+ struct zc_sock_bucket *b;++ if (!priv)+ return -ENODEV;+ + hash = tcp_udp_v4_hash(zsk->handler->sock_bucket_number, priv->src, priv->sport, priv->dst, priv->dport);++ b = &zsk->handler->sock_bucket[hash];++ write_lock_irqsave(&b->lock, flags);+ list_del_rcu(&zsk->zc_entry);+ write_unlock_irqrestore(&b->lock, flags);++ return 0;+}++/*+ * Must be called under RCU cover and with interrupts disabled. + */+static struct zsock *tcp_udp_v4_zc_sock_lookup(const struct zc_sock_bucket *bucket, const unsigned int bucket_number, + const u32 src, const u16 sport, const u32 dst, const u16 dport)+{+ u32 hash = tcp_udp_v4_hash(bucket_number, src, sport, dst, dport);+ struct zsock *zsk;+ struct tcp_udp_v4_priv *priv;++ printk("%s: hash=%08x: %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u.\n", + __func__, hash, NIPQUAD(src), htons(sport), NIPQUAD(dst), htons(dport));+ + list_for_each_entry_rcu(zsk, &bucket[hash].list, zc_entry) {+ priv = zsk_priv(zsk);++ /* May not happen for this kind of zc sockets, actually... */+ if (unlikely(!priv))+ continue;++ printk("%s: zsk=%p: %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u.\n", + __func__, zsk, NIPQUAD(priv->src), htons(priv->sport), NIPQUAD(priv->dst), htons(priv->dport));++ if (priv->sport == sport && priv->dport == dport && priv->src == src && priv->dst == dst) {+ zsk_get(zsk);+ return zsk;+ }+ }++ return NULL;+}++static int tcp_udp_v4_sendfile_alloc_data(struct zc_handler *zh, struct zc_buf *zb)+{+ struct ethhdr *eth;+ struct iphdr *iph;+ struct zsock *zsk;+ int err = -EINVAL;+ u16 sport, dport;++ if (zb->header_size < sizeof(struct ethhdr) + sizeof(struct iphdr))+ goto err_out_exit;++ eth = zb->header;++ if (eth->h_proto != htons(ETH_P_IP))+ goto err_out_exit;++ iph = (struct iphdr *)(eth + 1);+ + //if (iph->protocol != IPPROTO_TCP || iph->protocol != IPPROTO_UDP)+ if (iph->protocol != IPPROTO_TCP)+ goto err_out_exit;++ if (iph->protocol == IPPROTO_TCP) {+ struct tcphdr *tcph = (struct tcphdr *)(((u8 *)iph) + iph->ihl*4);+ sport = tcph->source;+ dport = tcph->dest;+ } else {+ struct udphdr *udph = (struct udphdr *)(((u8 *)iph) + iph->ihl*4);+ sport = udph->source;+ dport = udph->dest;+ }++ local_irq_disable();+ rcu_read_lock();+ zsk = tcp_udp_v4_zc_sock_lookup(zh->sock_bucket, zh->sock_bucket_number, iph->daddr, dport, iph->saddr, sport);+ if (zsk) {+ printk("%s: zsk=%p, zc_alloc_data=%p, refcnt=%d.\n", __func__, zsk, zsk->zc_alloc_data, atomic_read(&zsk->refcnt));+ spin_lock(&zsk->zc_lock);+ if (zsk->zc_alloc_data && zsk->zc_pages) {+ zb->priv = zsk;+ err = zsk->zc_alloc_data(zb);+ zb->status = (err)?1:0;+ wake_up(&zsk->zc_data_ready);+ }+ spin_unlock(&zsk->zc_lock);+ zsk_put(zsk);+ }+ rcu_read_unlock();+ local_irq_enable();++err_out_exit:+ return err;+}++static int tcp_udp_v4_sendfile_commit_data(struct zc_handler *zh, struct zc_buf *zb)+{+ struct zsock *zsk = zb->priv;+ int err;+ unsigned long flags;++ spin_lock_irqsave(&zsk->zc_lock, flags);+ err = zsk->zc_commit_data(zb);+ spin_unlock_irqrestore(&zsk->zc_lock, flags);++ wake_up(&zsk->zc_data_ready);++ printk("%s: commiting data, zsk=%p, size=%4u, err=%d.\n", __func__, zsk, zb->size, err);++ return err;+}++static int tcp_udp_v4_sendfile_check(struct zc_handler *zh, struct socket *sock, struct sock_zc_setup_data *p)+{+ struct tcp_udp_v4_priv *priv;+ u32 type = ntohl(p->type);+ u32 size = ntohl(p->size);++ if (type != IPPROTO_TCP && type != IPPROTO_UDP)+ return -EINVAL;++ if (size != sizeof(struct tcp_udp_v4_priv))+ return -EINVAL;++ priv = (struct tcp_udp_v4_priv *)p->data;++ printk("%s: %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u.\n", + __func__, NIPQUAD(priv->src), htons(priv->sport),+ NIPQUAD(priv->dst), htons(priv->dport));++ return 0;+}++static int tcp_udp_v4_sendfile_setup(struct zc_handler *zh, struct socket *sock, struct sock_zc_setup_data *p)+{+ struct tcp_udp_v4_priv *priv = (struct tcp_udp_v4_priv *)p->data;+ int err;++ err = tcp_udp_v4_sendfile_check(zh, sock, p);+ if (err)+ return err;++ printk("%s: %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u.\n", + __func__, NIPQUAD(priv->src), htons(priv->sport),+ NIPQUAD(priv->dst), htons(priv->dport));++ return tcp_udp_v4_sock_zc_init(sock, priv);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -