📄 svcsock.c
字号:
/* * linux/net/sunrpc/svcsock.c * * These are the RPC server socket internals. * * The server scheduling algorithm does not always distribute the load * evenly when servicing a single client. May need to modify the * svc_sock_enqueue procedure... * * TCP support is largely untested and may be a little slow. The problem * is that we currently do two separate recvfrom's, one for the 4-byte * record length, and the second for the actual record. This could possibly * be improved by always reading a minimum size of around 100 bytes and * tucking any superfluous bytes away in a temporary store. Still, that * leaves write requests out in the rain. An alternative may be to peek at * the first skb in the queue, and if it matches the next TCP sequence * number, to extract the record marker. Yuck. * * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */#include <linux/kernel.h>#include <linux/sched.h>#include <linux/errno.h>#include <linux/fcntl.h>#include <linux/net.h>#include <linux/in.h>#include <linux/inet.h>#include <linux/udp.h>#include <linux/tcp.h>#include <linux/unistd.h>#include <linux/slab.h>#include <linux/netdevice.h>#include <linux/skbuff.h>#include <linux/file.h>#include <linux/freezer.h>#include <net/sock.h>#include <net/checksum.h>#include <net/ip.h>#include <net/ipv6.h>#include <net/tcp_states.h>#include <asm/uaccess.h>#include <asm/ioctls.h>#include <linux/sunrpc/types.h>#include <linux/sunrpc/clnt.h>#include <linux/sunrpc/xdr.h>#include <linux/sunrpc/svcsock.h>#include <linux/sunrpc/stats.h>/* SMP locking strategy: * * svc_pool->sp_lock protects most of the fields of that pool. * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. * when both need to be taken (rare), svc_serv->sv_lock is first. * BKL protects svc_serv->sv_nrthread. * svc_sock->sk_lock protects the svc_sock->sk_deferred list * and the ->sk_info_authunix cache. * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. * * Some flags can be set to certain values at any time * providing that certain rules are followed: * * SK_CONN, SK_DATA, can be set or cleared at any time. * after a set, svc_sock_enqueue must be called. * after a clear, the socket must be read/accepted * if this succeeds, it must be set again. * SK_CLOSE can set at any time. It is never cleared. * sk_inuse contains a bias of '1' until SK_DEAD is set. * so when sk_inuse hits zero, we know the socket is dead * and no-one is using it. * SK_DEAD can only be set while SK_BUSY is held which ensures * no other thread will be using the socket or will try to * set SK_DEAD. * */#define RPCDBG_FACILITY RPCDBG_SVCSOCKstatic struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, int *errp, int flags);static void svc_delete_socket(struct svc_sock *svsk);static void svc_udp_data_ready(struct sock *, int);static int svc_udp_recvfrom(struct svc_rqst *);static int svc_udp_sendto(struct svc_rqst *);static void svc_close_socket(struct svc_sock *svsk);static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);static int svc_deferred_recv(struct svc_rqst *rqstp);static struct cache_deferred_req *svc_defer(struct cache_req *req);/* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after * 6 minutes * http://www.connectathon.org/talks96/nfstcp.pdf */static int svc_conn_age_period = 6*60;#ifdef CONFIG_DEBUG_LOCK_ALLOCstatic struct lock_class_key svc_key[2];static struct lock_class_key svc_slock_key[2];static inline void svc_reclassify_socket(struct socket *sock){ struct sock *sk = sock->sk; BUG_ON(sock_owned_by_user(sk)); switch (sk->sk_family) { case AF_INET: sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]); break; case AF_INET6: sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]); break; default: BUG(); }}#elsestatic inline void svc_reclassify_socket(struct socket *sock){}#endifstatic char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len){ switch (addr->sa_family) { case AF_INET: snprintf(buf, len, "%u.%u.%u.%u, port=%u", NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), ntohs(((struct sockaddr_in *) addr)->sin_port)); break; case AF_INET6: snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); break; default: snprintf(buf, len, "unknown address type: %d", addr->sa_family); break; } return buf;}/** * svc_print_addr - Format rq_addr field for printing * @rqstp: svc_rqst struct containing address to print * @buf: target buffer for formatted address * @len: length of target buffer * */char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len){ return __svc_print_addr(svc_addr(rqstp), buf, len);}EXPORT_SYMBOL_GPL(svc_print_addr);/* * Queue up an idle server thread. Must have pool->sp_lock held. * Note: this is really a stack rather than a queue, so that we only * use as many different threads as we need, and the rest don't pollute * the cache. */static inline voidsvc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp){ list_add(&rqstp->rq_list, &pool->sp_threads);}/* * Dequeue an nfsd thread. Must have pool->sp_lock held. */static inline voidsvc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp){ list_del(&rqstp->rq_list);}/* * Release an skbuff after use */static inline voidsvc_release_skb(struct svc_rqst *rqstp){ struct sk_buff *skb = rqstp->rq_skbuff; struct svc_deferred_req *dr = rqstp->rq_deferred; if (skb) { rqstp->rq_skbuff = NULL; dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); skb_free_datagram(rqstp->rq_sock->sk_sk, skb); } if (dr) { rqstp->rq_deferred = NULL; kfree(dr); }}/* * Any space to write? */static inline unsigned longsvc_sock_wspace(struct svc_sock *svsk){ int wspace; if (svsk->sk_sock->type == SOCK_STREAM) wspace = sk_stream_wspace(svsk->sk_sk); else wspace = sock_wspace(svsk->sk_sk); return wspace;}/* * Queue up a socket with data pending. If there are idle nfsd * processes, wake 'em up. * */static voidsvc_sock_enqueue(struct svc_sock *svsk){ struct svc_serv *serv = svsk->sk_server; struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; if (!(svsk->sk_flags & ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) return; if (test_bit(SK_DEAD, &svsk->sk_flags)) return; cpu = get_cpu(); pool = svc_pool_for_cpu(svsk->sk_server, cpu); put_cpu(); spin_lock_bh(&pool->sp_lock); if (!list_empty(&pool->sp_threads) && !list_empty(&pool->sp_sockets)) printk(KERN_ERR "svc_sock_enqueue: threads and sockets both waiting??\n"); if (test_bit(SK_DEAD, &svsk->sk_flags)) { /* Don't enqueue dead sockets */ dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk); goto out_unlock; } /* Mark socket as busy. It will remain in this state until the * server has processed all pending data and put the socket back * on the idle list. We update SK_BUSY atomically because * it also guards against trying to enqueue the svc_sock twice. */ if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) { /* Don't enqueue socket while already enqueued */ dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); goto out_unlock; } BUG_ON(svsk->sk_pool != NULL); svsk->sk_pool = pool; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2 > svc_sock_wspace(svsk)) && !test_bit(SK_CLOSE, &svsk->sk_flags) && !test_bit(SK_CONN, &svsk->sk_flags)) { /* Don't enqueue while not enough space for reply */ dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg, svc_sock_wspace(svsk)); svsk->sk_pool = NULL; clear_bit(SK_BUSY, &svsk->sk_flags); goto out_unlock; } clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, rq_list); dprintk("svc: socket %p served by daemon %p\n", svsk->sk_sk, rqstp); svc_thread_dequeue(pool, rqstp); if (rqstp->rq_sock) printk(KERN_ERR "svc_sock_enqueue: server %p, rq_sock=%p!\n", rqstp, rqstp->rq_sock); rqstp->rq_sock = svsk; atomic_inc(&svsk->sk_inuse); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); BUG_ON(svsk->sk_pool != pool); wake_up(&rqstp->rq_wait); } else { dprintk("svc: socket %p put into queue\n", svsk->sk_sk); list_add_tail(&svsk->sk_ready, &pool->sp_sockets); BUG_ON(svsk->sk_pool != pool); }out_unlock: spin_unlock_bh(&pool->sp_lock);}/* * Dequeue the first socket. Must be called with the pool->sp_lock held. */static inline struct svc_sock *svc_sock_dequeue(struct svc_pool *pool){ struct svc_sock *svsk; if (list_empty(&pool->sp_sockets)) return NULL; svsk = list_entry(pool->sp_sockets.next, struct svc_sock, sk_ready); list_del_init(&svsk->sk_ready); dprintk("svc: socket %p dequeued, inuse=%d\n", svsk->sk_sk, atomic_read(&svsk->sk_inuse)); return svsk;}/* * Having read something from a socket, check whether it * needs to be re-enqueued. * Note: SK_DATA only gets cleared when a read-attempt finds * no (or insufficient) data. */static inline voidsvc_sock_received(struct svc_sock *svsk){ svsk->sk_pool = NULL; clear_bit(SK_BUSY, &svsk->sk_flags); svc_sock_enqueue(svsk);}/** * svc_reserve - change the space reserved for the reply to a request. * @rqstp: The request in question * @space: new max space to reserve * * Each request reserves some space on the output queue of the socket * to make sure the reply fits. This function reduces that reserved * space to be the amount of space used already, plus @space. * */void svc_reserve(struct svc_rqst *rqstp, int space){ space += rqstp->rq_res.head[0].iov_len; if (space < rqstp->rq_reserved) { struct svc_sock *svsk = rqstp->rq_sock; atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved); rqstp->rq_reserved = space; svc_sock_enqueue(svsk); }}/* * Release a socket after use. */static inline voidsvc_sock_put(struct svc_sock *svsk){ if (atomic_dec_and_test(&svsk->sk_inuse)) { BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags)); dprintk("svc: releasing dead socket\n"); if (svsk->sk_sock->file) sockfd_put(svsk->sk_sock); else sock_release(svsk->sk_sock); if (svsk->sk_info_authunix != NULL) svcauth_unix_info_release(svsk->sk_info_authunix); kfree(svsk); }}static voidsvc_sock_release(struct svc_rqst *rqstp){ struct svc_sock *svsk = rqstp->rq_sock; svc_release_skb(rqstp); svc_free_res_pages(rqstp); rqstp->rq_res.page_len = 0; rqstp->rq_res.page_base = 0; /* Reset response buffer and release * the reservation. * But first, check that enough space was reserved * for the reply, otherwise we have a bug! */ if ((rqstp->rq_res.len) > rqstp->rq_reserved) printk(KERN_ERR "RPC request reserved %d but used %d\n", rqstp->rq_reserved, rqstp->rq_res.len); rqstp->rq_res.head[0].iov_len = 0; svc_reserve(rqstp, 0); rqstp->rq_sock = NULL; svc_sock_put(svsk);}/* * External function to wake up a server waiting for data * This really only makes sense for services like lockd * which have exactly one thread anyway. */voidsvc_wake_up(struct svc_serv *serv){ struct svc_rqst *rqstp; unsigned int i; struct svc_pool *pool; for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[i]; spin_lock_bh(&pool->sp_lock); if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, rq_list); dprintk("svc: daemon %p woken up.\n", rqstp); /* svc_thread_dequeue(pool, rqstp); rqstp->rq_sock = NULL; */ wake_up(&rqstp->rq_wait); } spin_unlock_bh(&pool->sp_lock); }}union svc_pktinfo_u { struct in_pktinfo pkti; struct in6_pktinfo pkti6;};#define SVC_PKTINFO_SPACE \ CMSG_SPACE(sizeof(union svc_pktinfo_u))static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh){ switch (rqstp->rq_sock->sk_sk->sk_family) { case AF_INET: { struct in_pktinfo *pki = CMSG_DATA(cmh); cmh->cmsg_level = SOL_IP; cmh->cmsg_type = IP_PKTINFO; pki->ipi_ifindex = 0; pki->ipi_spec_dst.s_addr = rqstp->rq_daddr.addr.s_addr; cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); } break; case AF_INET6: { struct in6_pktinfo *pki = CMSG_DATA(cmh); cmh->cmsg_level = SOL_IPV6; cmh->cmsg_type = IPV6_PKTINFO; pki->ipi6_ifindex = 0; ipv6_addr_copy(&pki->ipi6_addr, &rqstp->rq_daddr.addr6); cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); } break; } return;}/* * Generic sendto routine */static int
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -