📄 xprtsock.c
字号:
/* * linux/net/sunrpc/xprtsock.c * * Client-side transport implementation for sockets. * * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> * TCP NFS related read + write fixes * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> * * Rewrite of larges part of the code in order to stabilize TCP stuff. * Fix behaviour when socket buffer is full. * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> * * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> * * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005. * <gilles.quillard@bull.net> */#include <linux/types.h>#include <linux/slab.h>#include <linux/module.h>#include <linux/capability.h>#include <linux/pagemap.h>#include <linux/errno.h>#include <linux/socket.h>#include <linux/in.h>#include <linux/net.h>#include <linux/mm.h>#include <linux/udp.h>#include <linux/tcp.h>#include <linux/sunrpc/clnt.h>#include <linux/sunrpc/sched.h>#include <linux/sunrpc/xprtsock.h>#include <linux/file.h>#include <net/sock.h>#include <net/checksum.h>#include <net/udp.h>#include <net/tcp.h>/* * xprtsock tunables */unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;/* * We can register our own files under /proc/sys/sunrpc by * calling register_sysctl_table() again. The files in that * directory become the union of all files registered there. * * We simply need to make sure that we don't collide with * someone else's file names! */#ifdef RPC_DEBUGstatic unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;static struct ctl_table_header *sunrpc_table_header;/* * FIXME: changing the UDP slot table size should also resize the UDP * socket buffers for existing UDP transports */static ctl_table xs_tunables_table[] = { { .ctl_name = CTL_SLOTTABLE_UDP, .procname = "udp_slot_table_entries", .data = &xprt_udp_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { .ctl_name = CTL_SLOTTABLE_TCP, .procname = "tcp_slot_table_entries", .data = &xprt_tcp_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { .ctl_name = CTL_MIN_RESVPORT, .procname = "min_resvport", .data = &xprt_min_resvport, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, { .ctl_name = CTL_MAX_RESVPORT, .procname = "max_resvport", .data = &xprt_max_resvport, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, { .ctl_name = 0, },};static ctl_table sunrpc_table[] = { { .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = xs_tunables_table }, { .ctl_name = 0, },};#endif/* * How many times to try sending a request on a socket before waiting * for the socket buffer to clear. */#define XS_SENDMSG_RETRY (10U)/* * Time out for an RPC UDP socket connect. UDP socket connects are * synchronous, but we set a timeout anyway in case of resource * exhaustion on the local host. */#define XS_UDP_CONN_TO (5U * HZ)/* * Wait duration for an RPC TCP connection to be established. Solaris * NFS over TCP uses 60 seconds, for example, which is in line with how * long a server takes to reboot. */#define XS_TCP_CONN_TO (60U * HZ)/* * Wait duration for a reply from the RPC portmapper. */#define XS_BIND_TO (60U * HZ)/* * Delay if a UDP socket connect error occurs. This is most likely some * kind of resource problem on the local host. */#define XS_UDP_REEST_TO (2U * HZ)/* * The reestablish timeout allows clients to delay for a bit before attempting * to reconnect to a server that just dropped our connection. * * We implement an exponential backoff when trying to reestablish a TCP * transport connection with the server. Some servers like to drop a TCP * connection when they are overworked, so we start with a short timeout and * increase over time if the server is down or not responding. */#define XS_TCP_INIT_REEST_TO (3U * HZ)#define XS_TCP_MAX_REEST_TO (5U * 60 * HZ)/* * TCP idle timeout; client drops the transport socket if it is idle * for this long. Note that we also timeout UDP sockets to prevent * holding port numbers when there is no RPC traffic. */#define XS_IDLE_DISC_TO (5U * 60 * HZ)#ifdef RPC_DEBUG# undef RPC_DEBUG_DATA# define RPCDBG_FACILITY RPCDBG_TRANS#endif#ifdef RPC_DEBUG_DATAstatic void xs_pktdump(char *msg, u32 *packet, unsigned int count){ u8 *buf = (u8 *) packet; int j; dprintk("RPC: %s\n", msg); for (j = 0; j < count && j < 128; j += 4) { if (!(j & 31)) { if (j) dprintk("\n"); dprintk("0x%04x ", j); } dprintk("%02x%02x%02x%02x ", buf[j], buf[j+1], buf[j+2], buf[j+3]); } dprintk("\n");}#elsestatic inline void xs_pktdump(char *msg, u32 *packet, unsigned int count){ /* NOP */}#endifstruct sock_xprt { struct rpc_xprt xprt; /* * Network layer */ struct socket * sock; struct sock * inet; /* * State of TCP reply receive */ __be32 tcp_fraghdr, tcp_xid; u32 tcp_offset, tcp_reclen; unsigned long tcp_copied, tcp_flags; /* * Connection of transports */ struct delayed_work connect_worker; struct sockaddr_storage addr; unsigned short port; /* * UDP socket buffer size parameters */ size_t rcvsize, sndsize; /* * Saved socket callback addresses */ void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); void (*old_write_space)(struct sock *);};/* * TCP receive state flags */#define TCP_RCV_LAST_FRAG (1UL << 0)#define TCP_RCV_COPY_FRAGHDR (1UL << 1)#define TCP_RCV_COPY_XID (1UL << 2)#define TCP_RCV_COPY_DATA (1UL << 3)static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt){ return (struct sockaddr *) &xprt->addr;}static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt){ return (struct sockaddr_in *) &xprt->addr;}static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt){ return (struct sockaddr_in6 *) &xprt->addr;}static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt){ struct sockaddr_in *addr = xs_addr_in(xprt); char *buf; buf = kzalloc(20, GFP_KERNEL); if (buf) { snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); } xprt->address_strings[RPC_DISPLAY_ADDR] = buf; buf = kzalloc(8, GFP_KERNEL); if (buf) { snprintf(buf, 8, "%u", ntohs(addr->sin_port)); } xprt->address_strings[RPC_DISPLAY_PORT] = buf; buf = kzalloc(8, GFP_KERNEL); if (buf) { if (xprt->prot == IPPROTO_UDP) snprintf(buf, 8, "udp"); else snprintf(buf, 8, "tcp"); } xprt->address_strings[RPC_DISPLAY_PROTO] = buf; buf = kzalloc(48, GFP_KERNEL); if (buf) { snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", NIPQUAD(addr->sin_addr.s_addr), ntohs(addr->sin_port), xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; buf = kzalloc(10, GFP_KERNEL); if (buf) { snprintf(buf, 10, "%02x%02x%02x%02x", NIPQUAD(addr->sin_addr.s_addr)); } xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; buf = kzalloc(8, GFP_KERNEL); if (buf) { snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); } xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; buf = kzalloc(30, GFP_KERNEL); if (buf) { snprintf(buf, 30, NIPQUAD_FMT".%u.%u", NIPQUAD(addr->sin_addr.s_addr), ntohs(addr->sin_port) >> 8, ntohs(addr->sin_port) & 0xff); } xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; xprt->address_strings[RPC_DISPLAY_NETID] = kstrdup(xprt->prot == IPPROTO_UDP ? RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL);}static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt){ struct sockaddr_in6 *addr = xs_addr_in6(xprt); char *buf; buf = kzalloc(40, GFP_KERNEL); if (buf) { snprintf(buf, 40, NIP6_FMT, NIP6(addr->sin6_addr)); } xprt->address_strings[RPC_DISPLAY_ADDR] = buf; buf = kzalloc(8, GFP_KERNEL); if (buf) { snprintf(buf, 8, "%u", ntohs(addr->sin6_port)); } xprt->address_strings[RPC_DISPLAY_PORT] = buf; buf = kzalloc(8, GFP_KERNEL); if (buf) { if (xprt->prot == IPPROTO_UDP) snprintf(buf, 8, "udp"); else snprintf(buf, 8, "tcp"); } xprt->address_strings[RPC_DISPLAY_PROTO] = buf; buf = kzalloc(64, GFP_KERNEL); if (buf) { snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s", NIP6(addr->sin6_addr), ntohs(addr->sin6_port), xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; buf = kzalloc(36, GFP_KERNEL); if (buf) { snprintf(buf, 36, NIP6_SEQFMT, NIP6(addr->sin6_addr)); } xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; buf = kzalloc(8, GFP_KERNEL); if (buf) { snprintf(buf, 8, "%4hx", ntohs(addr->sin6_port)); } xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; buf = kzalloc(50, GFP_KERNEL); if (buf) { snprintf(buf, 50, NIP6_FMT".%u.%u", NIP6(addr->sin6_addr), ntohs(addr->sin6_port) >> 8, ntohs(addr->sin6_port) & 0xff); } xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; xprt->address_strings[RPC_DISPLAY_NETID] = kstrdup(xprt->prot == IPPROTO_UDP ? RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL);}static void xs_free_peer_addresses(struct rpc_xprt *xprt){ int i; for (i = 0; i < RPC_DISPLAY_MAX; i++) kfree(xprt->address_strings[i]);}#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more){ struct msghdr msg = { .msg_name = addr, .msg_namelen = addrlen, .msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0), }; struct kvec iov = { .iov_base = vec->iov_base + base, .iov_len = vec->iov_len - base, }; if (iov.iov_len != 0) return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); return kernel_sendmsg(sock, &msg, NULL, 0, 0);}static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more){ struct page **ppage; unsigned int remainder; int err, sent = 0; remainder = xdr->page_len - base; base += xdr->page_base; ppage = xdr->pages + (base >> PAGE_SHIFT); base &= ~PAGE_MASK; for(;;) { unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder); int flags = XS_SENDMSG_FLAGS; remainder -= len; if (remainder != 0 || more) flags |= MSG_MORE; err = sock->ops->sendpage(sock, *ppage, base, len, flags); if (remainder == 0 || err != len) break; sent += err; ppage++; base = 0; } if (sent == 0) return err; if (err > 0) sent += err; return sent;}/** * xs_sendpages - write pages directly to a socket * @sock: socket to send on * @addr: UDP only -- address of destination * @addrlen: UDP only -- length of destination address * @xdr: buffer containing this request * @base: starting position in the buffer * */static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base){ unsigned int remainder = xdr->len - base; int err, sent = 0; if (unlikely(!sock)) return -ENOTCONN; clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); if (base != 0) { addr = NULL; addrlen = 0; } if (base < xdr->head[0].iov_len || addr != NULL) { unsigned int len = xdr->head[0].iov_len - base; remainder -= len; err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0); if (remainder == 0 || err != len) goto out; sent += err; base = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -