📄 nfs_socket.c
字号:
/* * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_socket.c 8.3 (Berkeley) 1/12/94 *//* * Socket operations for use by nfs */#include <sys/param.h>#include <sys/systm.h>#include <sys/proc.h>#include <sys/mount.h>#include <sys/kernel.h>#include <sys/mbuf.h>#include <sys/vnode.h>#include <sys/domain.h>#include <sys/protosw.h>#include <sys/socket.h>#include <sys/socketvar.h>#include <sys/syslog.h>#include <sys/tprintf.h>#include <netinet/in.h>#include <netinet/tcp.h>#include <nfs/rpcv2.h>#include <nfs/nfsv2.h>#include <nfs/nfs.h>#include <nfs/xdr_subs.h>#include <nfs/nfsm_subs.h>#include <nfs/nfsmount.h>#include <nfs/nfsnode.h>#include <nfs/nfsrtt.h>#include <nfs/nqnfs.h>#define TRUE 1#define FALSE 0/* * Estimate rto for an nfs rpc sent via. an unreliable datagram. * Use the mean and mean deviation of rtt for the appropriate type of rpc * for the frequent rpcs and a default for the others. * The justification for doing "other" this way is that these rpcs * happen so infrequently that timer est. would probably be stale. * Also, since many of these rpcs are * non-idempotent, a conservative timeout is desired. * getattr, lookup - A+2D * read, write - A+4D * other - nm_timeo */#define NFS_RTO(n, t) \ ((t) == 0 ? (n)->nm_timeo : \ ((t) < 3 ? \ (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]/* * External data, mostly RPC constants in XDR form */extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred, rpc_auth_kerb;extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers;extern time_t nqnfsstarttime;extern int nonidempotent[NFS_NPROCS];/* * Maps errno values to nfs error numbers. * Use NFSERR_IO as the catch all for ones not specifically defined in * RFC 1094. */static int nfsrv_errmap[ELAST] = { NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,};/* * Defines which timer to use for the procnum. * 0 - default * 1 - getattr * 2 - lookup * 3 - read * 4 - write */static int proct[NFS_NPROCS] = { 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,};/* * There is a congestion window for outstanding rpcs maintained per mount * point. The cwnd size is adjusted in roughly the way that: * Van Jacobson, Congestion avoidance and Control, In "Proceedings of * SIGCOMM '88". ACM, August 1988. * describes for TCP. The cwnd size is chopped in half on a retransmit timeout * and incremented by 1/cwnd when each rpc reply is received and a full cwnd * of rpcs is in progress. * (The sent count and cwnd are scaled for integer arith.) * Variants of "slow start" were tried and were found to be too much of a * performance hit (ave. rtt 3 times larger), * I suspect due to the large rtt that nfs rpcs have. */#define NFS_CWNDSCALE 256#define NFS_MAXCWND (NFS_CWNDSCALE * 32)static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };int nfs_sbwait();void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock();void nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease();struct mbuf *nfsm_rpchead();int nfsrtton = 0;struct nfsrtt nfsrtt;struct nfsd nfsd_head;int nfsrv_null(), nfsrv_getattr(), nfsrv_setattr(), nfsrv_lookup(), nfsrv_readlink(), nfsrv_read(), nfsrv_write(), nfsrv_create(), nfsrv_remove(), nfsrv_rename(), nfsrv_link(), nfsrv_symlink(), nfsrv_mkdir(), nfsrv_rmdir(), nfsrv_readdir(), nfsrv_statfs(), nfsrv_noop(), nqnfsrv_readdirlook(), nqnfsrv_getlease(), nqnfsrv_vacated(), nqnfsrv_access();int (*nfsrv_procs[NFS_NPROCS])() = { nfsrv_null, nfsrv_getattr, nfsrv_setattr, nfsrv_noop, nfsrv_lookup, nfsrv_readlink, nfsrv_read, nfsrv_noop, nfsrv_write, nfsrv_create, nfsrv_remove, nfsrv_rename, nfsrv_link, nfsrv_symlink, nfsrv_mkdir, nfsrv_rmdir, nfsrv_readdir, nfsrv_statfs, nqnfsrv_readdirlook, nqnfsrv_getlease, nqnfsrv_vacated, nfsrv_noop, nqnfsrv_access,};struct nfsreq nfsreqh;/* * Initialize sockets and congestion for a new NFS connection. * We do not free the sockaddr if error. */nfs_connect(nmp, rep) register struct nfsmount *nmp; struct nfsreq *rep;{ register struct socket *so; int s, error, rcvreserve, sndreserve; struct sockaddr *saddr; struct sockaddr_in *sin; struct mbuf *m; u_short tport; nmp->nm_so = (struct socket *)0; saddr = mtod(nmp->nm_nam, struct sockaddr *); if (error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) goto bad; so = nmp->nm_so; nmp->nm_soflags = so->so_proto->pr_flags; /* * Some servers require that the client port be a reserved port number. */ if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { MGET(m, M_WAIT, MT_SONAME); sin = mtod(m, struct sockaddr_in *); sin->sin_len = m->m_len = sizeof (struct sockaddr_in); sin->sin_family = AF_INET; sin->sin_addr.s_addr = INADDR_ANY; tport = IPPORT_RESERVED - 1; sin->sin_port = htons(tport); while ((error = sobind(so, m)) == EADDRINUSE && --tport > IPPORT_RESERVED / 2) sin->sin_port = htons(tport); m_freem(m); if (error) goto bad; } /* * Protocols that do not require connections may be optionally left * unconnected for servers that reply from a port other than NFS_PORT. */ if (nmp->nm_flag & NFSMNT_NOCONN) { if (nmp->nm_soflags & PR_CONNREQUIRED) { error = ENOTCONN; goto bad; } } else { if (error = soconnect(so, nmp->nm_nam)) goto bad; /* * Wait for the connection to complete. Cribbed from the * connect system call but with the wait timing out so * that interruptible mounts don't hang here for a long time. */ s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 2 * hz); if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0 && rep && (error = nfs_sigintr(nmp, rep, rep->r_procp))) { so->so_state &= ~SS_ISCONNECTING; splx(s); goto bad; } } if (so->so_error) { error = so->so_error; so->so_error = 0; splx(s); goto bad; } splx(s); } if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { so->so_rcv.sb_timeo = (5 * hz); so->so_snd.sb_timeo = (5 * hz); } else { so->so_rcv.sb_timeo = 0; so->so_snd.sb_timeo = 0; } if (nmp->nm_sotype == SOCK_DGRAM) { sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR; } else if (nmp->nm_sotype == SOCK_SEQPACKET) { sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2; } else { if (nmp->nm_sotype != SOCK_STREAM) panic("nfscon sotype"); if (so->so_proto->pr_flags & PR_CONNREQUIRED) { MGET(m, M_WAIT, MT_SOOPTS); *mtod(m, int *) = 1; m->m_len = sizeof(int); sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); } if (so->so_proto->pr_protocol == IPPROTO_TCP) { MGET(m, M_WAIT, MT_SOOPTS); *mtod(m, int *) = 1; m->m_len = sizeof(int); sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); } sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 2; rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) * 2; } if (error = soreserve(so, sndreserve, rcvreserve)) goto bad; so->so_rcv.sb_flags |= SB_NOINTR; so->so_snd.sb_flags |= SB_NOINTR; /* Initialize other non-zero congestion variables */ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = nmp->nm_srtt[4] = (NFS_TIMEO << 3); nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ nmp->nm_sent = 0; nmp->nm_timeouts = 0; return (0);bad: nfs_disconnect(nmp); return (error);}/* * Reconnect routine: * Called when a connection is broken on a reliable protocol. * - clean up the old socket * - nfs_connect() again * - set R_MUSTRESEND for all outstanding requests on mount point * If this fails the mount point is DEAD! * nb: Must be called with the nfs_sndlock() set on the mount point. */nfs_reconnect(rep) register struct nfsreq *rep;{ register struct nfsreq *rp; register struct nfsmount *nmp = rep->r_nmp; int error; nfs_disconnect(nmp); while (error = nfs_connect(nmp, rep)) { if (error == EINTR || error == ERESTART) return (EINTR); (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); } /* * Loop through outstanding request list and fix up all requests * on old socket. */ rp = nfsreqh.r_next; while (rp != &nfsreqh) { if (rp->r_nmp == nmp) rp->r_flags |= R_MUSTRESEND; rp = rp->r_next; } return (0);}/* * NFS disconnect. Clean up and unlink. */voidnfs_disconnect(nmp) register struct nfsmount *nmp;{ register struct socket *so; if (nmp->nm_so) { so = nmp->nm_so; nmp->nm_so = (struct socket *)0; soshutdown(so, 2); soclose(so); }}/* * This is the nfs send routine. For connection based socket types, it * must be called with an nfs_sndlock() on the socket. * "rep == NULL" indicates that it has been called from a server. * For the client side: * - return EINTR if the RPC is terminated, 0 otherwise * - set R_MUSTRESEND if the send fails for any reason * - do any cleanup required by recoverable socket errors (???) * For the server side: * - return EINTR or ERESTART if interrupted by a signal * - return EPIPE if a connection is lost for connection based sockets (TCP...) * - do any cleanup required by recoverable socket errors (???) */nfs_send(so, nam, top, rep) register struct socket *so; struct mbuf *nam; register struct mbuf *top; struct nfsreq *rep;{ struct mbuf *sendnam; int error, soflags, flags; if (rep) { if (rep->r_flags & R_SOFTTERM) { m_freem(top); return (EINTR); } if ((so = rep->r_nmp->nm_so) == NULL) { rep->r_flags |= R_MUSTRESEND; m_freem(top); return (0); } rep->r_flags &= ~R_MUSTRESEND; soflags = rep->r_nmp->nm_soflags; } else soflags = so->so_proto->pr_flags; if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) sendnam = (struct mbuf *)0; else sendnam = nam; if (so->so_type == SOCK_SEQPACKET) flags = MSG_EOR; else flags = 0; error = sosend(so, sendnam, (struct uio *)0, top, (struct mbuf *)0, flags); if (error) { if (rep) { log(LOG_INFO, "nfs send error %d for server %s\n",error, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); /* * Deal with errors for the client side. */ if (rep->r_flags & R_SOFTTERM) error = EINTR; else rep->r_flags |= R_MUSTRESEND; } else log(LOG_INFO, "nfsd send error %d\n", error); /* * Handle any recoverable (soft) socket errors here. (???) */ if (error != EINTR && error != ERESTART && error != EWOULDBLOCK && error != EPIPE) error = 0; } return (error);}/* * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all * done by soreceive(), but for SOCK_STREAM we must deal with the Record * Mark and consolidate the data into a new mbuf list. * nb: Sometimes TCP passes the data up to soreceive() in long lists of * small mbufs. * For SOCK_STREAM we must be very careful to read an entire record once * we have read any of it, even if the system call has been interrupted. */nfs_receive(rep, aname, mp) register struct nfsreq *rep; struct mbuf **aname; struct mbuf **mp;{ register struct socket *so; struct uio auio; struct iovec aio; register struct mbuf *m; struct mbuf *control; u_long len; struct mbuf **getnam; int error, sotype, rcvflg; struct proc *p = curproc; /* XXX */ /* * Set up arguments for soreceive() */ *mp = (struct mbuf *)0; *aname = (struct mbuf *)0; sotype = rep->r_nmp->nm_sotype;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -