📄 uipc_socket.c
字号:
/* * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 * $Id: uipc_socket.c,v 1.7 2003/01/03 18:09:25 joel Exp $ */#include <sys/param.h>#include <sys/queue.h>#include <sys/systm.h>#include <sys/proc.h>#include <sys/file.h>#include <sys/malloc.h>#include <sys/mbuf.h>#include <sys/domain.h>#include <sys/kernel.h>#include <sys/protosw.h>#include <sys/socket.h>#include <sys/socketvar.h>#include <sys/resourcevar.h>#include <sys/signalvar.h>#include <sys/sysctl.h>#include <limits.h>static int somaxconn = SOMAXCONN;SYSCTL_INT(_kern, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, "");/* * Socket operation routines. * These routines are called by the routines in * sys_socket.c or from a system process, and * implement the semantics of socket operations by * switching out to the protocol specific routines. *//*ARGSUSED*/intsocreate(dom, aso, type, proto, p) int dom; struct socket **aso; register int type; int proto; struct proc *p;{ register struct protosw *prp; register struct socket *so; register int error; if (proto) prp = pffindproto(dom, proto, type); else prp = pffindtype(dom, type); if (prp == 0 || prp->pr_usrreqs == 0) return (EPROTONOSUPPORT); if (prp->pr_type != type) return (EPROTOTYPE); MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); bzero((caddr_t)so, sizeof(*so)); TAILQ_INIT(&so->so_incomp); TAILQ_INIT(&so->so_comp); so->so_type = type; so->so_state = SS_PRIV; so->so_uid = 0; so->so_proto = prp; error = (*prp->pr_usrreqs->pru_attach)(so, proto); if (error) { so->so_state |= SS_NOFDREF; sofree(so); return (error); } *aso = so; return (0);}intsobind(so, nam) struct socket *so; struct mbuf *nam;{ int s = splnet(); int error; error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam); splx(s); return (error);}intsolisten(so, backlog) register struct socket *so; int backlog;{ int s = splnet(), error; error = (*so->so_proto->pr_usrreqs->pru_listen)(so); if (error) { splx(s); return (error); } if (so->so_comp.tqh_first == NULL) so->so_options |= SO_ACCEPTCONN; if (backlog < 0 || backlog > somaxconn) backlog = somaxconn; so->so_qlimit = backlog; splx(s); return (0);}voidsofree(so) register struct socket *so;{ struct socket *head = so->so_head; if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) return; if (head != NULL) { if (so->so_state & SS_INCOMP) { TAILQ_REMOVE(&head->so_incomp, so, so_list); head->so_incqlen--; } else if (so->so_state & SS_COMP) { TAILQ_REMOVE(&head->so_comp, so, so_list); } else { panic("sofree: not queued"); } head->so_qlen--; so->so_state &= ~(SS_INCOMP|SS_COMP); so->so_head = NULL; } sbrelease(&so->so_snd); sorflush(so); FREE(so, M_SOCKET);}/* * Close a socket on last file table reference removal. * Initiate disconnect if connected. * Free socket when disconnect complete. */intsoclose(so) register struct socket *so;{ int s = splnet(); /* conservative */ int error = 0; if (so->so_options & SO_ACCEPTCONN) { struct socket *sp, *sonext; for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { sonext = sp->so_list.tqe_next; (void) soabort(sp); } for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { sonext = sp->so_list.tqe_next; (void) soabort(sp); } } if (so->so_pcb == 0) goto discard; if (so->so_state & SS_ISCONNECTED) { if ((so->so_state & SS_ISDISCONNECTING) == 0) { error = sodisconnect(so); if (error) goto drop; } if (so->so_options & SO_LINGER) { if ((so->so_state & SS_ISDISCONNECTING) && (so->so_state & SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { soconnsleep (so); } } }drop: if (so->so_pcb) { int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); if (error == 0) error = error2; }discard: if (so->so_state & SS_NOFDREF) panic("soclose: NOFDREF"); so->so_state |= SS_NOFDREF; sofree(so); splx(s); return (error);}/* * Must be called at splnet... */intsoabort(so) struct socket *so;{ return (*so->so_proto->pr_usrreqs->pru_abort)(so);}intsoaccept(so, nam) register struct socket *so; struct mbuf *nam;{ int s = splnet(); int error; if ((so->so_state & SS_NOFDREF) == 0) panic("soaccept: !NOFDREF"); so->so_state &= ~SS_NOFDREF; error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); splx(s); return (error);}intsoconnect(so, nam) register struct socket *so; struct mbuf *nam;{ int s; int error; if (so->so_options & SO_ACCEPTCONN) return (EOPNOTSUPP); s = splnet(); /* * If protocol is connection-based, can only connect once. * Otherwise, if connected, try to disconnect first. * This allows user to disconnect by connecting to, e.g., * a null address. */ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && ((so->so_proto->pr_flags & PR_CONNREQUIRED) || (error = sodisconnect(so)))) error = EISCONN; else error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam); splx(s); return (error);}intsoconnect2(so1, so2) register struct socket *so1; struct socket *so2;{ int s = splnet(); int error; error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); splx(s); return (error);}intsodisconnect(so) register struct socket *so;{ int s = splnet(); int error; if ((so->so_state & SS_ISCONNECTED) == 0) { error = ENOTCONN; goto bad; } if (so->so_state & SS_ISDISCONNECTING) { error = EALREADY; goto bad; } error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);bad: splx(s); return (error);}#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)/* * Send on a socket. * If send must go all at once and message is larger than * send buffering, then hard error. * Lock against other senders. * If must go all at once and not enough room now, then * inform user that this would block and do nothing. * Otherwise, if nonblocking, send as much as possible. * The data to be sent is described by "uio" if nonzero, * otherwise by the mbuf chain "top" (which must be null * if uio is not). Data provided in mbuf chain must be small * enough to send all at once. * * Returns nonzero on error, timeout or signal; callers * must check for short counts if EINTR/ERESTART are returned. * Data and control buffers are freed on return. */intsosend(so, addr, uio, top, control, flags) register struct socket *so; struct mbuf *addr; struct uio *uio; struct mbuf *top; struct mbuf *control; int flags;{ struct mbuf **mp; register struct mbuf *m; register long space, len, resid; int clen = 0, error, s, dontroute, mlen; int atomic = sosendallatonce(so) || top; if (uio) resid = uio->uio_resid; else resid = top->m_pkthdr.len; /* * In theory resid should be unsigned. * However, space must be signed, as it might be less than 0 * if we over-committed, and we must use a signed comparison * of space and resid. On the other hand, a negative resid * causes us to loop sending 0-length segments to the protocol. * * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM * type sockets since that's an error. */ if ((resid < 0) || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { error = EINVAL; goto out; } dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); if (control) clen = control->m_len;#define snderr(errno) { error = errno; splx(s); goto release; }restart: error = sblock(&so->so_snd, SBLOCKWAIT(flags)); if (error) goto out; do { s = splnet(); if (so->so_state & SS_CANTSENDMORE) snderr(EPIPE); if (so->so_error) { error = so->so_error; so->so_error = 0; splx(s); goto release; } if ((so->so_state & SS_ISCONNECTED) == 0) { /* * `sendto' and `sendmsg' is allowed on a connection- * based socket if it supports implied connect. * Return ENOTCONN if not connected and no address is * supplied. */ if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { if ((so->so_state & SS_ISCONFIRMING) == 0 && !(resid == 0 && clen != 0)) snderr(ENOTCONN); } else if (addr == 0) snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? ENOTCONN : EDESTADDRREQ); } space = sbspace(&so->so_snd); if (flags & MSG_OOB) space += 1024; if ((atomic && resid > so->so_snd.sb_hiwat) || clen > so->so_snd.sb_hiwat) snderr(EMSGSIZE); if (space < resid + clen && uio && (atomic || space < so->so_snd.sb_lowat || space < clen)) { if (so->so_state & SS_NBIO) snderr(EWOULDBLOCK); sbunlock(&so->so_snd); error = sbwait(&so->so_snd); splx(s); if (error) goto out; goto restart; } splx(s); mp = ⊤ space -= clen; do { if (uio == NULL) { /* * Data is prepackaged in "top". */ resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; } else do { if (top == 0) { MGETHDR(m, M_WAIT, MT_DATA); mlen = MHLEN; m->m_pkthdr.len = 0; m->m_pkthdr.rcvif = (struct ifnet *)0; } else { MGET(m, M_WAIT, MT_DATA); mlen = MLEN; } if (resid >= MINCLSIZE) { MCLGET(m, M_WAIT); if ((m->m_flags & M_EXT) == 0) goto nopages; mlen = MCLBYTES; len = min(min(mlen, resid), space); } else {nopages: len = min(min(mlen, resid), space); /* * For datagram protocols, leave room * for protocol headers in first mbuf. */ if (atomic && top == 0 && len < mlen) MH_ALIGN(m, len); } space -= len; error = uiomove(mtod(m, caddr_t), (int)len, uio); resid = uio->uio_resid; m->m_len = len; *mp = m; top->m_pkthdr.len += len; if (error) goto release; mp = &m->m_next; if (resid <= 0) { if (flags & MSG_EOR) top->m_flags |= M_EOR; break; } } while (space > 0 && atomic); if (dontroute) so->so_options |= SO_DONTROUTE; s = splnet(); /* XXX */ error = (*so->so_proto->pr_usrreqs->pru_send)(so, (flags & MSG_OOB) ? PRUS_OOB : /* * If the user set MSG_EOF, the protocol * understands this flag and nothing left to * send then use PRU_SEND_EOF instead of PRU_SEND. */ ((flags & MSG_EOF) && (so->so_proto->pr_flags & PR_IMPLOPCL) && (resid <= 0)) ? PRUS_EOF : 0, top, addr, control); splx(s); if (dontroute) so->so_options &= ~SO_DONTROUTE; clen = 0; control = 0; top = 0; mp = ⊤ if (error) goto release; } while (resid && space > 0); } while (resid);release: sbunlock(&so->so_snd);out: if (top) m_freem(top); if (control) m_freem(control); return (error);}/* * Implement receive operations on a socket. * We depend on the way that records are added to the sockbuf * by sbappend*. In particular, each record (mbufs linked through m_next) * must begin with an address if the protocol so specifies, * followed by an optional mbuf or mbufs containing ancillary data, * and then zero or more mbufs of data. * In order to avoid blocking network interrupts for the entire time here, * we splx() while doing the actual copy to user space. * Although the sockbuf is locked, new data may still be appended, * and thus we must maintain consistency of the sockbuf during that time. * * The caller may receive the data as a single mbuf chain by supplying * an mbuf **mp0 for use in returning the chain. The uio is then used * only for the count in uio_resid. */intsoreceive(so, paddr, uio, mp0, controlp, flagsp) register struct socket *so; struct mbuf **paddr; struct uio *uio; struct mbuf **mp0; struct mbuf **controlp; int *flagsp;{ register struct mbuf *m, **mp; register int flags, len, error, s, offset; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; int moff, type = 0; int orig_resid = uio->uio_resid; mp = mp0; if (paddr) *paddr = 0; if (controlp) *controlp = 0; if (flagsp) flags = *flagsp &~ MSG_EOR; else flags = 0; if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); if (error) goto bad; do { error = uiomove(mtod(m, caddr_t), (int) min(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid && error == 0 && m);bad: if (m) m_freem(m); return (error); } if (mp) *mp = (struct mbuf *)0; if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) (*pr->pr_usrreqs->pru_rcvd)(so, 0);restart: error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); if (error) return (error); s = splnet(); m = so->so_rcv.sb_mb; /* * If we have less data than requested, block awaiting more * (subject to any timeout) if: * 1. the current count is less than the low water mark, or * 2. MSG_WAITALL is set, and it is possible to do the entire
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -