📄 socket.c
字号:
/* * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1998-2003 Internet Software Consortium. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THIS SOFTWARE. *//* $Id: socket.c,v 1.207.2.19.2.13 2004/07/01 04:51:15 marka Exp $ */#include <config.h>#include <sys/param.h>#include <sys/types.h>#include <sys/socket.h>#include <sys/time.h>#include <sys/uio.h>#include <errno.h>#include <fcntl.h>#include <stddef.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#include <isc/buffer.h>#include <isc/bufferlist.h>#include <isc/condition.h>#include <isc/formatcheck.h>#include <isc/list.h>#include <isc/log.h>#include <isc/mem.h>#include <isc/msgs.h>#include <isc/mutex.h>#include <isc/net.h>#include <isc/platform.h>#include <isc/print.h>#include <isc/region.h>#include <isc/socket.h>#include <isc/strerror.h>#include <isc/task.h>#include <isc/thread.h>#include <isc/util.h>#include "errno2result.h"#ifndef ISC_PLATFORM_USETHREADS#include "socket_p.h"#endif /* ISC_PLATFORM_USETHREADS *//* * Some systems define the socket length argument as an int, some as size_t, * some as socklen_t. This is here so it can be easily changed if needed. */#ifndef ISC_SOCKADDR_LEN_T#ifdef _BSD_SOCKLEN_T_#define ISC_SOCKADDR_LEN_T _BSD_SOCKLEN_T_#else#define ISC_SOCKADDR_LEN_T unsigned int#endif#endif/* * Define what the possible "soft" errors can be. These are non-fatal returns * of various network related functions, like recv() and so on. * * For some reason, BSDI (and perhaps others) will sometimes return <0 * from recv() but will have errno==0. This is broken, but we have to * work around it here. */#define SOFT_ERROR(e) ((e) == EAGAIN || \ (e) == EWOULDBLOCK || \ (e) == EINTR || \ (e) == 0)#define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)/* * DLVL(90) -- Function entry/exit and other tracing. * DLVL(70) -- Socket "correctness" -- including returning of events, etc. * DLVL(60) -- Socket data send/receive * DLVL(50) -- Event tracing, including receiving/sending completion events. * DLVL(20) -- Socket creation/destruction. */#define TRACE_LEVEL 90#define CORRECTNESS_LEVEL 70#define IOEVENT_LEVEL 60#define EVENT_LEVEL 50#define CREATION_LEVEL 20#define TRACE DLVL(TRACE_LEVEL)#define CORRECTNESS DLVL(CORRECTNESS_LEVEL)#define IOEVENT DLVL(IOEVENT_LEVEL)#define EVENT DLVL(EVENT_LEVEL)#define CREATION DLVL(CREATION_LEVEL)typedef isc_event_t intev_t;#define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')#define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)/* * IPv6 control information. If the socket is an IPv6 socket we want * to collect the destination address and interface so the client can * set them on outgoing packets. */#ifdef ISC_PLATFORM_HAVEIPV6#ifndef USE_CMSG#define USE_CMSG 1#endif#endif/* * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have * a setsockopt() like interface to request timestamps, and if the OS * doesn't do it for us, call gettimeofday() on every UDP receive? */#ifdef SO_TIMESTAMP#ifndef USE_CMSG#define USE_CMSG 1#endif#endif/* * The number of times a send operation is repeated if the result is EINTR. */#define NRETRIES 10struct isc_socket { /* Not locked. */ unsigned int magic; isc_socketmgr_t *manager; isc_mutex_t lock; isc_sockettype_t type; /* Locked by socket lock. */ ISC_LINK(isc_socket_t) link; unsigned int references; int fd; int pf; ISC_LIST(isc_socketevent_t) send_list; ISC_LIST(isc_socketevent_t) recv_list; ISC_LIST(isc_socket_newconnev_t) accept_list; isc_socket_connev_t *connect_ev; /* * Internal events. Posted when a descriptor is readable or * writable. These are statically allocated and never freed. * They will be set to non-purgable before use. */ intev_t readable_ev; intev_t writable_ev; isc_sockaddr_t address; /* remote address */ unsigned int pending_recv : 1, pending_send : 1, pending_accept : 1, listener : 1, /* listener socket */ connected : 1, connecting : 1, /* connect pending */ bound : 1; /* bound to local addr */#ifdef ISC_NET_RECVOVERFLOW unsigned char overflow; /* used for MSG_TRUNC fake */#endif char *recvcmsgbuf; ISC_SOCKADDR_LEN_T recvcmsgbuflen; char *sendcmsgbuf; ISC_SOCKADDR_LEN_T sendcmsgbuflen;};#define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')#define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)struct isc_socketmgr { /* Not locked. */ unsigned int magic; isc_mem_t *mctx; isc_mutex_t lock; /* Locked by manager lock. */ ISC_LIST(isc_socket_t) socklist; fd_set read_fds; fd_set write_fds; isc_socket_t *fds[FD_SETSIZE]; int fdstate[FD_SETSIZE]; int maxfd;#ifdef ISC_PLATFORM_USETHREADS isc_thread_t watcher; isc_condition_t shutdown_ok; int pipe_fds[2];#else /* ISC_PLATFORM_USETHREADS */ unsigned int refs;#endif /* ISC_PLATFORM_USETHREADS */};#ifndef ISC_PLATFORM_USETHREADSstatic isc_socketmgr_t *socketmgr = NULL;#endif /* ISC_PLATFORM_USETHREADS */#define CLOSED 0 /* this one must be zero */#define MANAGED 1#define CLOSE_PENDING 2/* * send() and recv() iovec counts */#define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)#ifdef ISC_NET_RECVOVERFLOW# define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER + 1)#else# define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)#endifstatic void send_recvdone_event(isc_socket_t *, isc_socketevent_t **);static void send_senddone_event(isc_socket_t *, isc_socketevent_t **);static void free_socket(isc_socket_t **);static isc_result_t allocate_socket(isc_socketmgr_t *, isc_sockettype_t, isc_socket_t **);static void destroy(isc_socket_t **);static void internal_accept(isc_task_t *, isc_event_t *);static void internal_connect(isc_task_t *, isc_event_t *);static void internal_recv(isc_task_t *, isc_event_t *);static void internal_send(isc_task_t *, isc_event_t *);static void process_cmsg(isc_socket_t *, struct msghdr *, isc_socketevent_t *);static void build_msghdr_send(isc_socket_t *, isc_socketevent_t *, struct msghdr *, struct iovec *, size_t *);static void build_msghdr_recv(isc_socket_t *, isc_socketevent_t *, struct msghdr *, struct iovec *, size_t *);#define SELECT_POKE_SHUTDOWN (-1)#define SELECT_POKE_NOTHING (-2)#define SELECT_POKE_READ (-3)#define SELECT_POKE_ACCEPT (-3) /* Same as _READ */#define SELECT_POKE_WRITE (-4)#define SELECT_POKE_CONNECT (-4) /* Same as _WRITE */#define SELECT_POKE_CLOSE (-5)#define SOCK_DEAD(s) ((s)->references == 0)static voidmanager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category, isc_logmodule_t *module, int level, const char *fmt, ...) ISC_FORMAT_PRINTF(5, 6);static voidmanager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category, isc_logmodule_t *module, int level, const char *fmt, ...){ char msgbuf[2048]; va_list ap; if (! isc_log_wouldlog(isc_lctx, level)) return; va_start(ap, fmt); vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); va_end(ap); isc_log_write(isc_lctx, category, module, level, "sockmgr %p: %s", sockmgr, msgbuf);}static voidsocket_log(isc_socket_t *sock, isc_sockaddr_t *address, isc_logcategory_t *category, isc_logmodule_t *module, int level, isc_msgcat_t *msgcat, int msgset, int message, const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10);static voidsocket_log(isc_socket_t *sock, isc_sockaddr_t *address, isc_logcategory_t *category, isc_logmodule_t *module, int level, isc_msgcat_t *msgcat, int msgset, int message, const char *fmt, ...){ char msgbuf[2048]; char peerbuf[256]; va_list ap; if (! isc_log_wouldlog(isc_lctx, level)) return; va_start(ap, fmt); vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); va_end(ap); if (address == NULL) { isc_log_iwrite(isc_lctx, category, module, level, msgcat, msgset, message, "socket %p: %s", sock, msgbuf); } else { isc_sockaddr_format(address, peerbuf, sizeof(peerbuf)); isc_log_iwrite(isc_lctx, category, module, level, msgcat, msgset, message, "socket %p %s: %s", sock, peerbuf, msgbuf); }}static voidwakeup_socket(isc_socketmgr_t *manager, int fd, int msg) { isc_socket_t *sock; /* * This is a wakeup on a socket. If the socket is not in the * process of being closed, start watching it for either reads * or writes. */ INSIST(fd >= 0 && fd < (int)FD_SETSIZE); if (manager->fdstate[fd] == CLOSE_PENDING) { manager->fdstate[fd] = CLOSED; FD_CLR(fd, &manager->read_fds); FD_CLR(fd, &manager->write_fds); (void)close(fd); return; } if (manager->fdstate[fd] != MANAGED) return; sock = manager->fds[fd]; /* * Set requested bit. */ if (msg == SELECT_POKE_READ) FD_SET(sock->fd, &manager->read_fds); if (msg == SELECT_POKE_WRITE) FD_SET(sock->fd, &manager->write_fds);}#ifdef ISC_PLATFORM_USETHREADS/* * Poke the select loop when there is something for us to do. * The write is required (by POSIX) to complete. That is, we * will not get partial writes. */static voidselect_poke(isc_socketmgr_t *mgr, int fd, int msg) { int cc; int buf[2]; char strbuf[ISC_STRERRORSIZE]; buf[0] = fd; buf[1] = msg; do { cc = write(mgr->pipe_fds[1], buf, sizeof(buf));#ifdef ENOSR /* * Treat ENOSR as EAGAIN but loop slowly as it is * unlikely to clear fast. */ if (cc < 0 && errno == ENOSR) { sleep(1); errno = EAGAIN; }#endif } while (cc < 0 && SOFT_ERROR(errno)); if (cc < 0) { isc__strerror(errno, strbuf, sizeof(strbuf)); FATAL_ERROR(__FILE__, __LINE__, isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_WRITEFAILED, "write() failed " "during watcher poke: %s"), strbuf); } INSIST(cc == sizeof(buf));}/* * Read a message on the internal fd. */static voidselect_readmsg(isc_socketmgr_t *mgr, int *fd, int *msg) { int buf[2]; int cc; char strbuf[ISC_STRERRORSIZE]; cc = read(mgr->pipe_fds[0], buf, sizeof(buf)); if (cc < 0) { *msg = SELECT_POKE_NOTHING; if (SOFT_ERROR(errno)) return; isc__strerror(errno, strbuf, sizeof(strbuf)); FATAL_ERROR(__FILE__, __LINE__, isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_READFAILED, "read() failed " "during watcher poke: %s"), strbuf); return; } INSIST(cc == sizeof(buf)); *fd = buf[0]; *msg = buf[1];}#else /* ISC_PLATFORM_USETHREADS *//* * Update the state of the socketmgr when something changes. */static voidselect_poke(isc_socketmgr_t *manager, int fd, int msg) { if (msg == SELECT_POKE_SHUTDOWN) return; else if (fd >= 0) wakeup_socket(manager, fd, msg); return;}#endif /* ISC_PLATFORM_USETHREADS *//* * Make a fd non-blocking. */static isc_result_tmake_nonblock(int fd) { int ret; int flags; char strbuf[ISC_STRERRORSIZE]; flags = fcntl(fd, F_GETFL, 0); flags |= O_NONBLOCK; ret = fcntl(fd, F_SETFL, flags); if (ret == -1) { isc__strerror(errno, strbuf, sizeof(strbuf)); UNEXPECTED_ERROR(__FILE__, __LINE__, "fcntl(%d, F_SETFL, %d): %s", fd, flags, strbuf); return (ISC_R_UNEXPECTED); } return (ISC_R_SUCCESS);}#ifdef USE_CMSG/* * Not all OSes support advanced CMSG macros: CMSG_LEN and CMSG_SPACE. * In order to ensure as much portability as possible, we provide wrapper * functions of these macros. * Note that cmsg_space() could run slow on OSes that do not have * CMSG_SPACE. */static inline ISC_SOCKADDR_LEN_Tcmsg_len(ISC_SOCKADDR_LEN_T len) {#ifdef CMSG_LEN return (CMSG_LEN(len));#else ISC_SOCKADDR_LEN_T hdrlen; hdrlen = (ISC_SOCKADDR_LEN_T)CMSG_DATA(NULL); /* XXX */ return (hdrlen + len);#endif}static inline ISC_SOCKADDR_LEN_Tcmsg_space(ISC_SOCKADDR_LEN_T len) {#ifdef CMSG_SPACE return (CMSG_SPACE(len));#else struct msghdr msg; struct cmsghdr *cmsgp; /* * XXX: The buffer length is an ad-hoc value, but should be enough * in a practical sense. */ char dummybuf[sizeof(struct cmsghdr) + 1024]; memset(&msg, 0, sizeof(msg)); msg.msg_control = dummybuf; msg.msg_controllen = sizeof(dummybuf); cmsgp = (struct cmsghdr *)dummybuf; cmsgp->cmsg_len = cmsg_len(len); cmsgp = CMSG_NXTHDR(&msg, cmsgp); if (cmsgp != NULL) return ((char *)cmsgp - (char *)msg.msg_control); else return (0);#endif }#endif /* USE_CMSG *//* * Process control messages received on a socket. */static voidprocess_cmsg(isc_socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) {#ifdef USE_CMSG struct cmsghdr *cmsgp;#ifdef ISC_PLATFORM_HAVEIPV6 struct in6_pktinfo *pktinfop;#endif#ifdef SO_TIMESTAMP struct timeval *timevalp;#endif#endif /* * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined. * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined. * They are all here, outside of the CPP tests, because it is * more consistent with the usual ISC coding style. */ UNUSED(sock); UNUSED(msg);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -