📄 socket.c
字号:
/* * NET An implementation of the SOCKET network access protocol. * * Version: @(#)socket.c 1.1.93 18/02/95 * * Authors: Orest Zborowski, <obz@Kodak.COM> * Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * * Fixes: * Anonymous : NOTSOCK/BADF cleanup. Error fix in * shutdown() * Alan Cox : verify_area() fixes * Alan Cox : Removed DDI * Jonathan Kamens : SOCK_DGRAM reconnect bug * Alan Cox : Moved a load of checks to the very * top level. * Alan Cox : Move address structures to/from user * mode above the protocol layers. * Rob Janssen : Allow 0 length sends. * Alan Cox : Asynchronous I/O support (cribbed from the * tty drivers). * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) * Jeff Uphoff : Made max number of sockets command-line * configurable. * Matti Aarnio : Made the number of sockets dynamic, * to be allocated when needed, and mr. * Uphoff's max is used as max to be * allowed to allocate. * Linus : Argh. removed all the socket allocation * altogether: it's in the inode now. * Alan Cox : Made sock_alloc()/sock_release() public * for NetROM and future kernel nfsd type * stuff. * Alan Cox : sendmsg/recvmsg basics. * Tom Dyas : Export net symbols. * Marcin Dalecki : Fixed problems with CONFIG_NET="n". * Alan Cox : Added thread locking to sys_* calls * for sockets. May have errors at the * moment. * Kevin Buhr : Fixed the dumb errors in the above. * Andi Kleen : Some small cleanups, optimizations, * and fixed a copy_from_user() bug. * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) * Tigran Aivazian : Made listen(2) backlog sanity checks * protocol-independent * * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * * This module is effectively the top level interface to the BSD socket * paradigm. * */#include <linux/config.h>#include <linux/mm.h>#include <linux/smp_lock.h>#include <linux/socket.h>#include <linux/file.h>#include <linux/net.h>#include <linux/interrupt.h>#include <linux/netdevice.h>#include <linux/proc_fs.h>#include <linux/wanrouter.h>#include <linux/netlink.h>#include <linux/rtnetlink.h>#include <linux/init.h>#include <linux/poll.h>#include <linux/cache.h>#include <linux/module.h>#include <linux/highmem.h>#if defined(CONFIG_KMOD) && defined(CONFIG_NET)#include <linux/kmod.h>#endif#include <asm/uaccess.h>#include <net/sock.h>#include <net/scm.h>#include <linux/netfilter.h>static int sock_no_open(struct inode *irrelevant, struct file *dontcare);static loff_t sock_lseek(struct file *file, loff_t offset, int whence);static ssize_t sock_read(struct file *file, char *buf, size_t size, loff_t *ppos);static ssize_t sock_write(struct file *file, const char *buf, size_t size, loff_t *ppos);static int sock_mmap(struct file *file, struct vm_area_struct * vma);static int sock_close(struct inode *inode, struct file *file);static unsigned int sock_poll(struct file *file, struct poll_table_struct *wait);static int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);static int sock_fasync(int fd, struct file *filp, int on);static ssize_t sock_readv(struct file *file, const struct iovec *vector, unsigned long count, loff_t *ppos);static ssize_t sock_writev(struct file *file, const struct iovec *vector, unsigned long count, loff_t *ppos);static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more);/* * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear * in the operation structures but are done directly via the socketcall() multiplexor. */static struct file_operations socket_file_ops = { llseek: sock_lseek, read: sock_read, write: sock_write, poll: sock_poll, ioctl: sock_ioctl, mmap: sock_mmap, open: sock_no_open, /* special open code to disallow open via /proc */ release: sock_close, fasync: sock_fasync, readv: sock_readv, writev: sock_writev, sendpage: sock_sendpage};/* * The protocol list. Each protocol is registered in here. */static struct net_proto_family *net_families[NPROTO];#ifdef CONFIG_SMPstatic atomic_t net_family_lockct = ATOMIC_INIT(0);static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;/* The strategy is: modifications net_family vector are short, do not sleep and veeery rare, but read access should be free of any exclusive locks. */static void net_family_write_lock(void){ spin_lock(&net_family_lock); while (atomic_read(&net_family_lockct) != 0) { spin_unlock(&net_family_lock); current->policy |= SCHED_YIELD; schedule(); spin_lock(&net_family_lock); }}static __inline__ void net_family_write_unlock(void){ spin_unlock(&net_family_lock);}static __inline__ void net_family_read_lock(void){ atomic_inc(&net_family_lockct); spin_unlock_wait(&net_family_lock);}static __inline__ void net_family_read_unlock(void){ atomic_dec(&net_family_lockct);}#else#define net_family_write_lock() do { } while(0)#define net_family_write_unlock() do { } while(0)#define net_family_read_lock() do { } while(0)#define net_family_read_unlock() do { } while(0)#endif/* * Statistics counters of the socket lists */static union { int counter; char __pad[SMP_CACHE_BYTES];} sockets_in_use[NR_CPUS] __cacheline_aligned = {{0}};/* * Support routines. Move socket addresses back and forth across the kernel/user * divide and look after the messy bits. */#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 16 for IP, 16 for IPX, 24 for IPv6, about 80 for AX.25 must be at least one bigger than the AF_UNIX size (see net/unix/af_unix.c :unix_mkname()). */ /** * move_addr_to_kernel - copy a socket address into kernel space * @uaddr: Address in user space * @kaddr: Address in kernel space * @ulen: Length in user space * * The address is copied into kernel space. If the provided address is * too long an error code of -EINVAL is returned. If the copy gives * invalid addresses -EFAULT is returned. On a success 0 is returned. */int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr){ if(ulen<0||ulen>MAX_SOCK_ADDR) return -EINVAL; if(ulen==0) return 0; if(copy_from_user(kaddr,uaddr,ulen)) return -EFAULT; return 0;}/** * move_addr_to_user - copy an address to user space * @kaddr: kernel space address * @klen: length of address in kernel * @uaddr: user space address * @ulen: pointer to user length field * * The value pointed to by ulen on entry is the buffer length available. * This is overwritten with the buffer space used. -EINVAL is returned * if an overlong buffer is specified or a negative buffer size. -EFAULT * is returned if either the buffer or the length field are not * accessible. * After copying the data up to the limit the user specifies, the true * length of the data is written over the length limit the user * specified. Zero is returned for a success. */ int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen){ int err; int len; if((err=get_user(len, ulen))) return err; if(len>klen) len=klen; if(len<0 || len> MAX_SOCK_ADDR) return -EINVAL; if(len) { if(copy_to_user(uaddr,kaddr,len)) return -EFAULT; } /* * "fromlen shall refer to the value before truncation.." * 1003.1g */ return __put_user(klen, ulen);}#define SOCKFS_MAGIC 0x534F434Bstatic int sockfs_statfs(struct super_block *sb, struct statfs *buf){ buf->f_type = SOCKFS_MAGIC; buf->f_bsize = 1024; buf->f_namelen = 255; return 0;}static struct super_operations sockfs_ops = { statfs: sockfs_statfs,};static struct super_block * sockfs_read_super(struct super_block *sb, void *data, int silent){ struct inode *root = new_inode(sb); if (!root) return NULL; root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; root->i_uid = root->i_gid = 0; root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; sb->s_blocksize = 1024; sb->s_blocksize_bits = 10; sb->s_magic = SOCKFS_MAGIC; sb->s_op = &sockfs_ops; sb->s_root = d_alloc(NULL, &(const struct qstr) { "socket:", 7, 0 }); if (!sb->s_root) { iput(root); return NULL; } sb->s_root->d_sb = sb; sb->s_root->d_parent = sb->s_root; d_instantiate(sb->s_root, root); return sb;}static struct vfsmount *sock_mnt;static DECLARE_FSTYPE(sock_fs_type, "sockfs", sockfs_read_super, FS_NOMOUNT);static int sockfs_delete_dentry(struct dentry *dentry){ return 1;}static struct dentry_operations sockfs_dentry_operations = { d_delete: sockfs_delete_dentry,};/* * Obtains the first available file descriptor and sets it up for use. * * This functions creates file structure and maps it to fd space * of current process. On success it returns file descriptor * and file struct implicitly stored in sock->file. * Note that another thread may close file descriptor before we return * from this function. We use the fact that now we do not refer * to socket after mapping. If one day we will need it, this * function will inincrement ref. count on file by 1. * * In any case returned fd MAY BE not valid! * This race condition is inavoidable * with shared fd spaces, we cannot solve is inside kernel, * but we take care of internal coherence yet. */static int sock_map_fd(struct socket *sock){ int fd; struct qstr this; char name[32]; /* * Find a file descriptor suitable for return to the user. */ fd = get_unused_fd(); if (fd >= 0) { struct file *file = get_empty_filp(); if (!file) { put_unused_fd(fd); fd = -ENFILE; goto out; } sprintf(name, "[%lu]", sock->inode->i_ino); this.name = name; this.len = strlen(name); this.hash = sock->inode->i_ino; file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); if (!file->f_dentry) { put_filp(file); put_unused_fd(fd); fd = -ENOMEM; goto out; } file->f_dentry->d_op = &sockfs_dentry_operations; d_add(file->f_dentry, sock->inode); file->f_vfsmnt = mntget(sock_mnt); sock->file = file; file->f_op = sock->inode->i_fop = &socket_file_ops; file->f_mode = 3; file->f_flags = O_RDWR; file->f_pos = 0; fd_install(fd, file); }out: return fd;}extern __inline__ struct socket *socki_lookup(struct inode *inode){ return &inode->u.socket_i;}/** * sockfd_lookup - Go from a file number to its socket slot * @fd: file handle * @err: pointer to an error code return * * The file handle passed in is locked and the socket it is bound * too is returned. If an error occurs the err pointer is overwritten * with a negative errno code and NULL is returned. The function checks * for both invalid handles and passing a handle which is not a socket. * * On a success the socket object pointer is returned. */struct socket *sockfd_lookup(int fd, int *err){ struct file *file; struct inode *inode; struct socket *sock; if (!(file = fget(fd))) { *err = -EBADF; return NULL; } inode = file->f_dentry->d_inode; if (!inode->i_sock || !(sock = socki_lookup(inode))) { *err = -ENOTSOCK; fput(file); return NULL; } if (sock->file != file) { printk(KERN_ERR "socki_lookup: socket file changed!\n"); sock->file = file; } return sock;}extern __inline__ void sockfd_put(struct socket *sock){ fput(sock->file);}/** * sock_alloc - allocate a socket * * Allocate a new inode and socket object. The two are bound together * and initialised. The socket is then returned. If we are out of inodes * NULL is returned. */struct socket *sock_alloc(void){ struct inode * inode; struct socket * sock; inode = get_empty_inode(); if (!inode) return NULL; inode->i_sb = sock_mnt->mnt_sb; sock = socki_lookup(inode); inode->i_mode = S_IFSOCK|S_IRWXUGO; inode->i_sock = 1; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; sock->inode = inode; init_waitqueue_head(&sock->wait); sock->fasync_list = NULL; sock->state = SS_UNCONNECTED; sock->flags = 0; sock->ops = NULL; sock->sk = NULL; sock->file = NULL; sockets_in_use[smp_processor_id()].counter++; return sock;}/* * In theory you can't get an open on this inode, but /proc provides * a back door. Remember to keep it shut otherwise you'll let the * creepy crawlies in. */ static int sock_no_open(struct inode *irrelevant, struct file *dontcare){ return -ENXIO;}/** * sock_release - close a socket * @sock: socket to close * * The socket is released from the protocol stack if it has a release * callback, and the inode is then released if the socket is bound to * an inode not a file. */ void sock_release(struct socket *sock){ if (sock->ops) sock->ops->release(sock); if (sock->fasync_list) printk(KERN_ERR "sock_release: fasync list not empty!\n"); sockets_in_use[smp_processor_id()].counter--; if (!sock->file) { iput(sock->inode); return; } sock->file=NULL;}int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size){ int err; struct scm_cookie scm; err = scm_send(sock, msg, &scm); if (err >= 0) { err = sock->ops->sendmsg(sock, msg, size, &scm); scm_destroy(&scm); } return err;}int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags){ struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); size = sock->ops->recvmsg(sock, msg, size, flags, &scm); if (size >= 0) scm_recv(sock, msg, &scm, flags); return size;}/* * Sockets are not seekable. */static loff_t sock_lseek(struct file *file, loff_t offset, int whence){ return -ESPIPE;}/* * Read data from a socket. ubuf is a user mode pointer. We make sure the user * area ubuf...ubuf+size-1 is writable before asking the protocol. */static ssize_t sock_read(struct file *file, char *ubuf, size_t size, loff_t *ppos){ struct socket *sock; struct iovec iov; struct msghdr msg; int flags; if (ppos != &file->f_pos) return -ESPIPE; if (size==0) /* Match SYS5 behaviour */ return 0; sock = socki_lookup(file->f_dentry->d_inode); msg.msg_name=NULL; msg.msg_namelen=0; msg.msg_iov=&iov; msg.msg_iovlen=1; msg.msg_control=NULL; msg.msg_controllen=0; iov.iov_base=ubuf; iov.iov_len=size; flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; return sock_recvmsg(sock, &msg, size, flags);}/* * Write data to a socket. We verify that the user area ubuf..ubuf+size-1 * is readable by the user process. */static ssize_t sock_write(struct file *file, const char *ubuf, size_t size, loff_t *ppos){ struct socket *sock; struct msghdr msg; struct iovec iov; if (ppos != &file->f_pos) return -ESPIPE; if(size==0) /* Match SYS5 behaviour */ return 0; sock = socki_lookup(file->f_dentry->d_inode);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -