📄 af_unix.c
字号:
/* * NET4: Implementation of BSD Unix domain sockets. * * Authors: Alan Cox, <alan.cox@linux.org> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $ * * Fixes: * Linus Torvalds : Assorted bug cures. * Niibe Yutaka : async I/O support. * Carsten Paeth : PF_UNIX check, address fixes. * Alan Cox : Limit size of allocated blocks. * Alan Cox : Fixed the stupid socketpair bug. * Alan Cox : BSD compatibility fine tuning. * Alan Cox : Fixed a bug in connect when interrupted. * Alan Cox : Sorted out a proper draft version of * file descriptor passing hacked up from * Mike Shaver's work. * Marty Leisner : Fixes to fd passing * Nick Nevin : recvmsg bugfix. * Alan Cox : Started proper garbage collector * Heiko EiBfeldt : Missing verify_area check * Alan Cox : Started POSIXisms * Andreas Schwab : Replace inode by dentry for proper * reference counting * Kirk Petersen : Made this a module * Christoph Rohland : Elegant non-blocking accept/connect algorithm. * Lots of bug fixes. * Alexey Kuznetosv : Repaired (I hope) bugs introduces * by above two patches. * Andrea Arcangeli : If possible we block in connect(2) * if the max backlog of the listen socket * is been reached. This won't break * old apps and it will avoid huge amount * of socks hashed (this for unix_gc() * performances reasons). * Security fix that limits the max * number of socks to 2*max_files and * the number of skb queueable in the * dgram receiver. * Artur Skawina : Hash function optimizations * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) * Malcolm Beattie : Set peercred for socketpair * Michal Ostrowski : Module initialization cleanup. * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, * the core infrastructure is doing that * for all net proto families now (2.5.69+) * * * Known differences from reference BSD that was tested: * * [TO FIX] * ECONNREFUSED is not returned from one end of a connected() socket to the * other the moment one end closes. * fstat() doesn't return st_dev=0, and give the blksize as high water mark * and a fake inode identifier (nor the BSD first socket fstat twice bug). * [NOT TO FIX] * accept() returns a path name even if the connecting socket has closed * in the meantime (BSD loses the path and gives up). * accept() returns 0 length path for an unbound connector. BSD returns 16 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) * socketpair(...SOCK_RAW..) doesn't panic the kernel. * BSD af_unix apparently has connect forgetting to block properly. * (need to check this with the POSIX spec in detail) * * Differences from 2.0.0-11-... (ANK) * Bug fixes and improvements. * - client shutdown killed server socket. * - removed all useless cli/sti pairs. * * Semantic changes/extensions. * - generic control message passing. * - SCM_CREDENTIALS control message. * - "Abstract" (not FS based) socket bindings. * Abstract names are sequences of bytes (not zero terminated) * started by 0, so that this name space does not intersect * with BSD names. */#include <linux/module.h>#include <linux/config.h>#include <linux/kernel.h>#include <linux/major.h>#include <linux/signal.h>#include <linux/sched.h>#include <linux/errno.h>#include <linux/string.h>#include <linux/stat.h>#include <linux/dcache.h>#include <linux/namei.h>#include <linux/socket.h>#include <linux/un.h>#include <linux/fcntl.h>#include <linux/termios.h>#include <linux/sockios.h>#include <linux/net.h>#include <linux/in.h>#include <linux/fs.h>#include <linux/slab.h>#include <asm/uaccess.h>#include <linux/skbuff.h>#include <linux/netdevice.h>#include <net/sock.h>#include <linux/tcp.h>#include <net/af_unix.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <net/scm.h>#include <linux/init.h>#include <linux/poll.h>#include <linux/smp_lock.h>#include <linux/rtnetlink.h>#include <linux/mount.h>#include <net/checksum.h>#include <linux/security.h>int sysctl_unix_max_dgram_qlen = 10;kmem_cache_t *unix_sk_cachep;struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];rwlock_t unix_table_lock = RW_LOCK_UNLOCKED;static atomic_t unix_nr_socks = ATOMIC_INIT(0);#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)/* * SMP locking strategy: * hash table is protected with rwlock unix_table_lock * each socket state is protected by separate rwlock. */static inline unsigned unix_hash_fold(unsigned hash){ hash ^= hash>>16; hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1);}#define unix_peer(sk) (unix_sk(sk)->peer)static inline int unix_our_peer(struct sock *sk, struct sock *osk){ return unix_peer(osk) == sk;}static inline int unix_may_send(struct sock *sk, struct sock *osk){ return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));}static struct sock *unix_peer_get(struct sock *s){ struct sock *peer; unix_state_rlock(s); peer = unix_peer(s); if (peer) sock_hold(peer); unix_state_runlock(s); return peer;}static inline void unix_release_addr(struct unix_address *addr){ if (atomic_dec_and_test(&addr->refcnt)) kfree(addr);}/* * Check unix socket name: * - should be not zero length. * - if started by not zero, should be NULL terminated (FS object) * - if started by zero, it is abstract name. */ static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp){ if (len <= sizeof(short) || len > sizeof(*sunaddr)) return -EINVAL; if (!sunaddr || sunaddr->sun_family != AF_UNIX) return -EINVAL; if (sunaddr->sun_path[0]) { /* * This may look like an off by one error but it is * a bit more subtle. 108 is the longest valid AF_UNIX * path for a binding. sun_path[108] doesn't as such * exist. However in kernel space we are guaranteed that * it is a valid memory location in our kernel * address buffer. */ if (len > sizeof(*sunaddr)) len = sizeof(*sunaddr); ((char *)sunaddr)[len]=0; len = strlen(sunaddr->sun_path)+1+sizeof(short); return len; } *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0)); return len;}static void __unix_remove_socket(struct sock *sk){ sk_del_node_init(sk);}static void __unix_insert_socket(struct hlist_head *list, struct sock *sk){ BUG_TRAP(sk_unhashed(sk)); sk_add_node(sk, list);}static inline void unix_remove_socket(struct sock *sk){ write_lock(&unix_table_lock); __unix_remove_socket(sk); write_unlock(&unix_table_lock);}static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk){ write_lock(&unix_table_lock); __unix_insert_socket(list, sk); write_unlock(&unix_table_lock);}static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, int len, int type, unsigned hash){ struct sock *s; struct hlist_node *node; sk_for_each(s, node, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) goto found; } s = NULL;found: return s;}static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, int len, int type, unsigned hash){ struct sock *s; read_lock(&unix_table_lock); s = __unix_find_socket_byname(sunname, len, type, hash); if (s) sock_hold(s); read_unlock(&unix_table_lock); return s;}static struct sock *unix_find_socket_byinode(struct inode *i){ struct sock *s; struct hlist_node *node; read_lock(&unix_table_lock); sk_for_each(s, node, &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; if(dentry && dentry->d_inode == i) { sock_hold(s); goto found; } } s = NULL;found: read_unlock(&unix_table_lock); return s;}static inline int unix_writable(struct sock *sk){ return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;}static void unix_write_space(struct sock *sk){ read_lock(&sk->sk_callback_lock); if (unix_writable(sk)) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk, 2, POLL_OUT); } read_unlock(&sk->sk_callback_lock);}/* When dgram socket disconnects (or changes its peer), we clear its receive * queue of packets arrived from previous peer. First, it allows to do * flow control based only on wmem_alloc; second, sk connected to peer * may receive messages only from that peer. */static void unix_dgram_disconnected(struct sock *sk, struct sock *other){ if (skb_queue_len(&sk->sk_receive_queue)) { skb_queue_purge(&sk->sk_receive_queue); wake_up_interruptible_all(&unix_sk(sk)->peer_wait); /* If one link of bidirectional dgram pipe is disconnected, * we signal error. Messages are lost. Do not make this, * when peer was not connected to us. */ if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { other->sk_err = ECONNRESET; other->sk_error_report(other); } }}static void unix_sock_destructor(struct sock *sk){ struct unix_sock *u = unix_sk(sk); skb_queue_purge(&sk->sk_receive_queue); BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); BUG_TRAP(sk_unhashed(sk)); BUG_TRAP(!sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive unix socket: %p\n", sk); return; } if (u->addr) unix_release_addr(u->addr); atomic_dec(&unix_nr_socks);#ifdef UNIX_REFCNT_DEBUG printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));#endif}static int unix_release_sock (struct sock *sk, int embrion){ struct unix_sock *u = unix_sk(sk); struct dentry *dentry; struct vfsmount *mnt; struct sock *skpair; struct sk_buff *skb; int state; unix_remove_socket(sk); /* Clear state */ unix_state_wlock(sk); sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; dentry = u->dentry; u->dentry = NULL; mnt = u->mnt; u->mnt = NULL; state = sk->sk_state; sk->sk_state = TCP_CLOSE; unix_state_wunlock(sk); wake_up_interruptible_all(&u->peer_wait); skpair=unix_peer(sk); if (skpair!=NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { unix_state_wlock(skpair); /* No more writes */ skpair->sk_shutdown = SHUTDOWN_MASK; if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) skpair->sk_err = ECONNRESET; unix_state_wunlock(skpair); skpair->sk_state_change(skpair); read_lock(&skpair->sk_callback_lock); sk_wake_async(skpair,1,POLL_HUP); read_unlock(&skpair->sk_callback_lock); } sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; } /* Try to flush out this socket. Throw out buffers at least */ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { if (state==TCP_LISTEN) unix_release_sock(skb->sk, 1); /* passed fds are erased in the kfree_skb hook */ kfree_skb(skb); } if (dentry) { dput(dentry); mntput(mnt); } sock_put(sk); /* ---- Socket is dead now and most probably destroyed ---- */ /* * Fixme: BSD difference: In BSD all sockets connected to use get * ECONNRESET and we die on the spot. In Linux we behave * like files and pipes do and wait for the last * dereference. * * Can't we simply set sock->err? * * What the above comment does talk about? --ANK(980817) */ if (atomic_read(&unix_tot_inflight)) unix_gc(); /* Garbage collect fds */ return 0;}static int unix_listen(struct socket *sock, int backlog){ int err; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); err = -EOPNOTSUPP; if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET) goto out; /* Only stream/seqpacket sockets accept */ err = -EINVAL; if (!u->addr) goto out; /* No listens on an unbound socket */ unix_state_wlock(sk); if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) goto out_unlock; if (backlog > sk->sk_max_ack_backlog) wake_up_interruptible_all(&u->peer_wait); sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; /* set credentials so connect can copy them */ sk->sk_peercred.pid = current->tgid; sk->sk_peercred.uid = current->euid; sk->sk_peercred.gid = current->egid; err = 0;out_unlock: unix_state_wunlock(sk);out: return err;}static int unix_release(struct socket *);static int unix_bind(struct socket *, struct sockaddr *, int);static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags);static int unix_socketpair(struct socket *, struct socket *);static int unix_accept(struct socket *, struct socket *, int);static int unix_getname(struct socket *, struct sockaddr *, int *, int);static unsigned int unix_poll(struct file *, struct socket *, poll_table *);static int unix_ioctl(struct socket *, unsigned int, unsigned long);static int unix_shutdown(struct socket *, int);static int unix_stream_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t);static int unix_stream_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int);static int unix_dgram_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t);static int unix_dgram_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int);static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int);static struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, .bind = unix_bind, .connect = unix_stream_connect, .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, .poll = unix_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage,};static struct proto_ops unix_dgram_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, .bind = unix_bind, .connect = unix_dgram_connect, .socketpair = unix_socketpair, .accept = sock_no_accept, .getname = unix_getname, .poll = datagram_poll, .ioctl = unix_ioctl, .listen = sock_no_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -