📄 af_unix.c
字号:
/* * NET4: Implementation of BSD Unix domain sockets. * * Authors: Alan Cox, <alan.cox@linux.org> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $ * * Fixes: * Linus Torvalds : Assorted bug cures. * Niibe Yutaka : async I/O support. * Carsten Paeth : PF_UNIX check, address fixes. * Alan Cox : Limit size of allocated blocks. * Alan Cox : Fixed the stupid socketpair bug. * Alan Cox : BSD compatibility fine tuning. * Alan Cox : Fixed a bug in connect when interrupted. * Alan Cox : Sorted out a proper draft version of * file descriptor passing hacked up from * Mike Shaver's work. * Marty Leisner : Fixes to fd passing * Nick Nevin : recvmsg bugfix. * Alan Cox : Started proper garbage collector * Heiko EiBfeldt : Missing verify_area check * Alan Cox : Started POSIXisms * Andreas Schwab : Replace inode by dentry for proper * reference counting * Kirk Petersen : Made this a module * Christoph Rohland : Elegant non-blocking accept/connect algorithm. * Lots of bug fixes. * Alexey Kuznetosv : Repaired (I hope) bugs introduces * by above two patches. * Andrea Arcangeli : If possible we block in connect(2) * if the max backlog of the listen socket * is been reached. This won't break * old apps and it will avoid huge amount * of socks hashed (this for unix_gc() * performances reasons). * Security fix that limits the max * number of socks to 2*max_files and * the number of skb queueable in the * dgram receiver. * Artur Skawina : Hash function optimizations * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) * Malcolm Beattie : Set peercred for socketpair * Michal Ostrowski : Module initialization cleanup. * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, * the core infrastructure is doing that * for all net proto families now (2.5.69+) * * * Known differences from reference BSD that was tested: * * [TO FIX] * ECONNREFUSED is not returned from one end of a connected() socket to the * other the moment one end closes. * fstat() doesn't return st_dev=0, and give the blksize as high water mark * and a fake inode identifier (nor the BSD first socket fstat twice bug). * [NOT TO FIX] * accept() returns a path name even if the connecting socket has closed * in the meantime (BSD loses the path and gives up). * accept() returns 0 length path for an unbound connector. BSD returns 16 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) * socketpair(...SOCK_RAW..) doesn't panic the kernel. * BSD af_unix apparently has connect forgetting to block properly. * (need to check this with the POSIX spec in detail) * * Differences from 2.0.0-11-... (ANK) * Bug fixes and improvements. * - client shutdown killed server socket. * - removed all useless cli/sti pairs. * * Semantic changes/extensions. * - generic control message passing. * - SCM_CREDENTIALS control message. * - "Abstract" (not FS based) socket bindings. * Abstract names are sequences of bytes (not zero terminated) * started by 0, so that this name space does not intersect * with BSD names. */#include <linux/module.h>#include <linux/kernel.h>#include <linux/signal.h>#include <linux/sched.h>#include <linux/errno.h>#include <linux/string.h>#include <linux/stat.h>#include <linux/dcache.h>#include <linux/namei.h>#include <linux/socket.h>#include <linux/un.h>#include <linux/fcntl.h>#include <linux/termios.h>#include <linux/sockios.h>#include <linux/net.h>#include <linux/in.h>#include <linux/fs.h>#include <linux/slab.h>#include <asm/uaccess.h>#include <linux/skbuff.h>#include <linux/netdevice.h>#include <net/net_namespace.h>#include <net/sock.h>#include <net/tcp_states.h>#include <net/af_unix.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <net/scm.h>#include <linux/init.h>#include <linux/poll.h>#include <linux/rtnetlink.h>#include <linux/mount.h>#include <net/checksum.h>#include <linux/security.h>int sysctl_unix_max_dgram_qlen __read_mostly = 10;static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];static DEFINE_SPINLOCK(unix_table_lock);static atomic_t unix_nr_socks = ATOMIC_INIT(0);#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)static struct sock *first_unix_socket(int *i){ for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { if (!hlist_empty(&unix_socket_table[*i])) return __sk_head(&unix_socket_table[*i]); } return NULL;}static struct sock *next_unix_socket(int *i, struct sock *s){ struct sock *next = sk_next(s); /* More in this chain? */ if (next) return next; /* Look for next non-empty chain. */ for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { if (!hlist_empty(&unix_socket_table[*i])) return __sk_head(&unix_socket_table[*i]); } return NULL;}#define forall_unix_sockets(i, s) \ for (s = first_unix_socket(&(i)); s; s = next_unix_socket(&(i),(s)))#ifdef CONFIG_SECURITY_NETWORKstatic void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb){ memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));}static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb){ scm->secid = *UNIXSID(skb);}#elsestatic inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb){ }static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb){ }#endif /* CONFIG_SECURITY_NETWORK *//* * SMP locking strategy: * hash table is protected with spinlock unix_table_lock * each socket state is protected by separate rwlock. */static inline unsigned unix_hash_fold(__wsum n){ unsigned hash = (__force unsigned)n; hash ^= hash>>16; hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1);}#define unix_peer(sk) (unix_sk(sk)->peer)static inline int unix_our_peer(struct sock *sk, struct sock *osk){ return unix_peer(osk) == sk;}static inline int unix_may_send(struct sock *sk, struct sock *osk){ return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));}static struct sock *unix_peer_get(struct sock *s){ struct sock *peer; unix_state_lock(s); peer = unix_peer(s); if (peer) sock_hold(peer); unix_state_unlock(s); return peer;}static inline void unix_release_addr(struct unix_address *addr){ if (atomic_dec_and_test(&addr->refcnt)) kfree(addr);}/* * Check unix socket name: * - should be not zero length. * - if started by not zero, should be NULL terminated (FS object) * - if started by zero, it is abstract name. */static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp){ if (len <= sizeof(short) || len > sizeof(*sunaddr)) return -EINVAL; if (!sunaddr || sunaddr->sun_family != AF_UNIX) return -EINVAL; if (sunaddr->sun_path[0]) { /* * This may look like an off by one error but it is a bit more * subtle. 108 is the longest valid AF_UNIX path for a binding. * sun_path[108] doesnt as such exist. However in kernel space * we are guaranteed that it is a valid memory location in our * kernel address buffer. */ ((char *)sunaddr)[len]=0; len = strlen(sunaddr->sun_path)+1+sizeof(short); return len; } *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0)); return len;}static void __unix_remove_socket(struct sock *sk){ sk_del_node_init(sk);}static void __unix_insert_socket(struct hlist_head *list, struct sock *sk){ BUG_TRAP(sk_unhashed(sk)); sk_add_node(sk, list);}static inline void unix_remove_socket(struct sock *sk){ spin_lock(&unix_table_lock); __unix_remove_socket(sk); spin_unlock(&unix_table_lock);}static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk){ spin_lock(&unix_table_lock); __unix_insert_socket(list, sk); spin_unlock(&unix_table_lock);}static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, int len, int type, unsigned hash){ struct sock *s; struct hlist_node *node; sk_for_each(s, node, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) goto found; } s = NULL;found: return s;}static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, int len, int type, unsigned hash){ struct sock *s; spin_lock(&unix_table_lock); s = __unix_find_socket_byname(sunname, len, type, hash); if (s) sock_hold(s); spin_unlock(&unix_table_lock); return s;}static struct sock *unix_find_socket_byinode(struct inode *i){ struct sock *s; struct hlist_node *node; spin_lock(&unix_table_lock); sk_for_each(s, node, &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; if(dentry && dentry->d_inode == i) { sock_hold(s); goto found; } } s = NULL;found: spin_unlock(&unix_table_lock); return s;}static inline int unix_writable(struct sock *sk){ return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;}static void unix_write_space(struct sock *sk){ read_lock(&sk->sk_callback_lock); if (unix_writable(sk)) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_sync(sk->sk_sleep); sk_wake_async(sk, 2, POLL_OUT); } read_unlock(&sk->sk_callback_lock);}/* When dgram socket disconnects (or changes its peer), we clear its receive * queue of packets arrived from previous peer. First, it allows to do * flow control based only on wmem_alloc; second, sk connected to peer * may receive messages only from that peer. */static void unix_dgram_disconnected(struct sock *sk, struct sock *other){ if (!skb_queue_empty(&sk->sk_receive_queue)) { skb_queue_purge(&sk->sk_receive_queue); wake_up_interruptible_all(&unix_sk(sk)->peer_wait); /* If one link of bidirectional dgram pipe is disconnected, * we signal error. Messages are lost. Do not make this, * when peer was not connected to us. */ if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { other->sk_err = ECONNRESET; other->sk_error_report(other); } }}static void unix_sock_destructor(struct sock *sk){ struct unix_sock *u = unix_sk(sk); skb_queue_purge(&sk->sk_receive_queue); BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); BUG_TRAP(sk_unhashed(sk)); BUG_TRAP(!sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive unix socket: %p\n", sk); return; } if (u->addr) unix_release_addr(u->addr); atomic_dec(&unix_nr_socks);#ifdef UNIX_REFCNT_DEBUG printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));#endif}static int unix_release_sock (struct sock *sk, int embrion){ struct unix_sock *u = unix_sk(sk); struct dentry *dentry; struct vfsmount *mnt; struct sock *skpair; struct sk_buff *skb; int state; unix_remove_socket(sk); /* Clear state */ unix_state_lock(sk); sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; dentry = u->dentry; u->dentry = NULL; mnt = u->mnt; u->mnt = NULL; state = sk->sk_state; sk->sk_state = TCP_CLOSE; unix_state_unlock(sk); wake_up_interruptible_all(&u->peer_wait); skpair=unix_peer(sk); if (skpair!=NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { unix_state_lock(skpair); /* No more writes */ skpair->sk_shutdown = SHUTDOWN_MASK; if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) skpair->sk_err = ECONNRESET; unix_state_unlock(skpair); skpair->sk_state_change(skpair); read_lock(&skpair->sk_callback_lock); sk_wake_async(skpair,1,POLL_HUP); read_unlock(&skpair->sk_callback_lock); } sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; } /* Try to flush out this socket. Throw out buffers at least */ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { if (state==TCP_LISTEN) unix_release_sock(skb->sk, 1); /* passed fds are erased in the kfree_skb hook */ kfree_skb(skb); } if (dentry) { dput(dentry); mntput(mnt); } sock_put(sk); /* ---- Socket is dead now and most probably destroyed ---- */ /* * Fixme: BSD difference: In BSD all sockets connected to use get * ECONNRESET and we die on the spot. In Linux we behave * like files and pipes do and wait for the last * dereference. * * Can't we simply set sock->err? * * What the above comment does talk about? --ANK(980817) */ if (unix_tot_inflight) unix_gc(); /* Garbage collect fds */ return 0;}static int unix_listen(struct socket *sock, int backlog){ int err; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); err = -EOPNOTSUPP; if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET) goto out; /* Only stream/seqpacket sockets accept */ err = -EINVAL; if (!u->addr) goto out; /* No listens on an unbound socket */ unix_state_lock(sk); if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) goto out_unlock; if (backlog > sk->sk_max_ack_backlog) wake_up_interruptible_all(&u->peer_wait); sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; /* set credentials so connect can copy them */ sk->sk_peercred.pid = task_tgid_vnr(current); sk->sk_peercred.uid = current->euid; sk->sk_peercred.gid = current->egid; err = 0;out_unlock: unix_state_unlock(sk);out: return err;}static int unix_release(struct socket *);static int unix_bind(struct socket *, struct sockaddr *, int);static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags);static int unix_socketpair(struct socket *, struct socket *);static int unix_accept(struct socket *, struct socket *, int);static int unix_getname(struct socket *, struct sockaddr *, int *, int);static unsigned int unix_poll(struct file *, struct socket *, poll_table *);static int unix_ioctl(struct socket *, unsigned int, unsigned long);static int unix_shutdown(struct socket *, int);static int unix_stream_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t);static int unix_stream_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int);static int unix_dgram_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t);static int unix_dgram_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int);static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int);static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t);static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, .bind = unix_bind, .connect = unix_stream_connect, .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, .poll = unix_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -