📄 af_unix.c
字号:
/* * NET3: Implementation of BSD Unix domain sockets. * * Authors: Alan Cox, <alan.cox@linux.org> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Version: $Id: af_unix.c,v 1.108 2000/11/10 04:02:04 davem Exp $ * * Fixes: * Linus Torvalds : Assorted bug cures. * Niibe Yutaka : async I/O support. * Carsten Paeth : PF_UNIX check, address fixes. * Alan Cox : Limit size of allocated blocks. * Alan Cox : Fixed the stupid socketpair bug. * Alan Cox : BSD compatibility fine tuning. * Alan Cox : Fixed a bug in connect when interrupted. * Alan Cox : Sorted out a proper draft version of * file descriptor passing hacked up from * Mike Shaver's work. * Marty Leisner : Fixes to fd passing * Nick Nevin : recvmsg bugfix. * Alan Cox : Started proper garbage collector * Heiko EiBfeldt : Missing verify_area check * Alan Cox : Started POSIXisms * Andreas Schwab : Replace inode by dentry for proper * reference counting * Kirk Petersen : Made this a module * Christoph Rohland : Elegant non-blocking accept/connect algorithm. * Lots of bug fixes. * Alexey Kuznetosv : Repaired (I hope) bugs introduces * by above two patches. * Andrea Arcangeli : If possible we block in connect(2) * if the max backlog of the listen socket * is been reached. This won't break * old apps and it will avoid huge amount * of socks hashed (this for unix_gc() * performances reasons). * Security fix that limits the max * number of socks to 2*max_files and * the number of skb queueable in the * dgram receiver. * Artur Skawina : Hash function optimizations * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) * Malcolm Beattie : Set peercred for socketpair * Michal Ostrowski : Module initialization cleanup. * * * Known differences from reference BSD that was tested: * * [TO FIX] * ECONNREFUSED is not returned from one end of a connected() socket to the * other the moment one end closes. * fstat() doesn't return st_dev=NODEV, and give the blksize as high water mark * and a fake inode identifier (nor the BSD first socket fstat twice bug). * [NOT TO FIX] * accept() returns a path name even if the connecting socket has closed * in the meantime (BSD loses the path and gives up). * accept() returns 0 length path for an unbound connector. BSD returns 16 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) * socketpair(...SOCK_RAW..) doesn't panic the kernel. * BSD af_unix apparently has connect forgetting to block properly. * (need to check this with the POSIX spec in detail) * * Differences from 2.0.0-11-... (ANK) * Bug fixes and improvements. * - client shutdown killed server socket. * - removed all useless cli/sti pairs. * * Semantic changes/extensions. * - generic control message passing. * - SCM_CREDENTIALS control message. * - "Abstract" (not FS based) socket bindings. * Abstract names are sequences of bytes (not zero terminated) * started by 0, so that this name space does not intersect * with BSD names. */#include <linux/module.h>#include <linux/config.h>#include <linux/kernel.h>#include <linux/major.h>#include <linux/signal.h>#include <linux/sched.h>#include <linux/errno.h>#include <linux/string.h>#include <linux/stat.h>#include <linux/socket.h>#include <linux/un.h>#include <linux/fcntl.h>#include <linux/termios.h>#include <linux/sockios.h>#include <linux/net.h>#include <linux/in.h>#include <linux/fs.h>#include <linux/malloc.h>#include <asm/uaccess.h>#include <linux/skbuff.h>#include <linux/netdevice.h>#include <net/sock.h>#include <net/tcp.h>#include <net/af_unix.h>#include <linux/proc_fs.h>#include <net/scm.h>#include <linux/init.h>#include <linux/poll.h>#include <linux/smp_lock.h>#include <asm/checksum.h>#define min(a,b) (((a)<(b))?(a):(b))int sysctl_unix_max_dgram_qlen = 10;unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];rwlock_t unix_table_lock = RW_LOCK_UNLOCKED;static atomic_t unix_nr_socks = ATOMIC_INIT(0);#define unix_sockets_unbound (unix_socket_table[UNIX_HASH_SIZE])#define UNIX_ABSTRACT(sk) ((sk)->protinfo.af_unix.addr->hash!=UNIX_HASH_SIZE)/* SMP locking strategy. * hash table is protceted with rwlock unix_table_lock * each socket state is protected by separate rwlock. */extern __inline__ unsigned unix_hash_fold(unsigned hash){ hash ^= hash>>16; hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1);}#define unix_peer(sk) ((sk)->pair)extern __inline__ int unix_our_peer(unix_socket *sk, unix_socket *osk){ return unix_peer(osk) == sk;}extern __inline__ int unix_may_send(unix_socket *sk, unix_socket *osk){ return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));}static __inline__ unix_socket * unix_peer_get(unix_socket *s){ unix_socket *peer; unix_state_rlock(s); peer = unix_peer(s); if (peer) sock_hold(peer); unix_state_runlock(s); return peer;}extern __inline__ void unix_release_addr(struct unix_address *addr){ if (atomic_dec_and_test(&addr->refcnt)) kfree(addr);}/* * Check unix socket name: * - should be not zero length. * - if started by not zero, should be NULL terminated (FS object) * - if started by zero, it is abstract name. */ static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp){ if (len <= sizeof(short) || len > sizeof(*sunaddr)) return -EINVAL; if (!sunaddr || sunaddr->sun_family != AF_UNIX) return -EINVAL; if (sunaddr->sun_path[0]) { /* * This may look like an off by one error but it is * a bit more subtle. 108 is the longest valid AF_UNIX * path for a binding. sun_path[108] doesnt as such * exist. However in kernel space we are guaranteed that * it is a valid memory location in our kernel * address buffer. */ if (len > sizeof(*sunaddr)) len = sizeof(*sunaddr); ((char *)sunaddr)[len]=0; len = strlen(sunaddr->sun_path)+1+sizeof(short); return len; } *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0)); return len;}static void __unix_remove_socket(unix_socket *sk){ unix_socket **list = sk->protinfo.af_unix.list; if (list) { if (sk->next) sk->next->prev = sk->prev; if (sk->prev) sk->prev->next = sk->next; if (*list == sk) *list = sk->next; sk->protinfo.af_unix.list = NULL; sk->prev = NULL; sk->next = NULL; __sock_put(sk); }}static void __unix_insert_socket(unix_socket **list, unix_socket *sk){ BUG_TRAP(sk->protinfo.af_unix.list==NULL); sk->protinfo.af_unix.list = list; sk->prev = NULL; sk->next = *list; if (*list) (*list)->prev = sk; *list=sk; sock_hold(sk);}static __inline__ void unix_remove_socket(unix_socket *sk){ write_lock(&unix_table_lock); __unix_remove_socket(sk); write_unlock(&unix_table_lock);}static __inline__ void unix_insert_socket(unix_socket **list, unix_socket *sk){ write_lock(&unix_table_lock); __unix_insert_socket(list, sk); write_unlock(&unix_table_lock);}static unix_socket *__unix_find_socket_byname(struct sockaddr_un *sunname, int len, int type, unsigned hash){ unix_socket *s; for (s=unix_socket_table[hash^type]; s; s=s->next) { if(s->protinfo.af_unix.addr->len==len && memcmp(s->protinfo.af_unix.addr->name, sunname, len) == 0) return s; } return NULL;}static __inline__ unix_socket *unix_find_socket_byname(struct sockaddr_un *sunname, int len, int type, unsigned hash){ unix_socket *s; read_lock(&unix_table_lock); s = __unix_find_socket_byname(sunname, len, type, hash); if (s) sock_hold(s); read_unlock(&unix_table_lock); return s;}static unix_socket *unix_find_socket_byinode(struct inode *i){ unix_socket *s; read_lock(&unix_table_lock); for (s=unix_socket_table[i->i_ino & (UNIX_HASH_SIZE-1)]; s; s=s->next) { struct dentry *dentry = s->protinfo.af_unix.dentry; if(dentry && dentry->d_inode == i) { sock_hold(s); break; } } read_unlock(&unix_table_lock); return s;}static __inline__ int unix_writable(struct sock *sk){ return ((atomic_read(&sk->wmem_alloc)<<2) <= sk->sndbuf);}static void unix_write_space(struct sock *sk){ read_lock(&sk->callback_lock); if (unix_writable(sk)) { if (sk->sleep && waitqueue_active(sk->sleep)) wake_up_interruptible(sk->sleep); sk_wake_async(sk, 2, POLL_OUT); } read_unlock(&sk->callback_lock);}/* When dgram socket disconnects (or changes its peer), we clear its receive * queue of packets arrived from previous peer. First, it allows to do * flow control based only on wmem_alloc; second, sk connected to peer * may receive messages only from that peer. */static void unix_dgram_disconnected(struct sock *sk, struct sock *other){ if (skb_queue_len(&sk->receive_queue)) { skb_queue_purge(&sk->receive_queue); wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait); /* If one link of bidirectional dgram pipe is disconnected, * we signal error. Messages are lost. Do not make this, * when peer was not connected to us. */ if (!other->dead && unix_peer(other) == sk) { other->err = ECONNRESET; other->error_report(other); } }}static void unix_sock_destructor(struct sock *sk){ skb_queue_purge(&sk->receive_queue); BUG_TRAP(atomic_read(&sk->wmem_alloc) == 0); BUG_TRAP(sk->protinfo.af_unix.list==NULL); BUG_TRAP(sk->socket==NULL); if (sk->dead==0) { printk("Attempt to release alive unix socket: %p\n", sk); return; } if (sk->protinfo.af_unix.addr) unix_release_addr(sk->protinfo.af_unix.addr); atomic_dec(&unix_nr_socks);#ifdef UNIX_REFCNT_DEBUG printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));#endif MOD_DEC_USE_COUNT;}static int unix_release_sock (unix_socket *sk, int embrion){ struct dentry *dentry; struct vfsmount *mnt; unix_socket *skpair; struct sk_buff *skb; int state; unix_remove_socket(sk); /* Clear state */ unix_state_wlock(sk); sock_orphan(sk); sk->shutdown = SHUTDOWN_MASK; dentry = sk->protinfo.af_unix.dentry; sk->protinfo.af_unix.dentry=NULL; mnt = sk->protinfo.af_unix.mnt; sk->protinfo.af_unix.mnt=NULL; state = sk->state; sk->state = TCP_CLOSE; unix_state_wunlock(sk); wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait); skpair=unix_peer(sk); if (skpair!=NULL) { if (sk->type==SOCK_STREAM) { unix_state_wlock(skpair); skpair->shutdown=SHUTDOWN_MASK; /* No more writes*/ if (!skb_queue_empty(&sk->receive_queue) || embrion) skpair->err = ECONNRESET; unix_state_wunlock(skpair); skpair->state_change(skpair); read_lock(&skpair->callback_lock); sk_wake_async(skpair,1,POLL_HUP); read_unlock(&skpair->callback_lock); } sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; } /* Try to flush out this socket. Throw out buffers at least */ while((skb=skb_dequeue(&sk->receive_queue))!=NULL) { if (state==TCP_LISTEN) unix_release_sock(skb->sk, 1); /* passed fds are erased in the kfree_skb hook */ kfree_skb(skb); } if (dentry) { dput(dentry); mntput(mnt); } sock_put(sk); /* ---- Socket is dead now and most probably destroyed ---- */ /* * Fixme: BSD difference: In BSD all sockets connected to use get * ECONNRESET and we die on the spot. In Linux we behave * like files and pipes do and wait for the last * dereference. * * Can't we simply set sock->err? * * What the above comment does talk about? --ANK(980817) */ if (atomic_read(&unix_tot_inflight)) unix_gc(); /* Garbage collect fds */ return 0;}static int unix_listen(struct socket *sock, int backlog){ int err; struct sock *sk = sock->sk; err = -EOPNOTSUPP; if (sock->type!=SOCK_STREAM) goto out; /* Only stream sockets accept */ err = -EINVAL; if (!sk->protinfo.af_unix.addr) goto out; /* No listens on an unbound socket */ unix_state_wlock(sk); if (sk->state != TCP_CLOSE && sk->state != TCP_LISTEN) goto out_unlock; if (backlog > sk->max_ack_backlog) wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait); sk->max_ack_backlog=backlog; sk->state=TCP_LISTEN; /* set credentials so connect can copy them */ sk->peercred.pid = current->pid; sk->peercred.uid = current->euid; sk->peercred.gid = current->egid; err = 0;out_unlock: unix_state_wunlock(sk);out: return err;}extern struct proto_ops unix_stream_ops;extern struct proto_ops unix_dgram_ops;static struct sock * unix_create1(struct socket *sock){ struct sock *sk; if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files) return NULL; MOD_INC_USE_COUNT; sk = sk_alloc(PF_UNIX, GFP_KERNEL, 1); if (!sk) { MOD_DEC_USE_COUNT; return NULL; } atomic_inc(&unix_nr_socks); sock_init_data(sock,sk); sk->write_space = unix_write_space; sk->max_ack_backlog = sysctl_unix_max_dgram_qlen; sk->destruct = unix_sock_destructor; sk->protinfo.af_unix.dentry=NULL; sk->protinfo.af_unix.mnt=NULL; sk->protinfo.af_unix.lock = RW_LOCK_UNLOCKED; atomic_set(&sk->protinfo.af_unix.inflight, 0); init_MUTEX(&sk->protinfo.af_unix.readsem);/* single task reading lock */ init_waitqueue_head(&sk->protinfo.af_unix.peer_wait); sk->protinfo.af_unix.list=NULL; unix_insert_socket(&unix_sockets_unbound, sk); return sk;}static int unix_create(struct socket *sock, int protocol){ if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; sock->state = SS_UNCONNECTED; switch (sock->type) { case SOCK_STREAM: sock->ops = &unix_stream_ops; break; /* * Believe it or not BSD has AF_UNIX, SOCK_RAW though * nothing uses it. */ case SOCK_RAW: sock->type=SOCK_DGRAM; case SOCK_DGRAM: sock->ops = &unix_dgram_ops; break; default: return -ESOCKTNOSUPPORT; } return unix_create1(sock) ? 0 : -ENOMEM;}static int unix_release(struct socket *sock){ unix_socket *sk = sock->sk; if (!sk) return 0; sock->sk = NULL; return unix_release_sock (sk, 0);}static int unix_autobind(struct socket *sock){ struct sock *sk = sock->sk; static u32 ordernum = 1; struct unix_address * addr; int err; down(&sk->protinfo.af_unix.readsem); err = 0; if (sk->protinfo.af_unix.addr) goto out; err = -ENOMEM; addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); if (!addr) goto out; memset(addr, 0, sizeof(*addr) + sizeof(short) + 16); addr->name->sun_family = AF_UNIX; atomic_set(&addr->refcnt, 1);retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); write_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; if (__unix_find_socket_byname(addr->name, addr->len, sock->type, addr->hash)) { write_unlock(&unix_table_lock); /* Sanity yield. It is unusual case, but yet... */ if (!(ordernum&0xFF)) { current->policy |= SCHED_YIELD; schedule(); } goto retry; } addr->hash ^= sk->type; __unix_remove_socket(sk); sk->protinfo.af_unix.addr = addr; __unix_insert_socket(&unix_socket_table[addr->hash], sk); write_unlock(&unix_table_lock); err = 0;out: up(&sk->protinfo.af_unix.readsem); return err;}static unix_socket *unix_find_other(struct sockaddr_un *sunname, int len, int type, unsigned hash, int *error){ unix_socket *u; struct nameidata nd; int err = 0; if (sunname->sun_path[0]) { if (path_init(sunname->sun_path, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &nd)) err = path_walk(sunname->sun_path, &nd); if (err) goto fail; err = permission(nd.dentry->d_inode,MAY_WRITE); if (err) goto put_fail; err = -ECONNREFUSED; if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) goto put_fail; u=unix_find_socket_byinode(nd.dentry->d_inode); if (!u) goto put_fail; path_release(&nd); err=-EPROTOTYPE; if (u->type != type) { sock_put(u); goto fail; } } else { err = -ECONNREFUSED; u=unix_find_socket_byname(sunname, len, type, hash); if (!u) goto fail; } return u;put_fail: path_release(&nd);fail: *error=err; return NULL;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -