📄 futex.c

📁 linux 2.6.19 kernel source code before patching
💻 C
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
		return -EFAULT;	/*	 * We release only a lock we actually own:	 */	if ((uval & FUTEX_TID_MASK) != current->pid)		return -EPERM;	/*	 * First take all the futex related locks:	 */	if (fshared)		down_read(fshared);	ret = get_futex_key(uaddr, fshared, &key);	if (unlikely(ret != 0))		goto out;	hb = hash_futex(&key);retry_unlocked:	spin_lock(&hb->lock);	/*	 * To avoid races, try to do the TID -> 0 atomic transition	 * again. If it succeeds then we can return without waking	 * anyone else up:	 */	if (!(uval & FUTEX_OWNER_DIED)) {		pagefault_disable();		uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);		pagefault_enable();	}	if (unlikely(uval == -EFAULT))		goto pi_faulted;	/*	 * Rare case: we managed to release the lock atomically,	 * no need to wake anyone else up:	 */	if (unlikely(uval == current->pid))		goto out_unlock;	/*	 * Ok, other tasks may need to be woken up - check waiters	 * and do the wakeup if necessary:	 */	head = &hb->chain;	plist_for_each_entry_safe(this, next, head, list) {		if (!match_futex (&this->key, &key))			continue;		ret = wake_futex_pi(uaddr, uval, this);		/*		 * The atomic access to the futex value		 * generated a pagefault, so retry the		 * user-access and the wakeup:		 */		if (ret == -EFAULT)			goto pi_faulted;		goto out_unlock;	}	/*	 * No waiters - kernel unlocks the futex:	 */	if (!(uval & FUTEX_OWNER_DIED)) {		ret = unlock_futex_pi(uaddr, uval);		if (ret == -EFAULT)			goto pi_faulted;	}out_unlock:	spin_unlock(&hb->lock);out:	if (fshared)		up_read(fshared);	return ret;pi_faulted:	/*	 * We have to r/w  *(int __user *)uaddr, but we can't modify it	 * non-atomically.  Therefore, if get_user below is not	 * enough, we need to handle the fault ourselves, while	 * still holding the mmap_sem.	 *	 * ... and hb->lock. --ANK	 */	spin_unlock(&hb->lock);	if (attempt++) {		ret = futex_handle_fault((unsigned long)uaddr, fshared,					 attempt);		if (ret)			goto out;		goto retry_unlocked;	}	if (fshared)		up_read(fshared);	ret = get_user(uval, uaddr);	if (!ret && (uval != -EFAULT))		goto retry;	return ret;}static int futex_close(struct inode *inode, struct file *filp){	struct futex_q *q = filp->private_data;	unqueue_me(q);	kfree(q);	return 0;}/* This is one-shot: once it's gone off you need a new fd */static unsigned int futex_poll(struct file *filp,			       struct poll_table_struct *wait){	struct futex_q *q = filp->private_data;	int ret = 0;	poll_wait(filp, &q->waiters, wait);	/*	 * plist_node_empty() is safe here without any lock.	 * q->lock_ptr != 0 is not safe, because of ordering against wakeup.	 */	if (plist_node_empty(&q->list))		ret = POLLIN | POLLRDNORM;	return ret;}static const struct file_operations futex_fops = {	.release	= futex_close,	.poll		= futex_poll,};/* * Signal allows caller to avoid the race which would occur if they * set the sigio stuff up afterwards. */static int futex_fd(u32 __user *uaddr, int signal){	struct futex_q *q;	struct file *filp;	int ret, err;	struct rw_semaphore *fshared;	static unsigned long printk_interval;	if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {		printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "		    	"will be removed from the kernel in June 2007\n",			current->comm);	}	ret = -EINVAL;	if (!valid_signal(signal))		goto out;	ret = get_unused_fd();	if (ret < 0)		goto out;	filp = get_empty_filp();	if (!filp) {		put_unused_fd(ret);		ret = -ENFILE;		goto out;	}	filp->f_op = &futex_fops;	filp->f_path.mnt = mntget(futex_mnt);	filp->f_path.dentry = dget(futex_mnt->mnt_root);	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;	if (signal) {		err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1);		if (err < 0) {			goto error;		}		filp->f_owner.signum = signal;	}	q = kmalloc(sizeof(*q), GFP_KERNEL);	if (!q) {		err = -ENOMEM;		goto error;	}	q->pi_state = NULL;	fshared = &current->mm->mmap_sem;	down_read(fshared);	err = get_futex_key(uaddr, fshared, &q->key);	if (unlikely(err != 0)) {		up_read(fshared);		kfree(q);		goto error;	}	/*	 * queue_me() must be called before releasing mmap_sem, because	 * key->shared.inode needs to be referenced while holding it.	 */	filp->private_data = q;	queue_me(q, ret, filp);	up_read(fshared);	/* Now we map fd to filp, so userspace can access it */	fd_install(ret, filp);out:	return ret;error:	put_unused_fd(ret);	put_filp(filp);	ret = err;	goto out;}/* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. * * Implementation: user-space maintains a per-thread list of locks it * is holding. Upon do_exit(), the kernel carefully walks this list, * and marks all locks that are owned by this thread with the * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is * always manipulated with the lock held, so the list is private and * per-thread. Userspace also maintains a per-thread 'list_op_pending' * field, to allow the kernel to clean up if the thread dies after * acquiring the lock, but just before it could have added itself to * the list. There can only be one such pending lock. *//** * sys_set_robust_list - set the robust-futex list head of a task * @head: pointer to the list-head * @len: length of the list-head, as userspace expects */asmlinkage longsys_set_robust_list(struct robust_list_head __user *head,		    size_t len){	/*	 * The kernel knows only one size for now:	 */	if (unlikely(len != sizeof(*head)))		return -EINVAL;	current->robust_list = head;	return 0;}/** * sys_get_robust_list - get the robust-futex list head of a task * @pid: pid of the process [zero for current task] * @head_ptr: pointer to a list-head pointer, the kernel fills it in * @len_ptr: pointer to a length field, the kernel fills in the header size */asmlinkage longsys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,		    size_t __user *len_ptr){	struct robust_list_head __user *head;	unsigned long ret;	if (!pid)		head = current->robust_list;	else {		struct task_struct *p;		ret = -ESRCH;		rcu_read_lock();		p = find_task_by_pid(pid);		if (!p)			goto err_unlock;		ret = -EPERM;		if ((current->euid != p->euid) && (current->euid != p->uid) &&				!capable(CAP_SYS_PTRACE))			goto err_unlock;		head = p->robust_list;		rcu_read_unlock();	}	if (put_user(sizeof(*head), len_ptr))		return -EFAULT;	return put_user(head, head_ptr);err_unlock:	rcu_read_unlock();	return ret;}/* * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi){	u32 uval, nval, mval;retry:	if (get_user(uval, uaddr))		return -1;	if ((uval & FUTEX_TID_MASK) == curr->pid) {		/*		 * Ok, this dying thread is truly holding a futex		 * of interest. Set the OWNER_DIED bit atomically		 * via cmpxchg, and if the value had FUTEX_WAITERS		 * set, wake up a waiter (if any). (We have to do a		 * futex_wake() even if OWNER_DIED is already set -		 * to handle the rare but possible case of recursive		 * thread-death.) The rest of the cleanup is done in		 * userspace.		 */		mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;		nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);		if (nval == -EFAULT)			return -1;		if (nval != uval)			goto retry;		/*		 * Wake robust non-PI futexes here. The wakeup of		 * PI futexes happens in exit_pi_state():		 */		if (!pi) {			if (uval & FUTEX_WAITERS)				futex_wake(uaddr, &curr->mm->mmap_sem, 1);		}	}	return 0;}/* * Fetch a robust-list pointer. Bit 0 signals PI futexes: */static inline int fetch_robust_entry(struct robust_list __user **entry,				     struct robust_list __user * __user *head,				     int *pi){	unsigned long uentry;	if (get_user(uentry, (unsigned long __user *)head))		return -EFAULT;	*entry = (void __user *)(uentry & ~1UL);	*pi = uentry & 1;	return 0;}/* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. * * We silently return on any sign of list-walking problem. */void exit_robust_list(struct task_struct *curr){	struct robust_list_head __user *head = curr->robust_list;	struct robust_list __user *entry, *pending;	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;	unsigned long futex_offset;	/*	 * Fetch the list head (which was registered earlier, via	 * sys_set_robust_list()):	 */	if (fetch_robust_entry(&entry, &head->list.next, &pi))		return;	/*	 * Fetch the relative futex offset:	 */	if (get_user(futex_offset, &head->futex_offset))		return;	/*	 * Fetch any possibly pending lock-add first, and handle it	 * if it exists:	 */	if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))		return;	if (pending)		handle_futex_death((void __user *)pending + futex_offset,				   curr, pip);	while (entry != &head->list) {		/*		 * A pending lock might already be on the list, so		 * don't process it twice:		 */		if (entry != pending)			if (handle_futex_death((void __user *)entry + futex_offset,						curr, pi))				return;		/*		 * Fetch the next entry in the list:		 */		if (fetch_robust_entry(&entry, &entry->next, &pi))			return;		/*		 * Avoid excessively long or circular lists:		 */		if (!--limit)			break;		cond_resched();	}}long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,		u32 __user *uaddr2, u32 val2, u32 val3){	int ret;	int cmd = op & FUTEX_CMD_MASK;	struct rw_semaphore *fshared = NULL;	if (!(op & FUTEX_PRIVATE_FLAG))		fshared = &current->mm->mmap_sem;	switch (cmd) {	case FUTEX_WAIT:		ret = futex_wait(uaddr, fshared, val, timeout);		break;	case FUTEX_WAKE:		ret = futex_wake(uaddr, fshared, val);		break;	case FUTEX_FD:		/* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */		ret = futex_fd(uaddr, val);		break;	case FUTEX_REQUEUE:		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);		break;	case FUTEX_CMP_REQUEUE:		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3);		break;	case FUTEX_WAKE_OP:		ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);		break;	case FUTEX_LOCK_PI:		ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);		break;	case FUTEX_UNLOCK_PI:		ret = futex_unlock_pi(uaddr, fshared);		break;	case FUTEX_TRYLOCK_PI:		ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);		break;	default:		ret = -ENOSYS;	}	return ret;}asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,			  struct timespec __user *utime, u32 __user *uaddr2,			  u32 val3){	struct timespec ts;	ktime_t t, *tp = NULL;	u32 val2 = 0;	int cmd = op & FUTEX_CMD_MASK;	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) {		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)			return -EFAULT;		if (!timespec_valid(&ts))			return -EINVAL;		t = timespec_to_ktime(ts);		if (cmd == FUTEX_WAIT)			t = ktime_add(ktime_get(), t);		tp = &t;	}	/*	 * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE.	 */	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)		val2 = (u32) (unsigned long) utime;	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);}static int futexfs_get_sb(struct file_system_type *fs_type,			  int flags, const char *dev_name, void *data,			  struct vfsmount *mnt){	return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt);}static struct file_system_type futex_fs_type = {	.name		= "futexfs",	.get_sb		= futexfs_get_sb,	.kill_sb	= kill_anon_super,};static int __init init(void){	int i = register_filesystem(&futex_fs_type);	if (i)		return i;	futex_mnt = kern_mount(&futex_fs_type);	if (IS_ERR(futex_mnt)) {		unregister_filesystem(&futex_fs_type);		return PTR_ERR(futex_mnt);	}	for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {		plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);		spin_lock_init(&futex_queues[i].lock);	}	return 0;}__initcall(init);
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -