📄 futex.c

📁 linux 2.6.19 kernel source code before patching
💻 C
📖 第 1 页 / 共 4 页
字号:
	drop_futex_key_refs(&q->key);	return ret;}/* * PI futexes can not be requeued and must remove themself from the * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry * and dropped here. */static void unqueue_me_pi(struct futex_q *q){	WARN_ON(plist_node_empty(&q->list));	plist_del(&q->list, &q->list.plist);	BUG_ON(!q->pi_state);	free_pi_state(q->pi_state);	q->pi_state = NULL;	spin_unlock(q->lock_ptr);	drop_futex_key_refs(&q->key);}/* * Fixup the pi_state owner with current. * * Must be called with hash bucket lock held and mm->sem held for non * private futexes. */static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,				struct task_struct *curr){	u32 newtid = curr->pid | FUTEX_WAITERS;	struct futex_pi_state *pi_state = q->pi_state;	u32 uval, curval, newval;	int ret;	/* Owner died? */	if (pi_state->owner != NULL) {		spin_lock_irq(&pi_state->owner->pi_lock);		WARN_ON(list_empty(&pi_state->list));		list_del_init(&pi_state->list);		spin_unlock_irq(&pi_state->owner->pi_lock);	} else		newtid |= FUTEX_OWNER_DIED;	pi_state->owner = curr;	spin_lock_irq(&curr->pi_lock);	WARN_ON(!list_empty(&pi_state->list));	list_add(&pi_state->list, &curr->pi_state_list);	spin_unlock_irq(&curr->pi_lock);	/*	 * We own it, so we have to replace the pending owner	 * TID. This must be atomic as we have preserve the	 * owner died bit here.	 */	ret = get_futex_value_locked(&uval, uaddr);	while (!ret) {		newval = (uval & FUTEX_OWNER_DIED) | newtid;		pagefault_disable();		curval = futex_atomic_cmpxchg_inatomic(uaddr,						       uval, newval);		pagefault_enable();		if (curval == -EFAULT)			ret = -EFAULT;		if (curval == uval)			break;		uval = curval;	}	return ret;}/* * In case we must use restart_block to restart a futex_wait, * we encode in the 'arg3' shared capability */#define ARG3_SHARED  1static long futex_wait_restart(struct restart_block *restart);static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,		      u32 val, ktime_t *abs_time){	struct task_struct *curr = current;	DECLARE_WAITQUEUE(wait, curr);	struct futex_hash_bucket *hb;	struct futex_q q;	u32 uval;	int ret;	struct hrtimer_sleeper t;	int rem = 0;	q.pi_state = NULL; retry:	if (fshared)		down_read(fshared);	ret = get_futex_key(uaddr, fshared, &q.key);	if (unlikely(ret != 0))		goto out_release_sem;	hb = queue_lock(&q, -1, NULL);	/*	 * Access the page AFTER the futex is queued.	 * Order is important:	 *	 *   Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);	 *   Userspace waker:  if (cond(var)) { var = new; futex_wake(&var); }	 *	 * The basic logical guarantee of a futex is that it blocks ONLY	 * if cond(var) is known to be true at the time of blocking, for	 * any cond.  If we queued after testing *uaddr, that would open	 * a race condition where we could block indefinitely with	 * cond(var) false, which would violate the guarantee.	 *	 * A consequence is that futex_wait() can return zero and absorb	 * a wakeup when *uaddr != val on entry to the syscall.  This is	 * rare, but normal.	 *	 * for shared futexes, we hold the mmap semaphore, so the mapping	 * cannot have changed since we looked it up in get_futex_key.	 */	ret = get_futex_value_locked(&uval, uaddr);	if (unlikely(ret)) {		queue_unlock(&q, hb);		/*		 * If we would have faulted, release mmap_sem, fault it in and		 * start all over again.		 */		if (fshared)			up_read(fshared);		ret = get_user(uval, uaddr);		if (!ret)			goto retry;		return ret;	}	ret = -EWOULDBLOCK;	if (uval != val)		goto out_unlock_release_sem;	/* Only actually queue if *uaddr contained val.  */	__queue_me(&q, hb);	/*	 * Now the futex is queued and we have checked the data, we	 * don't want to hold mmap_sem while we sleep.	 */	if (fshared)		up_read(fshared);	/*	 * There might have been scheduling since the queue_me(), as we	 * cannot hold a spinlock across the get_user() in case it	 * faults, and we cannot just set TASK_INTERRUPTIBLE state when	 * queueing ourselves into the futex hash.  This code thus has to	 * rely on the futex_wake() code removing us from hash when it	 * wakes us up.	 */	/* add_wait_queue is the barrier after __set_current_state. */	__set_current_state(TASK_INTERRUPTIBLE);	add_wait_queue(&q.waiters, &wait);	/*	 * !plist_node_empty() is safe here without any lock.	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.	 */	if (likely(!plist_node_empty(&q.list))) {		if (!abs_time)			schedule();		else {			hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);			hrtimer_init_sleeper(&t, current);			t.timer.expires = *abs_time;			hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS);			/*			 * the timer could have already expired, in which			 * case current would be flagged for rescheduling.			 * Don't bother calling schedule.			 */			if (likely(t.task))				schedule();			hrtimer_cancel(&t.timer);			/* Flag if a timeout occured */			rem = (t.task == NULL);		}	}	__set_current_state(TASK_RUNNING);	/*	 * NOTE: we don't remove ourselves from the waitqueue because	 * we are the only user of it.	 */	/* If we were woken (and unqueued), we succeeded, whatever. */	if (!unqueue_me(&q))		return 0;	if (rem)		return -ETIMEDOUT;	/*	 * We expect signal_pending(current), but another thread may	 * have handled it for us already.	 */	if (!abs_time)		return -ERESTARTSYS;	else {		struct restart_block *restart;		restart = &current_thread_info()->restart_block;		restart->fn = futex_wait_restart;		restart->arg0 = (unsigned long)uaddr;		restart->arg1 = (unsigned long)val;		restart->arg2 = (unsigned long)abs_time;		restart->arg3 = 0;		if (fshared)			restart->arg3 |= ARG3_SHARED;		return -ERESTART_RESTARTBLOCK;	} out_unlock_release_sem:	queue_unlock(&q, hb); out_release_sem:	if (fshared)		up_read(fshared);	return ret;}static long futex_wait_restart(struct restart_block *restart){	u32 __user *uaddr = (u32 __user *)restart->arg0;	u32 val = (u32)restart->arg1;	ktime_t *abs_time = (ktime_t *)restart->arg2;	struct rw_semaphore *fshared = NULL;	restart->fn = do_no_restart_syscall;	if (restart->arg3 & ARG3_SHARED)		fshared = &current->mm->mmap_sem;	return (long)futex_wait(uaddr, fshared, val, abs_time);}/* * Userspace tried a 0 -> TID atomic transition of the futex value * and failed. The kernel side here does the whole locking operation: * if there are waiters then it will block, it does PI, etc. (Due to * races the kernel might see a 0 value of the futex too.) */static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,			 int detect, ktime_t *time, int trylock){	struct hrtimer_sleeper timeout, *to = NULL;	struct task_struct *curr = current;	struct futex_hash_bucket *hb;	u32 uval, newval, curval;	struct futex_q q;	int ret, lock_taken, ownerdied = 0, attempt = 0;	if (refill_pi_state_cache())		return -ENOMEM;	if (time) {		to = &timeout;		hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);		hrtimer_init_sleeper(to, current);		to->timer.expires = *time;	}	q.pi_state = NULL; retry:	if (fshared)		down_read(fshared);	ret = get_futex_key(uaddr, fshared, &q.key);	if (unlikely(ret != 0))		goto out_release_sem; retry_unlocked:	hb = queue_lock(&q, -1, NULL); retry_locked:	ret = lock_taken = 0;	/*	 * To avoid races, we attempt to take the lock here again	 * (by doing a 0 -> TID atomic cmpxchg), while holding all	 * the locks. It will most likely not succeed.	 */	newval = current->pid;	pagefault_disable();	curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);	pagefault_enable();	if (unlikely(curval == -EFAULT))		goto uaddr_faulted;	/*	 * Detect deadlocks. In case of REQUEUE_PI this is a valid	 * situation and we return success to user space.	 */	if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {		ret = -EDEADLK;		goto out_unlock_release_sem;	}	/*	 * Surprise - we got the lock. Just return to userspace:	 */	if (unlikely(!curval))		goto out_unlock_release_sem;	uval = curval;	/*	 * Set the WAITERS flag, so the owner will know it has someone	 * to wake at next unlock	 */	newval = curval | FUTEX_WAITERS;	/*	 * There are two cases, where a futex might have no owner (the	 * owner TID is 0): OWNER_DIED. We take over the futex in this	 * case. We also do an unconditional take over, when the owner	 * of the futex died.	 *	 * This is safe as we are protected by the hash bucket lock !	 */	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {		/* Keep the OWNER_DIED bit */		newval = (curval & ~FUTEX_TID_MASK) | current->pid;		ownerdied = 0;		lock_taken = 1;	}	pagefault_disable();	curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);	pagefault_enable();	if (unlikely(curval == -EFAULT))		goto uaddr_faulted;	if (unlikely(curval != uval))		goto retry_locked;	/*	 * We took the lock due to owner died take over.	 */	if (unlikely(lock_taken))		goto out_unlock_release_sem;	/*	 * We dont have the lock. Look up the PI state (or create it if	 * we are the first waiter):	 */	ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);	if (unlikely(ret)) {		switch (ret) {		case -EAGAIN:			/*			 * Task is exiting and we just wait for the			 * exit to complete.			 */			queue_unlock(&q, hb);			if (fshared)				up_read(fshared);			cond_resched();			goto retry;		case -ESRCH:			/*			 * No owner found for this futex. Check if the			 * OWNER_DIED bit is set to figure out whether			 * this is a robust futex or not.			 */			if (get_futex_value_locked(&curval, uaddr))				goto uaddr_faulted;			/*			 * We simply start over in case of a robust			 * futex. The code above will take the futex			 * and return happy.			 */			if (curval & FUTEX_OWNER_DIED) {				ownerdied = 1;				goto retry_locked;			}		default:			goto out_unlock_release_sem;		}	}	/*	 * Only actually queue now that the atomic ops are done:	 */	__queue_me(&q, hb);	/*	 * Now the futex is queued and we have checked the data, we	 * don't want to hold mmap_sem while we sleep.	 */	if (fshared)		up_read(fshared);	WARN_ON(!q.pi_state);	/*	 * Block on the PI mutex:	 */	if (!trylock)		ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);	else {		ret = rt_mutex_trylock(&q.pi_state->pi_mutex);		/* Fixup the trylock return value: */		ret = ret ? 0 : -EWOULDBLOCK;	}	if (fshared)		down_read(fshared);	spin_lock(q.lock_ptr);	if (!ret) {		/*		 * Got the lock. We might not be the anticipated owner		 * if we did a lock-steal - fix up the PI-state in		 * that case:		 */		if (q.pi_state->owner != curr)			ret = fixup_pi_state_owner(uaddr, &q, curr);	} else {		/*		 * Catch the rare case, where the lock was released		 * when we were on the way back before we locked the		 * hash bucket.		 */		if (q.pi_state->owner == curr &&		    rt_mutex_trylock(&q.pi_state->pi_mutex)) {			ret = 0;		} else {			/*			 * Paranoia check. If we did not take the lock			 * in the trylock above, then we should not be			 * the owner of the rtmutex, neither the real			 * nor the pending one:			 */			if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)				printk(KERN_ERR "futex_lock_pi: ret = %d "				       "pi-mutex: %p pi-state %p\n", ret,				       q.pi_state->pi_mutex.owner,				       q.pi_state->owner);		}	}	/* Unqueue and drop the lock */	unqueue_me_pi(&q);	if (fshared)		up_read(fshared);	return ret != -EINTR ? ret : -ERESTARTNOINTR; out_unlock_release_sem:	queue_unlock(&q, hb); out_release_sem:	if (fshared)		up_read(fshared);	return ret; uaddr_faulted:	/*	 * We have to r/w  *(int __user *)uaddr, but we can't modify it	 * non-atomically.  Therefore, if get_user below is not	 * enough, we need to handle the fault ourselves, while	 * still holding the mmap_sem.	 *	 * ... and hb->lock. :-) --ANK	 */	queue_unlock(&q, hb);	if (attempt++) {		ret = futex_handle_fault((unsigned long)uaddr, fshared,					 attempt);		if (ret)			goto out_release_sem;		goto retry_unlocked;	}	if (fshared)		up_read(fshared);	ret = get_user(uval, uaddr);	if (!ret && (uval != -EFAULT))		goto retry;	return ret;}/* * Userspace attempted a TID -> 0 atomic transition, and failed. * This is the in-kernel slowpath: we look up the PI state (if any), * and do the rt-mutex unlock. */static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared){	struct futex_hash_bucket *hb;	struct futex_q *this, *next;	u32 uval;	struct plist_head *head;	union futex_key key;	int ret, attempt = 0;retry:	if (get_user(uval, uaddr))
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -