📄 futex.c
字号:
drop_futex_key_refs(&q->key); return ret;}/* * PI futexes can not be requeued and must remove themself from the * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry * and dropped here. */static void unqueue_me_pi(struct futex_q *q){ WARN_ON(plist_node_empty(&q->list)); plist_del(&q->list, &q->list.plist); BUG_ON(!q->pi_state); free_pi_state(q->pi_state); q->pi_state = NULL; spin_unlock(q->lock_ptr); drop_futex_key_refs(&q->key);}/* * Fixup the pi_state owner with current. * * Must be called with hash bucket lock held and mm->sem held for non * private futexes. */static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, struct task_struct *curr){ u32 newtid = curr->pid | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; u32 uval, curval, newval; int ret; /* Owner died? */ if (pi_state->owner != NULL) { spin_lock_irq(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); spin_unlock_irq(&pi_state->owner->pi_lock); } else newtid |= FUTEX_OWNER_DIED; pi_state->owner = curr; spin_lock_irq(&curr->pi_lock); WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &curr->pi_state_list); spin_unlock_irq(&curr->pi_lock); /* * We own it, so we have to replace the pending owner * TID. This must be atomic as we have preserve the * owner died bit here. */ ret = get_futex_value_locked(&uval, uaddr); while (!ret) { newval = (uval & FUTEX_OWNER_DIED) | newtid; pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); pagefault_enable(); if (curval == -EFAULT) ret = -EFAULT; if (curval == uval) break; uval = curval; } return ret;}/* * In case we must use restart_block to restart a futex_wait, * we encode in the 'arg3' shared capability */#define ARG3_SHARED 1static long futex_wait_restart(struct restart_block *restart);static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, u32 val, ktime_t *abs_time){ struct task_struct *curr = current; DECLARE_WAITQUEUE(wait, curr); struct futex_hash_bucket *hb; struct futex_q q; u32 uval; int ret; struct hrtimer_sleeper t; int rem = 0; q.pi_state = NULL; retry: if (fshared) down_read(fshared); ret = get_futex_key(uaddr, fshared, &q.key); if (unlikely(ret != 0)) goto out_release_sem; hb = queue_lock(&q, -1, NULL); /* * Access the page AFTER the futex is queued. * Order is important: * * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val); * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); } * * The basic logical guarantee of a futex is that it blocks ONLY * if cond(var) is known to be true at the time of blocking, for * any cond. If we queued after testing *uaddr, that would open * a race condition where we could block indefinitely with * cond(var) false, which would violate the guarantee. * * A consequence is that futex_wait() can return zero and absorb * a wakeup when *uaddr != val on entry to the syscall. This is * rare, but normal. * * for shared futexes, we hold the mmap semaphore, so the mapping * cannot have changed since we looked it up in get_futex_key. */ ret = get_futex_value_locked(&uval, uaddr); if (unlikely(ret)) { queue_unlock(&q, hb); /* * If we would have faulted, release mmap_sem, fault it in and * start all over again. */ if (fshared) up_read(fshared); ret = get_user(uval, uaddr); if (!ret) goto retry; return ret; } ret = -EWOULDBLOCK; if (uval != val) goto out_unlock_release_sem; /* Only actually queue if *uaddr contained val. */ __queue_me(&q, hb); /* * Now the futex is queued and we have checked the data, we * don't want to hold mmap_sem while we sleep. */ if (fshared) up_read(fshared); /* * There might have been scheduling since the queue_me(), as we * cannot hold a spinlock across the get_user() in case it * faults, and we cannot just set TASK_INTERRUPTIBLE state when * queueing ourselves into the futex hash. This code thus has to * rely on the futex_wake() code removing us from hash when it * wakes us up. */ /* add_wait_queue is the barrier after __set_current_state. */ __set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&q.waiters, &wait); /* * !plist_node_empty() is safe here without any lock. * q.lock_ptr != 0 is not safe, because of ordering against wakeup. */ if (likely(!plist_node_empty(&q.list))) { if (!abs_time) schedule(); else { hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init_sleeper(&t, current); t.timer.expires = *abs_time; hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS); /* * the timer could have already expired, in which * case current would be flagged for rescheduling. * Don't bother calling schedule. */ if (likely(t.task)) schedule(); hrtimer_cancel(&t.timer); /* Flag if a timeout occured */ rem = (t.task == NULL); } } __set_current_state(TASK_RUNNING); /* * NOTE: we don't remove ourselves from the waitqueue because * we are the only user of it. */ /* If we were woken (and unqueued), we succeeded, whatever. */ if (!unqueue_me(&q)) return 0; if (rem) return -ETIMEDOUT; /* * We expect signal_pending(current), but another thread may * have handled it for us already. */ if (!abs_time) return -ERESTARTSYS; else { struct restart_block *restart; restart = ¤t_thread_info()->restart_block; restart->fn = futex_wait_restart; restart->arg0 = (unsigned long)uaddr; restart->arg1 = (unsigned long)val; restart->arg2 = (unsigned long)abs_time; restart->arg3 = 0; if (fshared) restart->arg3 |= ARG3_SHARED; return -ERESTART_RESTARTBLOCK; } out_unlock_release_sem: queue_unlock(&q, hb); out_release_sem: if (fshared) up_read(fshared); return ret;}static long futex_wait_restart(struct restart_block *restart){ u32 __user *uaddr = (u32 __user *)restart->arg0; u32 val = (u32)restart->arg1; ktime_t *abs_time = (ktime_t *)restart->arg2; struct rw_semaphore *fshared = NULL; restart->fn = do_no_restart_syscall; if (restart->arg3 & ARG3_SHARED) fshared = ¤t->mm->mmap_sem; return (long)futex_wait(uaddr, fshared, val, abs_time);}/* * Userspace tried a 0 -> TID atomic transition of the futex value * and failed. The kernel side here does the whole locking operation: * if there are waiters then it will block, it does PI, etc. (Due to * races the kernel might see a 0 value of the futex too.) */static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, int detect, ktime_t *time, int trylock){ struct hrtimer_sleeper timeout, *to = NULL; struct task_struct *curr = current; struct futex_hash_bucket *hb; u32 uval, newval, curval; struct futex_q q; int ret, lock_taken, ownerdied = 0, attempt = 0; if (refill_pi_state_cache()) return -ENOMEM; if (time) { to = &timeout; hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); to->timer.expires = *time; } q.pi_state = NULL; retry: if (fshared) down_read(fshared); ret = get_futex_key(uaddr, fshared, &q.key); if (unlikely(ret != 0)) goto out_release_sem; retry_unlocked: hb = queue_lock(&q, -1, NULL); retry_locked: ret = lock_taken = 0; /* * To avoid races, we attempt to take the lock here again * (by doing a 0 -> TID atomic cmpxchg), while holding all * the locks. It will most likely not succeed. */ newval = current->pid; pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval); pagefault_enable(); if (unlikely(curval == -EFAULT)) goto uaddr_faulted; /* * Detect deadlocks. In case of REQUEUE_PI this is a valid * situation and we return success to user space. */ if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) { ret = -EDEADLK; goto out_unlock_release_sem; } /* * Surprise - we got the lock. Just return to userspace: */ if (unlikely(!curval)) goto out_unlock_release_sem; uval = curval; /* * Set the WAITERS flag, so the owner will know it has someone * to wake at next unlock */ newval = curval | FUTEX_WAITERS; /* * There are two cases, where a futex might have no owner (the * owner TID is 0): OWNER_DIED. We take over the futex in this * case. We also do an unconditional take over, when the owner * of the futex died. * * This is safe as we are protected by the hash bucket lock ! */ if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { /* Keep the OWNER_DIED bit */ newval = (curval & ~FUTEX_TID_MASK) | current->pid; ownerdied = 0; lock_taken = 1; } pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); pagefault_enable(); if (unlikely(curval == -EFAULT)) goto uaddr_faulted; if (unlikely(curval != uval)) goto retry_locked; /* * We took the lock due to owner died take over. */ if (unlikely(lock_taken)) goto out_unlock_release_sem; /* * We dont have the lock. Look up the PI state (or create it if * we are the first waiter): */ ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state); if (unlikely(ret)) { switch (ret) { case -EAGAIN: /* * Task is exiting and we just wait for the * exit to complete. */ queue_unlock(&q, hb); if (fshared) up_read(fshared); cond_resched(); goto retry; case -ESRCH: /* * No owner found for this futex. Check if the * OWNER_DIED bit is set to figure out whether * this is a robust futex or not. */ if (get_futex_value_locked(&curval, uaddr)) goto uaddr_faulted; /* * We simply start over in case of a robust * futex. The code above will take the futex * and return happy. */ if (curval & FUTEX_OWNER_DIED) { ownerdied = 1; goto retry_locked; } default: goto out_unlock_release_sem; } } /* * Only actually queue now that the atomic ops are done: */ __queue_me(&q, hb); /* * Now the futex is queued and we have checked the data, we * don't want to hold mmap_sem while we sleep. */ if (fshared) up_read(fshared); WARN_ON(!q.pi_state); /* * Block on the PI mutex: */ if (!trylock) ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1); else { ret = rt_mutex_trylock(&q.pi_state->pi_mutex); /* Fixup the trylock return value: */ ret = ret ? 0 : -EWOULDBLOCK; } if (fshared) down_read(fshared); spin_lock(q.lock_ptr); if (!ret) { /* * Got the lock. We might not be the anticipated owner * if we did a lock-steal - fix up the PI-state in * that case: */ if (q.pi_state->owner != curr) ret = fixup_pi_state_owner(uaddr, &q, curr); } else { /* * Catch the rare case, where the lock was released * when we were on the way back before we locked the * hash bucket. */ if (q.pi_state->owner == curr && rt_mutex_trylock(&q.pi_state->pi_mutex)) { ret = 0; } else { /* * Paranoia check. If we did not take the lock * in the trylock above, then we should not be * the owner of the rtmutex, neither the real * nor the pending one: */ if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr) printk(KERN_ERR "futex_lock_pi: ret = %d " "pi-mutex: %p pi-state %p\n", ret, q.pi_state->pi_mutex.owner, q.pi_state->owner); } } /* Unqueue and drop the lock */ unqueue_me_pi(&q); if (fshared) up_read(fshared); return ret != -EINTR ? ret : -ERESTARTNOINTR; out_unlock_release_sem: queue_unlock(&q, hb); out_release_sem: if (fshared) up_read(fshared); return ret; uaddr_faulted: /* * We have to r/w *(int __user *)uaddr, but we can't modify it * non-atomically. Therefore, if get_user below is not * enough, we need to handle the fault ourselves, while * still holding the mmap_sem. * * ... and hb->lock. :-) --ANK */ queue_unlock(&q, hb); if (attempt++) { ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt); if (ret) goto out_release_sem; goto retry_unlocked; } if (fshared) up_read(fshared); ret = get_user(uval, uaddr); if (!ret && (uval != -EFAULT)) goto retry; return ret;}/* * Userspace attempted a TID -> 0 atomic transition, and failed. * This is the in-kernel slowpath: we look up the PI state (if any), * and do the rt-mutex unlock. */static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared){ struct futex_hash_bucket *hb; struct futex_q *this, *next; u32 uval; struct plist_head *head; union futex_key key; int ret, attempt = 0;retry: if (get_user(uval, uaddr))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -