📄 futex.c
字号:
*/ spin_lock_irq(&p->pi_lock); if (unlikely(p->flags & PF_EXITING)) { /* * The task is on the way out. When PF_EXITPIDONE is * set, we know that the task has finished the * cleanup: */ int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; spin_unlock_irq(&p->pi_lock); put_task_struct(p); return ret; } pi_state = alloc_pi_state(); /* * Initialize the pi_mutex in locked state and make 'p' * the owner of it: */ rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); /* Store the key for possible exit cleanups: */ pi_state->key = *key; WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &p->pi_state_list); pi_state->owner = p; spin_unlock_irq(&p->pi_lock); put_task_struct(p); *ps = pi_state; return 0;}/* * The hash bucket lock must be held when this is called. * Afterwards, the futex_q must not be accessed. */static void wake_futex(struct futex_q *q){ plist_del(&q->list, &q->list.plist); if (q->filp) send_sigio(&q->filp->f_owner, q->fd, POLL_IN); /* * The lock in wake_up_all() is a crucial memory barrier after the * plist_del() and also before assigning to q->lock_ptr. */ wake_up_all(&q->waiters); /* * The waiting task can free the futex_q as soon as this is written, * without taking any locks. This must come last. * * A memory barrier is required here to prevent the following store * to lock_ptr from getting ahead of the wakeup. Clearing the lock * at the end of wake_up_all() does not prevent this store from * moving. */ smp_wmb(); q->lock_ptr = NULL;}static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this){ struct task_struct *new_owner; struct futex_pi_state *pi_state = this->pi_state; u32 curval, newval; if (!pi_state) return -EINVAL; spin_lock(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); /* * This happens when we have stolen the lock and the original * pending owner did not enqueue itself back on the rt_mutex. * Thats not a tragedy. We know that way, that a lock waiter * is on the fly. We make the futex_q waiter the pending owner. */ if (!new_owner) new_owner = this->task; /* * We pass it to the next owner. (The WAITERS bit is always * kept enabled while there is PI state around. We must also * preserve the owner died bit.) */ if (!(uval & FUTEX_OWNER_DIED)) { int ret = 0; newval = FUTEX_WAITERS | new_owner->pid; pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); pagefault_enable(); if (curval == -EFAULT) ret = -EFAULT; if (curval != uval) ret = -EINVAL; if (ret) { spin_unlock(&pi_state->pi_mutex.wait_lock); return ret; } } spin_lock_irq(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); spin_unlock_irq(&pi_state->owner->pi_lock); spin_lock_irq(&new_owner->pi_lock); WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &new_owner->pi_state_list); pi_state->owner = new_owner; spin_unlock_irq(&new_owner->pi_lock); spin_unlock(&pi_state->pi_mutex.wait_lock); rt_mutex_unlock(&pi_state->pi_mutex); return 0;}static int unlock_futex_pi(u32 __user *uaddr, u32 uval){ u32 oldval; /* * There is no waiter, so we unlock the futex. The owner died * bit has not to be preserved here. We are the owner: */ pagefault_disable(); oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0); pagefault_enable(); if (oldval == -EFAULT) return oldval; if (oldval != uval) return -EAGAIN; return 0;}/* * Express the locking dependencies for lockdep: */static inline voiddouble_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2){ if (hb1 <= hb2) { spin_lock(&hb1->lock); if (hb1 < hb2) spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); } else { /* hb1 > hb2 */ spin_lock(&hb2->lock); spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING); }}/* * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: */static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, int nr_wake){ struct futex_hash_bucket *hb; struct futex_q *this, *next; struct plist_head *head; union futex_key key; int ret; if (fshared) down_read(fshared); ret = get_futex_key(uaddr, fshared, &key); if (unlikely(ret != 0)) goto out; hb = hash_futex(&key); spin_lock(&hb->lock); head = &hb->chain; plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key)) { if (this->pi_state) { ret = -EINVAL; break; } wake_futex(this); if (++ret >= nr_wake) break; } } spin_unlock(&hb->lock);out: if (fshared) up_read(fshared); return ret;}/* * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: */static intfutex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, u32 __user *uaddr2, int nr_wake, int nr_wake2, int op){ union futex_key key1, key2; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head; struct futex_q *this, *next; int ret, op_ret, attempt = 0;retryfull: if (fshared) down_read(fshared); ret = get_futex_key(uaddr1, fshared, &key1); if (unlikely(ret != 0)) goto out; ret = get_futex_key(uaddr2, fshared, &key2); if (unlikely(ret != 0)) goto out; hb1 = hash_futex(&key1); hb2 = hash_futex(&key2);retry: double_lock_hb(hb1, hb2); op_ret = futex_atomic_op_inuser(op, uaddr2); if (unlikely(op_ret < 0)) { u32 dummy; spin_unlock(&hb1->lock); if (hb1 != hb2) spin_unlock(&hb2->lock);#ifndef CONFIG_MMU /* * we don't get EFAULT from MMU faults if we don't have an MMU, * but we might get them from range checking */ ret = op_ret; goto out;#endif if (unlikely(op_ret != -EFAULT)) { ret = op_ret; goto out; } /* * futex_atomic_op_inuser needs to both read and write * *(int __user *)uaddr2, but we can't modify it * non-atomically. Therefore, if get_user below is not * enough, we need to handle the fault ourselves, while * still holding the mmap_sem. */ if (attempt++) { ret = futex_handle_fault((unsigned long)uaddr2, fshared, attempt); if (ret) goto out; goto retry; } /* * If we would have faulted, release mmap_sem, * fault it in and start all over again. */ if (fshared) up_read(fshared); ret = get_user(dummy, uaddr2); if (ret) return ret; goto retryfull; } head = &hb1->chain; plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key1)) { wake_futex(this); if (++ret >= nr_wake) break; } } if (op_ret > 0) { head = &hb2->chain; op_ret = 0; plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key2)) { wake_futex(this); if (++op_ret >= nr_wake2) break; } } ret += op_ret; } spin_unlock(&hb1->lock); if (hb1 != hb2) spin_unlock(&hb2->lock);out: if (fshared) up_read(fshared); return ret;}/* * Requeue all waiters hashed on one physical page to another * physical page. */static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, u32 __user *uaddr2, int nr_wake, int nr_requeue, u32 *cmpval){ union futex_key key1, key2; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; int ret, drop_count = 0; retry: if (fshared) down_read(fshared); ret = get_futex_key(uaddr1, fshared, &key1); if (unlikely(ret != 0)) goto out; ret = get_futex_key(uaddr2, fshared, &key2); if (unlikely(ret != 0)) goto out; hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); double_lock_hb(hb1, hb2); if (likely(cmpval != NULL)) { u32 curval; ret = get_futex_value_locked(&curval, uaddr1); if (unlikely(ret)) { spin_unlock(&hb1->lock); if (hb1 != hb2) spin_unlock(&hb2->lock); /* * If we would have faulted, release mmap_sem, fault * it in and start all over again. */ if (fshared) up_read(fshared); ret = get_user(curval, uaddr1); if (!ret) goto retry; return ret; } if (curval != *cmpval) { ret = -EAGAIN; goto out_unlock; } } head1 = &hb1->chain; plist_for_each_entry_safe(this, next, head1, list) { if (!match_futex (&this->key, &key1)) continue; if (++ret <= nr_wake) { wake_futex(this); } else { /* * If key1 and key2 hash to the same bucket, no need to * requeue. */ if (likely(head1 != &hb2->chain)) { plist_del(&this->list, &hb1->chain); plist_add(&this->list, &hb2->chain); this->lock_ptr = &hb2->lock;#ifdef CONFIG_DEBUG_PI_LIST this->list.plist.lock = &hb2->lock;#endif } this->key = key2; get_futex_key_refs(&key2); drop_count++; if (ret - nr_wake >= nr_requeue) break; } }out_unlock: spin_unlock(&hb1->lock); if (hb1 != hb2) spin_unlock(&hb2->lock); /* drop_futex_key_refs() must be called outside the spinlocks. */ while (--drop_count >= 0) drop_futex_key_refs(&key1);out: if (fshared) up_read(fshared); return ret;}/* The key must be already stored in q->key. */static inline struct futex_hash_bucket *queue_lock(struct futex_q *q, int fd, struct file *filp){ struct futex_hash_bucket *hb; q->fd = fd; q->filp = filp; init_waitqueue_head(&q->waiters); get_futex_key_refs(&q->key); hb = hash_futex(&q->key); q->lock_ptr = &hb->lock; spin_lock(&hb->lock); return hb;}static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb){ int prio; /* * The priority used to register this element is * - either the real thread-priority for the real-time threads * (i.e. threads with a priority lower than MAX_RT_PRIO) * - or MAX_RT_PRIO for non-RT threads. * Thus, all RT-threads are woken first in priority order, and * the others are woken last, in FIFO order. */ prio = min(current->normal_prio, MAX_RT_PRIO); plist_node_init(&q->list, prio);#ifdef CONFIG_DEBUG_PI_LIST q->list.plist.lock = &hb->lock;#endif plist_add(&q->list, &hb->chain); q->task = current; spin_unlock(&hb->lock);}static inline voidqueue_unlock(struct futex_q *q, struct futex_hash_bucket *hb){ spin_unlock(&hb->lock); drop_futex_key_refs(&q->key);}/* * queue_me and unqueue_me must be called as a pair, each * exactly once. They are called with the hashed spinlock held. *//* The key must be already stored in q->key. */static void queue_me(struct futex_q *q, int fd, struct file *filp){ struct futex_hash_bucket *hb; hb = queue_lock(q, fd, filp); __queue_me(q, hb);}/* Return 1 if we were still queued (ie. 0 means we were woken) */static int unqueue_me(struct futex_q *q){ spinlock_t *lock_ptr; int ret = 0; /* In the common case we don't take the spinlock, which is nice. */ retry: lock_ptr = q->lock_ptr; barrier(); if (lock_ptr != 0) { spin_lock(lock_ptr); /* * q->lock_ptr can change between reading it and * spin_lock(), causing us to take the wrong lock. This * corrects the race condition. * * Reasoning goes like this: if we have the wrong lock, * q->lock_ptr must have changed (maybe several times) * between reading it and the spin_lock(). It can * change again after the spin_lock() but only if it was * already changed before the spin_lock(). It cannot, * however, change back to the original value. Therefore * we can detect whether we acquired the correct lock. */ if (unlikely(lock_ptr != q->lock_ptr)) { spin_unlock(lock_ptr); goto retry; } WARN_ON(plist_node_empty(&q->list)); plist_del(&q->list, &q->list.plist); BUG_ON(q->pi_state); spin_unlock(lock_ptr); ret = 1; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -