📄 futex.c
字号:
return -EFAULT; /* * We release only a lock we actually own: */ if ((uval & FUTEX_TID_MASK) != current->pid) return -EPERM; /* * First take all the futex related locks: */ if (fshared) down_read(fshared); ret = get_futex_key(uaddr, fshared, &key); if (unlikely(ret != 0)) goto out; hb = hash_futex(&key);retry_unlocked: spin_lock(&hb->lock); /* * To avoid races, try to do the TID -> 0 atomic transition * again. If it succeeds then we can return without waking * anyone else up: */ if (!(uval & FUTEX_OWNER_DIED)) { pagefault_disable(); uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); pagefault_enable(); } if (unlikely(uval == -EFAULT)) goto pi_faulted; /* * Rare case: we managed to release the lock atomically, * no need to wake anyone else up: */ if (unlikely(uval == current->pid)) goto out_unlock; /* * Ok, other tasks may need to be woken up - check waiters * and do the wakeup if necessary: */ head = &hb->chain; plist_for_each_entry_safe(this, next, head, list) { if (!match_futex (&this->key, &key)) continue; ret = wake_futex_pi(uaddr, uval, this); /* * The atomic access to the futex value * generated a pagefault, so retry the * user-access and the wakeup: */ if (ret == -EFAULT) goto pi_faulted; goto out_unlock; } /* * No waiters - kernel unlocks the futex: */ if (!(uval & FUTEX_OWNER_DIED)) { ret = unlock_futex_pi(uaddr, uval); if (ret == -EFAULT) goto pi_faulted; }out_unlock: spin_unlock(&hb->lock);out: if (fshared) up_read(fshared); return ret;pi_faulted: /* * We have to r/w *(int __user *)uaddr, but we can't modify it * non-atomically. Therefore, if get_user below is not * enough, we need to handle the fault ourselves, while * still holding the mmap_sem. * * ... and hb->lock. --ANK */ spin_unlock(&hb->lock); if (attempt++) { ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt); if (ret) goto out; goto retry_unlocked; } if (fshared) up_read(fshared); ret = get_user(uval, uaddr); if (!ret && (uval != -EFAULT)) goto retry; return ret;}static int futex_close(struct inode *inode, struct file *filp){ struct futex_q *q = filp->private_data; unqueue_me(q); kfree(q); return 0;}/* This is one-shot: once it's gone off you need a new fd */static unsigned int futex_poll(struct file *filp, struct poll_table_struct *wait){ struct futex_q *q = filp->private_data; int ret = 0; poll_wait(filp, &q->waiters, wait); /* * plist_node_empty() is safe here without any lock. * q->lock_ptr != 0 is not safe, because of ordering against wakeup. */ if (plist_node_empty(&q->list)) ret = POLLIN | POLLRDNORM; return ret;}static const struct file_operations futex_fops = { .release = futex_close, .poll = futex_poll,};/* * Signal allows caller to avoid the race which would occur if they * set the sigio stuff up afterwards. */static int futex_fd(u32 __user *uaddr, int signal){ struct futex_q *q; struct file *filp; int ret, err; struct rw_semaphore *fshared; static unsigned long printk_interval; if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { printk(KERN_WARNING "Process `%s' used FUTEX_FD, which " "will be removed from the kernel in June 2007\n", current->comm); } ret = -EINVAL; if (!valid_signal(signal)) goto out; ret = get_unused_fd(); if (ret < 0) goto out; filp = get_empty_filp(); if (!filp) { put_unused_fd(ret); ret = -ENFILE; goto out; } filp->f_op = &futex_fops; filp->f_path.mnt = mntget(futex_mnt); filp->f_path.dentry = dget(futex_mnt->mnt_root); filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; if (signal) { err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1); if (err < 0) { goto error; } filp->f_owner.signum = signal; } q = kmalloc(sizeof(*q), GFP_KERNEL); if (!q) { err = -ENOMEM; goto error; } q->pi_state = NULL; fshared = ¤t->mm->mmap_sem; down_read(fshared); err = get_futex_key(uaddr, fshared, &q->key); if (unlikely(err != 0)) { up_read(fshared); kfree(q); goto error; } /* * queue_me() must be called before releasing mmap_sem, because * key->shared.inode needs to be referenced while holding it. */ filp->private_data = q; queue_me(q, ret, filp); up_read(fshared); /* Now we map fd to filp, so userspace can access it */ fd_install(ret, filp);out: return ret;error: put_unused_fd(ret); put_filp(filp); ret = err; goto out;}/* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. * * Implementation: user-space maintains a per-thread list of locks it * is holding. Upon do_exit(), the kernel carefully walks this list, * and marks all locks that are owned by this thread with the * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is * always manipulated with the lock held, so the list is private and * per-thread. Userspace also maintains a per-thread 'list_op_pending' * field, to allow the kernel to clean up if the thread dies after * acquiring the lock, but just before it could have added itself to * the list. There can only be one such pending lock. *//** * sys_set_robust_list - set the robust-futex list head of a task * @head: pointer to the list-head * @len: length of the list-head, as userspace expects */asmlinkage longsys_set_robust_list(struct robust_list_head __user *head, size_t len){ /* * The kernel knows only one size for now: */ if (unlikely(len != sizeof(*head))) return -EINVAL; current->robust_list = head; return 0;}/** * sys_get_robust_list - get the robust-futex list head of a task * @pid: pid of the process [zero for current task] * @head_ptr: pointer to a list-head pointer, the kernel fills it in * @len_ptr: pointer to a length field, the kernel fills in the header size */asmlinkage longsys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, size_t __user *len_ptr){ struct robust_list_head __user *head; unsigned long ret; if (!pid) head = current->robust_list; else { struct task_struct *p; ret = -ESRCH; rcu_read_lock(); p = find_task_by_pid(pid); if (!p) goto err_unlock; ret = -EPERM; if ((current->euid != p->euid) && (current->euid != p->uid) && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->robust_list; rcu_read_unlock(); } if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(head, head_ptr);err_unlock: rcu_read_unlock(); return ret;}/* * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi){ u32 uval, nval, mval;retry: if (get_user(uval, uaddr)) return -1; if ((uval & FUTEX_TID_MASK) == curr->pid) { /* * Ok, this dying thread is truly holding a futex * of interest. Set the OWNER_DIED bit atomically * via cmpxchg, and if the value had FUTEX_WAITERS * set, wake up a waiter (if any). (We have to do a * futex_wake() even if OWNER_DIED is already set - * to handle the rare but possible case of recursive * thread-death.) The rest of the cleanup is done in * userspace. */ mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); if (nval == -EFAULT) return -1; if (nval != uval) goto retry; /* * Wake robust non-PI futexes here. The wakeup of * PI futexes happens in exit_pi_state(): */ if (!pi) { if (uval & FUTEX_WAITERS) futex_wake(uaddr, &curr->mm->mmap_sem, 1); } } return 0;}/* * Fetch a robust-list pointer. Bit 0 signals PI futexes: */static inline int fetch_robust_entry(struct robust_list __user **entry, struct robust_list __user * __user *head, int *pi){ unsigned long uentry; if (get_user(uentry, (unsigned long __user *)head)) return -EFAULT; *entry = (void __user *)(uentry & ~1UL); *pi = uentry & 1; return 0;}/* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. * * We silently return on any sign of list-walking problem. */void exit_robust_list(struct task_struct *curr){ struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned long futex_offset; /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ if (fetch_robust_entry(&entry, &head->list.next, &pi)) return; /* * Fetch the relative futex offset: */ if (get_user(futex_offset, &head->futex_offset)) return; /* * Fetch any possibly pending lock-add first, and handle it * if it exists: */ if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) return; if (pending) handle_futex_death((void __user *)pending + futex_offset, curr, pip); while (entry != &head->list) { /* * A pending lock might already be on the list, so * don't process it twice: */ if (entry != pending) if (handle_futex_death((void __user *)entry + futex_offset, curr, pi)) return; /* * Fetch the next entry in the list: */ if (fetch_robust_entry(&entry, &entry->next, &pi)) return; /* * Avoid excessively long or circular lists: */ if (!--limit) break; cond_resched(); }}long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3){ int ret; int cmd = op & FUTEX_CMD_MASK; struct rw_semaphore *fshared = NULL; if (!(op & FUTEX_PRIVATE_FLAG)) fshared = ¤t->mm->mmap_sem; switch (cmd) { case FUTEX_WAIT: ret = futex_wait(uaddr, fshared, val, timeout); break; case FUTEX_WAKE: ret = futex_wake(uaddr, fshared, val); break; case FUTEX_FD: /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */ ret = futex_fd(uaddr, val); break; case FUTEX_REQUEUE: ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); break; case FUTEX_CMP_REQUEUE: ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3); break; case FUTEX_WAKE_OP: ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); break; case FUTEX_LOCK_PI: ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); break; case FUTEX_UNLOCK_PI: ret = futex_unlock_pi(uaddr, fshared); break; case FUTEX_TRYLOCK_PI: ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); break; default: ret = -ENOSYS; } return ret;}asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, struct timespec __user *utime, u32 __user *uaddr2, u32 val3){ struct timespec ts; ktime_t t, *tp = NULL; u32 val2 = 0; int cmd = op & FUTEX_CMD_MASK; if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) { if (copy_from_user(&ts, utime, sizeof(ts)) != 0) return -EFAULT; if (!timespec_valid(&ts)) return -EINVAL; t = timespec_to_ktime(ts); if (cmd == FUTEX_WAIT) t = ktime_add(ktime_get(), t); tp = &t; } /* * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. */ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) val2 = (u32) (unsigned long) utime; return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);}static int futexfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt){ return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt);}static struct file_system_type futex_fs_type = { .name = "futexfs", .get_sb = futexfs_get_sb, .kill_sb = kill_anon_super,};static int __init init(void){ int i = register_filesystem(&futex_fs_type); if (i) return i; futex_mnt = kern_mount(&futex_fs_type); if (IS_ERR(futex_mnt)) { unregister_filesystem(&futex_fs_type); return PTR_ERR(futex_mnt); } for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock); spin_lock_init(&futex_queues[i].lock); } return 0;}__initcall(init);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -