⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 futex.c

📁 linux 2.6.19 kernel source code before patching
💻 C
📖 第 1 页 / 共 4 页
字号:
/* *  Fast Userspace Mutexes (which I call "Futexes!"). *  (C) Rusty Russell, IBM 2002 * *  Generalized futexes, futex requeueing, misc fixes by Ingo Molnar *  (C) Copyright 2003 Red Hat Inc, All Rights Reserved * *  Removed page pinning, fix privately mapped COW pages and other cleanups *  (C) Copyright 2003, 2004 Jamie Lokier * *  Robust futex support started by Ingo Molnar *  (C) Copyright 2006 Red Hat Inc, All Rights Reserved *  Thanks to Thomas Gleixner for suggestions, analysis and fixes. * *  PI-futex support started by Ingo Molnar and Thomas Gleixner *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> *  Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> * *  PRIVATE futexes by Eric Dumazet *  Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> * *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly *  enough at me, Linus for the original (flawed) idea, Matthew *  Kirkwood for proof-of-concept implementation. * *  "The futexes are also cursed." *  "But they come in a choice of three flavours!" * *  This program is free software; you can redistribute it and/or modify *  it under the terms of the GNU General Public License as published by *  the Free Software Foundation; either version 2 of the License, or *  (at your option) any later version. * *  This program is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *  GNU General Public License for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */#include <linux/slab.h>#include <linux/poll.h>#include <linux/fs.h>#include <linux/file.h>#include <linux/jhash.h>#include <linux/init.h>#include <linux/futex.h>#include <linux/mount.h>#include <linux/pagemap.h>#include <linux/syscalls.h>#include <linux/signal.h>#include <linux/module.h>#include <asm/futex.h>#include "rtmutex_common.h"#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)/* * Priority Inheritance state: */struct futex_pi_state {	/*	 * list of 'owned' pi_state instances - these have to be	 * cleaned up in do_exit() if the task exits prematurely:	 */	struct list_head list;	/*	 * The PI object:	 */	struct rt_mutex pi_mutex;	struct task_struct *owner;	atomic_t refcount;	union futex_key key;};/* * We use this hashed waitqueue instead of a normal wait_queue_t, so * we can wake only the relevant ones (hashed queues may be shared). * * A futex_q has a woken state, just like tasks have TASK_RUNNING. * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. * The order of wakup is always to make the first condition true, then * wake up q->waiters, then make the second condition true. */struct futex_q {	struct plist_node list;	wait_queue_head_t waiters;	/* Which hash list lock to use: */	spinlock_t *lock_ptr;	/* Key which the futex is hashed on: */	union futex_key key;	/* For fd, sigio sent using these: */	int fd;	struct file *filp;	/* Optional priority inheritance state: */	struct futex_pi_state *pi_state;	struct task_struct *task;};/* * Split the global futex_lock into every hash list lock. */struct futex_hash_bucket {	spinlock_t lock;	struct plist_head chain;};static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];/* Futex-fs vfsmount entry: */static struct vfsmount *futex_mnt;/* * We hash on the keys returned from get_futex_key (see below). */static struct futex_hash_bucket *hash_futex(union futex_key *key){	u32 hash = jhash2((u32*)&key->both.word,			  (sizeof(key->both.word)+sizeof(key->both.ptr))/4,			  key->both.offset);	return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];}/* * Return 1 if two futex_keys are equal, 0 otherwise. */static inline int match_futex(union futex_key *key1, union futex_key *key2){	return (key1->both.word == key2->both.word		&& key1->both.ptr == key2->both.ptr		&& key1->both.offset == key2->both.offset);}/** * get_futex_key - Get parameters which are the keys for a futex. * @uaddr: virtual address of the futex * @shared: NULL for a PROCESS_PRIVATE futex, *	&current->mm->mmap_sem for a PROCESS_SHARED futex * @key: address where result is stored. * * Returns a negative error code or 0 * The key words are stored in *key on success. * * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode, * offset_within_page).  For private mappings, it's (uaddr, current->mm). * We can usually work out the index without swapping in the page. * * fshared is NULL for PROCESS_PRIVATE futexes * For other futexes, it points to &current->mm->mmap_sem and * caller must have taken the reader lock. but NOT any spinlocks. */int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,		  union futex_key *key){	unsigned long address = (unsigned long)uaddr;	struct mm_struct *mm = current->mm;	struct vm_area_struct *vma;	struct page *page;	int err;	/*	 * The futex address must be "naturally" aligned.	 */	key->both.offset = address % PAGE_SIZE;	if (unlikely((address % sizeof(u32)) != 0))		return -EINVAL;	address -= key->both.offset;	/*	 * PROCESS_PRIVATE futexes are fast.	 * As the mm cannot disappear under us and the 'key' only needs	 * virtual address, we dont even have to find the underlying vma.	 * Note : We do have to check 'uaddr' is a valid user address,	 *        but access_ok() should be faster than find_vma()	 */	if (!fshared) {		if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))			return -EFAULT;		key->private.mm = mm;		key->private.address = address;		return 0;	}	/*	 * The futex is hashed differently depending on whether	 * it's in a shared or private mapping.  So check vma first.	 */	vma = find_extend_vma(mm, address);	if (unlikely(!vma))		return -EFAULT;	/*	 * Permissions.	 */	if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))		return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;	/*	 * Private mappings are handled in a simple way.	 *	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if	 * it's a read-only handle, it's expected that futexes attach to	 * the object not the particular process.  Therefore we use	 * VM_MAYSHARE here, not VM_SHARED which is restricted to shared	 * mappings of _writable_ handles.	 */	if (likely(!(vma->vm_flags & VM_MAYSHARE))) {		key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */		key->private.mm = mm;		key->private.address = address;		return 0;	}	/*	 * Linear file mappings are also simple.	 */	key->shared.inode = vma->vm_file->f_path.dentry->d_inode;	key->both.offset |= FUT_OFF_INODE; /* inode-based key. */	if (likely(!(vma->vm_flags & VM_NONLINEAR))) {		key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)				     + vma->vm_pgoff);		return 0;	}	/*	 * We could walk the page table to read the non-linear	 * pte, and get the page index without fetching the page	 * from swap.  But that's a lot of code to duplicate here	 * for a rare case, so we simply fetch the page.	 */	err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);	if (err >= 0) {		key->shared.pgoff =			page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);		put_page(page);		return 0;	}	return err;}EXPORT_SYMBOL_GPL(get_futex_key);/* * Take a reference to the resource addressed by a key. * Can be called while holding spinlocks. * */inline void get_futex_key_refs(union futex_key *key){	if (key->both.ptr == 0)		return;	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {		case FUT_OFF_INODE:			atomic_inc(&key->shared.inode->i_count);			break;		case FUT_OFF_MMSHARED:			atomic_inc(&key->private.mm->mm_count);			break;	}}EXPORT_SYMBOL_GPL(get_futex_key_refs);/* * Drop a reference to the resource addressed by a key. * The hash bucket spinlock must not be held. */void drop_futex_key_refs(union futex_key *key){	if (key->both.ptr == 0)		return;	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {		case FUT_OFF_INODE:			iput(key->shared.inode);			break;		case FUT_OFF_MMSHARED:			mmdrop(key->private.mm);			break;	}}EXPORT_SYMBOL_GPL(drop_futex_key_refs);static inline int get_futex_value_locked(u32 *dest, u32 __user *from){	int ret;	pagefault_disable();	ret = __copy_from_user_inatomic(dest, from, sizeof(u32));	pagefault_enable();	return ret ? -EFAULT : 0;}/* * Fault handling. * if fshared is non NULL, current->mm->mmap_sem is already held */static int futex_handle_fault(unsigned long address,			      struct rw_semaphore *fshared, int attempt){	struct vm_area_struct * vma;	struct mm_struct *mm = current->mm;	int ret = -EFAULT;	if (attempt > 2)		return ret;	if (!fshared)		down_read(&mm->mmap_sem);	vma = find_vma(mm, address);	if (vma && address >= vma->vm_start &&	    (vma->vm_flags & VM_WRITE)) {		switch (handle_mm_fault(mm, vma, address, 1)) {		case VM_FAULT_MINOR:			ret = 0;			current->min_flt++;			break;		case VM_FAULT_MAJOR:			ret = 0;			current->maj_flt++;			break;		}	}	if (!fshared)		up_read(&mm->mmap_sem);	return ret;}/* * PI code: */static int refill_pi_state_cache(void){	struct futex_pi_state *pi_state;	if (likely(current->pi_state_cache))		return 0;	pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);	if (!pi_state)		return -ENOMEM;	INIT_LIST_HEAD(&pi_state->list);	/* pi_mutex gets initialized later */	pi_state->owner = NULL;	atomic_set(&pi_state->refcount, 1);	current->pi_state_cache = pi_state;	return 0;}static struct futex_pi_state * alloc_pi_state(void){	struct futex_pi_state *pi_state = current->pi_state_cache;	WARN_ON(!pi_state);	current->pi_state_cache = NULL;	return pi_state;}static void free_pi_state(struct futex_pi_state *pi_state){	if (!atomic_dec_and_test(&pi_state->refcount))		return;	/*	 * If pi_state->owner is NULL, the owner is most probably dying	 * and has cleaned up the pi_state already	 */	if (pi_state->owner) {		spin_lock_irq(&pi_state->owner->pi_lock);		list_del_init(&pi_state->list);		spin_unlock_irq(&pi_state->owner->pi_lock);		rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);	}	if (current->pi_state_cache)		kfree(pi_state);	else {		/*		 * pi_state->list is already empty.		 * clear pi_state->owner.		 * refcount is at 0 - put it back to 1.		 */		pi_state->owner = NULL;		atomic_set(&pi_state->refcount, 1);		current->pi_state_cache = pi_state;	}}/* * Look up the task based on what TID userspace gave us. * We dont trust it. */static struct task_struct * futex_find_get_task(pid_t pid){	struct task_struct *p;	rcu_read_lock();	p = find_task_by_pid(pid);	if (!p || ((current->euid != p->euid) && (current->euid != p->uid)))		p = ERR_PTR(-ESRCH);	else		get_task_struct(p);	rcu_read_unlock();	return p;}/* * This task is holding PI mutexes at exit time => bad. * Kernel cleans up PI-state, but userspace is likely hosed. * (Robust-futex cleanup is separate and might save the day for userspace.) */void exit_pi_state_list(struct task_struct *curr){	struct list_head *next, *head = &curr->pi_state_list;	struct futex_pi_state *pi_state;	struct futex_hash_bucket *hb;	union futex_key key;	/*	 * We are a ZOMBIE and nobody can enqueue itself on	 * pi_state_list anymore, but we have to be careful	 * versus waiters unqueueing themselves:	 */	spin_lock_irq(&curr->pi_lock);	while (!list_empty(head)) {		next = head->next;		pi_state = list_entry(next, struct futex_pi_state, list);		key = pi_state->key;		hb = hash_futex(&key);		spin_unlock_irq(&curr->pi_lock);		spin_lock(&hb->lock);		spin_lock_irq(&curr->pi_lock);		/*		 * We dropped the pi-lock, so re-check whether this		 * task still owns the PI-state:		 */		if (head->next != next) {			spin_unlock(&hb->lock);			continue;		}		WARN_ON(pi_state->owner != curr);		WARN_ON(list_empty(&pi_state->list));		list_del_init(&pi_state->list);		pi_state->owner = NULL;		spin_unlock_irq(&curr->pi_lock);		rt_mutex_unlock(&pi_state->pi_mutex);		spin_unlock(&hb->lock);		spin_lock_irq(&curr->pi_lock);	}	spin_unlock_irq(&curr->pi_lock);}static intlookup_pi_state(u32 uval, struct futex_hash_bucket *hb,		union futex_key *key, struct futex_pi_state **ps){	struct futex_pi_state *pi_state = NULL;	struct futex_q *this, *next;	struct plist_head *head;	struct task_struct *p;	pid_t pid = uval & FUTEX_TID_MASK;	head = &hb->chain;	plist_for_each_entry_safe(this, next, head, list) {		if (match_futex(&this->key, key)) {			/*			 * Another waiter already exists - bump up			 * the refcount and return its pi_state:			 */			pi_state = this->pi_state;			/*			 * Userspace might have messed up non PI and PI futexes			 */			if (unlikely(!pi_state))				return -EINVAL;			WARN_ON(!atomic_read(&pi_state->refcount));			WARN_ON(pid && pi_state->owner &&				pi_state->owner->pid != pid);			atomic_inc(&pi_state->refcount);			*ps = pi_state;			return 0;		}	}	/*	 * We are the first waiter - try to look up the real owner and attach	 * the new pi_state to it, but bail out when TID = 0	 */	if (!pid)		return -ESRCH;	p = futex_find_get_task(pid);	if (IS_ERR(p))		return PTR_ERR(p);	/*	 * We need to look at the task state flags to figure out,	 * whether the task is exiting. To protect against the do_exit	 * change of the task flags, we do this protected by	 * p->pi_lock:

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -