📄 fork.c
字号:
/*
* linux/kernel/fork.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
/*
* 'fork.c' contains the help-routines for the 'fork' system call
* (see also entry.S and others).
* Fork is rather simple, once you get the hang of it, but the memory
* management can be a bitch. See 'mm/memory.c': 'copy_page_tables()'
*/
#include <linux/config.h>
#include <linux/malloc.h>
#include <linux/init.h>
#include <linux/unistd.h>
#include <linux/smp_lock.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
/* The idle threads do not count.. */
int nr_threads;
int nr_running;
int max_threads;
unsigned long total_forks; /* Handle normal Linux uptimes. */
int last_pid;
struct task_struct *pidhash[PIDHASH_SZ];
void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
{
unsigned long flags;
wq_write_lock_irqsave(&q->lock, flags);
wait->flags = 0;
__add_wait_queue(q, wait);
wq_write_unlock_irqrestore(&q->lock, flags);
}
void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
{
unsigned long flags;
wq_write_lock_irqsave(&q->lock, flags);
wait->flags = WQ_FLAG_EXCLUSIVE;
__add_wait_queue_tail(q, wait);
wq_write_unlock_irqrestore(&q->lock, flags);
}
void remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
{
unsigned long flags;
wq_write_lock_irqsave(&q->lock, flags);
__remove_wait_queue(q, wait);
wq_write_unlock_irqrestore(&q->lock, flags);
}
void __init fork_init(unsigned long mempages)
{
/*
* The default maximum number of threads is set to a safe
* value: the thread structures can take up at most half
* of memory.
*/
max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 2;
init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
}
/* Protects next_safe and last_pid. */
spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED;
static int get_pid(unsigned long flags)
{
static int next_safe = PID_MAX;
struct task_struct *p;
if (flags & CLONE_PID)
return current->pid;
spin_lock(&lastpid_lock);
if((++last_pid) & 0xffff8000) {
last_pid = 300; /* Skip daemons etc. */
goto inside;
}
if(last_pid >= next_safe) {
inside:
next_safe = PID_MAX;
read_lock(&tasklist_lock);
repeat:
for_each_task(p) {
if(p->pid == last_pid ||
p->pgrp == last_pid ||
p->session == last_pid) {
if(++last_pid >= next_safe) {
if(last_pid & 0xffff8000)
last_pid = 300;
next_safe = PID_MAX;
}
goto repeat;
}
if(p->pid > last_pid && next_safe > p->pid)
next_safe = p->pid;
if(p->pgrp > last_pid && next_safe > p->pgrp)
next_safe = p->pgrp;
if(p->session > last_pid && next_safe > p->session)
next_safe = p->session;
}
read_unlock(&tasklist_lock);
}
spin_unlock(&lastpid_lock);
return last_pid;
}
static inline int dup_mmap(struct mm_struct * mm)
{
struct vm_area_struct * mpnt, *tmp, **pprev;
int retval;
flush_cache_mm(current->mm);
mm->locked_vm = 0;
mm->mmap = NULL;
mm->mmap_avl = NULL;
mm->mmap_cache = NULL;
mm->map_count = 0;
mm->cpu_vm_mask = 0;
mm->swap_cnt = 0;
mm->swap_address = 0;
pprev = &mm->mmap;
for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
struct file *file;
retval = -ENOMEM;
if(mpnt->vm_flags & VM_DONTCOPY)
continue;
tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (!tmp)
goto fail_nomem;
*tmp = *mpnt;
tmp->vm_flags &= ~VM_LOCKED;
tmp->vm_mm = mm;
mm->map_count++;
tmp->vm_next = NULL;
file = tmp->vm_file;
if (file) {
struct inode *inode = file->f_dentry->d_inode;
get_file(file);
if (tmp->vm_flags & VM_DENYWRITE)
atomic_dec(&inode->i_writecount);
/* insert tmp into the share list, just after mpnt */
spin_lock(&inode->i_mapping->i_shared_lock);
if((tmp->vm_next_share = mpnt->vm_next_share) != NULL)
mpnt->vm_next_share->vm_pprev_share =
&tmp->vm_next_share;
mpnt->vm_next_share = tmp;
tmp->vm_pprev_share = &mpnt->vm_next_share;
spin_unlock(&inode->i_mapping->i_shared_lock);
}
/* Copy the pages, but defer checking for errors */
retval = copy_page_range(mm, current->mm, tmp);
if (!retval && tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);
/*
* Link in the new vma even if an error occurred,
* so that exit_mmap() can clean up the mess.
*/
*pprev = tmp;
pprev = &tmp->vm_next;
if (retval)
goto fail_nomem;
}
retval = 0;
if (mm->map_count >= AVL_MIN_MAP_COUNT)
build_mmap_avl(mm);
fail_nomem:
flush_tlb_mm(current->mm);
return retval;
}
spinlock_t mmlist_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;
#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
static struct mm_struct * mm_init(struct mm_struct * mm)
{
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);
init_MUTEX(&mm->mmap_sem);
mm->page_table_lock = SPIN_LOCK_UNLOCKED;
mm->pgd = pgd_alloc();
if (mm->pgd)
return mm;
free_mm(mm);
return NULL;
}
/*
* Allocate and initialize an mm_struct.
*/
struct mm_struct * mm_alloc(void)
{
struct mm_struct * mm;
mm = allocate_mm();
if (mm) {
memset(mm, 0, sizeof(*mm));
return mm_init(mm);
}
return NULL;
}
/*
* Called when the last reference to the mm
* is dropped: either by a lazy thread or by
* mmput. Free the page directory and the mm.
*/
inline void __mmdrop(struct mm_struct *mm)
{
if (mm == &init_mm) BUG();
pgd_free(mm->pgd);
destroy_context(mm);
free_mm(mm);
}
/*
* Decrement the use count and release all resources for an mm.
*/
void mmput(struct mm_struct *mm)
{
if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
list_del(&mm->mmlist);
spin_unlock(&mmlist_lock);
exit_mmap(mm);
mmdrop(mm);
}
}
/* Please note the differences between mmput and mm_release.
* mmput is called whenever we stop holding onto a mm_struct,
* error success whatever.
*
* mm_release is called after a mm_struct has been removed
* from the current process.
*
* This difference is important for error handling, when we
* only half set up a mm_struct for a new process and need to restore
* the old one. Because we mmput the new mm_struct before
* restoring the old one. . .
* Eric Biederman 10 January 1998
*/
void mm_release(void)
{
struct task_struct *tsk = current;
/* notify parent sleeping on vfork() */
if (tsk->flags & PF_VFORK) {
tsk->flags &= ~PF_VFORK;
up(tsk->p_opptr->vfork_sem);
}
}
static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
{
struct mm_struct * mm, *oldmm;
int retval;
tsk->min_flt = tsk->maj_flt = 0;
tsk->cmin_flt = tsk->cmaj_flt = 0;
tsk->nswap = tsk->cnswap = 0;
tsk->mm = NULL;
tsk->active_mm = NULL;
/*
* Are we cloning a kernel thread?
*
* We need to steal a active VM for that..
*/
oldmm = current->mm;
if (!oldmm)
return 0;
if (clone_flags & CLONE_VM) {
atomic_inc(&oldmm->mm_users);
mm = oldmm;
goto good_mm;
}
retval = -ENOMEM;
mm = allocate_mm();
if (!mm)
goto fail_nomem;
/* Copy the current MM stuff.. */
memcpy(mm, oldmm, sizeof(*mm));
if (!mm_init(mm))
goto fail_nomem;
down(&oldmm->mmap_sem);
retval = dup_mmap(mm);
up(&oldmm->mmap_sem);
/*
* Add it to the mmlist after the parent.
*
* Doing it this way means that we can order
* the list, and fork() won't mess up the
* ordering significantly.
*/
spin_lock(&mmlist_lock);
list_add(&mm->mmlist, &oldmm->mmlist);
spin_unlock(&mmlist_lock);
if (retval)
goto free_pt;
/*
* child gets a private LDT (if there was an LDT in the parent)
*/
copy_segments(tsk, mm);
if (init_new_context(tsk,mm))
goto free_pt;
good_mm:
tsk->mm = mm;
tsk->active_mm = mm;
return 0;
free_pt:
mmput(mm);
fail_nomem:
return retval;
}
static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
{
struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
/* We don't need to lock fs - think why ;-) */
if (fs) {
atomic_set(&fs->count, 1);
fs->lock = RW_LOCK_UNLOCKED;
fs->umask = old->umask;
read_lock(&old->lock);
fs->rootmnt = mntget(old->rootmnt);
fs->root = dget(old->root);
fs->pwdmnt = mntget(old->pwdmnt);
fs->pwd = dget(old->pwd);
if (old->altroot) {
fs->altrootmnt = mntget(old->altrootmnt);
fs->altroot = dget(old->altroot);
} else {
fs->altrootmnt = NULL;
fs->altroot = NULL;
}
read_unlock(&old->lock);
}
return fs;
}
struct fs_struct *copy_fs_struct(struct fs_struct *old)
{
return __copy_fs_struct(old);
}
static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
{
if (clone_flags & CLONE_FS) {
atomic_inc(¤t->fs->count);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -