📄 fork.c
字号:
/* * linux/kernel/fork.c * * Copyright (C) 1991, 1992 Linus Torvalds *//* * 'fork.c' contains the help-routines for the 'fork' system call * (see also entry.S and others). * Fork is rather simple, once you get the hang of it, but the memory * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' */#include <linux/slab.h>#include <linux/init.h>#include <linux/unistd.h>#include <linux/module.h>#include <linux/vmalloc.h>#include <linux/completion.h>#include <linux/mnt_namespace.h>#include <linux/personality.h>#include <linux/mempolicy.h>#include <linux/sem.h>#include <linux/file.h>#include <linux/fdtable.h>#include <linux/iocontext.h>#include <linux/key.h>#include <linux/binfmts.h>#include <linux/mman.h>#include <linux/mmu_notifier.h>#include <linux/fs.h>#include <linux/nsproxy.h>#include <linux/capability.h>#include <linux/cpu.h>#include <linux/cgroup.h>#include <linux/security.h>#include <linux/hugetlb.h>#include <linux/swap.h>#include <linux/syscalls.h>#include <linux/jiffies.h>#include <linux/tracehook.h>#include <linux/futex.h>#include <linux/task_io_accounting_ops.h>#include <linux/rcupdate.h>#include <linux/ptrace.h>#include <linux/mount.h>#include <linux/audit.h>#include <linux/memcontrol.h>#include <linux/profile.h>#include <linux/rmap.h>#include <linux/acct.h>#include <linux/tsacct_kern.h>#include <linux/cn_proc.h>#include <linux/freezer.h>#include <linux/delayacct.h>#include <linux/taskstats_kern.h>#include <linux/random.h>#include <linux/tty.h>#include <linux/proc_fs.h>#include <linux/blkdev.h>#include <asm/pgtable.h>#include <asm/pgalloc.h>#include <asm/uaccess.h>#include <asm/mmu_context.h>#include <asm/cacheflush.h>#include <asm/tlbflush.h>/* * Protected counters by write_lock_irq(&tasklist_lock) */unsigned long total_forks; /* Handle normal Linux uptimes. */int nr_threads; /* The idle threads do not count.. */int max_threads; /* tunable limit on nr_threads */DEFINE_PER_CPU(unsigned long, process_counts) = 0;__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */int nr_processes(void){ int cpu; int total = 0; for_each_online_cpu(cpu) total += per_cpu(process_counts, cpu); return total;}#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR# define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk))static struct kmem_cache *task_struct_cachep;#endif#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATORstatic inline struct thread_info *alloc_thread_info(struct task_struct *tsk){#ifdef CONFIG_DEBUG_STACK_USAGE gfp_t mask = GFP_KERNEL | __GFP_ZERO;#else gfp_t mask = GFP_KERNEL;#endif return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);}static inline void free_thread_info(struct thread_info *ti){ free_pages((unsigned long)ti, THREAD_SIZE_ORDER);}#endif/* SLAB cache for signal_struct structures (tsk->signal) */static struct kmem_cache *signal_cachep;/* SLAB cache for sighand_struct structures (tsk->sighand) */struct kmem_cache *sighand_cachep;/* SLAB cache for files_struct structures (tsk->files) */struct kmem_cache *files_cachep;/* SLAB cache for fs_struct structures (tsk->fs) */struct kmem_cache *fs_cachep;/* SLAB cache for vm_area_struct structures */struct kmem_cache *vm_area_cachep;/* SLAB cache for mm_struct structures (tsk->mm) */static struct kmem_cache *mm_cachep;void free_task(struct task_struct *tsk){ prop_local_destroy_single(&tsk->dirties); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); free_task_struct(tsk);}EXPORT_SYMBOL(free_task);void __put_task_struct(struct task_struct *tsk){ WARN_ON(!tsk->exit_state); WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); security_task_free(tsk); free_uid(tsk->user); put_group_info(tsk->group_info); delayacct_tsk_free(tsk); if (!profile_handoff_task(tsk)) free_task(tsk);}/* * macro override instead of weak attribute alias, to workaround * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. */#ifndef arch_task_cache_init#define arch_task_cache_init()#endifvoid __init fork_init(unsigned long mempages){#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR#ifndef ARCH_MIN_TASKALIGN#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES#endif /* create a slab on which task_structs can be allocated */ task_struct_cachep = kmem_cache_create("task_struct", sizeof(struct task_struct), ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);#endif /* do the arch specific task caches init */ arch_task_cache_init(); /* * The default maximum number of threads is set to a safe * value: the thread structures can take up at most half * of memory. */ max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); /* * we need to allow at least 20 threads to boot a system */ if(max_threads < 20) max_threads = 20; init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC];}int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst, struct task_struct *src){ *dst = *src; return 0;}static struct task_struct *dup_task_struct(struct task_struct *orig){ struct task_struct *tsk; struct thread_info *ti; int err; prepare_to_copy(orig); tsk = alloc_task_struct(); if (!tsk) return NULL; ti = alloc_thread_info(tsk); if (!ti) { free_task_struct(tsk); return NULL; } err = arch_dup_task_struct(tsk, orig); if (err) goto out; tsk->stack = ti; err = prop_local_init_single(&tsk->dirties); if (err) goto out; setup_thread_stack(tsk, orig);#ifdef CONFIG_CC_STACKPROTECTOR tsk->stack_canary = get_random_int();#endif /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); atomic_set(&tsk->fs_excl, 0);#ifdef CONFIG_BLK_DEV_IO_TRACE tsk->btrace_seq = 0;#endif tsk->splice_pipe = NULL; return tsk;out: free_thread_info(ti); free_task_struct(tsk); return NULL;}#ifdef CONFIG_MMUstatic int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm){ struct vm_area_struct *mpnt, *tmp, **pprev; struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; struct mempolicy *pol; down_write(&oldmm->mmap_sem); flush_cache_dup_mm(oldmm); /* * Not linked in yet - no deadlock potential: */ down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); mm->locked_vm = 0; mm->mmap = NULL; mm->mmap_cache = NULL; mm->free_area_cache = oldmm->mmap_base; mm->cached_hole_size = ~0UL; mm->map_count = 0; cpus_clear(mm->cpu_vm_mask); mm->mm_rb = RB_ROOT; rb_link = &mm->mm_rb.rb_node; rb_parent = NULL; pprev = &mm->mmap; for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { struct file *file; if (mpnt->vm_flags & VM_DONTCOPY) { long pages = vma_pages(mpnt); mm->total_vm -= pages; vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, -pages); continue; } charge = 0; if (mpnt->vm_flags & VM_ACCOUNT) { unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; if (security_vm_enough_memory(len)) goto fail_nomem; charge = len; } tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!tmp) goto fail_nomem; *tmp = *mpnt; pol = mpol_dup(vma_policy(mpnt)); retval = PTR_ERR(pol); if (IS_ERR(pol)) goto fail_nomem_policy; vma_set_policy(tmp, pol); tmp->vm_flags &= ~VM_LOCKED; tmp->vm_mm = mm; tmp->vm_next = NULL; anon_vma_link(tmp); file = tmp->vm_file; if (file) { struct inode *inode = file->f_path.dentry->d_inode; get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); /* insert tmp into the share list, just after mpnt */ spin_lock(&file->f_mapping->i_mmap_lock); tmp->vm_truncate_count = mpnt->vm_truncate_count; flush_dcache_mmap_lock(file->f_mapping); vma_prio_tree_add(tmp, mpnt); flush_dcache_mmap_unlock(file->f_mapping); spin_unlock(&file->f_mapping->i_mmap_lock); } /* * Clear hugetlb-related page reserves for children. This only * affects MAP_PRIVATE mappings. Faults generated by the child * are not guaranteed to succeed, even if read-only */ if (is_vm_hugetlb_page(tmp)) reset_vma_resv_huge_pages(tmp); /* * Link in the new vma and copy the page table entries. */ *pprev = tmp; pprev = &tmp->vm_next; __vma_link_rb(mm, tmp, rb_link, rb_parent); rb_link = &tmp->vm_rb.rb_right; rb_parent = &tmp->vm_rb; mm->map_count++; retval = copy_page_range(mm, oldmm, mpnt); if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); if (retval) goto out; } /* a new mm has just been created */ arch_dup_mmap(oldmm, mm); retval = 0;out: up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); up_write(&oldmm->mmap_sem); return retval;fail_nomem_policy: kmem_cache_free(vm_area_cachep, tmp);fail_nomem: retval = -ENOMEM; vm_unacct_memory(charge); goto out;}static inline int mm_alloc_pgd(struct mm_struct * mm){ mm->pgd = pgd_alloc(mm); if (unlikely(!mm->pgd)) return -ENOMEM; return 0;}static inline void mm_free_pgd(struct mm_struct * mm){ pgd_free(mm, mm->pgd);}#else#define dup_mmap(mm, oldmm) (0)#define mm_alloc_pgd(mm) (0)#define mm_free_pgd(mm)#endif /* CONFIG_MMU */__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))#include <linux/init_task.h>static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p){ atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); mm->flags = (current->mm) ? current->mm->flags : MMF_DUMP_FILTER_DEFAULT; mm->core_state = NULL; mm->nr_ptes = 0; set_mm_counter(mm, file_rss, 0); set_mm_counter(mm, anon_rss, 0); spin_lock_init(&mm->page_table_lock); rwlock_init(&mm->ioctx_list_lock); mm->ioctx_list = NULL; mm->free_area_cache = TASK_UNMAPPED_BASE; mm->cached_hole_size = ~0UL; mm_init_owner(mm, p); if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; mmu_notifier_mm_init(mm); return mm; } free_mm(mm); return NULL;}/* * Allocate and initialize an mm_struct. */struct mm_struct * mm_alloc(void){ struct mm_struct * mm; mm = allocate_mm(); if (mm) { memset(mm, 0, sizeof(*mm)); mm = mm_init(mm, current); } return mm;}/* * Called when the last reference to the mm * is dropped: either by a lazy thread or by * mmput. Free the page directory and the mm. */void __mmdrop(struct mm_struct *mm){ BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); mmu_notifier_mm_destroy(mm); free_mm(mm);}EXPORT_SYMBOL_GPL(__mmdrop);/* * Decrement the use count and release all resources for an mm. */void mmput(struct mm_struct *mm){ might_sleep(); if (atomic_dec_and_test(&mm->mm_users)) { exit_aio(mm); exit_mmap(mm); set_mm_exe_file(mm, NULL); if (!list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); list_del(&mm->mmlist); spin_unlock(&mmlist_lock); } put_swap_token(mm); mmdrop(mm); }}EXPORT_SYMBOL_GPL(mmput);/** * get_task_mm - acquire a reference to the task's mm * * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning * this kernel workthread has transiently adopted a user mm with use_mm, * to do its AIO) is not set and if so returns a reference to it, after * bumping up the use count. User must release the mm via mmput() * after use. Typically used by /proc and ptrace. */struct mm_struct *get_task_mm(struct task_struct *task){ struct mm_struct *mm; task_lock(task); mm = task->mm; if (mm) { if (task->flags & PF_KTHREAD) mm = NULL; else atomic_inc(&mm->mm_users); } task_unlock(task); return mm;}EXPORT_SYMBOL_GPL(get_task_mm);/* Please note the differences between mmput and mm_release. * mmput is called whenever we stop holding onto a mm_struct, * error success whatever. * * mm_release is called after a mm_struct has been removed * from the current process. * * This difference is important for error handling, when we * only half set up a mm_struct for a new process and need to restore * the old one. Because we mmput the new mm_struct before * restoring the old one. . . * Eric Biederman 10 January 1998 */void mm_release(struct task_struct *tsk, struct mm_struct *mm){ struct completion *vfork_done = tsk->vfork_done; /* Get rid of any cached register state */ deactivate_mm(tsk, mm); /* notify parent sleeping on vfork() */ if (vfork_done) { tsk->vfork_done = NULL; complete(vfork_done); } /* * If we're exiting normally, clear a user-space tid field if * requested. We leave this alone when dying by signal, to leave * the value intact in a core dump, and to save the unnecessary * trouble otherwise. Userland only wants this done for a sys_exit. */ if (tsk->clear_child_tid && !(tsk->flags & PF_SIGNALED) && atomic_read(&mm->mm_users) > 1) { u32 __user * tidptr = tsk->clear_child_tid; tsk->clear_child_tid = NULL; /* * We don't check the error code - if userspace has * not set up a proper pointer then tough luck. */ put_user(0, tidptr); sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0); }}/* * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. */struct mm_struct *dup_mm(struct task_struct *tsk){ struct mm_struct *mm, *oldmm = current->mm; int err; if (!oldmm) return NULL; mm = allocate_mm();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -