📄 fork.c
字号:
/* * linux/kernel/fork.c * * Copyright (C) 1991, 1992 Linus Torvalds *//* * 'fork.c' contains the help-routines for the 'fork' system call * (see also entry.S and others). * Fork is rather simple, once you get the hang of it, but the memory * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' */#include <linux/slab.h>#include <linux/init.h>#include <linux/unistd.h>#include <linux/module.h>#include <linux/vmalloc.h>#include <linux/completion.h>#include <linux/mnt_namespace.h>#include <linux/personality.h>#include <linux/mempolicy.h>#include <linux/sem.h>#include <linux/file.h>#include <linux/key.h>#include <linux/binfmts.h>#include <linux/mman.h>#include <linux/fs.h>#include <linux/nsproxy.h>#include <linux/capability.h>#include <linux/cpu.h>#include <linux/cpuset.h>#include <linux/security.h>#include <linux/swap.h>#include <linux/syscalls.h>#include <linux/jiffies.h>#include <linux/futex.h>#include <linux/task_io_accounting_ops.h>#include <linux/rcupdate.h>#include <linux/ptrace.h>#include <linux/mount.h>#include <linux/audit.h>#include <linux/profile.h>#include <linux/rmap.h>#include <linux/acct.h>#include <linux/tsacct_kern.h>#include <linux/cn_proc.h>#include <linux/freezer.h>#include <linux/delayacct.h>#include <linux/taskstats_kern.h>#include <linux/random.h>#include <asm/pgtable.h>#include <asm/pgalloc.h>#include <asm/uaccess.h>#include <asm/mmu_context.h>#include <asm/cacheflush.h>#include <asm/tlbflush.h>/* * Protected counters by write_lock_irq(&tasklist_lock) */unsigned long total_forks; /* Handle normal Linux uptimes. */int nr_threads; /* The idle threads do not count.. */int max_threads; /* tunable limit on nr_threads */DEFINE_PER_CPU(unsigned long, process_counts) = 0;__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */int nr_processes(void){ int cpu; int total = 0; for_each_online_cpu(cpu) total += per_cpu(process_counts, cpu); return total;}#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR# define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk))static struct kmem_cache *task_struct_cachep;#endif/* SLAB cache for signal_struct structures (tsk->signal) */static struct kmem_cache *signal_cachep;/* SLAB cache for sighand_struct structures (tsk->sighand) */struct kmem_cache *sighand_cachep;/* SLAB cache for files_struct structures (tsk->files) */struct kmem_cache *files_cachep;/* SLAB cache for fs_struct structures (tsk->fs) */struct kmem_cache *fs_cachep;/* SLAB cache for vm_area_struct structures */struct kmem_cache *vm_area_cachep;/* SLAB cache for mm_struct structures (tsk->mm) */static struct kmem_cache *mm_cachep;void free_task(struct task_struct *tsk){ free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); free_task_struct(tsk);}EXPORT_SYMBOL(free_task);void __put_task_struct(struct task_struct *tsk){ WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); security_task_free(tsk); free_uid(tsk->user); put_group_info(tsk->group_info); delayacct_tsk_free(tsk); if (!profile_handoff_task(tsk)) free_task(tsk);}void __init fork_init(unsigned long mempages){#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR#ifndef ARCH_MIN_TASKALIGN#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES#endif /* create a slab on which task_structs can be allocated */ task_struct_cachep = kmem_cache_create("task_struct", sizeof(struct task_struct), ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);#endif /* * The default maximum number of threads is set to a safe * value: the thread structures can take up at most half * of memory. */ max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); /* * we need to allow at least 20 threads to boot a system */ if(max_threads < 20) max_threads = 20; init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC];}static struct task_struct *dup_task_struct(struct task_struct *orig){ struct task_struct *tsk; struct thread_info *ti; prepare_to_copy(orig); tsk = alloc_task_struct(); if (!tsk) return NULL; ti = alloc_thread_info(tsk); if (!ti) { free_task_struct(tsk); return NULL; } *tsk = *orig; tsk->stack = ti; setup_thread_stack(tsk, orig);#ifdef CONFIG_CC_STACKPROTECTOR tsk->stack_canary = get_random_int();#endif /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); atomic_set(&tsk->fs_excl, 0);#ifdef CONFIG_BLK_DEV_IO_TRACE tsk->btrace_seq = 0;#endif tsk->splice_pipe = NULL; return tsk;}#ifdef CONFIG_MMUstatic inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm){ struct vm_area_struct *mpnt, *tmp, **pprev; struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; struct mempolicy *pol; down_write(&oldmm->mmap_sem); flush_cache_dup_mm(oldmm); /* * Not linked in yet - no deadlock potential: */ down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); mm->locked_vm = 0; mm->mmap = NULL; mm->mmap_cache = NULL; mm->free_area_cache = oldmm->mmap_base; mm->cached_hole_size = ~0UL; mm->map_count = 0; cpus_clear(mm->cpu_vm_mask); mm->mm_rb = RB_ROOT; rb_link = &mm->mm_rb.rb_node; rb_parent = NULL; pprev = &mm->mmap; for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { struct file *file; if (mpnt->vm_flags & VM_DONTCOPY) { long pages = vma_pages(mpnt); mm->total_vm -= pages; vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, -pages); continue; } charge = 0; if (mpnt->vm_flags & VM_ACCOUNT) { unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; if (security_vm_enough_memory(len)) goto fail_nomem; charge = len; } tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!tmp) goto fail_nomem; *tmp = *mpnt; pol = mpol_copy(vma_policy(mpnt)); retval = PTR_ERR(pol); if (IS_ERR(pol)) goto fail_nomem_policy; vma_set_policy(tmp, pol); tmp->vm_flags &= ~VM_LOCKED; tmp->vm_mm = mm; tmp->vm_next = NULL; anon_vma_link(tmp); file = tmp->vm_file; if (file) { struct inode *inode = file->f_path.dentry->d_inode; get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); /* insert tmp into the share list, just after mpnt */ spin_lock(&file->f_mapping->i_mmap_lock); tmp->vm_truncate_count = mpnt->vm_truncate_count; flush_dcache_mmap_lock(file->f_mapping); vma_prio_tree_add(tmp, mpnt); flush_dcache_mmap_unlock(file->f_mapping); spin_unlock(&file->f_mapping->i_mmap_lock); } /* * Link in the new vma and copy the page table entries. */ *pprev = tmp; pprev = &tmp->vm_next; __vma_link_rb(mm, tmp, rb_link, rb_parent); rb_link = &tmp->vm_rb.rb_right; rb_parent = &tmp->vm_rb; mm->map_count++; retval = copy_page_range(mm, oldmm, mpnt); if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); if (retval) goto out; } /* a new mm has just been created */ arch_dup_mmap(oldmm, mm); retval = 0;out: up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); up_write(&oldmm->mmap_sem); return retval;fail_nomem_policy: kmem_cache_free(vm_area_cachep, tmp);fail_nomem: retval = -ENOMEM; vm_unacct_memory(charge); goto out;}static inline int mm_alloc_pgd(struct mm_struct * mm){ mm->pgd = pgd_alloc(mm); if (unlikely(!mm->pgd)) return -ENOMEM; return 0;}static inline void mm_free_pgd(struct mm_struct * mm){ pgd_free(mm->pgd);}#else#define dup_mmap(mm, oldmm) (0)#define mm_alloc_pgd(mm) (0)#define mm_free_pgd(mm)#endif /* CONFIG_MMU */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))#include <linux/init_task.h>static struct mm_struct * mm_init(struct mm_struct * mm){ atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); mm->core_waiters = 0; mm->nr_ptes = 0; set_mm_counter(mm, file_rss, 0); set_mm_counter(mm, anon_rss, 0); spin_lock_init(&mm->page_table_lock); rwlock_init(&mm->ioctx_list_lock); mm->ioctx_list = NULL; mm->free_area_cache = TASK_UNMAPPED_BASE; mm->cached_hole_size = ~0UL; if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; return mm; } free_mm(mm); return NULL;}/* * Allocate and initialize an mm_struct. */struct mm_struct * mm_alloc(void){ struct mm_struct * mm; mm = allocate_mm(); if (mm) { memset(mm, 0, sizeof(*mm)); mm = mm_init(mm); } return mm;}/* * Called when the last reference to the mm * is dropped: either by a lazy thread or by * mmput. Free the page directory and the mm. */void fastcall __mmdrop(struct mm_struct *mm){ BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); free_mm(mm);}/* * Decrement the use count and release all resources for an mm. */void mmput(struct mm_struct *mm){ might_sleep(); if (atomic_dec_and_test(&mm->mm_users)) { exit_aio(mm); exit_mmap(mm); if (!list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); list_del(&mm->mmlist); spin_unlock(&mmlist_lock); } put_swap_token(mm); mmdrop(mm); }}EXPORT_SYMBOL_GPL(mmput);/** * get_task_mm - acquire a reference to the task's mm * * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning * this kernel workthread has transiently adopted a user mm with use_mm, * to do its AIO) is not set and if so returns a reference to it, after * bumping up the use count. User must release the mm via mmput() * after use. Typically used by /proc and ptrace. */struct mm_struct *get_task_mm(struct task_struct *task){ struct mm_struct *mm; task_lock(task); mm = task->mm; if (mm) { if (task->flags & PF_BORROWED_MM) mm = NULL; else atomic_inc(&mm->mm_users); } task_unlock(task); return mm;}EXPORT_SYMBOL_GPL(get_task_mm);/* Please note the differences between mmput and mm_release. * mmput is called whenever we stop holding onto a mm_struct, * error success whatever. * * mm_release is called after a mm_struct has been removed * from the current process. * * This difference is important for error handling, when we * only half set up a mm_struct for a new process and need to restore * the old one. Because we mmput the new mm_struct before * restoring the old one. . . * Eric Biederman 10 January 1998 */void mm_release(struct task_struct *tsk, struct mm_struct *mm){ struct completion *vfork_done = tsk->vfork_done; /* Get rid of any cached register state */ deactivate_mm(tsk, mm); /* notify parent sleeping on vfork() */ if (vfork_done) { tsk->vfork_done = NULL; complete(vfork_done); } /* * If we're exiting normally, clear a user-space tid field if * requested. We leave this alone when dying by signal, to leave * the value intact in a core dump, and to save the unnecessary * trouble otherwise. Userland only wants this done for a sys_exit. */ if (tsk->clear_child_tid && !(tsk->flags & PF_SIGNALED) && atomic_read(&mm->mm_users) > 1) { u32 __user * tidptr = tsk->clear_child_tid; tsk->clear_child_tid = NULL; /* * We don't check the error code - if userspace has * not set up a proper pointer then tough luck. */ put_user(0, tidptr); sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0); }}/* * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. */static struct mm_struct *dup_mm(struct task_struct *tsk){ struct mm_struct *mm, *oldmm = current->mm; int err; if (!oldmm) return NULL; mm = allocate_mm(); if (!mm) goto fail_nomem; memcpy(mm, oldmm, sizeof(*mm)); /* Initializing for Swap token stuff */ mm->token_priority = 0; mm->last_interval = 0; if (!mm_init(mm)) goto fail_nomem; if (init_new_context(tsk, mm)) goto fail_nocontext; err = dup_mmap(mm, oldmm); if (err) goto free_pt; mm->hiwater_rss = get_mm_rss(mm); mm->hiwater_vm = mm->total_vm; return mm;free_pt: mmput(mm);fail_nomem: return NULL;fail_nocontext: /* * If init_new_context() failed, we cannot use mmput() to free the mm * because it calls destroy_context() */ mm_free_pgd(mm); free_mm(mm); return NULL;}static int copy_mm(unsigned long clone_flags, struct task_struct * tsk){ struct mm_struct * mm, *oldmm; int retval; tsk->min_flt = tsk->maj_flt = 0; tsk->nvcsw = tsk->nivcsw = 0; tsk->mm = NULL; tsk->active_mm = NULL; /* * Are we cloning a kernel thread? * * We need to steal a active VM for that.. */ oldmm = current->mm; if (!oldmm) return 0; if (clone_flags & CLONE_VM) { atomic_inc(&oldmm->mm_users); mm = oldmm; goto good_mm; } retval = -ENOMEM; mm = dup_mm(tsk); if (!mm) goto fail_nomem;good_mm: /* Initializing for Swap token stuff */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -