📄 039_fs_exec_c.html
字号:
flow: static(header); } /* used to insert page numbers */ div.google_header::before, div.google_footer::before { position: absolute; top: 0; } div.google_footer { flow: static(footer); } /* always consider this element at the start of the doc */ div#google_footer { flow: static(footer, start); } span.google_pagenumber { content: counter(page); } span.google_pagecount { content: counter(pages); } } @page { @top { content: flow(header); } @bottom { content: flow(footer); } } /* end default print css */ /* custom css *//* end custom css */ /* ui edited css */ body { font-family: Verdana; font-size: 10.0pt; line-height: normal; background-color: #ffffff; } .documentBG { background-color: #ffffff; } /* end ui edited css */</style> </head> <body revision="dcbsxfpf_26cx63kjz2:198"> <table align=center cellpadding=0 cellspacing=0 height=5716 width=802>
<tbody>
<tr>
<td height=5716 valign=top width=802>
<pre>2007-12-14 <br><br>binfmt 跳过了, 但是exec就不跳过了这次看了吧.<br><br>这里的exec是各种binfmt的管理单元...<br><br><font size=5><b>1.binfmt管理</b></font><br>/*<br> * This structure defines the functions that are used to load the binary formats that<br> * linux accepts.<br> */<br>struct <font color=#3333ff><b>linux_binfmt </b></font>{ //别和linux_binprm混淆了 ....<br> struct linux_binfmt * next;<br> struct module *module;<br> int (*load_binary)(struct linux_binprm *, struct pt_regs * regs);<br> int (*load_shlib)(struct file *);<br> int (*core_dump)(long signr, struct pt_regs * regs, struct file * file);<br> unsigned long min_coredump; /* minimal dump size */<br>};<br><br>static struct <font color=#3333ff><b>linux_binfmt</b></font> *formats;<br>static rwlock_t<font color=#3333ff><b> binfmt_lock</b></font> = RW_LOCK_UNLOCKED;<br><br>就是一个链表,注册和注销非常直接简单.<br>int <font color=#000099><b>register_binfmt</b></font>(struct linux_binfmt * fmt)<br>int <font color=#000099><b>unregister_binfmt</b></font>(struct linux_binfmt * fmt)<br><br><font size=5><b>2.Core Dump</b></font><br>asmlinkage long <font color=#3333ff><b>sys_uselib</b></font>(const char * library); /*就是打开文件名为library的文件,再调用fmt->load_shlib(file)*/<br>int <font color=#3333ff><b>do_coredump</b></font>(long signr, struct pt_regs * regs) /*core dump 是通过信号来触发的*/<br>{<br> struct linux_binfmt * binfmt;<br> char corename[6+sizeof(current->comm)];<font color=#3366ff> /*current->comm是进程对应的bin文件名*/</font><br> struct file * file;<br> struct inode * inode;<br><br> lock_kernel();<br> <font color=#3333ff>........//sanity check,略过</font><br> memcpy(corename,"core.", 5); <font color=#3333ff>//core.binfilename就是coredump的文件名</font><br><br> file = filp_open(corename, O_CREAT | 2 | O_TRUNC | O_NOFOLLOW, 0600); /*创建coredump文件,准备写...*/<br> ..........//略<br> if (inode->i_nlink > 1)<br> goto close_fail; /* multiple links - don't dump */ <br> <font color=#3333ff> /*如果文件已存在,并且是一个符号链接,就不产生core dump */</font><br> ...//more check<br> if (!binfmt->core_dump(signr, regs, file)) /*调用binfmt的core dump*/<br> goto close_fail;<br> ........//略....<br>}<br><br><font size=5><b>3.execve</b></font><br><br>do_execve 是载入bin运行的核心函数了,推荐阅读材料:<br><a href=http://www.iecc.com/linker/linker10.html id=qssy title="dynamic linker and loader">dynamic linker and loader</a> <br><a href=http://www.iecc.com/linker/ id=wpcp title="linker and loader">linker and loader</a> <br>/*<br> * sys_execve() executes a new program.<br> */<br>asmlinkage int <font color=#3333ff><b>sys_execve</b></font>(abi64_no_regargs, struct pt_regs regs)<br>{<br> int error;<br> char * filename;<br><br> filename = getname((char *) (long)regs.regs[4]);<br> error = PTR_ERR(filename);<br> if (IS_ERR(filename))<br> goto out;<br> error = <font color=#3333ff>do_execve<font color=#006600>(filename, (char **) (long)regs.regs[5],<br> (char **) (long)regs.regs[6], &regs);</font></font><br> putname(filename);<br><br>out:<br> return error;<br>}<br>以前读这个函数最大的问题是 bprm.p的理解, 总是和建立的main函数调用栈对不上号.原因在于,一开始的时候bprm.p<br>是一个size,典型大小是 PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *) :128k. 最后到调用具体的binfmt的load_binary<br>函数的时候,bprm.p是剩余的空间.(copy_strings会吧已经占用的空间从bprm.p中刨去).<br>copy_strings 和 setup_arg_pages 是理解其涵义的关键函数.<br>先看个图示:注意page的映射方式<br><div id=xm-4 style="PADDING:1em 0pt; TEXT-ALIGN:left"><img src=039_fs_exec_c_images/dcbsxfpf_27crdp7whb.gif style="WIDTH:568px; HEIGHT:348px"></div><br><br>从<font color=#3333ff><b>setup_arg_pages可以看出来页面的映射方式,同时参考上图<br></b></font>int <font color=#3333ff><b>setup_arg_pages</b></font>(struct linux_binprm *bprm)<br>{<br> unsigned long stack_base;<br> struct vm_area_struct *mpnt;<br> int i;<br><br> <font color=#3333ff>stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;<br><br> bprm->p += stack_base;</font><br> if (bprm->loader)<br> bprm->loader += stack_base;<br> bprm->exec += stack_base;<br><br> mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);<br> if (!mpnt) <br> return -ENOMEM; <br> <br> down(&current->mm->mmap_sem);<br> { <font color=#3333ff>[vma->start,vma->end) 左闭右开区间</font><br> .......<br> <font color=#3333ff>mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; []<br> mpnt->vm_end = STACK_TOP; /*STATCK TOP :0XC0000000 不属于这个vma的,见find_vma*/<br> ........<br></font> } <br><br> for (i = 0 ; i < MAX_ARG_PAGES ; i++) {<br> struct page *page = bprm->page[i];<br> if (page) {<br> bprm->page[i] = NULL;<br> current->mm->rss++;<br> put_dirty_page(current,page,stack_base);<br> }<br> stack_base += PAGE_SIZE;<br> }<br> up(&current->mm->mmap_sem);<br> <br> return 0;<br>}<br><font color=#cc0000>可以想象,相应的copy_strings必须从page[31]的高地址向地地址逐个写入参数:</font><br>/*<br> * 'copy_strings()' copies argument/envelope strings from user<br> * memory to free pages in kernel mem. These are in a format ready<br> * to be put directly into the top of new user memory.<br> */<br>int<font color=#3333ff><b> copy_strings</b></font>(int argc,char ** argv, struct linux_binprm *bprm) <br>{<br> while (argc-- > 0) {<br> ...<br> if (get_user(str, argv+argc) || !str || !(len = strnlen_user(str, bprm->p))) <br> return -EFAULT;<br> .........<br> bprm->p -= len; <font color=#3333ff>/*预留出这个字符串的空间*/</font><br> /* XXX: add architecture specific overflow check here. */ <br><br> pos = bprm->p;<br> while (len > 0) {<br> .............<br><br> <font color=#3333ff>offset = pos % PAGE_SIZE;<br> i = pos/PAGE_SIZE;<br> page = bprm->page[i]; /*这样计算偏移和page,肯定是最后的page[31]先分配*/</font><br> new = 0;<br> ...........//alloc page<br> kaddr = kmap(page);<br> ........//copy<br><br> pos += bytes_to_copy;<br> str += bytes_to_copy;<br> len -= bytes_to_copy;<br> }<br> }<br> return 0;<br>}<br><font color=#ff0000>最后看execve的过程......................</font><br>/*<br> * sys_execve() executes a new program.<br> */<br>int <font color=#3333ff><b>do_execve</b></font>(char * filename, char ** argv, char ** envp, struct pt_regs * regs)<br>{<br> file = open_exec(filename); <font color=#3333ff>//打开可执行文件....,内核内怎么读写文件的又一个例子,open..</font><br> ....//sanity check 略 <br><br> bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); <br> <font color=#3333ff> //bprm.p 现在是一个size:当前可用于参数传递的空间大小 (<font color=#cc0000>注意是剩余的空间大小阿....</font>)<br> //预留了一个指针 (怪异)</font><br> memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); /*copy arc的时候会分配page*/<br> ........//略<br> if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) { <font color=#3333ff>//看看有多少个参数</font><br> {<br> 第一个是bin的文件名..<br> .....<br> } <br><br> if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) { <font color=#3333ff>/*多少个环境变量*/</font><br> {<br> /env也是必须有的... <br> ... <br> }<br> retval = <b>prepare_binprm</b>(&bprm); <font color=#3333ff>/*e_uid/uid e_gid/gid capability 的处理*/<br> /*并读入一小段文件内容,以便判定是那种bin....*/<br></font> if (retval < 0) <br> goto out; <br><br> retval = copy_strings_kernel(1, &bprm.filename, &bprm); <br> <font color=#000099> <font color=#3333ff>/*copy 1个参数,从bprm.filename->bprm->page, bprm->p代表当前可用于参数传递的空间大小.*/</font></font><br> if (retval < 0) <br> goto out; <br><br> bprm.exec = bprm.p;<br> retval = copy_strings(bprm.envc, envp, &bprm); <font color=#3333ff>/*copy all env*/</font><br> if (retval < 0) <br> goto out; <br><br> retval = copy_strings(bprm.argc, argv, &bprm); <font color=#3333ff>/*copy all argc*/</font><br> if (retval < 0) <br> goto out; <br><br> retval = search_binary_handler(&bprm,regs); /*找到对应的linux<font color=#3333ff><b>_binfmt 调用fmt->load_binary; </b></font>*/<br> if (retval >= 0)<br> /* execve success */<br> return retval;<br> ............ <font color=#3333ff>//错误处理,略...</font><br><br> return retval;<br>}<br><br>这里的工作只是copy参数到制定页面,映射页面<font color=#3333ff><b>(setup_arg_pages)</b></font>和建立调用堆栈需要具体的binfmt来做,以elf为例吧.<br><br>static int <font color=#3333ff><b>load_elf_binary</b></font>(struct linux_binprm * bprm, struct pt_regs * regs)<br>{<br> ........<br><br> /* Get the exec-header */<br> elf_ex = *((struct elfhdr *) bprm->buf); /*<b>prepare_binprm</b>已经读入一小段了*/<br><br> ......//判定是否是elf<br><br> /* Now read in all of the header information */<br> ........<br> retval = kernel_read(bprm->file, elf_ex.e_phoff, (char *) elf_phdata, size);<br> ....<br> fd_install(elf_exec_fileno = retval, bprm->file);<font color=#3333ff> //读入完整头,并设置到已打开文件表</font><br><br> elf_ppnt = elf_phdata;<br> elf_bss = 0;<br> elf_brk = 0;<br><br> start_code = ~0UL;<br> end_code = 0;<br> start_data = 0;<br> end_data = 0;<br> <font color=#3333ff> /*下面加载shared libraries 的interpreter,具体分析elf再说吧....*/</font><br> for (i = 0; i < elf_ex.e_phnum; i++) {<br> ..........<br> }<br><br> <font color=#3333ff>/* Some simple consistency checks for the interpreter */</font><br> if (elf_interpreter) {<br> ..............<br> }<br><br> <font color=#3333ff>/* OK, we are done with that, now set up the arg stuff,<br> and then start this sucker up */<br> //我们关注下 argc argv envc env的设置..........</font><br> if (!bprm->sh_bang) {<br> ........<br> }<br><br> /* Flush all traces of the currently running executable */<br> retval = flush_old_exec(bprm);<br> <font color=#3333ff> /*释放老的sig ctx copy一份新的;释放老的mm(用户页面和pagetable),建立并切换到新的mm<br> (新的4g virtual address sapce); 释放老的文件;清空 fpu,thread,从thread group摘除<br> */</font><br> if (retval)<br> goto out_free_dentry;<br><br> /* OK, This is the point of no return */<br> current->mm->start_data = 0;<br> current->mm->end_data = 0;<br> current->mm->end_code = 0;<br> current->mm->mmap = NULL;<br> current->flags &= ~PF_FORKNOEXEC;<br> elf_entry = (unsigned long) elf_ex.e_entry;<br><br> /* Do this immediately, since STACK_TOP as used in setup_arg_pages<br> may depend on the personality. */<br> SET_PERSONALITY(elf_ex, ibcs2_interpreter);<br><br> /* Do this so that we can load the interpreter, if need be. We will<br> change some of these later */<br> current->mm->rss = 0;<br> <b>setup_arg_pages</b>(bprm); /* XXX: check error */<br> <font color=#3333ff>/*上面看过了映射页面+setup vma,不过这里是ia32_setup_arg_pages,差别不大*/</font><br> <b>current->mm->start_stack = bprm->p;</b><br style=FONT-WEIGHT:bold><br> /* Try and get dynamic programs out of the way of the default mmap<br> base, as well as whatever program they might try to exec. This<br> is because the brk will follow the loader, and is not movable. */<br><br> load_bias = ELF_PAGESTART(elf_ex.e_type==ET_DYN ? ELF_ET_DYN_BASE : 0);<br><br> <font color=#3366ff>/* Now we do a little grungy work by mmaping the ELF image into<br> the correct location in memory. At this point, we assume that<br> the image should be loaded at fixed address, not at a variable<br> address. */</font><br><br> old_fs = get_fs();<br> set_fs(get_ds());<br> for(i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) {<br> int elf_prot = 0, elf_flags;<br> .........<br> }<br> set_fs(old_fs);<br><font color=#009900><br> </font><font color=#009900>elf_entry += load_bias;<br> elf_bss += load_bias;<br> elf_brk += load_bias;<br> start_code += load_bias;<br> end_code += load_bias;<br> start_data += load_bias;<br> end_data += load_bias;</font><br><br> if (elf_interpreter) {/*加载elf <font color=#3333ff>interpreter</font>*/<br> if (interpreter_type == INTERPRETER_AOUT)<br> elf_entry = load_aout_interp(&interp_ex,<br> interpreter);<br> else<br> elf_entry = load_elf_interp(&interp_elf_ex,<br> interpreter,<br> &interp_load_addr);<br> ..............<br> }<br> <font color=#3333ff> .......下面的函数是真正建立argc arv...的代码</font><br> bprm->p = (unsigned long)<br> <font color=#000099><b>create_elf_tables</b></font>(<font color=#009900>(char *)bprm->p,<br> bprm->argc,<br> bprm->envc,<br> (interpreter_type == <font color=#cc0000>INTERPRETER_ELF</font> ? &elf_ex : <font color=#cc0000>NUL</font>L),<br> load_addr, load_bias,<br> interp_load_addr,<br> (interpreter_type == <font color=#cc0000>INTERPRETER_AOUT</font> ? 0 : 1)</font>);<br> /* N.B. passed_fileno might not be initialized? */<br> if (interpreter_type == INTERPRETER_AOUT)<br> <font color=#009900> current->mm->arg_start += strlen(passed_fileno) + 1;<br> current->mm->start_brk = current->mm->brk = elf_brk;<br> current->mm->end_code = end_code;<br> current->mm->start_code = start_code;<br> current->mm->start_data = start_data;<br> current->mm->end_data = end_data;<br> current->mm->start_stack = bprm->p;</font><br><br> /* Calling set_brk effectively mmaps the pages that we need<br> * for the bss and break sections<br> */<br> set_brk(elf_bss, elf_brk);<br><br> padzero(elf_bss);<br> .............<br>}<br>static elf_addr_t * <br><font color=#000099><b>create_elf_tables</b></font>(char *p, int argc, int envc,<br> struct elfhdr * exec,<br> unsigned long load_addr,<br> unsigned long load_bias,<br> unsigned long interp_load_addr, int ibcs)<br>{<br> elf_caddr_t *argv;<br> elf_caddr_t *envp;<br> elf_addr_t *sp, *csp;<br> char *k_platform, *u_platform;<br> long hwcap;<br> size_t platform_len = 0;<br><br> /*<br> * Get hold of platform and hardware capabilities masks for<br> * the machine we are running on. In some cases (Sparc), <br> * this info is impossible to get, in others (i386) it is<br> * merely difficult.<br> */<br> hwcap = ELF_HWCAP;<br> k_platform = ELF_PLATFORM;<br> if (k_platform) { <font color=#3333ff>//多传递一个平台特定的elf优化参数</font><br> platform_len = strlen(k_platform) + 1;<br> u_platform = p - platform_len;<br> __copy_to_user(u_platform, k_platform, platform_len);<br> } else<br> u_platform = p;<br><br> <font color=#3333ff>/*<br> * Force 16 byte _final_ alignment here for generality.<br> * Leave an extra 16 bytes free so that on the PowerPC we<br> * can move the aux table up to start on a 16-byte boundary.<br> */<br> /*sp是cruuret stack top*/ /*为各个参数预留空间:*/</font><br> sp = (elf_addr_t *)((~15UL & (unsigned long)(u_platform)) - 16UL);<br> csp = sp;<br> csp -= ((exec ? DLINFO_ITEMS*2 : 4) + (k_platform ? 2 : 0));<br> csp -= envc+1;<br> csp -= argc+1;<br> csp -= (!ibcs ? 3 : 1); /* argc itself */<br> if ((unsigned long)csp & 15UL)<br> sp -= ((unsigned long)csp & 15UL) / sizeof(*sp);<br><br> /*<br> * Put the ELF interpreter info on the stack<br> */<br>#define NEW_AUX_ENT(nr, id, val) \<br> __put_user ((id), sp+(nr*2)); \<br> __put_user ((val), sp+(nr*2+1)); \<br><br> sp -= 2;<br> NEW_AUX_ENT(0, AT_NULL, 0);<br> if (k_platform) {<br> sp -= 2;<br> NEW_AUX_ENT(0, AT_PLATFORM, (elf_addr_t)(unsigned long) u_platform);<br> }<br> sp -= 3*2;<br> NEW_AUX_ENT(0, AT_HWCAP, hwcap);<br> NEW_AUX_ENT(1, AT_PAGESZ, ELF_EXEC_PAGESIZE);<br> NEW_AUX_ENT(2, AT_CLKTCK, CLOCKS_PER_SEC);<br><br> if (exec) {<br> sp -= 10*2;<br><br> NEW_AUX_ENT(0, AT_PHDR, load_addr + exec->e_phoff);<br> NEW_AUX_ENT(1, AT_PHENT, sizeof (struct elf_phdr));<br> NEW_AUX_ENT(2, AT_PHNUM, exec->e_phnum);<br> NEW_AUX_ENT(3, AT_BASE, interp_load_addr);<br> NEW_AUX_ENT(4, AT_FLAGS, 0);<br> NEW_AUX_ENT(5, AT_ENTRY, load_bias + exec->e_entry);<br> NEW_AUX_ENT(6, AT_UID, (elf_addr_t) current->uid);<br> NEW_AUX_ENT(7, AT_EUID, (elf_addr_t) current->euid);<br> NEW_AUX_ENT(8, AT_GID, (elf_addr_t) current->gid);<br> NEW_AUX_ENT(9, AT_EGID, (elf_addr_t) current->egid);<br> }<br>#undef NEW_AUX_ENT<br> <font color=#3333ff> /*这里才到了建立argc,argv,...的代码, 倒是也不难了....*/</font><br> sp -= envc+1;<br> envp = (elf_caddr_t *) sp;<br> sp -= argc+1;<br> argv = (elf_caddr_t *) sp;<br> if (!ibcs) {<br> __put_user((elf_addr_t)(unsigned long) envp,--sp);<br> __put_user((elf_addr_t)(unsigned long) argv,--sp);<br> }<br><br> __put_user((elf_addr_t)argc,--sp);<br> current->mm->arg_start = (unsigned long) p;<br> while (argc-->0) {<br> __put_user((elf_caddr_t)(unsigned long)p,argv++);<br> p += strlen_user(p);<br> }<br> __put_user(NULL, argv);<br> current->mm->arg_end = current->mm->env_start = (unsigned long) p;<br> while (envc-->0) {<br> __put_user((elf_caddr_t)(unsigned long)p,envp++);<br> p += strlen_user(p);<br> }<br> __put_user(NULL, envp);<br> current->mm->env_end = (unsigned long) p;<br> return sp;<br>}<br><font color=#3333ff size=3>传递的参数真是不少啊.......... (怎么不检查空间够不够啊.... 预留的4个byte是啥意思...)</font><br><br><font size=5><b>4. execve辅助函数<br><br></b></font>int flush_old_exec(struct linux_binprm * bprm)<br>{<br> char * name;<br> int i, ch, retval;<br> struct signal_struct * oldsig;<br><br> /*<br> * Make sure we have a private signal table<br> */<br> oldsig = current->sig;<br> retval = make_private_signals(); <font color=#3333ff>/*分配并拷贝新的sigact*/</font><br> if (retval) goto flush_failed;<br><br> /* <br> * Release all of the old mmap stuff<br> */<br> retval = exec_mmap();<font color=#000099> <font color=#3333ff>/*有了对mm的分析,理解这个函数自然是不难的...*/<br> /*建立新的虚拟空间,释放所有对原有mm的引用:用户页面.. CLEAR page table*/<br> /*fork的进程共享所有页面,但是并没有共享pgd哦...见copy_mm->mm_init*/<br> <br></font></font> if (retval) goto mmap_failed;<br><br> /* This is the point of no return */<br> release_old_signals(oldsig); <font color=#3333ff>/*释放老的sigact*/</font><br><br> current->sas_ss_sp = current->sas_ss_size = 0;<br> /*设置好current->comm*/<br> .........<br><br> flush_thread();<font color=#3333ff> /*clear fpu 和 debug regs*/</font><br><br> de_thread(current);<font color=#3333ff> /*摘链表的操作*/</font><br><br> ........<br> <br> flush_signal_handlers(current); <font color=#3333ff>/*不是IGN就换成DFL(不忽略就改为default)*/</font><br> flush_old_files(current->files); <font color=#3333ff>/*如果有close on exec 就close 文件*/</font><br><br> return 0;<br><br>mmap_failed:<br> .......<br>}<br><font color=#3333ff>map页面到指定的虚拟地址</font><br>void <font color=#000099><b>put_dirty_page</b></font>(struct task_struct * tsk, struct page *page, unsigned long address)<br><font color=#3333ff><br>capbilities 的继承和设置策略.<br>/*<br> * This function is used to produce the new IDs and capabilities<br> * from the old ones and the file's capabilities.<br> *<br> * The formula used for evolving capabilities is:<br> *<br> * pI' = pI<br> * (***) pP' = (fP & X) | (fI & pI)<br> * pE' = pP' & fE [NB. fE is 0 or ~0]<br> *<br> * I=Inheritable, P=Permitted, E=Effective // p=process, f=file<br> * ' indicates post-exec(), and X is the global 'cap_bset'.<br> *<br> */</font><br>void compute_creds(struct linux_binprm *bprm) <br><br><br><br><br>done. 2007.12.15<br><br></pre>
</td>
</tr>
</tbody>
</table></body></html>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -