⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 amd64-linux.elf-entry.s

📁 UPX 源代码
💻 S
字号:
/*  amd64-linux.elf-entry.S -- Linux program entry point & decompressor (Elf binary)**  This file is part of the UPX executable compressor.**  Copyright (C) 1996-2007 Markus Franz Xaver Johannes Oberhumer*  Copyright (C) 1996-2007 Laszlo Molnar*  Copyright (C) 2000-2007 John F. Reiser*  All Rights Reserved.**  UPX and the UCL library are free software; you can redistribute them*  and/or modify them under the terms of the GNU General Public License as*  published by the Free Software Foundation; either version 2 of*  the License, or (at your option) any later version.**  This program is distributed in the hope that it will be useful,*  but WITHOUT ANY WARRANTY; without even the implied warranty of*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*  GNU General Public License for more details.**  You should have received a copy of the GNU General Public License*  along with this program; see the file COPYING.*  If not, write to the Free Software Foundation, Inc.,*  59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.**  Markus F.X.J. Oberhumer              Laszlo Molnar*  <mfx@users.sourceforge.net>          <ml1050@users.sourceforge.net>**  John F. Reiser*  <jreiser@users.sourceforge.net>*/#include "arch/amd64/macros.S"#include "arch/amd64/regs.h"sz_Ehdr= 64sz_Phdr= 56sz_l_info= 12  l_lsize= 8sz_p_info= 12sz_b_info= 12  sz_unc= 0  sz_cpr= 4  b_method= 8PROT_READ=  1PROT_WRITE= 2PROT_EXEC=  4MAP_PRIVATE= 2MAP_FIXED=     0x10MAP_ANONYMOUS= 0x20SYS_mmap= 9  // 64-bit mode only!PAGE_SHIFT= 12PAGE_MASK= (~0<<PAGE_SHIFT)PAGE_SIZE= -PAGE_MASKM_NRV2B_LE32=2  // ../conf.hM_NRV2D_LE32=5M_NRV2E_LE32=8  section ELFMAINX_start: .globl _start        call main  // push &decompressret_main:/* Returns 0 on success; non-zero on failure. */decompress:  // (uchar const *src, size_t lsrc, uchar *dst, u32 &ldst, uint method)/* Arguments according to calling convention */#define src  %arg1#define lsrc %arg2#define dst  %arg3#define ldst %arg4  /* Out: actually a reference: &len_dst */#define meth %arg5l#define methb %arg5b        push %rbp; push %rbx  // C callable        push ldst        push dst        addq src,lsrc; push lsrc  // &input_eof  section NRV_HEAD/* Working registers */#define off  %eax  /* XXX: 2GB */#define len  %ecx  /* XXX: 2GB */#define lenq %rcx#define bits %ebx#define disp %rbp        movq src,%rsi  // hardware src for movsb, lodsb        movq dst,%rdi  // hardware dst for movsb        xor bits,bits  // empty; force refill        xor len,len  // create loop invariant        orq $(~0),disp  // -1: initial displacement        call setup  // push &getbit [TUNED]ra_setup:/* AMD64 branch prediction is much worse if there are more than 3 branches   per 16-byte block.  The jnextb would suffer unless inlined.  getnextb is OK   using closed subroutine to save space, and should be OK on cycles because   CALL+RET should be predicted.  getnextb could partially expand, using closed   subroutine only for refill.*//* jump on next bit {0,1} with prediction {y==>likely, n==>unlikely} *//* Prediction omitted for now. *//* On refill: prefetch next byte, for latency reduction on literals and offsets. */#define jnextb0np jnextb0yp#define jnextb0yp GETBITp; jnc#define jnextb1np jnextb1yp#define jnextb1yp GETBITp; jc#define GETBITp \        addl bits,bits; jnz 0f; \        movl (%rsi),bits; subq $-4,%rsi; \        adcl bits,bits; movb (%rsi),%dl; \0:/* Same, but without prefetch (not useful for length of match.) */#define jnextb0n jnextb0y#define jnextb0y GETBIT; jnc#define jnextb1n jnextb1y#define jnextb1y GETBIT; jc#define GETBIT \        addl bits,bits; jnz 0f; \        movl (%rsi),bits; subq $-4,%rsi; \        adcl bits,bits; \0:/* rotate next bit into bottom bit of reg */#define getnextbp(reg) call *%r11; adcl reg,reg#define getnextb(reg)  getnextbp(reg)getbit:        addl bits,bits; jz refill  // Carry= next bit        rep; retrefill:        movl (%rsi),bits; subq $-4,%rsi  // next 32 bits; set Carry        adcl bits,bits  // LSB= 1 (CarryIn); CarryOut= next bit        movb (%rsi),%dl  // speculate: literal, or bottom 8 bits of offset        rep; retcopy:  // In: len, %rdi, disp;  Out: 0==len, %rdi, disp;  trashes %rax, %rdx        leaq (%rdi,disp),%rax; cmpl $5,len  // <=3 is forced        movb (%rax),%dl; jbe copy1  // <=5 for better branch predict        cmpq $-4,disp;   ja  copy1  // 4-byte chunks would overlap        subl $4,len  // adjust for termination casescopy4:        movl (%rax),%edx; addq $4,      %rax; subl $4,len        movl %edx,(%rdi); leaq  4(%rdi),%rdi; jnc copy4        addl $4,len; movb (%rax),%dl; jz copy0copy1:        incq %rax; movb %dl,(%rdi); subl $1,len                   movb (%rax),%dl        leaq 1(%rdi),%rdi;          jnz copy1copy0:        rep; retsetup:        cld        pop %r11  // addq $ getbit - ra_setup,%r11  # &getbit  section NRV2E#include "arch/amd64/nrv2e_d.S"  section NRV2D#include "arch/amd64/nrv2d_d.S"  section NRV2B#include "arch/amd64/nrv2b_d.S"#include "arch/amd64/lzma_d.S"  section NRV_TAIL        // empty  section ELFMAINYeof:        pop %rcx  // &input_eof        movq %rsi,%rax; subq %rcx,%rax  // src -= eof;  // return 0: good; else: bad        pop %rdx;       subq %rdx,%rdi  // dst -= original dst        pop %rcx;            movl %edi,(%rcx)  // actual length used at dst  XXX: 4GB        pop %rbx; pop %rbp        ret/* These from /usr/include/asm-x86_64/unistd.h */__NR_write =  1__NR_exit  = 60msg_SELinux:        push $ L71 - L70; pop %arg3  // length        call L72L70:        .asciz "PROT_EXEC|PROT_WRITE failed.\n"L71:        // IDENTSTR goes here  section ELFMAINZL72:        pop %arg2  // message text        push $2; pop %arg1  // fd stderr        push $ __NR_write; pop %rax        syscalldie:        push $127; pop %arg1        push $ __NR_exit; pop %rax        syscall/* Decompress the rest of this loader, and jump to it.   Map a page to hold the decompressed bytes.  Logically this could   be done by setting .p_memsz for our first PT_LOAD.  But as of 2005-11-09,   linux 2.6.14 only does ".bss expansion" on the PT_LOAD that describes the   highest address.  [I regard this as a bug, and it makes the kernel's   fs/binfmt_elf.c complicated, buggy, and insecure.]  For us, that is the 2nd   PT_LOAD, which is the only way that linux allows to set the brk() for the   uncompressed program.  [This is a significant kernel misfeature.]*/unfold:        pop %rbx  // &b_info/* Get some pages.  If small, then get 1 page located just after the end   of the first PT_LOAD of the compressed program.  This will still be below   all of the uncompressed program.  If large (>=3MB uncompressed), then enough   to duplicate the entire compressed PT_LOAD, plus 1 page, located just after   the brk() of the _un_compressed program.  The address and length are pre-   calculated by PackLinuxElf64amd::defineSymbols().*/        movl $ ADRM,%edi  // XXX: 4GB        push $ PROT_READ | PROT_WRITE | PROT_EXEC; pop %arg3        movl $ LENM,%esi  // XXX: 4GB        push $ MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS; pop %sys4        subl %arg5l,%arg5l  //; subl %arg6l,%arg6l  // MAP_ANON ==> ignore offset        push $ SYS_mmap; pop %rax        syscall  // %rax= result; trashes %rcx,%r11 only        cmpl %eax,%edi; jne msg_SELinux  // XXX: 4GB/* Load the addresses and lengths.   XXX: 2GB  Note that  PUSH $imm32      sign-extends to 64 bits.   XXX: 4GB  Note that  MOVL $imm32,reg  zero-extends to 64-bits.   (Use an temporary register to obtain 4GB range on PUSH constant.)*/        push $ JMPU  // for unmap in fold        push $ ADRU  // for unmap in fold        movl $ ADRC,%esi        push $ LENU  // for unmap in fold        push $ ADRX  // for upx_main        push %r15  // LENX for upx_main        movl %edi,%edx  //  ADRM        subl %esi,%edx  // (ADRM - ADRC) == relocation amount        je L80  // no copy        addl      %edx,%ebp  // update &decompress        //addl      %edx,%ebx  // copy==>no overlap; unfolded before overwrite        movl %ebx,%ecx  // XXX: 4GB  [&b_info] beyond end of decompressor        subl %esi,%ecx  // byte length        shrl $3,%ecx  // qwords to copy        cld        rep; movsqL80:        xchgl %eax,%edi/* Decompress the folded part of this stub, then execute it. */        movl %ebx,%esi  // %arg2l= &b_info        push %rax  // ret_addr after decompression               xchgl %eax,%arg3l  // %arg3= dst for unfolding  XXX: 4GB        lodsl; push %rax          // allocate slot on stack               movq  %rsp,%arg4   // &len_dst ==> used by lzma for EOF        lodsl; xchgl %eax,%arg1l  // sz_cpr  XXX: 4GB        lodsl; movzbl %al,%arg5l  // b_method              xchg %arg1l,%arg2l  // XXX: 4GB        call *%rbp  // decompress               pop %rcx  // discard len_dst        retmain:////    int3  # uncomment for debugging        pop %rbp  // &decompress        movl -4-(ret_main - _start)(%rbp),%r15d  // sz_pack2: length before stub        subl $ sz_Ehdr + 2*sz_Phdr + sz_l_info + sz_p_info,%r15d  // XXX: 4GB        call unfold  // push &b_info        // { b_info={sz_unc, sz_cpr, {4 char}}, folded_loader...}/*__XTHEENDX__*//*vi:ts=8:et:nowrap */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -