📄 cleanasm.py
字号:
#! /usr/bin/env python## vim:set ts=4 sw=4 et: -*- coding: utf-8 -*-## cleanasm.py --## This file is part of the UPX executable compressor.## Copyright (C) 1996-2007 Markus Franz Xaver Johannes Oberhumer# All Rights Reserved.## UPX and the UCL library are free software; you can redistribute them# and/or modify them under the terms of the GNU General Public License as# published by the Free Software Foundation; either version 2 of# the License, or (at your option) any later version.## This program is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU General Public License for more details.## You should have received a copy of the GNU General Public License# along with this program; see the file COPYING.# If not, write to the Free Software Foundation, Inc.,# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.## Markus F.X.J. Oberhumer Laszlo Molnar# <mfx@users.sourceforge.net> <ml1050@users.sourceforge.net>#import getopt, os, re, string, sysclass opts: label_prefix = ".L" verbose = 0 # optimizer flags auto_inline = 1 call_rewrite = 1 loop_rewrite = 1 mov_rewrite = 1inline_map = { "__aNNalshl": ["M_aNNalshl", 1], "__aNahdiff": ["M_aNahdiff", 1], "__PIA": ["M_PIA", 999], "__PTS": ["M_PTS", 999], "__PTC": ["M_PTC", 999], "__U4M": ["M_U4M", 999],}# /***********************************************************************# // main# ************************************************************************/def main(argv): shortopts, longopts = "qv", [ "label-prefix=", "quiet", "verbose" ] xopts, args = getopt.gnu_getopt(argv[1:], shortopts, longopts) for opt, optarg in xopts: if 0: pass elif opt in ["-q", "--quiet"]: opts.verbose = opts.verbose - 1 elif opt in ["-v", "--verbose"]: opts.verbose = opts.verbose + 1 elif opt in ["--label-prefix"]: opts.label_prefix = optarg else: assert 0, ("getopt problem:", opt, optarg, xopts, args) # assert opts.label_prefix assert len(args) == 2 ifile = args[0] ofile = args[1] # read ifile lines = open(ifile, "rb").readlines() lines = filter(None, map(string.rstrip, lines)) # # def inst_has_label(inst): return inst in [ "call", "ja", "jae", "jb", "jbe", "jcxz", "je", "jg", "jge", "jl", "jle", "jmp", "jne", "loop", ] labels = {} def parse_label(inst, args): k = v = None m = re.search(r"^(.*?)\b(2|R_386_PC16)\s+(__\w+)$", args) if m and k is None: # external 2-byte label k, v = m.group(3).strip(), [1, 2, None, 0] m = re.search("^0x([0-9a-z]+)$", args) if m and k is None: # local label k, v = m.group(1).strip(), [0, 0, None, 0] m = re.search("^([0-9a-z]+)\s+<", args) if m and k is None: # local label k, v = m.group(1).strip(), [0, 0, None, 0] assert k and v, (inst, args) v[2] = k # new name if labels.has_key(k): assert labels[k][:2] == v[:2] return k, v def add_label(k, v): if labels.has_key(k): assert labels[k][:2] == v[:2] else: labels[k] = v labels[k][3] += 1 # usage counter return k olines = [] def omatch(pos, mlen, m, debug=0): assert len(m) >= abs(mlen) def sgn(x): if x < 0: return -1 if x > 0: return 1 return 0 def match(a, b): if b is None: return False if "^" in a or "*" in a or "$" in a: # regexp return re.search(a, b.lower()) else: return a.lower() == b.lower() mpos = [] while len(mpos) != abs(mlen): if pos < 0 or pos >= len(olines): return [] o = olines[pos] if o[1] != "*DEL*": mpos.append(pos) pos += sgn(mlen) if mlen < 0: mpos.reverse() if debug and 1: print mlen, m, [olines[x] for x in mpos] dpos = [] i = -abs(mlen) while i < 0: pos = mpos[i] o = olines[pos] assert o[1] != "*DEL*" assert len(m[i]) == 2, (i, m) m0 = match(m[i][0], o[1]) m1 = match(m[i][1], o[2]) if not m0 or not m1: return [] dpos.append([pos, m0, m1]) i += 1 assert len(dpos) == abs(mlen) return dpos def orewrite_inst(i, inst, args, dpos): for pos, m0, m1 in dpos: olines[pos][1] = "*DEL*" olines[i][1] = inst olines[i][2] = args olines[i][3] = None def orewrite_call(i, k, v, dpos): for pos, m0, m1 in dpos: olines[pos][1] = "*DEL*" v[2] = k olines[i][2] = None olines[i][3] = add_label(k, v) # # pass 1 func = None for i in range(len(lines)): l = lines[i] m = re.search(r"^0{8,16}\s*<(\.text\.)?(\w+)>:", l) if m: func = re.sub(r"^_+|_+$", "", m.group(2)) if not func in ["LzmaDecode"]: continue m = re.search(r"^(\s*[0-9a-z]+):\s+(\w+)(.*)", l) if not m: continue label = m.group(1).strip() inst = m.group(2).strip() args = "" if m.group(3): args = m.group(3).strip() if not inst_has_label(inst): def hex2int(m): return str(int(m.group(0), 16)) args = re.sub(r"\b0x[0-9a-fA-F]+\b", hex2int, args) # if 1 and inst in ["movl",] and re.search(r"\b[de]s\b", args): # work around a bug in objdump 2.17 (fixed in binutils 2.18) inst = "mov" m = re.search(r"^(.+?)\b(0|0x0)\s+(\w+):\s+(1|2|R_386_16|R_386_PC16)\s+(__\w+)$", args) if m: # 1 or 2 byte reloc args = m.group(1) + m.group(5) olines.append([label, inst, args, None]) # # pass 2 for i in range(len(olines)): label, inst, args, args_label = olines[i] # if inst == "*DEL*": continue # if opts.call_rewrite and inst in ["call"]: k, v = parse_label(inst, args) if v[:2] == [1, 2]: # external 2-byte if k == "__aNahdiff": s = [ ["push", "word ptr [bp+8]"], ["push", "word ptr [bp+6]"], ["push", r"word ptr \[bp([+-](\d+))\]$"], ["push", r"word ptr \[bp([+-](\d+))\]$"], ] dpos = omatch(i-1, -4, s) if dpos: orewrite_inst(i, "*DEL*", "", dpos) continue if k in ["__LMUL", "__U4M",]: s1 = [ ["mov", "bx,768"], # 0x300 ["xor", "cx,cx"], ] s2 = [ ["shl", "ax,1"], ["rcl", "dx,1"], ] dpos1 = omatch(i-1, -2, s1) dpos2 = omatch(i+1, 2, s2) if dpos1 and dpos2: orewrite_inst(i, "M_U4M_dxax_0x0600", "", dpos1 + dpos2) continue s = [ ["mov", "bx,word ptr [bx]"], ["xor", "cx,cx"], ] dpos = omatch(i-1, -2, s, debug=0) if 0 and dpos: orewrite_inst(i, "M_U4M_dxax_00bx_ptr", "", dpos) continue dpos = omatch(i-1, -1, s) if dpos: orewrite_inst(i, "M_U4M_dxax_00bx", "", dpos) continue
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -