⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 common.py

📁 网络蜘蛛
💻 PY
📖 第 1 页 / 共 2 页
字号:
    # SetObject('USER_AGENT', value)
    global RegisterObj
    RegisterObj.USER_AGENT = user_agent

def SetUserDebug(message):
    """ Used to store error messages related
    to user settings in the config file/project file.
    These will be printed at the end of the program """

    # This is no longer used.
    # Instead use the more generic method
    # SetUserDebug('USER_AGENT', value)
    global RegisterObj
    if message:
        try:
            RegisterObj.userdebug.index(message)
        except:
            RegisterObj.userdebug.append(message)

def varprint(arg, *args):
    """ Our custom printing function """

    op=str(arg)
    for a in args: op += ' ' + str(a)
    op += '\n'
    sys.stdout.write(op)
    global RegisterObj

    if RegisterObj.writeflag:
        try:
            RegisterObj.ofs.write(op)
            RegisterObj.ofs.flush()
        except IOError, e:
            print e

def Initialize():
    """ Initialize the globals module. This
    initializes the registry object and a basic
    config object in the regsitry """

    global RegisterObj
    if RegisterObj.ini==1:
        return -1

    RegisterObj.ini = 1

    from config import HarvestManStateObject

    cfg = HarvestManStateObject()
    RegisterObj.config = cfg

    logfile = RegisterObj.config.logfile
    try:
        RegisterObj.ofs = open(logfile, 'w')
        writeflag=1
    except OSError, e:
        print e

def Finish():
    """ Clean up this module. This function
    can be called at program exit or when
    handling signals to clean up """

    global RegisterObj
    RegisterObj.ofs.flush()
    RegisterObj.ofs.close()
    RegisterObj.writeflag = 0
    RegisterObj.ini = 0
    RegisterObj.ofs = None

    # inform user of config file errors
    if RegisterObj.userdebug:
        print "Some errors were found in your configuration, please correct them!"
        for x in range(0, len(RegisterObj.userdebug)):
            print str(x+1),':', RegisterObj.userdebug[x]

    RegisterObj.userdebug = []

def info(arg, *args):
    """ Print basic information, will print if verbosity is >=1 """

    # Setting verbosity to 1 will print the basic
    # messages like project info and final download stats.
    ConfigObj = GetObject('config')
    if ConfigObj.verbosity==0:
        return
    elif ConfigObj.verbosity>=1:
        varprint(arg, *args)

def moreinfo(arg, *args):
    """ Print more information, will print if verbosity is >=2 """

    # Setting verbosity to 2 will print the basic info
    # as well as detailed information regarding each downloaded link.
    ConfigObj = GetObject('config')
    if ConfigObj.verbosity==0:
        return
    elif ConfigObj.verbosity>=2:
        varprint(arg, *args)

def extrainfo(arg, *args):
    """ Print extra information, will print if verbosity is >=3 """

    # Setting verbosity to 3 will print more information on each link
    # as well as information of each thread downloading the link, as
    # well as some more extra information.
    ConfigObj = GetObject('config')
    if ConfigObj.verbosity==0:
        return
    elif ConfigObj.verbosity>=3:
        varprint(arg, *args)

def debug(arg, *args):
    """ Print debug information, will print if verbosity is >=4 """

    # Setting verbosity to 4 will print maximum information
    # plus extra debugging information.
    ConfigObj = GetObject('config')
    if ConfigObj.verbosity==0:
        return
    elif ConfigObj.verbosity>=4:
        varprint(arg, *args)

def moredebug(arg, *args):
    """ Print more debug information, will print if verbosity is >=5 """

    # Setting verbosity to 5 will print maximum information
    # plus maximum debugging information.
    ConfigObj = GetObject('config')
    if ConfigObj.verbosity==0:
        return
    elif ConfigObj.verbosity>=5:
        varprint(arg, *args)


def wasOrWere(val):
    """ What it says """

    if val > 1: return 'were'
    else: return 'was'

def plural((str, val)):
    """ What it says """

    if val>1:
        if str[len(str)-1] == 'y':
            return str[:len(str)-1]+'ies'
        else: return str+'s'
    else:
        return str

# file type identification functions
# this is the precursor of a more generic file identificator
# based on the '/etc/magic' file on unices.

signatures = { "gif" : [0, ("GIF87a", "GIF89a")],
               "jpeg" :[6, ("JFIF",)],
               "bmp" : [0, ("BM6",)]
             }
aliases = { "gif" : (),                       # common extension aliases
            "jpeg" : ("jpg", "jpe", "jfif"),
            "bmp" : ("dib",) }

def bin_crypt(data):
    """ Encryption using binascii and obfuscation """

    if data=='':
        return ''

    try:
        return binascii.hexlify(obfuscate(data))
    except TypeError, e:
        print 'Error in encrypting data: <',data,'>', e
        return data
    except ValueError, e:
        print 'Error in encrypting data: <',data,'>', e
        return data

def bin_decrypt(data):
    """ Decrypttion using binascii and deobfuscation """

    if data=='':
        return ''

    try:
        return unobfuscate(binascii.unhexlify(data))
    except TypeError, e:
        print 'Error in decrypting data: <',data,'>', e
        return data
    except ValueError, e:
        print 'Error in decrypting data: <',data,'>', e
        return data


def obfuscate(data):
    """ Obfuscate a string using repeated xor """

    out = ""
    import operator

    e0=chr(operator.xor(ord(data[0]), ord(data[1])))
    out += e0

    x=1
    eprev=e0
    for x in range(1, len(data)):
        ax=ord(data[x])
        ex=chr(operator.xor(ax, ord(eprev)))
        out += ex
        # out += chr(operator.xor(ord(ex), ax))
        eprev = ex

    return out

def unobfuscate(data):
    """ Unobfuscate a xor obfuscated string """

    out = ""
    x=len(data) - 1

    import operator

    while x>1:
        apos=data[x]
        aprevpos=data[x-1]
        epos=chr(operator.xor(ord(apos), ord(aprevpos)))
        out += epos
        x -= 1

    out=str(reduce(lambda x, y: y + x, out))
    e2, a2 = data[1], data[0]
    a1=chr(operator.xor(ord(a2), ord(e2)))
    a1 += out
    out = a1
    e1, a1=out[0], data[0]
    a0=chr(operator.xor(ord(a1), ord(e1)))
    a0 += out
    out = a0

    return out


def filetype(filename, usemagicfile=False):
    """ Return filetype of a file by reading its
    signature """

    fullpath=os.path.abspath(filename)
    if not os.path.exists(fullpath):
        return ''

    try:
        f=open(fullpath, 'rb')
    except IOError, e:
        print e
        return ''
    except OSError, e:
        print e
        return ''

    sigbuffer = ''
    try:
        sigbuffer=f.read(20)
    except IOError, e:
        print e
        return ''

    ftype=''
    for key in signatures.keys():
        sigs = (signatures[key])[1]
        # look for the sigs in the sigbuffer
        for sig in sigs:
            index = sigbuffer.find(sig)
            if index == -1: continue
            if index == (signatures[key])[0]:
                ftype = key
                break

    return ftype

def rename(filename):
    """ Rename a file by looking at its signature """

    ftype=filetype(filename)

    if ftype:
        fullpath = os.path.abspath(filename)
        # get extension
        extn = (((os.path.splitext(fullpath))[1])[1:]).lower()
        if extn==ftype: return ''
        try:
            a=aliases[ftype]
            if extn in a: return ''
        except KeyError:
            return ''

        # rename the file
        newname = (os.path.splitext(fullpath))[0] + '.' + ftype
        try:
            os.rename(fullpath, newname)
            # set the global variable to new name
            global RegisterObj
            # build up a dictionary of oldfilename => newfilename
            # mappings, this will be useful later
            RegisterObj.oldnewmappings[fullpath]=newname
            RegisterObj.modfilename = newname
            # return the new name
            return newname
        except OSError, e:
            print e
            return ''
    return ''

def htmlLikeFile(filename):
    """ Find out whether I am an html file or a possible candidate
        for generating one """

    # Note: right now we treat dynamic server-side scripts namely
    # php, psp, asp, pl, jsp, and cgi as possible html candidates, though
    # actually they might be generating non-html content (like dynamic
    # images.)
    import os

    extn= (((os.path.splitext(filename))[1])).lower()
    if extn in ('.htm', '.html', '.php', '.asp', '.jsp','.psp','.pl','.cgi'):
        return 1

    return 0



⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -