⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 config.py

📁 网络蜘蛛
💻 PY
📖 第 1 页 / 共 3 页
字号:

        for option, value in optlist:
            # first parse arguments with no options
            if option in ('-h', '--help'):
                self.print_help()
                sys.exit(0)
            elif option in ('-v', '--version'):
                self.print_version_info()
                sys.exit(0)
            elif option in ('-C', '--configfile'):
                if self.check_value(value):
                    self.set_option('CONFIGFILE', self.process_value(value))
                    # No need to parse further values
                    return -1
            elif option in ('--PF', '--projectfile'):
                if self.check_value(value):
                    self.set_option('PROJECTFILE', self.process_value(value))
                    import HarvestManUtils

                    projector = HarvestManUtils.HarvestManProjectManager()

                    if projector.read_project() == 0:
                        # No need to parse further values
                        return 0

            elif option in ('-U', '--url'):
                if self.check_value(value): self.set_option('project.url', self.process_value(value))
            elif option in ('-B', '--basedir'):
                if self.check_value(value): self.set_option('project.basedir', self.process_value(value))
            elif option in ('-P', '--project'):
                if self.check_value(value): self.set_option('project.name', self.process_value(value))
            elif option in ('-H', '--html'):
                if self.check_value(value): self.set_option('download.html', self.process_value(value))
            elif option in ('-I', '--images'):
                if self.check_value(value): self.set_option('download.images', self.process_value(value))
            elif option in ('-S', '--getcss'):
                if self.check_value(value): self.set_option('download.linkedstylesheets', self.process_value(value))
            elif option in ('-i', '--getimages'):
                if self.check_value(value): self.set_option('download.linkedimages', self.process_value(value))
            elif option in ('-r', '--retryfailed'):
                if self.check_value(value): self.set_option('download.retryfailed', self.process_value(value))
            elif option in ('-l', '--localise'):
                if self.check_value(value): self.set_option('indexer.localise', self.process_value(value))
            elif option in ('-l', '--browsepage'):
                if self.check_value(value): self.set_option('display.browsepage', self.process_value(value))
            elif option in ('-c', '--checkfiles'):
                if self.check_value(value): self.set_option('download.checkfiles', self.process_value(value))
            elif option in ('-j', '--jitlocalise'):
                if self.check_value(value): self.set_option('indexer.jitlocalise', self.process_value(value))
            elif option in ('--hp', '--htmlparser'):
                if self.check_value(value): self.set_option('parser.htmlparser', self.process_value(value))
            elif option in ('-t', '--usethreads'):
                if self.check_value(value): self.set_option('system.usethreads', self.process_value(value))
            elif option in ('-s', '--threadpoolsize'):
                if self.check_value(value): self.set_option('system.threadpoolsize', self.process_value(value))
            elif option in ('-o', '--timeout'):
                if self.check_value(value): self.set_option('system.threadtimeout', self.process_value(value))
            elif option in ('--rn', '--renamefiles'):
                if self.check_value(value): self.set_option('download.rename', self.process_value(value))
            elif option in ('--fl', '--fetchlevel'):
                if self.check_value(value): self.set_option('download.fetchlevel', self.process_value(value))
            elif option in ('-F', '--fastmode'):
                if self.check_value(value): self.set_option('system.fastmode', self.process_value(value))
            elif option in ('--MT', '--maxthreads'):
                if self.check_value(value): self.set_option('system.maxtrackers', self.process_value(value))
            elif option in ('--M1', '--maxdirs'):
                if self.check_value(value): self.set_option('control.maxextdirs', self.process_value(value))
            elif option in ('--M2', '--maxservers'):
                if self.check_value(value): self.set_option('control.maxextservers', self.process_value(value))
            elif option in ('--M', '--maxfiles'):
                if self.check_value(value): self.set_option('control.maxfiles', self.process_value(value))
            elif option in ('--F1', '--urlfilter'):
                if self.check_value(value): self.set_option('control.urlfilter', self.process_value(value))
            elif option in ('--F2', '--serverfilter'):
                if self.check_value(value): self.set_option('control.serverfilter', self.process_value(value))
            elif option in ('--ep', '--epagelinks'):
                if self.check_value(value): self.set_option('control.extpagelinks', self.process_value(value))
            elif option in ('--es', '--eserverlinks'):
                if self.check_value(value): self.set_option('control.extserverlinks', self.process_value(value))
            elif option in ('--d1', '--depth'):
                if self.check_value(value): self.set_option('control.depth', self.process_value(value))
            elif option in ('--d2', '--extdepth'):
                if self.check_value(value): self.set_option('control.extdepth', self.process_value(value))
            elif option in ('--R', '--rep'):
                if self.check_value(value): self.set_option('control.robots', self.process_value(value))
            elif option in ('-E', '--errorfile'):
                if self.check_value(value): self.set_option('files.errorfile', self.process_value(value))
            elif option in ('-L', '--logfile'):
                if self.check_value(value): self.set_option('files.logfile', self.process_value(value))
            elif option in ('--UL', '--urllistfile'):
                if self.check_value(value): self.set_option('files.urllistfile', self.process_value(value))
            elif option in ('--in', '--intranet'):
                if self.check_value(value): self.set_option('download.intranet', self.process_value(value))
            elif option in ('-p', '--proxy'):
                if self.check_value(value): self.set_option('network.proxyserver', self.process_value(value))
            elif option in ('-u', '--puser'):
                if self.check_value(value): self.set_option('network.proxyuser', self.process_value(value))
            elif option in ('-w', '--ppasswd'):
                if self.check_value(value): self.set_option('network.proxypasswd', self.process_value(value))
            elif option in ('--pp', '--pport'):
                if self.check_value(value): self.set_option('network.proxyport', self.process_value(value))
            elif option in ('-k', '--cookies'):
                if self.check_value(value): self.set_option('download.cookies', self.process_value(value))
            elif option in ('--pc', '--pagecache'):
                if self.check_value(value): self.set_option('control.pagecache', self.process_value(value))
            elif option in ('--nc', '--connections'):
                if self.check_value(value): self.set_option('control.connections', self.process_value(value))
            elif option in ('--po', '--prjtimeout'):
                if self.check_value(value): self.set_option('control.projtimeout', self.process_value(value))
            elif option in ('--js', '--javascript'):
                if self.check_value(value): self.set_option('download.javascript', self.process_value(value))
            elif option in ('--ja', '--javaapplet'):
                if self.check_value(value): self.set_option('download.javaapplet', self.process_value(value))
            else:
                print 'Ignoring invalid option ', option

        # Error in option value
        if self._error:
            print self._error, value
            return -1

        return 1

    def check_value(self, value):
        """ This function checks the values for options
        when options are supplied as command line arguments.
        Returns 0 on any error and non-zero if ok """

        # check #1: If value is a null, return 0
        if not value:
            self._error='Error in option value, value should not be empty!'
            return 0

        # no other checks right now
        return 1

    def process_value(self, value):
        """ This function processes values of command line
        arguments and returns values which can be used by
        this class """

        # a 'yes' is treated as 1 and 'no' as 0
        # also an 'on' is treated as 1 and 'off' as 0
        # Other valid values: integers, strings, 'YES'/'NO'
        # 'OFF'/'ON'

        ret=0
        # We expect the null check has been done before
        val = value.lower()
        if val in ('yes', 'on'):
            return 1
        elif val in ('no', 'off'):
            return 0

        # convert value to int
        try:
            ret=int(val)
            return ret
        except:
            pass

        # return string value directly
        return str(value)

    def print_help(self):
        """ Prints the help information """

        print PROG_HELP % {'appname' : self.appname,
                           'version' : self.version }

    def print_version_info(self):
        """ Print version information """

        print 'Version: ', self.version

    def __fix(self):
        """ Fix errors in config variables """

        # Fix url error
        # Check for protocol strings
        # http://
        url = self.url

        pindex = -1
        pindex = url.find('http://')
        if pindex == -1:
            # ftp://
            pindex = url.find('ftp://')
            if pindex == -1:
                # https://
                pindex = url.find('https://')
                if pindex == -1:
                    # www.
                    pindex = url.find('www.')
                    if pindex == -1:
                        # prepend http:// to it
                        url = 'http://' + url


        self.url = url

    def parse_config_file(self):
        """ Opens the configuration file and parses it """

        cfgfile = self.configfile

        # open config file
        try:
            cf=open(cfgfile, 'r')
        except IOError:
            print 'Fatal error: Cannot find config file', cfgfile
            sys.exit(2)

        # Parsing config file
        while 1:
            l=cf.readline()
            if l=='': break
            # strip '\n' from the string
            l = l.replace('\n','')
            # replace tabs with spaces
            l = l.replace('\t', '    ')
            index = l.find(' ')
            if index == -1: continue
            str1 = l[:index]
            # Any line beginning with a '#' is a comment.
            if str1[0] == '#': continue

            # Mod: From v (1.2alpha) the config file format
            # is changed. We also support ';;' as the comment
            # character (it is the default now)
            if str1[:2] == ';;': continue
            # Get value string
            str2 = l[(index+1):]
            # Modification: Allow comments in the config line also
            # Egs: URL http://www.python.org # The url for download
            for s in ('#', ';;'):
                hashidx = str2.find(s)
                if hashidx != -1:
                    str2 = str2[:hashidx]

            # strip any leading spaces
            str2 = str2.strip()
            if str1 in self.Options().keys():
                self.set_option(str1, str2)
            else:
                print 'Invalid config option', str1

    def get_program_options(self):
        """ This function gets the program options from
        the config file or command line """

        # first check in argument list, if failed
        # check in config file
        res = self.parse_arguments()
        if res==-1:
            self.parse_config_file()

        # fix errors in config variables
        self.__fix()

    # This is important for the dictionary + attribute mechanism
    # to work.
    def __getattr__(self, name):
        try:
            return self[name]
        except KeyError:
            return None

    def __setattr__(self, name, value):
        self[name] = value



⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -