⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 config.py

📁 网络蜘蛛
💻 PY
📖 第 1 页 / 共 3 页
字号:
                            'download.fetchlevel' : ('fetchlevel', 'int'),
                            'download.linkedstylesheets' : ('getstylesheets', 'int'),
                            'download.linkedimages' : ('getimagelinks', 'int'),
                            # Two new variables added (Oct 1 2003)
                            'download.javascript'   : ('javascript', 'int'),
                            'download.javaapplet'   : ('javaapplet', 'int'),
                            # Two new variables added (Sep 04 2003)
                            'download.cookies' : ('cookies', 'int'),
                            'control.pagecache' : ('pagecache', 'int'),
                            'control.depth' : ('depth', 'int'),
                            'control.extdepth' : ('extdepth', 'int'),
                            'control.maxextdirs' : ('maxextdirs', 'int'),
                            'control.maxextservers' : ('maxextservers', 'int'),
                            'download.retryfailed' : ('retryfailed', 'int'),
                            'control.robots' : ('robots', 'int'),
                            'control.extpagelinks' : ('epagelinks', 'int'),
                            'control.extserverlinks' : ('eserverlinks', 'int'),
                            'control.connections' : ('connections', 'int'),
                            'indexer.localise' : ('localise', 'int'),
                            'download.html': ('html', 'int'),
                            'system.fastmode' : ('fastmode', 'int'),
                            'control.maxfiles' : ('maxfiles', 'int'),
                            'download.images' : ('images', 'int'),
                            'control.urlfilter' : ('urlfilter', 'str'),
                            'control.serverfilter' : ('serverfiler', 'str'),
                            'control.wordfilter' : ('wordfilter', 'str'),
                            'control.urlfilterre' : (('inclfilter', 'list'), ('exclfilter', 'list'),
                                                   ('allfilters', 'list')),
                            'control.serverfilterre' : (('serverinclfilter', 'list'),
                                                      ('serverexclfilter', 'list'),
                                                      ('allserverfilters', 'list')),
                            'control.urlpriority' : ('urlpriority', 'str'),
                            'control.serverpriority' : ('serverpriority', 'str'),
                            'control.urlprioritydict' : ('urlprioritydict', 'dict'),
                            'control.serverprioritydict' : ('serverprioritydict', 'dict'),
                            'control.tidyhtml' : ('tidyhtml', 'int'),
                            'control.projtimeout' : ('projtimeout', 'float'),
                            'control.subdomain'   : ('subdomain', 'int'),
                            'control.skipqueryforms' : ('skipqueryforms', 'int'),
                            'control.requests'    : ('requests', 'int'),
                            'control.bytes'       : ('bytes', 'float')
                            }

    def set_option(self, option, value, negate=0):
        """ Set the passed option in the config class
        with its value as the passed value """

        # find out if the option exists in the dictionary
        if option in self.__options.keys():
            # if the option is a string or int or any
            # non-seq type

            # if value is an emptry string, return error
            if value=="": return -1

            if type(value) is not tuple:
                # get the key for the option
                key = (self.__options[option])[0]
                # get the type of the option
                typ = (self.__options[option])[1]
                # print key, typ
                # do any type casting of the option
                fval = (eval(typ))(value)
                # do any negation of the option
                # FIXME: Not checking for negation of seq types
                # which will raise an exception
                if negate: fval = not fval
                # set the option on the dictionary
                self[key] = fval
                return 1
            else:
                # option is a tuple of values
                # iterate through all values of the option
                # see if the size of the value tuple and the
                # size of the values for this key match
                _values = self.__options[option]
                if len(_values) != len(value): return -1

                for index in range(0, len(_values)):
                    _v = _values[index]
                    if len(_v) !=2: continue
                    _key, _type = _v

                    v = value[index]
                    # do any type casting on the option's value
                    fval = (eval(_type))(v)
                    # do any negation
                    # FIXME: Not checking for negation of seq types
                    # which will raise an exception
                    if negate: fval = not fval
                    # set the option on the dictionary
                    self[_key] = fval

                return 1

        return -1

    def get_variable(self, option):
        """ Get the variable for the passed option
        if it exists in the config file, otherwise
        return None """

        # Note: if the option matches more than one
        # variable, the return is a list of variables
        # otherwise a single variable

        if option in self.__options.keys():
            value = self.__options[option]

            if type(value[0]) is not tuple:
                key = value[0]
                return self.key
            else:
                # the values are tuples
                ret=[]
                for v in value:
                    key = v[0]
                    ret.append(self.key)
                return ret
        else:
            return None

    def get_variable_type(self, option):
        """ Get the type of the variable for the passed
        option if it exists in the config file, else return
        None """

        # Note: if the option matches more than one variable
        # the return is a list of types, otherwise a single type

        if option in self.__options.keys():
            value = self.__options[option]

            if type(value[0]) is not tuple:
                typ = value[1]
                return typ
            else:
                # the values are tuples
                ret=[]
                for v in value:
                    typ = v[1]
                    ret.append(typ)
                return ret
        else:
            return None


    def Options(self):
        """ Return the options dictionary """

        return self.__options

    def parse_arguments(self):
        """ Parse the command line arguments """

        # This function has 3 return values
        # -1 => no cmd line arguments/invalid cmd line arguments
        # ,so force program to read config file.
        # 0 => existing project file supplied in cmd line
        # 1 => all options correctly read from cmd line
        import sys

        # return value
        res=0
        # if no cmd line arguments, then use config file,
        # return -1
        if len(sys.argv)==1:
            return -1

        # Otherwise parse the arguments, the command line arguments
        # are the same as the variables(dictionary keys) of this class.
        # Description
        # Options needing no arguments
        #
        # -h => prints help
        # -v => prints version info

        # Options needing arguments (short type)
        #
        # -U => url
        # -P => project
        # -B => base directory
        # -b => browse page
        # -C => new config filename
        # -V => verbosity
        # -M => max files setting
        # -s => thread pool size
        # -o => timeout limit
        # -E => error file name
        # -L => logfile name
        # -p => proxy server
        # -u => username for proxy
        # -w => password for proxy
        # -H => html flag
        # -I => image flag
        # -S => get stylesheets downloaded
        # -i => get images downloaded
        # -r => retry failed links (can have an option but not necessary)
        # -l => localise links
        # -t => threads flag
        # -F => fastmode flag
        # -R => REP flag
        # -n => site username
        # -d => site password
        # -k => cookies support

        # Long type options
        # --url => url
        # --project => project
        # --help => help string
        # --version => version string
        # --basedir => base directory
        # --browsepage => create a project browse page?
        # --configfile => config filename
        # --projectfile => project filename
        # --verbosity => verbosity
        # --html => html
        # --images => images
        # --getcss => css setting
        # --getimages => image setting
        # --ep/--epagelinks => external page links
        # --es/--eserverlinks => external server links
        # --d1/--depth => depth setting
        # --d2/--extdepth => external depth setting
        # --M1/--maxdirs => maxdirs setting
        # --M2/--maxservers => maxservers setting
        # --maxfiles => maxfiles setting
        # --rep => Robot Exclusion Principle setting
        # --F1/--urlfiler => url filters
        # --F2/--serverfilter => server filters
        # --retryfailed => retry failed setting
        # --localise => localise links setting
        # --MT/--maxthreads => maxthreads setting
        # --usethreads => usethreads setting
        # --threadpoolsize => thread pool size setting
        # --timeout => timeout setting
        # --rn/--renamefiles => renamefiles setting
        # --fl/--fetchlevel => fetch level setting
        # --fastmode => fast mode setting
        # --errorfile => errorfile setting
        # --logfile => logfile setting
        # --UL/--urlslistfile => urls list file
        # --in/--intranet => intranet setting
        # --proxy => proxy server setting
        # --puser => proxy user setting
        # --ppasswd => proxy passwd setting
        # --pp/--pport => proxy port setting
        # --username => site username
        # --userpasswd => site password
        # --pc/--pagecache => support for webpage caching/update
        # --cookies => support for cookies
        # --nc/--connections => number of network connections
        # --po/--prjtimeout => timeout value in seconds for the project
        # --js/--javascript => option for controlling download of javascript files
        # --ja/--javaapplet => option for controlling download of java applet files

        shortoptionsstring = 'hvU:P:B:b:C:V:M:s:o:E:L:p:u:w:H:I:i:t:F:R:S:l:r:c:j:n:d:k:'
        longoptions = [ "configfile=", "projectfile=",
                        "url=", "project=", "help", "version", "basedir=", "browsepage=","verbosity=",
                        "html=", "images=", "getcss=", "getimages=", "epagelinks=", "eserverlinks=",
                        "depth=","d1=", "extdepth=", "d2=", "M1=", "maxdirs=", "M2=", "maxservers=",
                        "maxfiles=", "rep=", "F1=", "urlfilter=", "F2=", "serverfilter=", "retryfailed=",
                        "localise=", "MT=", "maxthreads=", "usethreads", "threadpoolsize=", "timeout=",
                        "rn", "renamefiles", "fl=", "fetchlevel=", "fastmode=", "errorfile=", "logfile=",
                        "UL=", "urlslistfile=", "in", "intranet=", "proxy=", "puser=", "ppasswd=", "pp=",
                        "pport=", "jitlocalise", "checkfiles", "htmlparser=", "hp=", "username=",
                        "userpasswd=", "cookies=", "pagecache=", "pc=", "nc=", "connections=", "po=",
                        "prjtimeout=", "js==", "javascript=", "ja=", "javaapplet="
                      ]

        arguments = sys.argv[1:]
        try:
            optlist, args = getopt.getopt(arguments, shortoptionsstring, longoptions)
        except getopt.GetoptError, e:
            print 'Error: ',e
            return -1

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -