⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 genconfig.py

📁 网络蜘蛛
💻 PY
📖 第 1 页 / 共 2 页
字号:
                print 'Please enter y, n or <Enter>.'
            elif res==' ':
                # Abort signalled using <space> key
                # generate the config file
                print '<Space> key pressed! '
                print 'Aborting dialog...'
                self.GenConfigFile()            

        if res=='y': return 1
        elif res=='n': return 0
        elif res=='': return expected
            
    def UserDialog(self):

        print 'You are about to create a config file for the "HarvestMan" program.\n'
        print 'You will be asked questions to which you can answer yes or no'
        print 'and questions for which you need to type a response string.'
        print 'If you press the [Enter] key for any question, the default value of'
        print 'its setting will be used. If you want to discontinue at any time,'
        print 'press the <space> key as the answer to a question. (If you abort,'
        print 'the program will assume default values for the rest of the options.)'
        print ''
        res=self.MakeYesOrNoQuery('Do you want to continue')
        if res==0: return 0

        self.project=self.MakeStringQuery('Enter the name of this HarvestMan project: ')
        self.url=self.MakeStringQuery('Enter the starting url for this project: ')
        self.basedir=self.MakeStringQuery('Enter the base directory : ')
        self.images=self.MakeYesOrNoQuery('Download images')
        self.html=self.MakeYesOrNoQuery('Download html pages')
        self.getimagelinks=self.MakeYesOrNoQuery('Always get images linked from a page')
        self.getstylesheets=self.MakeYesOrNoQuery('Always get stylesheets associated to a page')        
        proxy=self.MakeYesOrNoQuery('Do you connect to internet through a proxy server', 0)
        if proxy:
            server=self.MakeStringQuery('Enter your proxy server\'s name/ip: ')
            port=self.MakeStringQuery('Enter the proxy port: ', 0)
            if not port:
                port=80
            else:
                port=int(port)
            # change for version 1.1 (port is specified separately)
            self.proxy=server
            self.proxyport=port
            auth=self.MakeYesOrNoQuery('Does your proxy need authentication', 0)
            if auth:
                self.puser=self.MakeStringQuery('Enter Proxy Username: ')
                if os.name != 'posix':
                    import getpass
                    self.ppasswd=getpass.getpass('Enter Proxy Password: ')
                else:
                    self.ppasswd=self.MakeStringQuery('Enter Proxy Password: ')

        # Cookie stuff
        self.cookies = self.MakeYesOrNoQuery('Enable Cookies Support')
        # Javascript/java stuff
        self.javascript = self.MakeYesOrNoQuery('Fetch javascripts')
        self.javaapplet = self.MakeYesOrNoQuery('Download java applets')
        
        self.usethreads=self.MakeYesOrNoQuery('Multithreaded')
        mthreads=self.MakeStringQuery('Maximum number of trackers(threads) to run: ', 0, '(More threads you run, the faster is the download)')
        if mthreads:
            self.maxtrackers = int(mthreads)
            threadpoolsize=self.MakeStringQuery('Size of the thread pool: ', 0)
            if threadpoolsize:
                self.threadpoolsize=int(threadpoolsize)
            else:
                self.threadpoolsize=20
        maxconn=self.MakeStringQuery("Maximum number of simultaneous network connections: ", 0)
        if maxconn:
            self.connections=int(maxconn)
        timeout=self.MakeStringQuery('Timeout value(in seconds) for a download thread: ', 0)
        if timeout: self.timeout=int(timeout)
        self.rep=self.MakeYesOrNoQuery('Obey Robot Exclusion Principle')
        depth=self.MakeStringQuery('Depth of url rel. to starting directory (default is 10) : ', 0)
        if depth: self.depth=int(depth)
        self.eserverlinks=self.MakeYesOrNoQuery('Fetch Pages from External Servers', 0)
        if self.eserverlinks:
            maxextserverlinks=self.MakeStringQuery('Limit of number of external servers: ', 0)
            if maxextserverlinks: self.maxextservers=int(maxextserverlinks)
            edepth=self.MakeStringQuery('Depth of external urls (relative to base server): ', 0)
            if edepth: self.extdepth=int(edepth)
        self.epagelinks=self.MakeYesOrNoQuery('Fetch Pages from External Directories')
        if self.epagelinks:
            maxextpagelinks=self.MakeStringQuery('Limit of number of external directories: ', 0)
            if maxextpagelinks: self.maxextdirs=int(maxextpagelinks)
            edepth=self.MakeStringQuery('Depth of external directories : ', 0)
            if edepth: self.extdepth=int(edepth)
        filter=self.MakeYesOrNoQuery('Filter Urls', 0)
        if filter:
            self.urlfilter=self.MakeStringQuery('Enter/Paste url filter regular expression here: ', 0)
        sfilter=self.MakeYesOrNoQuery('Filter External Servers', 0)
        if sfilter:
            self.serverfilter=self.MakeStringQuery('Enter/Paste server filter regular expression here: ', 0)
        priority1= self.MakeYesOrNoQuery('Apply user priorities for urls',0)
        if priority1:
            self.urlpriority=self.MakeStringQuery('Enter/Paste url priority string here:', 0)
        priority2= self.MakeYesOrNoQuery('Apply  priorities for servers',0)
        if priority2:
            self.serverpriority=self.MakeStringQuery('Enter/Paste server priority string here:', 0)            
        self.retry=self.MakeYesOrNoQuery('Retry failed links')
        self.renamefiles=self.MakeYesOrNoQuery('Try to rename dynamically generated files', 0)
        self.subdomain=self.MakeYesOrNoQuery('Consider subdomains in web servers as external servers', 0)
        self.skipqueryforms=self.MakeYesOrNoQuery('Skip server-side query forms', 1)
        
        if self.MakeYesOrNoQuery('Localise links in Downloaded Files'):
            if self.MakeYesOrNoQuery('Localise links using absolute path names', 2) == 1:
                self.localise=1
        else:
            self.localise=0

        prjtimeout = self.MakeStringQuery("Timeout value in seconds for the project ", 0)
        if prjtimeout:
            self.projtimeout = float(prjtimeout)
        self.pagecache = self.MakeYesOrNoQuery('Enable support for page caching')
        self.checkfiles=self.MakeYesOrNoQuery('Verify integrity of saved files')
        self.tidyhtml=self.MakeYesOrNoQuery('Tidy html pages before parsing (Helps to remove errors and do more downloads)')
        
        self.browsepage=self.MakeYesOrNoQuery('Add project information to the browse page')
        
        maxfiles=self.MakeStringQuery('Enter maximum number of files to download : ', 0)
        if maxfiles:
            self.maxfiles=int(maxfiles)
        else:
            self.maxfiles=3000
        requests=self.MakeStringQuery('Enter ceiling of number of simultaneous requests to a server: ', 0)
        if requests:
            self.requests=requests
        else:
            self.requests=5
        errorfile=self.MakeStringQuery('Error log filename: ', 0)
        if errorfile: self.errorfile=errorfile
        logfile=self.MakeStringQuery('Message log filename: ', 0)
        if logfile: self.logfile = logfile
        dumpurls=self.MakeYesOrNoQuery('Dump crawled urls to a file', 0)
        if dumpurls:
            urlslistfile=self.MakeStringQuery('Url list filename: ', 0)
            if urlslistfile: self.urlslistfile=urllistfile
        self.fastmode=self.MakeYesOrNoQuery("Run in 'fast mode'")
        fetchlevel=self.MakeStringQuery('Which fetch level would you like to set (0, 1 or 2): ', 0)
        if fetchlevel:
            self.fetchlevel=int(fetchlevel)
        verb=self.MakeStringQuery('Verbosity level (0 <=> 5, 0=>minimal messages to 5=> maximum messages) : ',0)
        if verb: self.verbosity=int(verb)
        
        print 'Thank you for your excellent patience.'
        print  'In a moment your config file will be generated...'
        return 1

if __name__=="__main__":

    # Pick up modules from the
    # parent directory.
    sys.path.append("..")
    from common import *

    configer=GenConfig()
    if configer.UserDialog():
        configer.GenConfigFile()
        print 'You are ready to run HarvestMan using your new config file.'
    else:
        print 'Aborting...'
    
        
            
            

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -