📄 genconfig.py
字号:
print 'Please enter y, n or <Enter>.'
elif res==' ':
# Abort signalled using <space> key
# generate the config file
print '<Space> key pressed! '
print 'Aborting dialog...'
self.GenConfigFile()
if res=='y': return 1
elif res=='n': return 0
elif res=='': return expected
def UserDialog(self):
print 'You are about to create a config file for the "HarvestMan" program.\n'
print 'You will be asked questions to which you can answer yes or no'
print 'and questions for which you need to type a response string.'
print 'If you press the [Enter] key for any question, the default value of'
print 'its setting will be used. If you want to discontinue at any time,'
print 'press the <space> key as the answer to a question. (If you abort,'
print 'the program will assume default values for the rest of the options.)'
print ''
res=self.MakeYesOrNoQuery('Do you want to continue')
if res==0: return 0
self.project=self.MakeStringQuery('Enter the name of this HarvestMan project: ')
self.url=self.MakeStringQuery('Enter the starting url for this project: ')
self.basedir=self.MakeStringQuery('Enter the base directory : ')
self.images=self.MakeYesOrNoQuery('Download images')
self.html=self.MakeYesOrNoQuery('Download html pages')
self.getimagelinks=self.MakeYesOrNoQuery('Always get images linked from a page')
self.getstylesheets=self.MakeYesOrNoQuery('Always get stylesheets associated to a page')
proxy=self.MakeYesOrNoQuery('Do you connect to internet through a proxy server', 0)
if proxy:
server=self.MakeStringQuery('Enter your proxy server\'s name/ip: ')
port=self.MakeStringQuery('Enter the proxy port: ', 0)
if not port:
port=80
else:
port=int(port)
# change for version 1.1 (port is specified separately)
self.proxy=server
self.proxyport=port
auth=self.MakeYesOrNoQuery('Does your proxy need authentication', 0)
if auth:
self.puser=self.MakeStringQuery('Enter Proxy Username: ')
if os.name != 'posix':
import getpass
self.ppasswd=getpass.getpass('Enter Proxy Password: ')
else:
self.ppasswd=self.MakeStringQuery('Enter Proxy Password: ')
# Cookie stuff
self.cookies = self.MakeYesOrNoQuery('Enable Cookies Support')
# Javascript/java stuff
self.javascript = self.MakeYesOrNoQuery('Fetch javascripts')
self.javaapplet = self.MakeYesOrNoQuery('Download java applets')
self.usethreads=self.MakeYesOrNoQuery('Multithreaded')
mthreads=self.MakeStringQuery('Maximum number of trackers(threads) to run: ', 0, '(More threads you run, the faster is the download)')
if mthreads:
self.maxtrackers = int(mthreads)
threadpoolsize=self.MakeStringQuery('Size of the thread pool: ', 0)
if threadpoolsize:
self.threadpoolsize=int(threadpoolsize)
else:
self.threadpoolsize=20
maxconn=self.MakeStringQuery("Maximum number of simultaneous network connections: ", 0)
if maxconn:
self.connections=int(maxconn)
timeout=self.MakeStringQuery('Timeout value(in seconds) for a download thread: ', 0)
if timeout: self.timeout=int(timeout)
self.rep=self.MakeYesOrNoQuery('Obey Robot Exclusion Principle')
depth=self.MakeStringQuery('Depth of url rel. to starting directory (default is 10) : ', 0)
if depth: self.depth=int(depth)
self.eserverlinks=self.MakeYesOrNoQuery('Fetch Pages from External Servers', 0)
if self.eserverlinks:
maxextserverlinks=self.MakeStringQuery('Limit of number of external servers: ', 0)
if maxextserverlinks: self.maxextservers=int(maxextserverlinks)
edepth=self.MakeStringQuery('Depth of external urls (relative to base server): ', 0)
if edepth: self.extdepth=int(edepth)
self.epagelinks=self.MakeYesOrNoQuery('Fetch Pages from External Directories')
if self.epagelinks:
maxextpagelinks=self.MakeStringQuery('Limit of number of external directories: ', 0)
if maxextpagelinks: self.maxextdirs=int(maxextpagelinks)
edepth=self.MakeStringQuery('Depth of external directories : ', 0)
if edepth: self.extdepth=int(edepth)
filter=self.MakeYesOrNoQuery('Filter Urls', 0)
if filter:
self.urlfilter=self.MakeStringQuery('Enter/Paste url filter regular expression here: ', 0)
sfilter=self.MakeYesOrNoQuery('Filter External Servers', 0)
if sfilter:
self.serverfilter=self.MakeStringQuery('Enter/Paste server filter regular expression here: ', 0)
priority1= self.MakeYesOrNoQuery('Apply user priorities for urls',0)
if priority1:
self.urlpriority=self.MakeStringQuery('Enter/Paste url priority string here:', 0)
priority2= self.MakeYesOrNoQuery('Apply priorities for servers',0)
if priority2:
self.serverpriority=self.MakeStringQuery('Enter/Paste server priority string here:', 0)
self.retry=self.MakeYesOrNoQuery('Retry failed links')
self.renamefiles=self.MakeYesOrNoQuery('Try to rename dynamically generated files', 0)
self.subdomain=self.MakeYesOrNoQuery('Consider subdomains in web servers as external servers', 0)
self.skipqueryforms=self.MakeYesOrNoQuery('Skip server-side query forms', 1)
if self.MakeYesOrNoQuery('Localise links in Downloaded Files'):
if self.MakeYesOrNoQuery('Localise links using absolute path names', 2) == 1:
self.localise=1
else:
self.localise=0
prjtimeout = self.MakeStringQuery("Timeout value in seconds for the project ", 0)
if prjtimeout:
self.projtimeout = float(prjtimeout)
self.pagecache = self.MakeYesOrNoQuery('Enable support for page caching')
self.checkfiles=self.MakeYesOrNoQuery('Verify integrity of saved files')
self.tidyhtml=self.MakeYesOrNoQuery('Tidy html pages before parsing (Helps to remove errors and do more downloads)')
self.browsepage=self.MakeYesOrNoQuery('Add project information to the browse page')
maxfiles=self.MakeStringQuery('Enter maximum number of files to download : ', 0)
if maxfiles:
self.maxfiles=int(maxfiles)
else:
self.maxfiles=3000
requests=self.MakeStringQuery('Enter ceiling of number of simultaneous requests to a server: ', 0)
if requests:
self.requests=requests
else:
self.requests=5
errorfile=self.MakeStringQuery('Error log filename: ', 0)
if errorfile: self.errorfile=errorfile
logfile=self.MakeStringQuery('Message log filename: ', 0)
if logfile: self.logfile = logfile
dumpurls=self.MakeYesOrNoQuery('Dump crawled urls to a file', 0)
if dumpurls:
urlslistfile=self.MakeStringQuery('Url list filename: ', 0)
if urlslistfile: self.urlslistfile=urllistfile
self.fastmode=self.MakeYesOrNoQuery("Run in 'fast mode'")
fetchlevel=self.MakeStringQuery('Which fetch level would you like to set (0, 1 or 2): ', 0)
if fetchlevel:
self.fetchlevel=int(fetchlevel)
verb=self.MakeStringQuery('Verbosity level (0 <=> 5, 0=>minimal messages to 5=> maximum messages) : ',0)
if verb: self.verbosity=int(verb)
print 'Thank you for your excellent patience.'
print 'In a moment your config file will be generated...'
return 1
if __name__=="__main__":
# Pick up modules from the
# parent directory.
sys.path.append("..")
from common import *
configer=GenConfig()
if configer.UserDialog():
configer.GenConfigFile()
print 'You are ready to run HarvestMan using your new config file.'
else:
print 'Aborting...'
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -