📄 mirrors.py
字号:
('surfnet', 'Amsterdam, The Netherlands'),
('kent', 'Kent, UK'),
('optusnet', 'Sydney, Australia'),
('jaist', 'Ishikawa, Japan'),
('nchc', 'Tainan, Taiwan'))
sf_mirrors = tuple([HarvestManMirror('http://%s.dl.sourceforge.net' % name[0]) for name in sf_mirror_info])
sf_mirror_domains = tuple([mirror.urlobj.get_full_domain() for mirror in sf_mirrors])
# print sf_mirror_domains
def __init__(self):
# List of mirror URLs loaded from a mirror file/other source
self.filemirrors = []
# Flag to perform mirror search
self.mirrorsearch = False
# List of current mirrors in use
self.current_mirrors = []
# List of used mirrors
self.used_mirrors = []
# List of mirrors which can be retried cuz they failed with
# non-fatal errors
self.mirrors_to_retry = []
# List of mirrors which failed (Includes above list)
self.failed_mirrors = []
# Mirror retry attempts
self.retries = 0
# Used flag
self.used = False
# Mirror search object
self.searcher = HarvestManMirrorSearch()
def find_mirror(self, urlobj):
mirrors = self.get_mirrors(urlobj, False)
if mirrors == None:
return
for m in mirrors:
if m.absolute:
if m.urlobj == urlobj:
return m
elif m.urlobj == urlobj.baseurl:
return m
def load_mirrors(self, mirrorfile):
""" Load mirror information from the mirror file """
if mirrorfile:
for line in file(mirrorfile):
url = line.strip()
if url != '':
self.filemirrors.append(HarvestManMirror(url))
def mirrors_available(self, urlobj):
return (is_sourceforge_url(urlobj) or len(self.filemirrors) or self.mirrorsearch)
# return len(self.filemirrors) or (self.mirrorsearch)
def search_for_mirrors(self, urlobj, find_new = True):
if not find_new:
return self.searcher.cache
if self.searcher.can_search():
mirror_urls = self.searcher.search(urlobj)
if mirror_urls:
print '%d mirror URLs found, queuing them for multipart downloads...' % len(mirror_urls)
return mirror_urls
else:
return []
else:
print 'Cannot search for new mirrors'
return []
pass
def get_mirrors(self, urlobj, find_new=True):
if is_sourceforge_url(urlobj):
return self.sf_mirrors
elif self.filemirrors:
return self.filemirrors
elif self.mirrorsearch:
return self.search_for_mirrors(urlobj, find_new)
def create_multipart_urls(self, urlobj, numparts):
urlobjects = []
relpath = ''
mirrors = self.get_mirrors(urlobj)
if len(mirrors) < numparts:
numparts = len(mirrors)
if len(mirrors)==0:
print 'No mirrors found'
return []
elif len(mirrors)==1:
# Only one mirror - this is of no use
print 'Only single mirror found'
return []
# Get a random list of servers
# Python seems to sometimes optimize these lists to tuples...
# This produced an error in Cygwin python, so forcefully
# coercing them to lists...
self.current_mirrors = list(mirrors[:numparts])
self.used_mirrors = list(self.current_mirrors[:])
orig_url = urlobj.get_full_url()
for x in range(numparts):
mirror = self.current_mirrors[x]
newurlobj = mirror.mirror_url(urlobj)
urlobjects.append(newurlobj)
return urlobjects
def download_multipart_url(self, urlobj, clength, numparts, threadpool):
""" Download URL multipart from supported servers """
logconsole('Splitting download across mirrors...\n')
# List of servers - note that we are not doing
# any kind of search for the nearest servers. Instead
# a random list is created.
# Calculate size of each piece
piecesz = clength/numparts
# Calculate size of each piece
pcsizes = [piecesz]*numparts
# For last URL add the reminder
pcsizes[-1] += clength % numparts
# Create a URL object for each and set range
urlobjects = self.create_multipart_urls(urlobj, numparts)
if (len(urlobjects)) == 0:
return MIRRORS_NOT_FOUND
prev = 0
for x in range(len(urlobjects)):
curr = pcsizes[x]
next = curr + prev
urlobject = urlobjects[x]
urlobject.clength = clength
urlobject.range = (prev, next-1)
urlobject.mindex = x
prev = next
# Push this URL objects to the pool
threadpool.push(urlobject)
self.used = True
return URL_PUSHED_TO_POOL
def get_different_mirror_url(self, urlobj, urlerror):
""" Return a different mirror URL for a (failed) mirror URL """
mirror_url = self.find_mirror(urlobj)
if mirror_url == None:
return None
if mirror_url not in self.failed_mirrors:
self.failed_mirrors.append(mirror_url)
# If not fatal error, append to mirrors_to_retry
if not urlerror.fatal:
if mirror_url not in self.mirrors_to_retry:
self.mirrors_to_retry.append(mirror_url)
mirrors = self.get_mirrors(urlobj)
# Get the difference of the 2 sets
newmirrors = list(set(mirrors).difference(set(self.used_mirrors)))
# print 'New mirrors=>',newmirrors
if newmirrors:
extrainfo("Returning from new mirror list...")
# Get a random one out of it...
new_mirror = newmirrors[0]
# Remove the old mirror and replace it with new mirror in
# current_mirrors
self.current_mirrors.remove(mirror_url)
self.current_mirrors.append(new_mirror)
self.used_mirrors.append(new_mirror)
elif len(self.mirrors_to_retry)>1:
extrainfo("Returning from mirrors_to_retry...")
# We don't want to go back to same mirror!
new_mirror = self.mirrors_to_retry.pop(0)
self.current_mirrors.remove(mirror_url)
self.current_mirrors.append(new_mirror)
if not new_mirror in self.used_mirrors:
self.used_mirrors.append(new_mirror)
else:
return None
self.retries += 1
return new_mirror.new_mirror_url(urlobj)
def reset(self):
""" Reset the state """
self.current_mirrors = []
self.used_mirrors = []
self.mirrors_to_retry = []
def get_stats(self):
""" Provide statistics """
statsd = {}
statsd['filemirrors'] = len(self.filemirrors)
statsd['usedmirrors'] = len(self.used_mirrors)
statsd['failedmirrors'] = len(self.failed_mirrors)
statsd['retries'] = self.retries
return statsd
def print_stats(self):
""" Print statistics to console """
d = self.get_stats()
info = ''
fmirrors = d['filemirrors']
if fmirrors:
logconsole("\nPrinting mirror statistics...")
info = "%d mirrors were loaded from file, " % fmirrors
umirrors = d['usedmirrors']
if umirrors:
if info: info += ', '
info += "%d mirrors were used " % umirrors
else:
return
fldmirrors = d['failedmirrors']
retries = d['retries']
if fldmirrors:
if info: info += ', '
if fldmirrors>1:
info += "%d mirrors failed" % fldmirrors
else:
info += "%d mirror failed" % fldmirrors
logconsole(info)
def is_multipart_download_supported(urlobj):
""" Check whether this URL (server) supports multipart downloads """
return is_sourceforge_url(urlobj)
def is_sourceforge_url(urlobj):
""" Is this a download from sourceforge ? """
ret = (urlobj.domain in ('downloads.sourceforge.net', 'prdownloads.sourceforge.net') or \
urlobj.get_full_domain() in HarvestManMirrorManager.sf_mirror_domains )
return ret
if __name__ == "__main__":
import config
import logger
import datamgr
SetAlias(config.HarvestManStateObject())
cfg = objects.config
cfg.verbosity = 5
SetAlias(logger.HarvestManLogger())
SetLogSeverity()
SetAlias(datamgr.HarvestManDataManager())
search = HarvestManMirrorSearch()
print search.search(urlparser.HarvestManUrl('http://pv-mirror02.mozilla.org/pub/mozilla.org/firefox/releases/2.0.0.11/linux-i686/en-US/firefox-2.0.0.11.tar.gz'))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -