⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mirrors.py

📁 Harvestman-最新版本
💻 PY
📖 第 1 页 / 共 2 页
字号:
                      ('surfnet', 'Amsterdam, The Netherlands'),
                      ('kent', 'Kent, UK'),
                      ('optusnet', 'Sydney, Australia'),
                      ('jaist', 'Ishikawa, Japan'),
                      ('nchc', 'Tainan, Taiwan'))
                   

    sf_mirrors = tuple([HarvestManMirror('http://%s.dl.sourceforge.net' % name[0]) for name in sf_mirror_info])

    sf_mirror_domains = tuple([mirror.urlobj.get_full_domain() for mirror in sf_mirrors])
    # print sf_mirror_domains

    def __init__(self):
        # List of mirror URLs loaded from a mirror file/other source
        self.filemirrors = []
        # Flag to perform mirror search
        self.mirrorsearch = False
        # List of current mirrors in use
        self.current_mirrors = []
        # List of used mirrors
        self.used_mirrors = []
        # List of mirrors which can be retried cuz they failed with
        # non-fatal errors
        self.mirrors_to_retry = []
        # List of mirrors which failed (Includes above list)
        self.failed_mirrors = []
        # Mirror retry attempts
        self.retries = 0
        # Used flag
        self.used = False
        # Mirror search object
        self.searcher = HarvestManMirrorSearch()
        
    def find_mirror(self, urlobj):

        mirrors = self.get_mirrors(urlobj, False)
        if mirrors == None:
            return
        
        for m in mirrors:
            if m.absolute:
                if m.urlobj == urlobj:
                    return m
            elif m.urlobj == urlobj.baseurl:
                return m
    
    def load_mirrors(self, mirrorfile):
        """ Load mirror information from the mirror file """

        if mirrorfile:
            for line in file(mirrorfile):
                url = line.strip()
                if url != '':
                    self.filemirrors.append(HarvestManMirror(url))
    
    def mirrors_available(self, urlobj):
        return (is_sourceforge_url(urlobj) or len(self.filemirrors) or self.mirrorsearch)
        # return len(self.filemirrors) or (self.mirrorsearch)    
    
    def search_for_mirrors(self, urlobj, find_new = True):

        if not find_new:
            return self.searcher.cache
        
        if self.searcher.can_search():
            mirror_urls = self.searcher.search(urlobj)
            
            if mirror_urls:
                print '%d mirror URLs found, queuing them for multipart downloads...' % len(mirror_urls)
                return mirror_urls
            else:
                return []
        else:
            print 'Cannot search for new mirrors'
            return []
        
        pass
    
    def get_mirrors(self, urlobj, find_new=True):

        if is_sourceforge_url(urlobj):
            return self.sf_mirrors
        elif self.filemirrors:
            return self.filemirrors
        elif self.mirrorsearch:
            return self.search_for_mirrors(urlobj, find_new)
        
    def create_multipart_urls(self, urlobj, numparts):

        urlobjects = []
        relpath = ''

        mirrors = self.get_mirrors(urlobj)
        if len(mirrors) < numparts:
            numparts = len(mirrors)

        if len(mirrors)==0:
            print 'No mirrors found'
            return []
        elif len(mirrors)==1:
            # Only one mirror - this is of no use
            print 'Only single mirror found'
            return []
        
        # Get a random list of servers

        # Python seems to sometimes optimize these lists to tuples...
        # This produced an error in Cygwin python, so forcefully
        # coercing them to lists...
        self.current_mirrors = list(mirrors[:numparts])
        self.used_mirrors = list(self.current_mirrors[:])

        orig_url = urlobj.get_full_url()

        for x in range(numparts):
            mirror = self.current_mirrors[x]
            newurlobj = mirror.mirror_url(urlobj)
            urlobjects.append(newurlobj)

        return urlobjects
    
    def download_multipart_url(self, urlobj, clength, numparts, threadpool):
        """ Download URL multipart from supported servers """

        logconsole('Splitting download across mirrors...\n')

        # List of servers - note that we are not doing
        # any kind of search for the nearest servers. Instead
        # a random list is created.
        # Calculate size of each piece
        piecesz = clength/numparts

        # Calculate size of each piece
        pcsizes = [piecesz]*numparts
        # For last URL add the reminder
        pcsizes[-1] += clength % numparts 
        # Create a URL object for each and set range
        urlobjects = self.create_multipart_urls(urlobj, numparts)

        if (len(urlobjects)) == 0:
            return MIRRORS_NOT_FOUND
        
        prev = 0

        for x in range(len(urlobjects)):
            curr = pcsizes[x]
            next = curr + prev
            urlobject = urlobjects[x]
            urlobject.clength = clength
            urlobject.range = (prev, next-1)
            urlobject.mindex = x
            prev = next

            # Push this URL objects to the pool
            threadpool.push(urlobject)

        self.used = True
        
        return URL_PUSHED_TO_POOL

    def get_different_mirror_url(self, urlobj, urlerror):
        """ Return a different mirror URL for a (failed) mirror URL """
        
        mirror_url = self.find_mirror(urlobj)

        if mirror_url == None:
            return None
        
        if mirror_url not in self.failed_mirrors:
            self.failed_mirrors.append(mirror_url)
            
        # If not fatal error, append to mirrors_to_retry
        if not urlerror.fatal:
            if mirror_url not in self.mirrors_to_retry:
                self.mirrors_to_retry.append(mirror_url)

        mirrors = self.get_mirrors(urlobj)
        # Get the difference of the 2 sets
        newmirrors = list(set(mirrors).difference(set(self.used_mirrors)))
        # print 'New mirrors=>',newmirrors

        if newmirrors:
            extrainfo("Returning from new mirror list...")
            # Get a random one out of it...
            new_mirror = newmirrors[0]
            # Remove the old mirror and replace it with new mirror in
            # current_mirrors
            self.current_mirrors.remove(mirror_url)
            self.current_mirrors.append(new_mirror)
            self.used_mirrors.append(new_mirror)

        elif len(self.mirrors_to_retry)>1:
            extrainfo("Returning from mirrors_to_retry...")        
            # We don't want to go back to same mirror!
            new_mirror = self.mirrors_to_retry.pop(0)
            self.current_mirrors.remove(mirror_url)
            self.current_mirrors.append(new_mirror)
            if not new_mirror in self.used_mirrors:
                self.used_mirrors.append(new_mirror)
        else:
            return None

        self.retries += 1
        
        return new_mirror.new_mirror_url(urlobj)

    def reset(self):
        """ Reset the state """

        self.current_mirrors = []
        self.used_mirrors = []
        self.mirrors_to_retry = []

    def get_stats(self):
        """ Provide statistics """

        statsd = {}
        statsd['filemirrors'] = len(self.filemirrors)
        statsd['usedmirrors'] = len(self.used_mirrors)
        statsd['failedmirrors'] = len(self.failed_mirrors)
        statsd['retries'] = self.retries

        return statsd
    
    def print_stats(self):
        """ Print statistics to console """
        
        d = self.get_stats()

        info = ''
        fmirrors = d['filemirrors']
        if fmirrors:
            logconsole("\nPrinting mirror statistics...")
            info = "%d mirrors were loaded from file, " % fmirrors

        umirrors = d['usedmirrors']
        if umirrors:
            if info: info += ', '
            info += "%d mirrors were used " % umirrors
        else:
            return
        
        fldmirrors = d['failedmirrors']
        retries  = d['retries']
        
        if fldmirrors:
            if info: info += ', '            
            if fldmirrors>1:
                info += "%d mirrors failed" % fldmirrors
            else:
                info += "%d mirror failed" % fldmirrors
            
        logconsole(info)
        
def is_multipart_download_supported(urlobj):
    """ Check whether this URL (server) supports multipart downloads """
    
    return is_sourceforge_url(urlobj)

def is_sourceforge_url(urlobj):
    """ Is this a download from sourceforge ? """
    
    ret = (urlobj.domain in ('downloads.sourceforge.net', 'prdownloads.sourceforge.net') or \
           urlobj.get_full_domain() in HarvestManMirrorManager.sf_mirror_domains )

    return ret

if __name__ == "__main__":
    import config
    import logger
    import datamgr
    
    SetAlias(config.HarvestManStateObject())
    cfg = objects.config
    cfg.verbosity = 5
    SetAlias(logger.HarvestManLogger())
    SetLogSeverity()
    SetAlias(datamgr.HarvestManDataManager())
    
    search = HarvestManMirrorSearch()
    print search.search(urlparser.HarvestManUrl('http://pv-mirror02.mozilla.org/pub/mozilla.org/firefox/releases/2.0.0.11/linux-i686/en-US/firefox-2.0.0.11.tar.gz'))

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -