⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 connector.py

📁 Harvestman-最新版本
💻 PY
📖 第 1 页 / 共 5 页
字号:
            return False        # Create progress object        prog = self._cfg.progressobj        prog.setTopic(topic)        #if n==1:        prog.set(100, 100)        #else:        #    prog.set(n, 100)                if nolengthmode:            prog.setNoLengthMode(True)        if n>0:            prog.setHasSub(True)            if not nolengthmode:                for x in range(1,n+1):                    prog.setSubTopic(x, subtopics[x-1])                    prog.setSub(x, 0.0, 100)        else:            pass                def make_tmp_fname(self, filename, directory='.'):        """ Creates a temporary filename for download """        random.seed()                while True:            fint = int(random.random()*random.random()*10000000)            fname = ''.join(('.',filename,'#',str(fint)))            fpath = os.path.join(directory, fname)            if not os.path.isfile(fpath):                return fpath    def create_request(self, urltofetch, lmt='', etag='', useragent=True):        """ Creates request object for the URL 'urltofetch' and return it """        # This function takes care of adding any additional headers        # etc in addition to creating the request object.                # create a request object        if lmt or etag:            # print 'Making a head request', lmt, etag            # Create a head request...            request = HeadRequest(urltofetch)            if lmt != '':                ts = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.localtime(lmt))                request.add_header('If-Modified-Since', ts)            if etag != '':                request.add_header('If-None-Match', etag)        else:            request = urllib2.Request(urltofetch)        # Some sites do not like User-Agent strings and raise a Bad Request        # (HTTP 400) error. Egs: http://www.bad-ischl.ooe.gv.at/. In such        # cases, the connect method, sets useragent flag to False and calls        # this method again.        # print 'User agent', self._cfg.USER_AGENT        if useragent:             request.add_header('User-Agent', self._cfg.USER_AGENT)                # Check if any HTTP username/password are required        username, password = self._cfg.username, self._cfg.passwd        if username and password:            # Add basic HTTP auth headers            authstring = base64.encodestring('%s:%s' % (username, password))            request.add_header('Authorization','Basic %s' % authstring)        return request    def get_url_data(self, url):        """ Downloads data for the given URL and returns it """        try:            urlobj = urlparser.HarvestManUrl(url)            res = self.connect(urlobj)            return self._data        except urlparser.HarvestManUrlError, e:            error("URL Error: ",e)            def connect2(self, urlobj, resuming=False):        """ Connects to the Internet and fetches data for the URL encapsulated        in the object 'urlobj'. This is the method used by Hget """                data = ''                 # Reset the http headers        self._headers.clear()        retries = self._cfg.retryfailed        self._numtries = 0        urltofetch = urlobj.get_full_url()        filename = urlobj.get_filename()        dmgr = objects.datamgr        rulesmgr = objects.rulesmgr        showprogress = self._cfg.showprogress        # Flag indicating we are reusing a previously redirected URL        # to produce automatic mirror split        automirror = urlobj.redirected and urlobj.redirected_old        # print self, urltofetch        while self._numtries <= retries:            # If automirror we don't exit on even fatal errors since            # a new request on the old URL can lead to a new mirror            # which can work. If automirror we exit only after number            # of retries are completed.            if not automirror and self._error.fatal:                break            # Reset status            self._status = 0                        errnum = 0            try:                # Reset error                self._error.reset()                self._numtries += 1                request = self.create_request(urltofetch)                byterange = urlobj.range                                if byterange:                                    range1 = byterange[0]                    range2 = byterange[-1]                    # For a repeat connection, don't redownload already downloaded data.                    if self._fo:                        datasofar = self._fo.get_datalen()                        if datasofar: range1 += datasofar                    # If this was a redirected old URL which we are re-using for                    # producing further redirections for auto-mirror downloads                    # don't add this header now, but add it later, after the                    # redirection has happened.                    if not automirror:                        request.add_header('Range','bytes=%d-%d' % (range1,range2))                                        self._freq = urllib2.urlopen(request)                # Set status to 1                self._status = 1                actual_url = self._freq.geturl()                if actual_url != urltofetch:                    # Don't do this for mirrors...                    if not urlobj.trymultipart:                        logconsole('Redirected to %s...' % actual_url)                    else:                        extrainfo('Redirected to %s...' % actual_url)                                                            if actual_url.replace(urltofetch, '') != '/':                        no_change = False                    else:                        no_change = True                    if no_change:                        if (actual_url[-1] == '/' and urltofetch[-1] != '/'):                            # Setting directory URL                            urlobj.set_directory_url()                    else:                        # Considerable change                        urlobj.redirected = True                        urlobj.url = actual_url                        debug("Re-resolving URL: Current is %s..." % urlobj.get_full_url())                                                urlobj.wrapper_resolveurl()                        debug("Re-resolving URL: New is %s..." % urlobj.get_full_url())                                                # Get filename again                        filename = urlobj.get_filename()                        # Get URL again                        if byterange and automirror:                            # print 'Automirror URL...!'                            range1, range2 = byterange                            # For a repeat connection, don't redownload already downloaded data.                            if self._fo:                                datasofar = self._fo.get_datalen()                                if datasofar: range1 += datasofar                            request = self.create_request(urlobj.get_full_url())                                                        request.add_header('Range','bytes=%d-%d' % (range1,range2))                            self._freq.close()                            self._freq = urllib2.urlopen(request)                                        # Set http headers                self.set_http_headers()                                encoding = self.get_content_encoding()                ctype = self.get_content_type()                clength = int(self.get_content_length())                                if clength==0:                    clength_str = 'Unknown'                elif clength>=1024*1024:                    clength_str = '%dM' % (clength/(1024*1024))                elif clength >=1024:                    clength_str = '%dK' % (clength/1024)                else:                    clength_str = '%d bytes' % clength                if showprogress and (resuming or (not urlobj.range)):                    if clength:                        logconsole('Length: %d (%s) Type: %s' % (clength, clength_str, ctype))                        nolengthmode = False                    else:                        logconsole('Length: (%s) Type: %s' % (clength_str, ctype))                        nolengthmode = True                    logconsole('Content Encoding: %s\n' % encoding)                # FTP servers do not support HTTP like byte-range                # requests. The way to do multipart for FTP is to use                # the FTP restart (REST) command, but that requires writing                # new wrappers on top of ftplib instead of the current simpler                # way of routing everything using urllib2. This is planned                # for later.                # However if mirror search is enabled, we try to do it                if urlobj.protocol == 'ftp://' and not self._cfg.mirrorsearch:                    trynormal = True                                        if self._cfg.forcesplit:                        logconsole('FTP request, not trying multipart download, defaulting to single thread')                else:                    trynormal = False                # Check constraint on file size                if (not byterange) and (not trynormal) and (not self._cfg.nomultipart) and self._cfg.forcesplit:                    # I hate local imports, but there is no other way to do this                    # except moving the function to this module.                    from harvestman.lib import mirrors                                        if self._cfg.forcesplit and not self._cfg.mirrorsearch:                        logconsole('Forcing download into %d parts' % self._cfg.numparts)                    if (not self._headers.get('accept-ranges', '').lower() == 'bytes') and \                           (not self._cfg.mirrorfile) and \                           not mirrors.is_multipart_download_supported(urlobj) and \                           not (self._cfg.mirrorsearch):                                                logconsole('Checking whether server supports multipart downloads...')                        # See if the server supports 'Range' header                        # by requesting half the length                        self._headers.clear()                        # Need to re-create request in case URL has changed                        request = self.create_request(urlobj.get_full_url())                                                request.add_header('Range','bytes=%d-%d' % (0,clength/2))                        self._freq.close()                                                self._freq = urllib2.urlopen(request)                        # Set http headers                        self.set_http_headers()                        range_result = self._headers.get('accept-ranges', '')                        if range_result.lower()=='bytes':                            logconsole('Server supports multipart downloads')                            self._freq.close()                        else:                            logconsole('Server does not support multipart downloads')                            resp = raw_input('Do you still want to download this URL [y/n] ?')                            if resp.lower() !='y':                                logconsole('Aborting download.')                                return CONNECT_DOWNLOAD_ABORTED                            else:                                # Create a fresh request object                                self._freq.close()                                request = self.create_request(urlobj.get_full_url())                                self._freq = urllib2.urlopen(request)                                logconsole('Downloading' % urlobj.get_full_url())                                trynormal = True                    else:                        logconsole('Server supports multipart downloads')                    if not trynormal:                        logconsole('Trying multipart download...')                        urlobj.trymultipart = True                                                ret = dmgr.download_multipart_url(urlobj, clength)                        if ret == URL_PUSHED_TO_POOL:                            # Set flag which indicates a multipart                            # download is in progress                            self._cfg.multipart = True                            # Set progress object                            if showprogress:                                self.set_progress_object(filename,1,[filename],nolengthmode)                            return CONNECT_MULTIPART_DOWNLOAD                        elif ret == MIRRORS_NOT_FOUND:                            return ret                                    # if this is the not the first attempt, print a success msg                if self._numtries>1:                    extrainfo("Reconnect succeeded => ", urlobj.get_full_url())                try:                    # Don't set progress object if multipart download - it                    # would have been done before.                    if showprogress and (resuming or (not urlobj.range)):                        self.set_progress_object(filename,1,[filename],nolengthmode)                                        prog = self._cfg.progressobj                                        mypercent = 0.0                    # Report fname to calling thread                    ct = threading.currentThread()                    # Only set tmpfname if this is a fresh download.                    if self._tmpfname=='':                        if not self._cfg.hgetnotemp:                            if urlobj.trymultipart:                                localurl = urlobj.mirror_url.get_original_url()                            else:                                localurl = urlobj.get_original_url()                                                            tmpd = os.path.join(GetMyTempDir(), str(abs(hash(localurl))))                        else:                            tmpd = '.'                                                    self._tmpfname = self.make_tmp_fname(filename, tmpd)                                            if ct.__class__.__name__ == 'HarvestManUrlThread':                        ct.set_tmpfname(self._tmpfname)                    if self._fo==None:                        self._fo = HarvestManFileObject(self._freq,                                                        self._tmpfname,                                                        cle

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -