utils.py

来自「Harvestman-最新版本」· Python 代码 · 共 521 行 · 第 1/2 页

PY
521
字号
                    except Exception, e:                        debug(str(e))            except OSError, e:                debug('OS Exception ', e)        self._cachefilename = os.path.join(self._cachedir, 'cache')            def read_project_cache(self):        """ Try to read the project cache file """        found = False        # Get cache filename        if not os.path.exists(self._cachefilename):            info("Project cache not found")        cache_obj = Base(self._cachefilename)        if os.path.isfile(self._cachefilename):            try:                cache_obj.open()                found = True            except Exception, e:                logconsole(e)        return (cache_obj, found)    def write_project_cache(self, cache):        """ Commit the project cache to the disk """        cache.commit()            def write_url_headers(self, headerdict):        try:            pickler = HarvestManSerializer()            pickler.dump(headerdict, os.path.join(self._cachedir, 'urlheaders.db'))        except HarvestManSerializerError, e:            logconsole(str(e))            return WRITE_URL_HEADERS_ERROR        return WRITE_URL_HEADERS_OK    class HarvestManProjectManager(object):    """ Utility class to read/write project files """    def __init__(self):        pass    def write_project(self):        """ Write project files """        info('Writing Project Files...')        cfg = objects.config.copy()        pckfile = os.path.join(cfg.basedir, cfg.project + '.hpf')                if os.path.exists(pckfile):            try:                os.remove(pckfile)            except OSError, e:                logconsole(e)                return PROJECT_FILE_REMOVE_ERROR        try:            pickler = HarvestManSerializer()            pickler.dump( cfg, pckfile)        except HarvestManSerializerError, e:            logconsole(str(e))            return PROJECT_FILE_WRITE_ERROR        extrainfo('Done.')                return PROJECT_FILE_WRITE_OK    def read_project(self):        """ Load an existing HarvestMan project file and        crete dictionary for the passed config object """        projectfile = config.projectfile        try:            pickler = HarvestManSerializer()            d = pickler.load(projectfile)            for key in objects.config.keys():                try:                    objects.config[key] = d[key]                except:                    pass            objects.config.fromprojfile = True            return PROJECT_FILE_READ_OK        except HarvestManSerializerError, e:            logconsole(e)            return PROJECT_FILE_READ_ERRORclass HarvestManBrowser(object):    """ Utility class to write the project browse pages """    def __init__(self):        self._projectstartpage = os.path.abspath(objects.queuemgr.get_base_url().get_full_filename())        self._projectstartpage = 'file://' + self._projectstartpage.replace('\\', '/')        self._cfg = objects.config    def make_project_browse_page(self):        """ This creates an xhtml page for browsing the downloaded html pages """        if self._cfg.browsepage == 0:            return        ret = self._add_project_to_browse_page()        if ret == BROWSE_FILE_NOT_FOUND:            return self._make_initial_browse_page()        else:            return ret    def open_project_browse_page(self):        """ Open the project page in the user's web-browser """                import webbrowser        info('Opening project in browser...')        browsefile=os.path.join(self._cfg.basedir, 'index.html')        try:            webbrowser.open(browsefile)            extrainfo('Done.')        except webbrowser.Error, e:            logconsole(e)        return     def _add_project_to_browse_page(self):        """ Append new project information to existing project browser page """        browsefile=os.path.join(self._cfg.basedir, 'index.html')        if not os.path.exists(browsefile):            return BROWSE_FILE_NOT_FOUND        # read contents of file        contents=''        try:            f=open(browsefile, 'r')            contents=f.read()            f.close()        except (IOError, OSError), e:            logconsole(e)            return BROWSE_FILE_READ_ERROR        if not contents:            return BROWSE_FILE_EMPTY        # See if this is a proper browse file created by HARVESTMan        index = contents.find("HARVESTMan SIG:")        if index == -1: return -1        sig=contents[(index+17):(index+32)].strip()        if sig != HARVESTMAN_SIG: return -1        # Locate position to insert project info        index = contents.find(HARVESTMAN_BROWSER_HEADER)        if index == -1: return BROWSE_FILE_INVALID        # get project page        index=contents.rfind('<!-- PROJECTPAGE -->', index)        if index == -1: return BROWSE_FILE_INVALID        newindex=contents.find('<!-- END -->', index)        projpage=contents[(index+29):(newindex-2)]        # get project url        index=contents.find('<!-- PROJECTURL -->', newindex)        if index == -1: return BROWSE_FILE_INVALID        newindex=contents.find('<!-- END -->', index)        prjurl=contents[(index+19):newindex]        if prjurl and prjurl==self._cfg.url:            debug('Duplicate project!')            if projpage:                newcontents=contents.replace(projpage,self._projectstartpage)            if prjurl:                newcontents=contents.replace(prjurl, self._cfg.url)            try:                f=open(browsefile, 'w')                f.write(newcontents)                f.close()                return BROWSE_FILE_WRITE_OK            except OSError, e:                logconsole(e)                return BROWSE_FILE_WRITE_ERROR        else:            # find location of </TR> from this index            index = contents.find('</TR>', newindex)            if index==-1: return BROWSE_FILE_INVALID            newprojectinfo = HARVESTMAN_PROJECTINFO % {'PROJECTNAME': self._cfg.project,                                                       'PROJECTSTARTPAGE': self._projectstartpage,                                                       'PROJECTURL' : self._cfg.url }            # insert this string            newcontents = contents[:index] + '\n' + newprojectinfo + contents[index+5:]            try:                f=open(browsefile, 'w')                f.write(newcontents)                f.close()                return BROWSE_FILE_WRITE_OK                            except OSError, e:                logconsole(e)                return BROWSE_FILE_WRITE_ERROR    def _make_initial_browse_page(self):        """ This creates an xhtml page for browsing the downloaded        files similar to HTTrack copier """        debug('Making fresh page...')        browsefile=os.path.join(self._cfg.basedir, 'index.html')        f=open(browsefile, 'w')        f.write("<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\n\n")        f.write("<head>\n")        f.write("\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\" />\n")        f.write("\t<meta name=\"description\" content=\"" + HARVESTMAN_BOAST + "\" />\n")        f.write("\t<meta name=\"keywords\" content=\"" + HARVESTMAN_KEYWORDS + "\" />\n")        f.write("\t<title>Local index - HARVESTMAN Internet Spider</title>\n")        f.write("<!-- Mirror and index made by HARVESTMAN Internet Spider/" + self._cfg.version + " [ABP, NK '2003] -->\n")        f.write("<style type=\"text/css\">\n")        f.write("<!--\n\n")        f.write(HARVESTMAN_BROWSER_CSS)        f.write("\n\n")        f.write("// -->\n")        f.write("</style>\n")        f.write("</head>\n")        f.write(HARVESTMAN_BROWSER_TABLE1)        s=HARVESTMAN_BROWSER_TABLE2 % {'PER'    : '%',                                         'VERSION': self._cfg.version,                                         'HEADER' : HARVESTMAN_BROWSER_HEADER,                                         'PROJECTNAME': self._cfg.project,                                         'PROJECTSTARTPAGE': self._projectstartpage,                                         'PROJECTURL' : self._cfg.url}        f.write(s)        f.write("<BR><BR><BR><BR>\n")        f.write("<HR width=76%>\n")        s=HARVESTMAN_BROWSER_TABLE3 % {'PER'    : '%',                                         'CREDITS': HARVESTMAN_CREDITS }        f.write(s)        f.write("</body>\n")        # insert signature        sigstr = "<!-- HARVESTMan SIG: <" + HARVESTMAN_SIG + "> -->\n"        f.write(sigstr)        f.write("</html>\n")if __name__=="__main__":    pass

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?