utils.py
来自「Harvestman-最新版本」· Python 代码 · 共 521 行 · 第 1/2 页
PY
521 行
except Exception, e: debug(str(e)) except OSError, e: debug('OS Exception ', e) self._cachefilename = os.path.join(self._cachedir, 'cache') def read_project_cache(self): """ Try to read the project cache file """ found = False # Get cache filename if not os.path.exists(self._cachefilename): info("Project cache not found") cache_obj = Base(self._cachefilename) if os.path.isfile(self._cachefilename): try: cache_obj.open() found = True except Exception, e: logconsole(e) return (cache_obj, found) def write_project_cache(self, cache): """ Commit the project cache to the disk """ cache.commit() def write_url_headers(self, headerdict): try: pickler = HarvestManSerializer() pickler.dump(headerdict, os.path.join(self._cachedir, 'urlheaders.db')) except HarvestManSerializerError, e: logconsole(str(e)) return WRITE_URL_HEADERS_ERROR return WRITE_URL_HEADERS_OK class HarvestManProjectManager(object): """ Utility class to read/write project files """ def __init__(self): pass def write_project(self): """ Write project files """ info('Writing Project Files...') cfg = objects.config.copy() pckfile = os.path.join(cfg.basedir, cfg.project + '.hpf') if os.path.exists(pckfile): try: os.remove(pckfile) except OSError, e: logconsole(e) return PROJECT_FILE_REMOVE_ERROR try: pickler = HarvestManSerializer() pickler.dump( cfg, pckfile) except HarvestManSerializerError, e: logconsole(str(e)) return PROJECT_FILE_WRITE_ERROR extrainfo('Done.') return PROJECT_FILE_WRITE_OK def read_project(self): """ Load an existing HarvestMan project file and crete dictionary for the passed config object """ projectfile = config.projectfile try: pickler = HarvestManSerializer() d = pickler.load(projectfile) for key in objects.config.keys(): try: objects.config[key] = d[key] except: pass objects.config.fromprojfile = True return PROJECT_FILE_READ_OK except HarvestManSerializerError, e: logconsole(e) return PROJECT_FILE_READ_ERRORclass HarvestManBrowser(object): """ Utility class to write the project browse pages """ def __init__(self): self._projectstartpage = os.path.abspath(objects.queuemgr.get_base_url().get_full_filename()) self._projectstartpage = 'file://' + self._projectstartpage.replace('\\', '/') self._cfg = objects.config def make_project_browse_page(self): """ This creates an xhtml page for browsing the downloaded html pages """ if self._cfg.browsepage == 0: return ret = self._add_project_to_browse_page() if ret == BROWSE_FILE_NOT_FOUND: return self._make_initial_browse_page() else: return ret def open_project_browse_page(self): """ Open the project page in the user's web-browser """ import webbrowser info('Opening project in browser...') browsefile=os.path.join(self._cfg.basedir, 'index.html') try: webbrowser.open(browsefile) extrainfo('Done.') except webbrowser.Error, e: logconsole(e) return def _add_project_to_browse_page(self): """ Append new project information to existing project browser page """ browsefile=os.path.join(self._cfg.basedir, 'index.html') if not os.path.exists(browsefile): return BROWSE_FILE_NOT_FOUND # read contents of file contents='' try: f=open(browsefile, 'r') contents=f.read() f.close() except (IOError, OSError), e: logconsole(e) return BROWSE_FILE_READ_ERROR if not contents: return BROWSE_FILE_EMPTY # See if this is a proper browse file created by HARVESTMan index = contents.find("HARVESTMan SIG:") if index == -1: return -1 sig=contents[(index+17):(index+32)].strip() if sig != HARVESTMAN_SIG: return -1 # Locate position to insert project info index = contents.find(HARVESTMAN_BROWSER_HEADER) if index == -1: return BROWSE_FILE_INVALID # get project page index=contents.rfind('<!-- PROJECTPAGE -->', index) if index == -1: return BROWSE_FILE_INVALID newindex=contents.find('<!-- END -->', index) projpage=contents[(index+29):(newindex-2)] # get project url index=contents.find('<!-- PROJECTURL -->', newindex) if index == -1: return BROWSE_FILE_INVALID newindex=contents.find('<!-- END -->', index) prjurl=contents[(index+19):newindex] if prjurl and prjurl==self._cfg.url: debug('Duplicate project!') if projpage: newcontents=contents.replace(projpage,self._projectstartpage) if prjurl: newcontents=contents.replace(prjurl, self._cfg.url) try: f=open(browsefile, 'w') f.write(newcontents) f.close() return BROWSE_FILE_WRITE_OK except OSError, e: logconsole(e) return BROWSE_FILE_WRITE_ERROR else: # find location of </TR> from this index index = contents.find('</TR>', newindex) if index==-1: return BROWSE_FILE_INVALID newprojectinfo = HARVESTMAN_PROJECTINFO % {'PROJECTNAME': self._cfg.project, 'PROJECTSTARTPAGE': self._projectstartpage, 'PROJECTURL' : self._cfg.url } # insert this string newcontents = contents[:index] + '\n' + newprojectinfo + contents[index+5:] try: f=open(browsefile, 'w') f.write(newcontents) f.close() return BROWSE_FILE_WRITE_OK except OSError, e: logconsole(e) return BROWSE_FILE_WRITE_ERROR def _make_initial_browse_page(self): """ This creates an xhtml page for browsing the downloaded files similar to HTTrack copier """ debug('Making fresh page...') browsefile=os.path.join(self._cfg.basedir, 'index.html') f=open(browsefile, 'w') f.write("<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\n\n") f.write("<head>\n") f.write("\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\" />\n") f.write("\t<meta name=\"description\" content=\"" + HARVESTMAN_BOAST + "\" />\n") f.write("\t<meta name=\"keywords\" content=\"" + HARVESTMAN_KEYWORDS + "\" />\n") f.write("\t<title>Local index - HARVESTMAN Internet Spider</title>\n") f.write("<!-- Mirror and index made by HARVESTMAN Internet Spider/" + self._cfg.version + " [ABP, NK '2003] -->\n") f.write("<style type=\"text/css\">\n") f.write("<!--\n\n") f.write(HARVESTMAN_BROWSER_CSS) f.write("\n\n") f.write("// -->\n") f.write("</style>\n") f.write("</head>\n") f.write(HARVESTMAN_BROWSER_TABLE1) s=HARVESTMAN_BROWSER_TABLE2 % {'PER' : '%', 'VERSION': self._cfg.version, 'HEADER' : HARVESTMAN_BROWSER_HEADER, 'PROJECTNAME': self._cfg.project, 'PROJECTSTARTPAGE': self._projectstartpage, 'PROJECTURL' : self._cfg.url} f.write(s) f.write("<BR><BR><BR><BR>\n") f.write("<HR width=76%>\n") s=HARVESTMAN_BROWSER_TABLE3 % {'PER' : '%', 'CREDITS': HARVESTMAN_CREDITS } f.write(s) f.write("</body>\n") # insert signature sigstr = "<!-- HARVESTMan SIG: <" + HARVESTMAN_SIG + "> -->\n" f.write(sigstr) f.write("</html>\n")if __name__=="__main__": pass
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?