⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 utils.py

📁 网络蜘蛛
💻 PY
📖 第 1 页 / 共 2 页
字号:
""" HarvestManUtils.py - Utility classes for harvestman
    program.

    Created: Anand B Pillai on Sep 25 2003.
    
    Author: Anand B Pillai (anandpillai at letterboxes dot org).    

    This contains a class for pickling using compressed data
    streams and another one for writing project files.

    Feb 10 2004   Anand   1.3.1 bug fix release.
    Jun 14 2004   Anand   1.3.9 release.
"""

import os
import pickle
from common import *

HARVESTMAN_XML_HEAD1="""<?xml version=\"1.0\" encoding=\"UTF-8\"?>"""
HARVESTMAN_XML_HEAD2="""<!DOCTYPE HarvestManProject SYSTEM \"HarvestManProject.dtd\">"""


class HarvestManPicklerError(Exception):

    def __init__(self, value):
        self.value = value

    def __str__(self):
        return str(self.value)

class HarvestManPickler:

    import pickle

    def __init__(self):
        pass

    def dump(self, obj, filename, binary = False):
        """ dump method similar to pickle. The main difference is that
        this method accepts a filename string rather than a file
        stream as pickle does """

        import zlib

        # use zlib to compress pickled data before writing it
        # to the file.
        try:
            cstr = zlib.compress(pickle.dumps(obj, binary))
            stream = open(filename, 'wb')
            stream.write(cstr)
            stream.close()
        except Exception, e:
            raise HarvestManPicklerError, str(e)
            return -1

        return 0

    def load(self, filename):
        """ load method similar to pickle. The main difference is that
        this method accepts a filename string rather than a file
        stream as pickle does """

        import zlib

        cstr=''
        try:
            stream  = open(os.path.abspath(filename), 'rb')
            cstr = stream.read()
            stream.close()
        except Exception, e:
            raise HarvestManPicklerError, str(e)
            return -1

        # use zlib to decompress the data before unpickling it.
        obj = None

        try:
            s = zlib.decompress(cstr)
            obj = pickle.loads(s)
        except Exception, e:
            raise HarvestManPicklerError, str(e)            

        return obj

class HarvestManCacheManager:
    """ Utility class to read/write project cache files """

    def __init__(self, filename):
        self.__cachefilename = filename

        pass

    def read_project_cache(self):
        """ Try to read the project cache file """

        # Get cache filename
        if not os.path.exists(self.__cachefilename):
            moreinfo("Project cache not found")
            return

        cfg = GetObject('config')
        if cfg.cachefileformat == 'pickled':
            return self.__read_pickled_cache_file()
        elif cfg.cachefileformat == 'xml': # not supported
            return -1

        return None

    def __read_pickled_cache_file(self):

        cache_obj = None
        try:
            pickler = HarvestManPickler()
            cache_obj = pickler.load(self.__cachefilename)
        except HarvestManPicklerError, e:
            print e
            return None

        return cache_obj

    def write_project_cache(self, cache_obj, format='pickled'):

        cachedir = os.path.dirname(self.__cachefilename)
        try:
            if not os.path.isdir(cachedir):
                if not os.path.exists(cachedir):
                    os.makedirs(cachedir)
                    extrainfo('Created cache directory => ', cachedir)
        except OSError, e:
            debug('OS Exception ', e)
            return -1

        # If file already exists, shred it
        if os.path.exists(self.__cachefilename):
            try:
                os.remove(self.__cachefilename)
            except OSError, e:
                print e
                return -1

        # Copy a readme.txt file to the cache directory
        readmefile = os.path.join(cachedir, "Readme.txt")
        try:
            fs=open(readmefile, 'w')
            fs.write(HARVESTMAN_CACHE_README)
            fs.close()
        except Exception, e:
            debug(str(e))

        if format == 'pickled':
            return self.__write_pickled_cache_file(cache_obj)
        elif format == 'xml':
            return self.__write_xml_cache_file(cache_obj)

        return -1

    def __write_pickled_cache_file(self, cache_obj):

        try:
            pickler = HarvestManPickler()
            pickler.dump( cache_obj, self.__cachefilename, False)
        except HarvestManPicklerError, e:
            print e
            return -1

        return 0

    def __write_xml_cache_file(self, cache_obj):

        cachedir = os.path.dirname(self.__cachefilename)
        # Copy the HarvestMan DTD from the installation
        dtdfile = os.path.join(cachedir, "HarvestManCache.dtd")

        configobj = GetObject('config')

        if not os.path.exists(dtdfile):
            try:
                import shutil
                shutil.copy("./HarvestManCache.dtd", dtdfile)
            except Exception, e:  # Catch all exceptions
                debug(str(e))

        try:
            fs = file(self.__cachefilename, 'w')
            fs.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
            fs.write("<!DOCTYPE HarvestManCache SYSTEM \"HarvestManCache.dtd\">\n")        
            fs.write("<PROJECT Name=\"" + configobj.project + "\"" + " Starturl=\"" + configobj.url + "\">\n")
            # Write cache information for every file

            index = 0

            for d in cache_obj:
                filename = d['location']
                url = d['url']
                contentlen = d['content-length']
                # encrypt the checksum and write it in hex format
                # otherwise the xml file will look like garbage!
                md5checksum = d['checksum']

                # increment index
                index += 1
                fs.write("\t<file location=\"" + filename + "\"" + " index=\"" + str(index) + "\">\n")
                fs.write("\t<url>" + url + "</url>\n")
                fs.write("\t<content-length>" + str(contentlen) + "</content-length>\n")
                fs.write("\t<checksum>" + str(md5checksum) + "</checksum>\n")           
                fs.write("\t</file>\n")

            fs.write("</PROJECT>\n")
            fs.close()
        except Exception, e:
            print e
            return -1

        return 0

class HarvestManProjectManager(object):
    """ Utility class to read/write project files """

    def __init__(self):
        pass

    def write_project(self, mode='pickled'):
        """ Write project files """

        moreinfo('Writing Project Files...')

        if mode == 'pickled':
            self.__write_pickled_project_file()
        elif mode == 'xml':
            self.__write_xml_project_file()

    def read_project(self):
        """ Load an existing HarvestMan project file and
        crete dictionary for the passed config object """

        projectfile = GetObject('config').projectfile

        # Find out if it is an XML project file
        # Read the first line and look for the xml specification
        try:
            pf = open( projectfile )
        except Exception, e:
            print e
            return -1

        line1, line2 = '', ''
        try:
            line1 = pf.readline().strip()
            line2 = pf.readline().strip()
            pf.close()
        except Exception, e:
            print e
            return -1

        isxml = False

        # Verify line1
        if line1 == HARVESTMAN_XML_HEAD1:
            # Verify line2
            if line2 == HARVESTMAN_XML_HEAD2:
                isxml = True

        if isxml:
            # XML parsing code
            return self.__read_xml_project_file()
        else:
            return self.__read_pickled_project_file()

        return -1

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -