⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgmllib.py

📁 在 linux平台上的网页编程的模板
💻 PY
📖 第 1 页 / 共 2 页
字号:
        while len(self.stack) > found:            tag = self.stack[-1]            try:                method = getattr(self, 'end_' + tag)            except AttributeError:                method = None            if method:                self.handle_endtag(tag, method)            else:                self.unknown_endtag(tag)            del self.stack[-1]    # Overridable -- handle start tag    def handle_starttag(self, tag, method, attrs):        method(attrs)    # Overridable -- handle end tag    def handle_endtag(self, tag, method):        method()    # Example -- report an unbalanced </...> tag.    def report_unbalanced(self, tag):        if self.verbose:            print '*** Unbalanced </' + tag + '>'            print '*** Stack:', self.stack    # Example -- handle character reference, no need to override    def handle_charref(self, name):        try:            n = string.atoi(name)        except string.atoi_error:            self.unknown_charref(name)            return        if not 0 <= n <= 255:            self.unknown_charref(name)            return        self.handle_data(chr(n))    # Definition of entities -- derived classes may override    entitydefs = ENTITYDEFS    # Example -- handle entity reference, no need to override    def handle_entityref(self, name):        table = self.entitydefs        if table.has_key(name):            self.handle_data(table[name])        else:            self.unknown_entityref(name)            return    # Example -- handle data, should be overridden    def handle_data(self, data):        pass    # Example -- handle comment, could be overridden    def handle_comment(self, data):        pass    # To be overridden -- handlers for unknown objects    def unknown_starttag(self, tag, attrs): pass    def unknown_endtag(self, tag): pass    def unknown_charref(self, ref): pass    def unknown_entityref(self, ref): pass# --------------------------------------------------------------------# accelerated SGML parserclass FastSGMLParser:    # Interface -- initialize and reset this instance    def __init__(self, verbose=0):        self.verbose = verbose        self.reset()    # Interface -- reset this instance.  Loses all unprocessed data    def reset(self):        self.rawdata = ''        self.stack = []        self.lasttag = '???'        self.nomoretags = 0        self.literal = 0        self.parser = sgmlop.SGMLParser()        self.feed = self.parser.feed        self.parser.register(self)    # For derived classes only -- enter literal mode (CDATA) till EOF    def setnomoretags(self):        self.nomoretags = self.literal = 1 # FIXME!    # For derived classes only -- enter literal mode (CDATA)    def setliteral(self, *args):        self.literal = 1 # FIXME!    # Interface -- feed some data to the parser.  Call this as    # often as you want, with as little or as much text as you    # want (may include '\n').    def feed(self, data): # overridden by reset        self.parser.feed(data)    # Interface -- handle the remaining data    def close(self):        try:            self.parser.close()        finally:            self.parser = None    # Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)    def finish_shorttag(self, tag, data):        self.finish_starttag(tag, [])        self.handle_data(data)        self.finish_endtag(tag)    # Internal -- finish processing of start tag    # Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag    def finish_starttag(self, tag, attrs):        # FIXME: should move this logic into sgmlop!        try:            method = getattr(self, 'start_' + tag)        except AttributeError:            try:                method = getattr(self, 'do_' + tag)            except AttributeError:                self.unknown_starttag(tag, attrs)                return -1            else:                self.handle_starttag(tag, method, attrs)                return 0        else:            self.stack.append(tag)            self.handle_starttag(tag, method, attrs)            return 1    # Internal -- finish processing of end tag    def finish_endtag(self, tag):        if not tag:            found = len(self.stack) - 1            if found < 0:                self.unknown_endtag(tag)                return        else:            if tag not in self.stack:                try:                    method = getattr(self, 'end_' + tag)                except AttributeError:                    self.unknown_endtag(tag)                return            found = len(self.stack)            for i in range(found):                if self.stack[i] == tag: found = i        while len(self.stack) > found:            tag = self.stack[-1]            try:                method = getattr(self, 'end_' + tag)            except AttributeError:                method = None            if method:                self.handle_endtag(tag, method)            else:                self.unknown_endtag(tag)            del self.stack[-1]    # Overridable -- handle start tag    def handle_starttag(self, tag, method, attrs):        method(attrs)    # Overridable -- handle end tag    def handle_endtag(self, tag, method):        method()    # Example -- report an unbalanced </...> tag.    def report_unbalanced(self, tag):        if self.verbose:            print '*** Unbalanced </' + tag + '>'            print '*** Stack:', self.stack    # Example -- handle character reference, no need to override    # def handle_charref(self, name):    #     pass    # Definition of entities -- derived classes may override    entitydefs = ENTITYDEFS    # Example -- handle entity reference, no need to override    def handle_entityref(self, name):        try:            data = self.entitydefs[name]        except KeyError:            self.unknown_entityref(name)        else:            self.handle_data(data)    # Example -- handle data, should be overridden    def handle_data(self, data):        pass    # Example -- handle comment, could be overridden    # def handle_comment(self, data):    #   pass    # To be overridden -- handlers for unknown objects    def unknown_starttag(self, tag, attrs): pass    def unknown_endtag(self, tag): pass    def unknown_charref(self, ref): pass    def unknown_entityref(self, ref): pass#sgmlop = None# pick a suitable parserif sgmlop:    SGMLParser = FastSGMLParserelse:    SGMLParser = SlowSGMLParser# --------------------------------------------------------------------# test stuffclass TestSGMLParser(SGMLParser):    def __init__(self, verbose=0):        self.testdata = ""        SGMLParser.__init__(self, verbose)    def handle_data(self, data):        self.testdata = self.testdata + data        if len(`self.testdata`) >= 70:            self.flush()    def flush(self):        data = self.testdata        if data:            self.testdata = ""            print 'data:', `data`    def handle_comment(self, data):        self.flush()        r = `data`        if len(r) > 68:            r = r[:32] + '...' + r[-32:]        print 'comment:', r    def unknown_starttag(self, tag, attrs):        self.flush()        if not attrs:            print 'start tag: <' + tag + '>'        else:            print 'start tag: <' + tag,            for name, value in attrs:                print name + '=' + '"' + value + '"',            print '>'    def unknown_endtag(self, tag):        self.flush()        print 'end tag: </' + tag + '>'    def unknown_entityref(self, ref):        self.flush()        print '*** unknown entity ref: &' + ref + ';'    def unknown_charref(self, ref):        self.flush()        print '*** unknown char ref: &#' + ref + ';'    def close(self):        SGMLParser.close(self)        self.flush()def test(args = None):    import sys    if not args:        args = sys.argv[1:]    if args and args[0] == '-s':        args = args[1:]        klass = SGMLParser    else:        klass = TestSGMLParser    if args:        file = args[0]    else:        file = 'test.html'    if file == '-':        f = sys.stdin    else:        try:            f = open(file, 'r')        except IOError, msg:            print file, ":", msg            sys.exit(1)    data = f.read()    if f is not sys.stdin:        f.close()    x = klass()    for c in data:        x.feed(c)    x.close()if __name__ == '__main__':    test()

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -