📄 kgp.py

📁 dive int python document
💻 PY
字号:
#!/usr/bin/env python2"""Kant Generator for PythonGenerates mock philosophy based on a context-free grammarUsage: python kgp.py [options] [source]Options:  -g ..., --grammar=...   use specified grammar file or URL  -h, --help              show this help  -d                      show debugging information while parsingExamples:  kgp.py                  generates several paragraphs of Kantian philosophy  kgp.py -g husserl.xml   generates several paragraphs of Husserl  kpg.py "<xref id='paragraph'/>"  generates a paragraph of Kant  kgp.py template.xml     reads from template.xml to decide what to generateThis program is part of "Dive Into Python", a free Python book forexperienced programmers.  Visit http://diveintopython.org/ for thelatest version."""__author__ = "Mark Pilgrim (mark@diveintopython.org)"__version__ = "$Revision: 1.4 $"__date__ = "$Date: 2004/05/05 21:57:19 $"__copyright__ = "Copyright (c) 2001 Mark Pilgrim"__license__ = "Python"from xml.dom import minidomimport randomimport toolboximport sysimport getopt_debug = 0class NoSourceError(Exception): passclass KantGenerator:    """generates mock philosophy based on a context-free grammar"""        def __init__(self, grammar, source=None):        self.loadGrammar(grammar)        self.loadSource(source and source or self.getDefaultSource())        self.refresh()    def _load(self, source):        """load XML input source, return parsed XML document        - a URL of a remote XML file ("http://diveintopython.org/kant.xml")        - a filename of a local XML file ("~/diveintopython/common/py/kant.xml")        - standard input ("-")        - the actual XML document, as a string        """        sock = toolbox.openAnything(source)        xmldoc = minidom.parse(sock).documentElement        sock.close()        return xmldoc    def loadGrammar(self, grammar):        """load context-free grammar"""        self.grammar = self._load(grammar)        self.refs = {}        for ref in self.grammar.getElementsByTagName("ref"):            self.refs[ref.attributes["id"].value] = ref            def loadSource(self, source):        """load source"""        self.source = self._load(source)    def getDefaultSource(self):        """guess default source of the current grammar                The default source will be one of the <ref>s that is not        cross-referenced.  This sounds complicated but it's not.        Example: The default source for kant.xml is        "<xref id='section'/>", because 'section' is the one <ref>        that is not <xref>'d anywhere in the grammar.        In most grammars, the default source will produce the        longest (and most interesting) output.        """        xrefs = {}        for xref in self.grammar.getElementsByTagName("xref"):            xrefs[xref.attributes["id"].value] = 1        xrefs = xrefs.keys()        standaloneXrefs = [e for e in self.refs.keys() if e not in xrefs]        if not standaloneXrefs:            raise NoSourceError, "can't guess source, and no source specified"        return '<xref id="%s"/>' % random.choice(standaloneXrefs)            def reset(self):        """reset parser"""        self.pieces = []        self.capitalizeNextWord = 0    def refresh(self):        """reset output buffer, re-parse entire source file, and return output                Since parsing involves a good deal of randomness, this is an        easy way to get new output without having to reload a grammar file        each time.        """        self.reset()        self.parse(self.source)        return self.output()    def output(self):        """output generated text"""        return "".join(self.pieces)    def randomChildElement(self, node):        """choose a random child element of a node                This is a utility method used by do_xref and do_choice.        """        choices = [e for e in node.childNodes                   if e.nodeType == e.ELEMENT_NODE]        chosen = random.choice(choices)        if _debug:            sys.stderr.write('%s available choices: %s\n' % \                (len(choices), [e.toxml() for e in choices]))            sys.stderr.write('Chosen: %s\n' % chosen.toxml())        return chosen    def parse(self, node):        """parse a single XML node                A parsed XML document (from minidom.parse) is a tree of nodes        of various types.  Each node is represented by an instance of the        corresponding Python class (Element for a tag, Text for        text data, Document for the top-level document).  The following        statement constructs the name of a class method based on the type        of node we're parsing ("parse_Element" for an Element node,        "parse_Text" for a Text node, etc.) and then calls the method.        """        parseMethod = getattr(self, "parse_%s" % node.__class__.__name__)        parseMethod(node)    def parse_Document(self, node):        """parse the document node                The document node by itself isn't interesting (to us), but        its only child, node.documentElement, is: it's the root node        of the grammar.        """        self.parse(node.documentElement)    def parse_Text(self, node):        """parse a text node                The text of a text node is usually added to the output buffer        verbatim.  The one exception is that <p class='sentence'> sets        a flag to capitalize the first letter of the next word.  If        that flag is set, we capitalize the text and reset the flag.        """        text = node.data        if self.capitalizeNextWord:            self.pieces.append(text[0].upper())            self.pieces.append(text[1:])            self.capitalizeNextWord = 0        else:            self.pieces.append(text)    def parse_Element(self, node):        """parse an element                An XML element corresponds to an actual tag in the source:        <xref id='...'>, <p chance='...'>, <choice>, etc.        Each element type is handled in its own method.  Like we did in        parse(), we construct a method name based on the name of the        element ("do_xref" for an <xref> tag, etc.) and        call the method.        """        handlerMethod = getattr(self, "do_%s" % node.tagName)        handlerMethod(node)    def parse_Comment(self, node):        """parse a comment                The grammar can contain XML comments, but we ignore them        """        pass        def do_xref(self, node):        """handle <xref id='...'> tag                An <xref id='...'> tag is a cross-reference to a <ref id='...'>        tag.  <xref id='sentence'/> evaluates to a randomly chosen child of        <ref id='sentence'>.        """        id = node.attributes["id"].value        self.parse(self.randomChildElement(self.refs[id]))    def do_p(self, node):        """handle <p> tag                The <p> tag is the core of the grammar.  It can contain almost        anything: freeform text, <choice> tags, <xref> tags, even other        <p> tags.  If a "class='sentence'" attribute is found, a flag        is set and the next word will be capitalized.  If a "chance='X'"        attribute is found, there is an X% chance that the tag will be        evaluated (and therefore a (100-X)% chance that it will be        completely ignored)        """        keys = node.attributes.keys()        if "class" in keys:            if node.attributes["class"].value == "sentence":                self.capitalizeNextWord = 1        if "chance" in keys:            chance = int(node.attributes["chance"].value)            doit = (chance > random.randrange(100))        else:            doit = 1        if doit:            for child in node.childNodes: self.parse(child)    def do_choice(self, node):        """handle <choice> tag                A <choice> tag contains one or more <p> tags.  One <p> tag        is chosen at random and evaluated; the rest are ignored.        """        self.parse(self.randomChildElement(node))def usage():    print __doc__def main(argv):    grammar = "kant.xml"    try:        opts, args = getopt.getopt(argv, "hg:d", ["help", "grammar="])    except getopt.GetoptError:        usage()        sys.exit(2)    for opt, arg in opts:        if opt in ("-h", "--help"):            usage()            sys.exit()        elif opt == '-d':            global _debug            _debug = 1        elif opt in ("-g", "--grammar"):            grammar = arg        source = "".join(args)    k = KantGenerator(grammar, source)    print k.output()if __name__ == "__main__":    main(sys.argv[1:])
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -