📄 markdown.py

📁 SQLAlchemy. 经典的Python ORM框架。学习必看。
💻 PY
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
#!/usr/bin/env python# The following constant specifies the name used in the usage# statement displayed for python versions lower than 2.3.  (With# python2.3 and higher the usage statement is generated by optparse# and uses the actual name of the executable called.)EXECUTABLE_NAME_FOR_USAGE = "python markdown.py"SPEED_TEST = 0"""====================================================================IF YOA ARE LOOKING TO EXTEND MARKDOWN, SEE THE "FOOTNOTES" SECTION====================================================================Python-Markdown===============Converts Markdown to HTML.  Basic usage as a module:    import markdown    html = markdown.markdown(your_text_string)Started by [Manfred Stienstra](http://www.dwerg.net/).  Continued andmaintained  by [Yuri Takhteyev](http://www.freewisdom.org).Project website: http://www.freewisdom.org/projects/python-markdownContact: yuri [at] freewisdom.orgLicense: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSDVersion: 1.5a (July 9, 2006)For changelog, see end of file"""import re, sys, os, random, codecs# set debug level: 3 none, 2 critical, 1 informative, 0 all(VERBOSE, INFO, CRITICAL, NONE) = range(4)MESSAGE_THRESHOLD = CRITICALdef message(level, text) :    if level >= MESSAGE_THRESHOLD :        print text# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------# all tabs will be expanded to up to this many spacesTAB_LENGTH = 4ENABLE_ATTRIBUTES = 1SMART_EMPHASIS = 1# --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ----------# a template for html placeholdersHTML_PLACEHOLDER_PREFIX = "qaodmasdkwaspemas"HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%dajkqlsmdqpakldnzsdfls"BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table',                        'dl', 'ol', 'ul', 'script', 'noscript',                        'form', 'fieldset', 'iframe', 'math', 'ins',                        'del', 'hr', 'hr/', 'style']def is_block_level (tag) :    return ( (tag in BLOCK_LEVEL_ELEMENTS) or             (tag[0] == 'h' and tag[1] in "0123456789") )"""================================================================================================ NANODOM =========================================================================================================The three classes below implement some of the most basic DOMmethods.  I use this instead of minidom because I need a simplerfunctionality and do not want to require additional libraries.Importantly, NanoDom does not do normalization, which is what wewant. It also adds extra white space when converting DOM to string"""class Document :    def appendChild(self, child) :        self.documentElement = child        child.parent = self        self.entities = {}    def createElement(self, tag, textNode=None) :        el = Element(tag)        el.doc = self        if textNode :            el.appendChild(self.createTextNode(textNode))        return el    def createTextNode(self, text) :        node = TextNode(text)        node.doc = self        return node    def createEntityReference(self, entity):        if entity not in self.entities:            self.entities[entity] = EntityReference(entity)        return self.entities[entity]    def toxml (self) :        return self.documentElement.toxml()    def normalizeEntities(self, text) :        pairs = [ ("&", "&amp;"),                  ("<", "&lt;"),                  (">", "&gt;"),                  ("\"", "&quot;")]        for old, new in pairs :            text = text.replace(old, new)        return text    def find(self, test) :        return self.documentElement.find(test)    def unlink(self) :        self.documentElement.unlink()        self.documentElement = Noneclass Element :    type = "element"    def __init__ (self, tag) :        self.nodeName = tag        self.attributes = []        self.attribute_values = {}        self.childNodes = []    def unlink(self) :        for child in self.childNodes :            if child.type == "element" :                child.unlink()        self.childNodes = None    def setAttribute(self, attr, value) :        if not attr in self.attributes :            self.attributes.append(attr)        self.attribute_values[attr] = value    def insertChild(self, position, child) :        self.childNodes.insert(position, child)        child.parent = self    def removeChild(self, child) :        self.childNodes.remove(child)    def replaceChild(self, oldChild, newChild) :        position = self.childNodes.index(oldChild)        self.removeChild(oldChild)        self.insertChild(position, newChild)    def appendChild(self, child) :        self.childNodes.append(child)        child.parent = self    def handleAttributes(self) :        pass    def find(self, test, depth=0) :        """ Returns a list of descendants that pass the test function """        matched_nodes = []        for child in self.childNodes :            if test(child) :                matched_nodes.append(child)            if child.type == "element" :                matched_nodes += child.find(test, depth+1)        return matched_nodes    def toxml(self):        if ENABLE_ATTRIBUTES :            for child in self.childNodes:                child.handleAttributes()        buffer = ""        if self.nodeName in ['h1', 'h2', 'h3', 'h4'] :            buffer += "\n"        elif self.nodeName in ['li'] :            buffer += "\n "        buffer += "<" + self.nodeName        for attr in self.attributes :            value = self.attribute_values[attr]            value = self.doc.normalizeEntities(value)            buffer += ' %s="%s"' % (attr, value)        if self.childNodes or self.nodeName in ['blockquote']:            buffer += ">"            for child in self.childNodes :                buffer += child.toxml()            if self.nodeName == 'p' :                buffer += "\n"            elif self.nodeName == 'li' :                buffer += "\n "            buffer += "</%s>" % self.nodeName        else :            buffer += "/>"        if self.nodeName in ['p', 'li', 'ul', 'ol',                             'h1', 'h2', 'h3', 'h4'] :            buffer += "\n"        return bufferclass TextNode :    type = "text"    attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123}    def __init__ (self, text) :        self.value = text            def attributeCallback(self, match) :        self.parent.setAttribute(match.group(1), match.group(2))    def handleAttributes(self) :        self.value = self.attrRegExp.sub(self.attributeCallback, self.value)    def toxml(self) :        text = self.value        if not text.startswith(HTML_PLACEHOLDER_PREFIX):            if self.parent.nodeName == "p" :                text = text.replace("\n", "\n   ")            elif (self.parent.nodeName == "li"                  and self.parent.childNodes[0]==self):                text = "\n     " + text.replace("\n", "\n     ")        text = self.doc.normalizeEntities(text)        return textclass EntityReference:    type = "entity_ref"    def __init__(self, entity):        self.entity = entity    def handleAttributes(self):        pass    def toxml(self):        return "&" + self.entity + ";""""================================================================================================ PRE-PROCESSORS ==================================================================================================Preprocessors munge source text before we start doing anything toocomplicated.Each preprocessor implements a "run" method that takes a pointer to a list of lines of the document,modifies it as necessary and returns either the same pointer or apointer to a new list.  Preprocessors must extendmarkdown.Preprocessor."""class Preprocessor :    passclass HeaderPreprocessor (Preprocessor):    """       Replaces underlined headers with hashed headers to avoid       the nead for lookahead later.    """    def run (self, lines) :        i = -1        while i+1 < len(lines) :            i = i+1            if not lines[i].strip() :                continue            if lines[i].startswith("#") :                lines.insert(i+1, "\n")            if (i+1 <= len(lines)                  and lines[i+1]                  and lines[i+1][0] in ['-', '=']) :                underline = lines[i+1].strip()                if underline == "="*len(underline) :                    lines[i] = "# " + lines[i].strip()                    lines[i+1] = ""                elif underline == "-"*len(underline) :                    lines[i] = "## " + lines[i].strip()                    lines[i+1] = ""        #for l in lines :        #    print l.encode('utf8')        #sys.exit(0)        return linesHEADER_PREPROCESSOR = HeaderPreprocessor()class LinePreprocessor (Preprocessor):    """Deals with HR lines (needs to be done before processing lists)"""    def run (self, lines) :        for i in range(len(lines)) :            if self._isLine(lines[i]) :                lines[i] = "<hr />"        return lines    def _isLine(self, block) :        """Determines if a block should be replaced with an <HR>"""        if block.startswith("    ") : return 0  # a code block        text = "".join([x for x in block if not x.isspace()])        if len(text) <= 2 :            return 0        for pattern in ['isline1', 'isline2', 'isline3'] :            m = RE.regExp[pattern].match(text)            if (m and m.group(1)) :                return 1        else:            return 0LINE_PREPROCESSOR = LinePreprocessor()class LineBreaksPreprocessor (Preprocessor):    """Replaces double spaces at the end of the lines with <br/ >."""    def run (self, lines) :        for i in range(len(lines)) :            if (lines[i].endswith("  ")                and not RE.regExp['tabbed'].match(lines[i]) ):                lines[i] += "<br />"        return linesLINE_BREAKS_PREPROCESSOR = LineBreaksPreprocessor()class HtmlBlockPreprocessor (Preprocessor):    """Removes html blocks from self.lines"""        def _get_left_tag(self, block):        return block[1:].replace(">", " ", 1).split()[0].lower()    def _get_right_tag(self, left_tag, block):        return block.rstrip()[-len(left_tag)-2:-1].lower()    def _equal_tags(self, left_tag, right_tag):        if left_tag in ['?', '?php', 'div'] : # handle PHP, etc.            return True        if ("/" + left_tag) == right_tag:            return True        elif left_tag == right_tag[1:] \            and right_tag[0] != "<":            return True        else:            return False    def _is_oneliner(self, tag):        return (tag in ['hr', 'hr/'])        def run (self, lines) :        new_blocks = []        text = "\n".join(lines)        text = text.split("\n\n")                items = []        left_tag = ''        right_tag = ''        in_tag = False # flag                for block in text:            if block.startswith("\n") :                block = block[1:]            if not in_tag:                if block.startswith("<"):                                        left_tag = self._get_left_tag(block)                    right_tag = self._get_right_tag(left_tag, block)                    if not (is_block_level(left_tag) \                        or block[1] in ["!", "?", "@", "%"]):                        new_blocks.append(block)                        continue                    if self._is_oneliner(left_tag):                        new_blocks.append(block.strip())                        continue                                            if block[1] == "!":                        # is a comment block                        left_tag = "--"                        right_tag = self._get_right_tag(left_tag, block)                        # keep checking conditions below and maybe just append                                            if block.rstrip().endswith(">") \                        and self._equal_tags(left_tag, right_tag):                        new_blocks.append(                            self.stash.store(block.strip()))
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -