📄 markdown.py

📁 SQLAlchemy. 经典的Python ORM框架。学习必看。
💻 PY
📖 第 1 页 / 共 4 页
字号:
    def detabbed_fn(self, line) :        """ An auxiliary method to be passed to _findHead """        m = RE.regExp['tabbed'].match(line)        if m:            return m.group(4)        else :            return None    def detectTabbed(self, lines) :        return self._findHead(lines, self.detabbed_fn,                              allowBlank = 1)def print_error(string):    """Print an error string to stderr"""    sys.stderr.write(string +'\n')def dequote(string) :    """ Removes quotes from around a string """    if ( ( string.startswith('"') and string.endswith('"'))         or (string.startswith("'") and string.endswith("'")) ) :        return string[1:-1]    else :        return string"""================================================================================================ CORE MARKDOWN ===================================================================================================This stuff is ugly, so if you are thinking of extending the syntax,see first if you can do it via pre-processors, post-processors,inline patterns or a combination of the three."""class CorePatterns :    """This class is scheduled for removal as part of a refactoring        effort."""    patterns = {        'header':          r'(#*)([^#]*)(#*)', # # A title        'reference-def' :  r'(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)',                           # [Google]: http://www.google.com/        'containsline':    r'([-]*)$|^([=]*)', # -----, =====, etc.        'ol':              r'[ ]{0,3}[\d]*\.\s+(.*)', # 1. text        'ul':              r'[ ]{0,3}[*+-]\s+(.*)', # "* text"        'isline1':         r'(\**)', # ***        'isline2':         r'(\-*)', # ---        'isline3':         r'(\_*)', # ___        'tabbed':          r'((\t)|(    ))(.*)', # an indented line        'quoted' :         r'> ?(.*)', # a quoted block ("> ...")    }    def __init__ (self) :        self.regExp = {}        for key in self.patterns.keys() :            self.regExp[key] = re.compile("^%s$" % self.patterns[key],                                          re.DOTALL)        self.regExp['containsline'] = re.compile(r'^([-]*)$|^([=]*)$', re.M)RE = CorePatterns()class Markdown:    """ Markdown formatter class for creating an html document from        Markdown text """    def __init__(self, source=None,                 extensions=[],                 extension_configs=None,                 encoding=None,                 safe_mode = True):        """Creates a new Markdown instance.           @param source: The text in Markdown format.           @param encoding: The character encoding of <text>. """        self.safeMode = safe_mode        self.encoding = encoding        self.source = source        self.blockGuru = BlockGuru()        self.registeredExtensions = []        self.stripTopLevelTags = 1        self.docType = ""        self.preprocessors = [ HEADER_PREPROCESSOR,                               LINE_PREPROCESSOR,                               HTML_BLOCK_PREPROCESSOR,                               LINE_BREAKS_PREPROCESSOR,                               # A footnote preprocessor will                               # get inserted here                               REFERENCE_PREPROCESSOR ]        self.postprocessors = [] # a footnote postprocessor will get                                 # inserted later        self.textPostprocessors = [] # a footnote postprocessor will get                                     # inserted later                                         self.prePatterns = []                self.inlinePatterns = [ DOUBLE_BACKTICK_PATTERN,                                BACKTICK_PATTERN,                                ESCAPE_PATTERN,                                IMAGE_LINK_PATTERN,                                IMAGE_REFERENCE_PATTERN,                                REFERENCE_PATTERN,                                LINK_ANGLED_PATTERN,                                LINK_PATTERN,                                AUTOLINK_PATTERN,                                AUTOMAIL_PATTERN,                                HTML_PATTERN,                                ENTITY_PATTERN,                                NOT_STRONG_PATTERN,                                STRONG_EM_PATTERN,                                STRONG_EM_PATTERN_2,                                STRONG_PATTERN,                                STRONG_PATTERN_2,                                EMPHASIS_PATTERN,                                EMPHASIS_PATTERN_2                                # The order of the handlers matters!!!                                ]        self.registerExtensions(extensions = extensions,                                configs = extension_configs)        self.reset()    def registerExtensions(self, extensions, configs) :        if not configs :            configs = {}        for ext in extensions :            extension_module_name = "mdx_" + ext            try :                module = __import__(extension_module_name)            except :                message(CRITICAL,                        "couldn't load extension %s (looking for %s module)"                        % (ext, extension_module_name) )            else :                if configs.has_key(ext) :                    configs_for_ext = configs[ext]                else :                    configs_for_ext = []                extension = module.makeExtension(configs_for_ext)                    extension.extendMarkdown(self, globals())    def registerExtension(self, extension) :        """ This gets called by the extension """        self.registeredExtensions.append(extension)    def reset(self) :        """Resets all state variables so that we can start            with a new text."""        self.references={}        self.htmlStash = HtmlStash()        HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash        REFERENCE_PREPROCESSOR.references = self.references        HTML_PATTERN.stash = self.htmlStash        ENTITY_PATTERN.stash = self.htmlStash        REFERENCE_PATTERN.references = self.references        IMAGE_REFERENCE_PATTERN.references = self.references        for extension in self.registeredExtensions :            extension.reset()    def _transform(self):        """Transforms the Markdown text into a XHTML body document           @returns: A NanoDom Document """        # Setup the document        self.doc = Document()        self.top_element = self.doc.createElement("span")        self.top_element.appendChild(self.doc.createTextNode('\n'))        self.top_element.setAttribute('class', 'markdown')        self.doc.appendChild(self.top_element)        # Fixup the source text        text = self.source.strip()        text = text.replace("\r\n", "\n").replace("\r", "\n")        text += "\n\n"        text = text.expandtabs(TAB_LENGTH)        # Split into lines and run the preprocessors that will work with        # self.lines        self.lines = text.split("\n")        # Run the pre-processors on the lines        for prep in self.preprocessors :            self.lines = prep.run(self.lines)        # Create a NanoDom tree from the lines and attach it to Document        buffer = []        for line in self.lines :            if line.startswith("#") :                self._processSection(self.top_element, buffer)                buffer = [line]            else :                buffer.append(line)        self._processSection(self.top_element, buffer)                #self._processSection(self.top_element, self.lines)        # Not sure why I put this in but let's leave it for now.        self.top_element.appendChild(self.doc.createTextNode('\n'))        # Run the post-processors        for postprocessor in self.postprocessors :            postprocessor.run(self.doc)        return self.doc    def _processSection(self, parent_elem, lines,                        inList = 0, looseList = 0) :        """Process a section of a source document, looking for high           level structural elements like lists, block quotes, code           segments, html blocks, etc.  Some those then get stripped           of their high level markup (e.g. get unindented) and the           lower-level markup is processed recursively.           @param parent_elem: A NanoDom element to which the content                               will be added           @param lines: a list of lines           @param inList: a level           @returns: None"""        if not lines :            return        # Check if this section starts with a list, a blockquote or        # a code block        processFn = { 'ul' :     self._processUList,                      'ol' :     self._processOList,                      'quoted' : self._processQuote,                      'tabbed' : self._processCodeBlock }        for regexp in ['ul', 'ol', 'quoted', 'tabbed'] :            m = RE.regExp[regexp].match(lines[0])            if m :                processFn[regexp](parent_elem, lines, inList)                return        # We are NOT looking at one of the high-level structures like        # lists or blockquotes.  So, it's just a regular paragraph        # (though perhaps nested inside a list or something else).  If        # we are NOT inside a list, we just need to look for a blank        # line to find the end of the block.  If we ARE inside a        # list, however, we need to consider that a sublist does not        # need to be separated by a blank line.  Rather, the following        # markup is legal:        #        # * The top level list item        #        #     Another paragraph of the list.  This is where we are now.        #     * Underneath we might have a sublist.        #        if inList :            start, theRest = self._linesUntil(lines, (lambda line:                             RE.regExp['ul'].match(line)                             or RE.regExp['ol'].match(line)                                              or not line.strip()))            self._processSection(parent_elem, start,                                 inList - 1, looseList = looseList)            self._processSection(parent_elem, theRest,                                 inList - 1, looseList = looseList)        else : # Ok, so it's just a simple block            paragraph, theRest = self._linesUntil(lines, lambda line:                                                 not line.strip())            if len(paragraph) and paragraph[0].startswith('#') :                m = RE.regExp['header'].match(paragraph[0])                if m :                    level = len(m.group(1))                    h = self.doc.createElement("h%d" % level)                    parent_elem.appendChild(h)                    for item in self._handleInlineWrapper2(m.group(2).strip()) :                        h.appendChild(item)                else :                    message(CRITICAL, "We've got a problem header!")            elif paragraph :                list = self._handleInlineWrapper2("\n".join(paragraph))                if ( parent_elem.nodeName == 'li'                     and not (looseList or parent_elem.childNodes)):                    #and not parent_elem.childNodes) :                    # If this is the first paragraph inside "li", don't                    # put <p> around it - append the paragraph bits directly                    # onto parent_elem                    el = parent_elem                else :                    # Otherwise make a "p" element                    el = self.doc.createElement("p")                    parent_elem.appendChild(el)                for item in list :                    el.appendChild(item)            if theRest :                theRest = theRest[1:]  # skip the first (blank) line            self._processSection(parent_elem, theRest, inList)    def _processUList(self, parent_elem, lines, inList) :        self._processList(parent_elem, lines, inList,                         listexpr='ul', tag = 'ul')    def _processOList(self, parent_elem, lines, inList) :        self._processList(parent_elem, lines, inList,                         listexpr='ol', tag = 'ol')    def _processList(self, parent_elem, lines, inList, listexpr, tag) :        """Given a list of document lines starting with a list item,           finds the end of the list, breaks it up, and recursively           processes each list item and the remainder of the text file.           @param parent_elem: A dom element to which the content will be added           @param lines: a list of lines           @param inList: a level           @returns: None"""        ul = self.doc.createElement(tag)  # ul might actually be '<ol>'        parent_elem.appendChild(ul)        looseList = 0        # Make a list of list items        items = []        item = -1        i = 0  # a counter to keep track of where we are        for line in lines :            loose = 0            if not line.strip() :                # If we see a blank line, this _might_ be the end of the list                i += 1                loose = 1                # Find the next non-blank line                for j in range(i, len(lines)) :                    if lines[j].strip() :                        next = lines[j]                        break                else :                    # There is no more text => end of the list                    break                # Check if the next non-blank line is still a part of the list                if ( RE.regExp['ul'].match(next) or                     RE.regExp['ol'].match(next) or                      RE.regExp['tabbed'].match(next) ):                    # get rid of any white space in the line                    items[item].append(line.strip())                    looseList = loose or looseList                    continue                else :                    break # found end of the list            # Now we need to detect list items (at the current level)            # while also detabing child elements if necessary            for expr in ['ul', 'ol', 'tabbed']:                m = RE.regExp[expr].match(line)                if m :                    if expr in ['ul', 'ol'] :  # We are looking at a new item                        if m.group(1) :                            items.append([m.group(1)])                            item += 1                    elif expr == 'tabbed' :  # This line needs to be detabbed                        items[item].append(m.group(4)) #after the 'tab'                    i += 1                    break            else :                items[item].append(line)  # Just regular continuation
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -