📄 markdown.py
字号:
def detabbed_fn(self, line) : """ An auxiliary method to be passed to _findHead """ m = RE.regExp['tabbed'].match(line) if m: return m.group(4) else : return None def detectTabbed(self, lines) : return self._findHead(lines, self.detabbed_fn, allowBlank = 1)def print_error(string): """Print an error string to stderr""" sys.stderr.write(string +'\n')def dequote(string) : """ Removes quotes from around a string """ if ( ( string.startswith('"') and string.endswith('"')) or (string.startswith("'") and string.endswith("'")) ) : return string[1:-1] else : return string"""================================================================================================ CORE MARKDOWN ===================================================================================================This stuff is ugly, so if you are thinking of extending the syntax,see first if you can do it via pre-processors, post-processors,inline patterns or a combination of the three."""class CorePatterns : """This class is scheduled for removal as part of a refactoring effort.""" patterns = { 'header': r'(#*)([^#]*)(#*)', # # A title 'reference-def' : r'(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)', # [Google]: http://www.google.com/ 'containsline': r'([-]*)$|^([=]*)', # -----, =====, etc. 'ol': r'[ ]{0,3}[\d]*\.\s+(.*)', # 1. text 'ul': r'[ ]{0,3}[*+-]\s+(.*)', # "* text" 'isline1': r'(\**)', # *** 'isline2': r'(\-*)', # --- 'isline3': r'(\_*)', # ___ 'tabbed': r'((\t)|( ))(.*)', # an indented line 'quoted' : r'> ?(.*)', # a quoted block ("> ...") } def __init__ (self) : self.regExp = {} for key in self.patterns.keys() : self.regExp[key] = re.compile("^%s$" % self.patterns[key], re.DOTALL) self.regExp['containsline'] = re.compile(r'^([-]*)$|^([=]*)$', re.M)RE = CorePatterns()class Markdown: """ Markdown formatter class for creating an html document from Markdown text """ def __init__(self, source=None, extensions=[], extension_configs=None, encoding=None, safe_mode = True): """Creates a new Markdown instance. @param source: The text in Markdown format. @param encoding: The character encoding of <text>. """ self.safeMode = safe_mode self.encoding = encoding self.source = source self.blockGuru = BlockGuru() self.registeredExtensions = [] self.stripTopLevelTags = 1 self.docType = "" self.preprocessors = [ HEADER_PREPROCESSOR, LINE_PREPROCESSOR, HTML_BLOCK_PREPROCESSOR, LINE_BREAKS_PREPROCESSOR, # A footnote preprocessor will # get inserted here REFERENCE_PREPROCESSOR ] self.postprocessors = [] # a footnote postprocessor will get # inserted later self.textPostprocessors = [] # a footnote postprocessor will get # inserted later self.prePatterns = [] self.inlinePatterns = [ DOUBLE_BACKTICK_PATTERN, BACKTICK_PATTERN, ESCAPE_PATTERN, IMAGE_LINK_PATTERN, IMAGE_REFERENCE_PATTERN, REFERENCE_PATTERN, LINK_ANGLED_PATTERN, LINK_PATTERN, AUTOLINK_PATTERN, AUTOMAIL_PATTERN, HTML_PATTERN, ENTITY_PATTERN, NOT_STRONG_PATTERN, STRONG_EM_PATTERN, STRONG_EM_PATTERN_2, STRONG_PATTERN, STRONG_PATTERN_2, EMPHASIS_PATTERN, EMPHASIS_PATTERN_2 # The order of the handlers matters!!! ] self.registerExtensions(extensions = extensions, configs = extension_configs) self.reset() def registerExtensions(self, extensions, configs) : if not configs : configs = {} for ext in extensions : extension_module_name = "mdx_" + ext try : module = __import__(extension_module_name) except : message(CRITICAL, "couldn't load extension %s (looking for %s module)" % (ext, extension_module_name) ) else : if configs.has_key(ext) : configs_for_ext = configs[ext] else : configs_for_ext = [] extension = module.makeExtension(configs_for_ext) extension.extendMarkdown(self, globals()) def registerExtension(self, extension) : """ This gets called by the extension """ self.registeredExtensions.append(extension) def reset(self) : """Resets all state variables so that we can start with a new text.""" self.references={} self.htmlStash = HtmlStash() HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash REFERENCE_PREPROCESSOR.references = self.references HTML_PATTERN.stash = self.htmlStash ENTITY_PATTERN.stash = self.htmlStash REFERENCE_PATTERN.references = self.references IMAGE_REFERENCE_PATTERN.references = self.references for extension in self.registeredExtensions : extension.reset() def _transform(self): """Transforms the Markdown text into a XHTML body document @returns: A NanoDom Document """ # Setup the document self.doc = Document() self.top_element = self.doc.createElement("span") self.top_element.appendChild(self.doc.createTextNode('\n')) self.top_element.setAttribute('class', 'markdown') self.doc.appendChild(self.top_element) # Fixup the source text text = self.source.strip() text = text.replace("\r\n", "\n").replace("\r", "\n") text += "\n\n" text = text.expandtabs(TAB_LENGTH) # Split into lines and run the preprocessors that will work with # self.lines self.lines = text.split("\n") # Run the pre-processors on the lines for prep in self.preprocessors : self.lines = prep.run(self.lines) # Create a NanoDom tree from the lines and attach it to Document buffer = [] for line in self.lines : if line.startswith("#") : self._processSection(self.top_element, buffer) buffer = [line] else : buffer.append(line) self._processSection(self.top_element, buffer) #self._processSection(self.top_element, self.lines) # Not sure why I put this in but let's leave it for now. self.top_element.appendChild(self.doc.createTextNode('\n')) # Run the post-processors for postprocessor in self.postprocessors : postprocessor.run(self.doc) return self.doc def _processSection(self, parent_elem, lines, inList = 0, looseList = 0) : """Process a section of a source document, looking for high level structural elements like lists, block quotes, code segments, html blocks, etc. Some those then get stripped of their high level markup (e.g. get unindented) and the lower-level markup is processed recursively. @param parent_elem: A NanoDom element to which the content will be added @param lines: a list of lines @param inList: a level @returns: None""" if not lines : return # Check if this section starts with a list, a blockquote or # a code block processFn = { 'ul' : self._processUList, 'ol' : self._processOList, 'quoted' : self._processQuote, 'tabbed' : self._processCodeBlock } for regexp in ['ul', 'ol', 'quoted', 'tabbed'] : m = RE.regExp[regexp].match(lines[0]) if m : processFn[regexp](parent_elem, lines, inList) return # We are NOT looking at one of the high-level structures like # lists or blockquotes. So, it's just a regular paragraph # (though perhaps nested inside a list or something else). If # we are NOT inside a list, we just need to look for a blank # line to find the end of the block. If we ARE inside a # list, however, we need to consider that a sublist does not # need to be separated by a blank line. Rather, the following # markup is legal: # # * The top level list item # # Another paragraph of the list. This is where we are now. # * Underneath we might have a sublist. # if inList : start, theRest = self._linesUntil(lines, (lambda line: RE.regExp['ul'].match(line) or RE.regExp['ol'].match(line) or not line.strip())) self._processSection(parent_elem, start, inList - 1, looseList = looseList) self._processSection(parent_elem, theRest, inList - 1, looseList = looseList) else : # Ok, so it's just a simple block paragraph, theRest = self._linesUntil(lines, lambda line: not line.strip()) if len(paragraph) and paragraph[0].startswith('#') : m = RE.regExp['header'].match(paragraph[0]) if m : level = len(m.group(1)) h = self.doc.createElement("h%d" % level) parent_elem.appendChild(h) for item in self._handleInlineWrapper2(m.group(2).strip()) : h.appendChild(item) else : message(CRITICAL, "We've got a problem header!") elif paragraph : list = self._handleInlineWrapper2("\n".join(paragraph)) if ( parent_elem.nodeName == 'li' and not (looseList or parent_elem.childNodes)): #and not parent_elem.childNodes) : # If this is the first paragraph inside "li", don't # put <p> around it - append the paragraph bits directly # onto parent_elem el = parent_elem else : # Otherwise make a "p" element el = self.doc.createElement("p") parent_elem.appendChild(el) for item in list : el.appendChild(item) if theRest : theRest = theRest[1:] # skip the first (blank) line self._processSection(parent_elem, theRest, inList) def _processUList(self, parent_elem, lines, inList) : self._processList(parent_elem, lines, inList, listexpr='ul', tag = 'ul') def _processOList(self, parent_elem, lines, inList) : self._processList(parent_elem, lines, inList, listexpr='ol', tag = 'ol') def _processList(self, parent_elem, lines, inList, listexpr, tag) : """Given a list of document lines starting with a list item, finds the end of the list, breaks it up, and recursively processes each list item and the remainder of the text file. @param parent_elem: A dom element to which the content will be added @param lines: a list of lines @param inList: a level @returns: None""" ul = self.doc.createElement(tag) # ul might actually be '<ol>' parent_elem.appendChild(ul) looseList = 0 # Make a list of list items items = [] item = -1 i = 0 # a counter to keep track of where we are for line in lines : loose = 0 if not line.strip() : # If we see a blank line, this _might_ be the end of the list i += 1 loose = 1 # Find the next non-blank line for j in range(i, len(lines)) : if lines[j].strip() : next = lines[j] break else : # There is no more text => end of the list break # Check if the next non-blank line is still a part of the list if ( RE.regExp['ul'].match(next) or RE.regExp['ol'].match(next) or RE.regExp['tabbed'].match(next) ): # get rid of any white space in the line items[item].append(line.strip()) looseList = loose or looseList continue else : break # found end of the list # Now we need to detect list items (at the current level) # while also detabing child elements if necessary for expr in ['ul', 'ol', 'tabbed']: m = RE.regExp[expr].match(line) if m : if expr in ['ul', 'ol'] : # We are looking at a new item if m.group(1) : items.append([m.group(1)]) item += 1 elif expr == 'tabbed' : # This line needs to be detabbed items[item].append(m.group(4)) #after the 'tab' i += 1 break else : items[item].append(line) # Just regular continuation
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -