📄 markdown.py
字号:
#!/usr/bin/env python# The following constant specifies the name used in the usage# statement displayed for python versions lower than 2.3. (With# python2.3 and higher the usage statement is generated by optparse# and uses the actual name of the executable called.)EXECUTABLE_NAME_FOR_USAGE = "python markdown.py"SPEED_TEST = 0"""====================================================================IF YOA ARE LOOKING TO EXTEND MARKDOWN, SEE THE "FOOTNOTES" SECTION====================================================================Python-Markdown===============Converts Markdown to HTML. Basic usage as a module: import markdown html = markdown.markdown(your_text_string)Started by [Manfred Stienstra](http://www.dwerg.net/). Continued andmaintained by [Yuri Takhteyev](http://www.freewisdom.org).Project website: http://www.freewisdom.org/projects/python-markdownContact: yuri [at] freewisdom.orgLicense: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSDVersion: 1.5a (July 9, 2006)For changelog, see end of file"""import re, sys, os, random, codecs# set debug level: 3 none, 2 critical, 1 informative, 0 all(VERBOSE, INFO, CRITICAL, NONE) = range(4)MESSAGE_THRESHOLD = CRITICALdef message(level, text) : if level >= MESSAGE_THRESHOLD : print text# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------# all tabs will be expanded to up to this many spacesTAB_LENGTH = 4ENABLE_ATTRIBUTES = 1SMART_EMPHASIS = 1# --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ----------# a template for html placeholdersHTML_PLACEHOLDER_PREFIX = "qaodmasdkwaspemas"HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%dajkqlsmdqpakldnzsdfls"BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table', 'dl', 'ol', 'ul', 'script', 'noscript', 'form', 'fieldset', 'iframe', 'math', 'ins', 'del', 'hr', 'hr/', 'style']def is_block_level (tag) : return ( (tag in BLOCK_LEVEL_ELEMENTS) or (tag[0] == 'h' and tag[1] in "0123456789") )"""================================================================================================ NANODOM =========================================================================================================The three classes below implement some of the most basic DOMmethods. I use this instead of minidom because I need a simplerfunctionality and do not want to require additional libraries.Importantly, NanoDom does not do normalization, which is what wewant. It also adds extra white space when converting DOM to string"""class Document : def appendChild(self, child) : self.documentElement = child child.parent = self self.entities = {} def createElement(self, tag, textNode=None) : el = Element(tag) el.doc = self if textNode : el.appendChild(self.createTextNode(textNode)) return el def createTextNode(self, text) : node = TextNode(text) node.doc = self return node def createEntityReference(self, entity): if entity not in self.entities: self.entities[entity] = EntityReference(entity) return self.entities[entity] def toxml (self) : return self.documentElement.toxml() def normalizeEntities(self, text) : pairs = [ ("&", "&"), ("<", "<"), (">", ">"), ("\"", """)] for old, new in pairs : text = text.replace(old, new) return text def find(self, test) : return self.documentElement.find(test) def unlink(self) : self.documentElement.unlink() self.documentElement = Noneclass Element : type = "element" def __init__ (self, tag) : self.nodeName = tag self.attributes = [] self.attribute_values = {} self.childNodes = [] def unlink(self) : for child in self.childNodes : if child.type == "element" : child.unlink() self.childNodes = None def setAttribute(self, attr, value) : if not attr in self.attributes : self.attributes.append(attr) self.attribute_values[attr] = value def insertChild(self, position, child) : self.childNodes.insert(position, child) child.parent = self def removeChild(self, child) : self.childNodes.remove(child) def replaceChild(self, oldChild, newChild) : position = self.childNodes.index(oldChild) self.removeChild(oldChild) self.insertChild(position, newChild) def appendChild(self, child) : self.childNodes.append(child) child.parent = self def handleAttributes(self) : pass def find(self, test, depth=0) : """ Returns a list of descendants that pass the test function """ matched_nodes = [] for child in self.childNodes : if test(child) : matched_nodes.append(child) if child.type == "element" : matched_nodes += child.find(test, depth+1) return matched_nodes def toxml(self): if ENABLE_ATTRIBUTES : for child in self.childNodes: child.handleAttributes() buffer = "" if self.nodeName in ['h1', 'h2', 'h3', 'h4'] : buffer += "\n" elif self.nodeName in ['li'] : buffer += "\n " buffer += "<" + self.nodeName for attr in self.attributes : value = self.attribute_values[attr] value = self.doc.normalizeEntities(value) buffer += ' %s="%s"' % (attr, value) if self.childNodes or self.nodeName in ['blockquote']: buffer += ">" for child in self.childNodes : buffer += child.toxml() if self.nodeName == 'p' : buffer += "\n" elif self.nodeName == 'li' : buffer += "\n " buffer += "</%s>" % self.nodeName else : buffer += "/>" if self.nodeName in ['p', 'li', 'ul', 'ol', 'h1', 'h2', 'h3', 'h4'] : buffer += "\n" return bufferclass TextNode : type = "text" attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123} def __init__ (self, text) : self.value = text def attributeCallback(self, match) : self.parent.setAttribute(match.group(1), match.group(2)) def handleAttributes(self) : self.value = self.attrRegExp.sub(self.attributeCallback, self.value) def toxml(self) : text = self.value if not text.startswith(HTML_PLACEHOLDER_PREFIX): if self.parent.nodeName == "p" : text = text.replace("\n", "\n ") elif (self.parent.nodeName == "li" and self.parent.childNodes[0]==self): text = "\n " + text.replace("\n", "\n ") text = self.doc.normalizeEntities(text) return textclass EntityReference: type = "entity_ref" def __init__(self, entity): self.entity = entity def handleAttributes(self): pass def toxml(self): return "&" + self.entity + ";""""================================================================================================ PRE-PROCESSORS ==================================================================================================Preprocessors munge source text before we start doing anything toocomplicated.Each preprocessor implements a "run" method that takes a pointer to a list of lines of the document,modifies it as necessary and returns either the same pointer or apointer to a new list. Preprocessors must extendmarkdown.Preprocessor."""class Preprocessor : passclass HeaderPreprocessor (Preprocessor): """ Replaces underlined headers with hashed headers to avoid the nead for lookahead later. """ def run (self, lines) : i = -1 while i+1 < len(lines) : i = i+1 if not lines[i].strip() : continue if lines[i].startswith("#") : lines.insert(i+1, "\n") if (i+1 <= len(lines) and lines[i+1] and lines[i+1][0] in ['-', '=']) : underline = lines[i+1].strip() if underline == "="*len(underline) : lines[i] = "# " + lines[i].strip() lines[i+1] = "" elif underline == "-"*len(underline) : lines[i] = "## " + lines[i].strip() lines[i+1] = "" #for l in lines : # print l.encode('utf8') #sys.exit(0) return linesHEADER_PREPROCESSOR = HeaderPreprocessor()class LinePreprocessor (Preprocessor): """Deals with HR lines (needs to be done before processing lists)""" def run (self, lines) : for i in range(len(lines)) : if self._isLine(lines[i]) : lines[i] = "<hr />" return lines def _isLine(self, block) : """Determines if a block should be replaced with an <HR>""" if block.startswith(" ") : return 0 # a code block text = "".join([x for x in block if not x.isspace()]) if len(text) <= 2 : return 0 for pattern in ['isline1', 'isline2', 'isline3'] : m = RE.regExp[pattern].match(text) if (m and m.group(1)) : return 1 else: return 0LINE_PREPROCESSOR = LinePreprocessor()class LineBreaksPreprocessor (Preprocessor): """Replaces double spaces at the end of the lines with <br/ >.""" def run (self, lines) : for i in range(len(lines)) : if (lines[i].endswith(" ") and not RE.regExp['tabbed'].match(lines[i]) ): lines[i] += "<br />" return linesLINE_BREAKS_PREPROCESSOR = LineBreaksPreprocessor()class HtmlBlockPreprocessor (Preprocessor): """Removes html blocks from self.lines""" def _get_left_tag(self, block): return block[1:].replace(">", " ", 1).split()[0].lower() def _get_right_tag(self, left_tag, block): return block.rstrip()[-len(left_tag)-2:-1].lower() def _equal_tags(self, left_tag, right_tag): if left_tag in ['?', '?php', 'div'] : # handle PHP, etc. return True if ("/" + left_tag) == right_tag: return True elif left_tag == right_tag[1:] \ and right_tag[0] != "<": return True else: return False def _is_oneliner(self, tag): return (tag in ['hr', 'hr/']) def run (self, lines) : new_blocks = [] text = "\n".join(lines) text = text.split("\n\n") items = [] left_tag = '' right_tag = '' in_tag = False # flag for block in text: if block.startswith("\n") : block = block[1:] if not in_tag: if block.startswith("<"): left_tag = self._get_left_tag(block) right_tag = self._get_right_tag(left_tag, block) if not (is_block_level(left_tag) \ or block[1] in ["!", "?", "@", "%"]): new_blocks.append(block) continue if self._is_oneliner(left_tag): new_blocks.append(block.strip()) continue if block[1] == "!": # is a comment block left_tag = "--" right_tag = self._get_right_tag(left_tag, block) # keep checking conditions below and maybe just append if block.rstrip().endswith(">") \ and self._equal_tags(left_tag, right_tag): new_blocks.append( self.stash.store(block.strip()))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -