📄 read_markdown.py
字号:
"""loads Markdown files, converts each one to HTML and parses the HTML into an ElementTree structure.The collection of ElementTrees are further parsed to generate a table of contents structure, and are manipulated to replace various markdown-generated HTML with specific Mako tags before being written to Mako templates, which then re-access the table of contents structure at runtime.Much thanks to Alexey Shamrin, who came up with the original idea and did all the heavy Markdown/Elementtree lifting for this module."""import sys, re, osfrom toc import TOCElementtry: import xml.etree.ElementTree as etexcept ImportError: try: import elementtree.ElementTree as et except: raise "This module requires ElementTree to run (http://effbot.org/zone/element-index.htm)"import markdowndef dump_tree(elem, stream): if elem.tag.startswith('MAKO:'): dump_mako_tag(elem, stream) else: if elem.tag != 'html': if elem.attrib: stream.write("<%s %s>" % (elem.tag, " ".join(["%s=%s" % (key, repr(val)) for key, val in elem.attrib.iteritems()]))) else: stream.write("<%s>" % elem.tag) if elem.text: stream.write(elem.text) for child in elem: dump_tree(child, stream) if child.tail: stream.write(child.tail) if elem.tag != 'html': stream.write("</%s>" % elem.tag)def dump_mako_tag(elem, stream): tag = elem.tag[5:] params = ','.join(['%s=%s' % i for i in elem.items()]) stream.write('<%%call expr="%s(%s)">' % (tag, params)) if elem.text: stream.write(elem.text) for n in elem: dump_tree(n, stream) if n.tail: stream.write(n.tail) stream.write("</%call>")def create_toc(filename, tree, tocroot): title = [None] current = [tocroot] level = [0] def process(tree): while True: i = find_header_index(tree) if i is None: return node = tree[i] taglevel = int(node.tag[1]) start, end = i, end_of_header(tree, taglevel, i+1) content = tree[start+1:end] description = node.text.strip() if title[0] is None: title[0] = description name = node.get('name') if name is None: name = description.split()[0].lower() taglevel = node.tag[1] if taglevel > level[0]: current[0] = TOCElement(filename, name, description, current[0]) elif taglevel == level[0]: current[0] = TOCElement(filename, name, description, current[0].parent) else: current[0] = TOCElement(filename, name, description, current[0].parent.parent) level[0] = taglevel tag = et.Element("MAKO:formatting.section", path=repr(current[0].path), paged='paged', extension='extension', toc='toc') tag.text = (node.tail or "") + '\n' tag.tail = '\n' tag[:] = content tree[start:end] = [tag] process(tag) process(tree) return (title[0], tocroot.get_by_file(filename))def literal(s): return '"%s"' % s def index(parent, item): for n, i in enumerate(parent): if i is item: return ndef find_header_index(tree): for i, node in enumerate(tree): if is_header(node): return idef is_header(node): t = node.tag return (isinstance(t, str) and len(t) == 2 and t[0] == 'h' and t[1] in '123456789')def end_of_header(tree, level, start): for i, node in enumerate(tree[start:]): if is_header(node) and int(node.tag[1]) <= level: return start + i return len(tree)def process_rel_href(tree): parent = get_parent_map(tree) for a in tree.findall('.//a'): m = re.match(r'(bold)?rel\:(.+)', a.get('href')) if m: (bold, path) = m.group(1,2) text = a.text if text == path: tag = et.Element("MAKO:nav.toclink", path=repr(path), extension='extension', paged='paged', toc='toc') else: tag = et.Element("MAKO:nav.toclink", path=repr(path), description=repr(text), extension='extension', paged='paged', toc='toc') a_parent = parent[a] if bold: bold = et.Element('strong') bold.tail = a.tail bold.append(tag) a_parent[index(a_parent, a)] = bold else: tag.tail = a.tail a_parent[index(a_parent, a)] = tagdef replace_pre_with_mako(tree): def splice_code_tag(pre, text, code=None, title=None): doctest_directives = re.compile(r'#\s*doctest:\s*[+-]\w+(,[+-]\w+)*\s*$', re.M) text = re.sub(doctest_directives, '', text) # process '>>>' to have quotes around it, to work with the pygments # syntax highlighter which uses the tokenize module text = re.sub(r'>>> ', r'">>>" ', text) sqlre = re.compile(r'{sql}(.*?)\n((?:PRAGMA|BEGIN|SELECT|INSERT|DELETE|ROLLBACK|COMMIT|UPDATE|CREATE|DROP|PRAGMA|DESCRIBE).*?)\n\s*((?:{stop})|\n|$)', re.S) if sqlre.search(text) is not None: use_sliders = False else: use_sliders = True text = sqlre.sub(r"""${formatting.poplink()}\1<%call expr="formatting.codepopper()">\2</%call>""", text) #sqlre2 = re.compile(r'{opensql}(.*?\n)((?:PRAGMA|BEGIN|SELECT|INSERT|DELETE|UPDATE|ROLLBACK|COMMIT|CREATE|DROP).*?)\n\s*((?:{stop})|\n|$)', re.S) sqlre2 = re.compile(r'{opensql}(.*?)\n?((?:PRAGMA|BEGIN|SELECT|INSERT|DELETE|ROLLBACK|COMMIT|UPDATE|CREATE|DROP|PRAGMA|DESCRIBE).*?)\n\s*((?:{stop})|\n|$)', re.S) text = sqlre2.sub(r"\1<%call expr='formatting.poppedcode()' >\2</%call>\n\n", text) tag = et.Element("MAKO:formatting.code", extension='extension', paged='paged', toc='toc') if code: tag.attrib["syntaxtype"] = repr(code) if title: tag.attrib["title"] = repr(title) if use_sliders: tag.attrib['use_sliders'] = True tag.text = text pre_parent = parents[pre] tag.tail = pre.tail pre_parent[reverse_parent(pre_parent, pre)] = tag parents = get_parent_map(tree) for precode in tree.findall('.//pre/code'): reg = re.compile(r'\{(python|code|diagram)(?: title="(.*?)"){0,1}\}(.*)', re.S) m = reg.match(precode[0].text.lstrip()) if m: code = m.group(1) title = m.group(2) text = m.group(3) text = re.sub(r'{(python|code|diagram).*?}(\n\s*)?', '', text) text = re.sub(r'\\\n', r'${r"\\\\" + "\\n\\n"}', text) splice_code_tag(parents[precode], text, code=code, title=title) elif precode.text.lstrip().startswith('>>> '): splice_code_tag(parents[precode], precode.text)def safety_code(tree): parents = get_parent_map(tree) for code in tree.findall('.//code'): tag = et.Element('%text') if parents[code].tag != 'pre': tag.attrib["filter"] = "h" tag.text = code.text code.append(tag) code.text = ""def reverse_parent(parent, item): for n, i in enumerate(parent): if i is item: return ndef get_parent_map(tree): return dict([(c, p) for p in tree.getiterator() for c in p])def header(toc, title, filename): return \"""# -*- coding: utf-8 -*-<%%inherit file="content_layout.html"/><%%page args="toc, extension, paged"/><%%namespace name="formatting" file="formatting.html"/><%%namespace name="nav" file="nav.html"/><%%def name="title()">%s - %s</%%def><%%! filename = '%s'%%>## This file is generated. Edit the .txt files instead of this one.""" % (toc.root.doctitle, title, filename) class utf8stream(object): def __init__(self, stream): self.stream = stream def write(self, str): self.stream.write(str.encode('utf8')) def parse_markdown_files(toc, files): for inname in files: infile = 'content/%s.txt' % inname if not os.access(infile, os.F_OK): continue html = markdown.markdown(file(infile).read()) #foo = file('foo', 'w') #foo.write(html) tree = et.fromstring("<html>" + html + "</html>") (title, toc_element) = create_toc(inname, tree, toc) safety_code(tree) replace_pre_with_mako(tree) process_rel_href(tree) outname = 'output/%s.html' % inname print infile, '->', outname outfile = utf8stream(file(outname, 'w')) outfile.write(header(toc, title, inname)) dump_tree(tree, outfile)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -