📄 docfixer.py
字号:
# we may need to split off trailing white space: child = children[after - 1] data = child.data if string.rstrip(data) != data: have_last = 0 child.splitText(len(string.rstrip(data))) para = doc.createElement(PARA_ELEMENT) prev = None indexes = range(start, after) indexes.reverse() for j in indexes: node = parent.childNodes[j] parent.removeChild(node) para.insertBefore(node, prev) prev = node if have_last: parent.appendChild(para) parent.appendChild(doc.createTextNode("\n\n")) return len(parent.childNodes) else: nextnode = parent.childNodes[start] if nextnode.nodeType == TEXT: if nextnode.data and nextnode.data[0] != "\n": nextnode.data = "\n" + nextnode.data else: newnode = doc.createTextNode("\n") parent.insertBefore(newnode, nextnode) nextnode = newnode start = start + 1 parent.insertBefore(para, nextnode) return start + 1def skip_leading_nodes(children, start=0): """Return index into children of a node at which paragraph building should begin or a recursive call to fixup_paras_helper() should be made (for subsections, etc.). When the return value >= len(children), we've built all the paras we can from this list of children. """ i = len(children) while i > start: # skip over leading comments and whitespace: child = children[start] nodeType = child.nodeType if nodeType == TEXT: data = child.data shortened = string.lstrip(data) if shortened: if data != shortened: # break into two nodes: whitespace and non-whitespace child.splitText(len(data) - len(shortened)) return start + 1 return start # all whitespace, just skip elif nodeType == ELEMENT: tagName = child.get_tagName() if tagName in RECURSE_INTO_PARA_CONTAINERS: return start if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS: return start start = start + 1 return startdef fixup_rfc_references(doc, fragment): for rfcnode in find_all_elements(fragment, "rfc"): rfcnode.appendChild(doc.createTextNode( "RFC " + rfcnode.getAttribute("num")))def fixup_signatures(doc, fragment): for child in fragment.childNodes: if child.nodeType == ELEMENT: args = child.getElementsByTagName("args") for arg in args: fixup_args(doc, arg) arg.normalize() args = child.getElementsByTagName("constructor-args") for arg in args: fixup_args(doc, arg) arg.normalize()def fixup_args(doc, arglist): for child in arglist.childNodes: if child.get_nodeName() == "optional": # found it; fix and return arglist.insertBefore(doc.createTextNode("["), child) optkids = child.childNodes while optkids: k = optkids[0] child.removeChild(k) arglist.insertBefore(k, child) arglist.insertBefore(doc.createTextNode("]"), child) arglist.removeChild(child) return fixup_args(doc, arglist)def fixup_sectionauthors(doc, fragment): for sectauth in find_all_elements(fragment, "sectionauthor"): section = sectauth.parentNode section.removeChild(sectauth) sectauth._node.name = "author" sectauth.appendChild(doc.createTextNode( sectauth.getAttribute("name"))) sectauth.removeAttribute("name") after = section.childNodes[2] title = section.childNodes[1] if title.get_nodeName() != "title": after = section.childNodes[0] section.insertBefore(doc.createTextNode("\n "), after) section.insertBefore(sectauth, after)def fixup_verbatims(doc): for verbatim in find_all_elements(doc, "verbatim"): child = verbatim.childNodes[0] if child.nodeType == TEXT \ and string.lstrip(child.data)[:3] == ">>>": verbatim._node.name = "interactive-session"def add_node_ids(fragment, counter=0): fragment._node.node_id = counter for node in fragment.childNodes: counter = counter + 1 if node.nodeType == ELEMENT: counter = add_node_ids(node, counter) else: node._node.node_id = counter return counter + 1REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex', 'refexmodindex', 'refstmodindex')def fixup_refmodindexes(fragment): # Locate <ref*modindex>...</> co-located with <module>...</>, and # remove the <ref*modindex>, replacing it with index=index on the # <module> element. nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS) d = {} for node in nodes: parent = node.parentNode d[parent._node.node_id] = parent del nodes map(fixup_refmodindexes_chunk, d.values())def fixup_refmodindexes_chunk(container): # node is probably a <para>; let's see how often it isn't: if container.get_tagName() != PARA_ELEMENT: bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container) module_entries = find_all_elements(container, "module") if not module_entries: return index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS) removes = [] for entry in index_entries: children = entry.childNodes if len(children) != 0: bwrite("--- unexpected number of children for %s node:\n" % entry.get_tagName()) ewrite(entry.toxml() + "\n") continue found = 0 module_name = entry.getAttribute("module") for node in module_entries: if len(node.childNodes) != 1: continue this_name = node.childNodes[0].data if this_name == module_name: found = 1 node.setAttribute("index", "yes") if found: removes.append(entry) for node in removes: container.removeChild(node)def fixup_bifuncindexes(fragment): nodes = find_all_elements(fragment, 'bifuncindex') d = {} # make sure that each parent is only processed once: for node in nodes: parent = node.parentNode d[parent._node.node_id] = parent del nodes map(fixup_bifuncindexes_chunk, d.values())def fixup_bifuncindexes_chunk(container): removes = [] entries = find_all_child_elements(container, "bifuncindex") function_entries = find_all_child_elements(container, "function") for entry in entries: function_name = entry.getAttribute("name") found = 0 for func_entry in function_entries: t2 = func_entry.childNodes[0].data if t2[-2:] != "()": continue t2 = t2[:-2] if t2 == function_name: func_entry.setAttribute("index", "yes") func_entry.setAttribute("module", "__builtin__") if not found: found = 1 removes.append(entry) for entry in removes: container.removeChild(entry)def join_adjacent_elements(container, gi): queue = [container] while queue: parent = queue.pop() i = 0 children = parent.get_childNodes() nchildren = len(children) while i < (nchildren - 1): child = children[i] if child.nodeName == gi: if children[i+1].nodeName == gi: ewrite("--- merging two <%s/> elements\n" % gi) child = children[i] nextchild = children[i+1] nextchildren = nextchild.get_childNodes() while len(nextchildren): node = nextchildren[0] nextchild.removeChild(node) child.appendChild(node) parent.removeChild(nextchild) continue if child.nodeType == ELEMENT: queue.append(child) i = i + 1_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")def write_esis(doc, ofp, knownempty): for node in doc.childNodes: nodeType = node.nodeType if nodeType == ELEMENT: gi = node.get_tagName() if knownempty(gi): if node.hasChildNodes(): raise ValueError, \ "declared-empty node <%s> has children" % gi ofp.write("e\n") for k, v in node.attributes.items(): value = v.value if _token_rx.match(value): dtype = "TOKEN" else: dtype = "CDATA" ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value))) ofp.write("(%s\n" % gi) write_esis(node, ofp, knownempty) ofp.write(")%s\n" % gi) elif nodeType == TEXT: ofp.write("-%s\n" % esistools.encode(node.data)) elif nodeType == ENTITY_REFERENCE: ofp.write("&%s\n" % node.get_nodeName()) else: raise RuntimeError, "unsupported node type: %s" % nodeTypedef convert(ifp, ofp): p = esistools.ExtendedEsisBuilder() p.feed(ifp.read()) doc = p.document fragment = p.fragment normalize(fragment) simplify(doc, fragment) handle_labels(doc, fragment) handle_appendix(doc, fragment) fixup_trailing_whitespace(doc, { "abstract": "\n", "title": "", "chapter": "\n\n", "section": "\n\n", "subsection": "\n\n", "subsubsection": "\n\n", "paragraph": "\n\n", "subparagraph": "\n\n", }) cleanup_root_text(doc) cleanup_trailing_parens(fragment, ["function", "method", "cfunction"]) cleanup_synopses(doc, fragment) fixup_descriptors(doc, fragment) fixup_verbatims(fragment) normalize(fragment) fixup_paras(doc, fragment) fixup_sectionauthors(doc, fragment) fixup_table_structures(doc, fragment) fixup_rfc_references(doc, fragment) fixup_signatures(doc, fragment) add_node_ids(fragment) fixup_refmodindexes(fragment) fixup_bifuncindexes(fragment) # Take care of ugly hacks in the LaTeX markup to avoid LaTeX and # LaTeX2HTML screwing with GNU-style long options (the '--' problem). join_adjacent_elements(fragment, "option") # d = {} for gi in p.get_empties(): d[gi] = gi if d.has_key("rfc"): del d["rfc"] knownempty = d.has_key # try: write_esis(fragment, ofp, knownempty) except IOError, (err, msg): # Ignore EPIPE; it just means that whoever we're writing to stopped # reading. The rest of the output would be ignored. All other errors # should still be reported, if err != errno.EPIPE: raisedef main(): if len(sys.argv) == 1: ifp = sys.stdin ofp = sys.stdout elif len(sys.argv) == 2: ifp = open(sys.argv[1]) ofp = sys.stdout elif len(sys.argv) == 3: ifp = open(sys.argv[1]) ofp = open(sys.argv[2], "w") else: usage() sys.exit(2) convert(ifp, ofp)if __name__ == "__main__": main()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -