📄 docfixer.py
字号:
if nextnode.data and nextnode.data[0] != "\n":
nextnode.data = "\n" + nextnode.data
else:
newnode = doc.createTextNode("\n")
parent.insertBefore(newnode, nextnode)
nextnode = newnode
start = start + 1
parent.insertBefore(para, nextnode)
return start + 1
def skip_leading_nodes(children, start=0):
"""Return index into children of a node at which paragraph building should
begin or a recursive call to fixup_paras_helper() should be made (for
subsections, etc.).
When the return value >= len(children), we've built all the paras we can
from this list of children.
"""
i = len(children)
while i > start:
# skip over leading comments and whitespace:
child = children[start]
nodeType = child.nodeType
if nodeType == TEXT:
data = child.data
shortened = data.lstrip()
if shortened:
if data != shortened:
# break into two nodes: whitespace and non-whitespace
child.splitText(len(data) - len(shortened))
return start + 1
return start
# all whitespace, just skip
elif nodeType == ELEMENT:
tagName = child.tagName
if tagName in RECURSE_INTO_PARA_CONTAINERS:
return start
if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
return start
start = start + 1
return start
def fixup_rfc_references(doc, fragment):
for rfcnode in find_all_elements_from_set(fragment, ("pep", "rfc")):
rfcnode.appendChild(doc.createTextNode(
rfcnode.tagName.upper() + " " + rfcnode.getAttribute("num")))
def fixup_signatures(doc, fragment):
for child in fragment.childNodes:
if child.nodeType == ELEMENT:
args = child.getElementsByTagName("args")
for arg in args:
rewrite_args(doc, arg)
args = child.getElementsByTagName("constructor-args")
for arg in args:
rewrite_args(doc, arg)
def rewrite_args(doc, arglist):
fixup_args(doc, arglist)
arglist.normalize()
if arglist.childNodes.length == 1 and arglist.firstChild.nodeType == TEXT:
node = arglist.firstChild
node.data = ' '.join(node.data.split())
def fixup_args(doc, arglist):
for child in arglist.childNodes:
if child.nodeName == "optional":
# found it; fix and return
arglist.insertBefore(doc.createTextNode("["), child)
optkids = child.childNodes
while optkids:
arglist.insertBefore(child.firstChild, child)
arglist.insertBefore(doc.createTextNode("]"), child)
arglist.removeChild(child)
return fixup_args(doc, arglist)
def fixup_sectionauthors(doc, fragment):
for sectauth in find_all_elements(fragment, "sectionauthor"):
section = sectauth.parentNode
section.removeChild(sectauth)
set_tagName(sectauth, "author")
sectauth.appendChild(doc.createTextNode(
sectauth.getAttribute("name")))
sectauth.removeAttribute("name")
after = section.childNodes[2]
title = section.childNodes[1]
if title.nodeName != "title":
after = section.childNodes[0]
section.insertBefore(doc.createTextNode("\n "), after)
section.insertBefore(sectauth, after)
def fixup_verbatims(doc):
for verbatim in find_all_elements(doc, "verbatim"):
child = verbatim.childNodes[0]
if child.nodeType == TEXT \
and child.data.lstrip().startswith(">>>"):
set_tagName(verbatim, "interactive-session")
def add_node_ids(fragment, counter=0):
fragment.node_id = counter
for node in fragment.childNodes:
counter = counter + 1
if node.nodeType == ELEMENT:
counter = add_node_ids(node, counter)
else:
node.node_id = counter
return counter + 1
def fixup_ulink(doc, fragment):
for ulink in find_all_elements(fragment, "ulink"):
children = ulink.childNodes
assert len(children) == 2
text = children[0]
href = children[1]
href.normalize()
assert len(href.childNodes) == 1
assert href.childNodes[0].nodeType == TEXT
url = href.childNodes[0].data
ulink.setAttribute("href", url)
ulink.removeChild(href)
content = text.childNodes
while len(content):
ulink.appendChild(content[0])
ulink.removeChild(text)
REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
'refexmodindex', 'refstmodindex')
def fixup_refmodindexes(fragment):
# Locate <ref*modindex>...</> co-located with <module>...</>, and
# remove the <ref*modindex>, replacing it with index=index on the
# <module> element.
nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
d = {}
for node in nodes:
parent = node.parentNode
d[parent.node_id] = parent
del nodes
map(fixup_refmodindexes_chunk, d.values())
def fixup_refmodindexes_chunk(container):
# node is probably a <para>; let's see how often it isn't:
if container.tagName != PARA_ELEMENT:
bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
module_entries = find_all_elements(container, "module")
if not module_entries:
return
index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
removes = []
for entry in index_entries:
children = entry.childNodes
if len(children) != 0:
bwrite("--- unexpected number of children for %s node:\n"
% entry.tagName)
ewrite(entry.toxml() + "\n")
continue
found = 0
module_name = entry.getAttribute("module")
for node in module_entries:
if len(node.childNodes) != 1:
continue
this_name = node.childNodes[0].data
if this_name == module_name:
found = 1
node.setAttribute("index", "yes")
if found:
removes.append(entry)
for node in removes:
container.removeChild(node)
def fixup_bifuncindexes(fragment):
nodes = find_all_elements(fragment, 'bifuncindex')
d = {}
# make sure that each parent is only processed once:
for node in nodes:
parent = node.parentNode
d[parent.node_id] = parent
del nodes
map(fixup_bifuncindexes_chunk, d.values())
def fixup_bifuncindexes_chunk(container):
removes = []
entries = find_all_child_elements(container, "bifuncindex")
function_entries = find_all_child_elements(container, "function")
for entry in entries:
function_name = entry.getAttribute("name")
found = 0
for func_entry in function_entries:
t2 = func_entry.childNodes[0].data
if t2[-2:] != "()":
continue
t2 = t2[:-2]
if t2 == function_name:
func_entry.setAttribute("index", "yes")
func_entry.setAttribute("module", "__builtin__")
if not found:
found = 1
removes.append(entry)
for entry in removes:
container.removeChild(entry)
def join_adjacent_elements(container, gi):
queue = [container]
while queue:
parent = queue.pop()
i = 0
children = parent.childNodes
nchildren = len(children)
while i < (nchildren - 1):
child = children[i]
if child.nodeName == gi:
if children[i+1].nodeName == gi:
ewrite("--- merging two <%s/> elements\n" % gi)
child = children[i]
nextchild = children[i+1]
nextchildren = nextchild.childNodes
while len(nextchildren):
node = nextchildren[0]
nextchild.removeChild(node)
child.appendChild(node)
parent.removeChild(nextchild)
continue
if child.nodeType == ELEMENT:
queue.append(child)
i = i + 1
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
def write_esis(doc, ofp, knownempty):
for node in doc.childNodes:
nodeType = node.nodeType
if nodeType == ELEMENT:
gi = node.tagName
if knownempty(gi):
if node.hasChildNodes():
raise ValueError, \
"declared-empty node <%s> has children" % gi
ofp.write("e\n")
for k, value in node.attributes.items():
if _token_rx.match(value):
dtype = "TOKEN"
else:
dtype = "CDATA"
ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
ofp.write("(%s\n" % gi)
write_esis(node, ofp, knownempty)
ofp.write(")%s\n" % gi)
elif nodeType == TEXT:
ofp.write("-%s\n" % esistools.encode(node.data))
elif nodeType == ENTITY_REFERENCE:
ofp.write("&%s\n" % node.nodeName)
else:
raise RuntimeError, "unsupported node type: %s" % nodeType
def convert(ifp, ofp):
events = esistools.parse(ifp)
toktype, doc = events.getEvent()
fragment = doc.createDocumentFragment()
events.expandNode(fragment)
normalize(fragment)
simplify(doc, fragment)
handle_labels(doc, fragment)
handle_appendix(doc, fragment)
fixup_trailing_whitespace(doc, fragment, {
# element -> (before-end-tag, after-end-tag)
"abstract": ("\n", "\n"),
"title": ("", "\n"),
"chapter": ("\n", "\n\n\n"),
"section": ("\n", "\n\n\n"),
"subsection": ("\n", "\n\n"),
"subsubsection": ("\n", "\n\n"),
"paragraph": ("\n", "\n\n"),
"subparagraph": ("\n", "\n\n"),
"description": ("\n", "\n\n"),
"enumeration": ("\n", "\n\n"),
"item": ("\n", "\n\n"),
})
cleanup_root_text(doc)
cleanup_trailing_parens(fragment, ["function", "method", "cfunction"])
cleanup_synopses(doc, fragment)
fixup_descriptors(doc, fragment)
fixup_verbatims(fragment)
normalize(fragment)
fixup_paras(doc, fragment)
fixup_sectionauthors(doc, fragment)
fixup_table_structures(doc, fragment)
fixup_rfc_references(doc, fragment)
fixup_signatures(doc, fragment)
fixup_ulink(doc, fragment)
add_node_ids(fragment)
fixup_refmodindexes(fragment)
fixup_bifuncindexes(fragment)
# Take care of ugly hacks in the LaTeX markup to avoid LaTeX and
# LaTeX2HTML screwing with GNU-style long options (the '--' problem).
join_adjacent_elements(fragment, "option")
# Attempt to avoid trailing blank lines:
fragment.normalize()
if fragment.lastChild.data[-1:] == "\n":
fragment.lastChild.data = fragment.lastChild.data.rstrip() + "\n"
#
d = {}
for gi in events.parser.get_empties():
d[gi] = gi
for key in ("author", "pep", "rfc"):
if d.has_key(key):
del d[key]
knownempty = d.has_key
#
try:
write_esis(fragment, ofp, knownempty)
except IOError, (err, msg):
# Ignore EPIPE; it just means that whoever we're writing to stopped
# reading. The rest of the output would be ignored. All other errors
# should still be reported,
if err != errno.EPIPE:
raise
def main():
if len(sys.argv) == 1:
ifp = sys.stdin
ofp = sys.stdout
elif len(sys.argv) == 2:
ifp = open(sys.argv[1])
ofp = sys.stdout
elif len(sys.argv) == 3:
ifp = open(sys.argv[1])
import StringIO
ofp = StringIO.StringIO()
else:
usage()
sys.exit(2)
convert(ifp, ofp)
if len(sys.argv) == 3:
fp = open(sys.argv[2], "w")
fp.write(ofp.getvalue())
fp.close()
ofp.close()
if __name__ == "__main__":
main()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -