📄 esis2sgml.py
字号:
#! /usr/bin/env python"""Convert ESIS events to SGML or XML markup.This is limited, but seems sufficient for the ESIS generated by thelatex2esis.py script when run over the Python documentation."""# This should have an explicit option to indicate whether the *INPUT* was# generated from an SGML or an XML application.__version__ = '$Revision: 1.1.1.1 $'import errnoimport esistoolsimport osimport reimport stringfrom xml.utils import escapeAUTOCLOSE = ()EMPTIES_FILENAME = "../sgml/empties.dat"LIST_EMPTIES = 0_elem_map = {}_attr_map = {}_token_map = {}_normalize_case = strdef map_gi(sgmlgi, map): uncased = _normalize_case(sgmlgi) try: return map[uncased] except IndexError: map[uncased] = sgmlgi return sgmlgidef null_map_gi(sgmlgi, map): return sgmlgidef format_attrs(attrs, xml=0): attrs = attrs.items() attrs.sort() parts = [] append = parts.append for name, value in attrs: if xml: append('%s="%s"' % (name, escape(value))) else: # this is a little bogus, but should do for now if name == value and isnmtoken(value): append(value) elif istoken(value): if value == "no" + name: append(value) else: append("%s=%s" % (name, value)) else: append('%s="%s"' % (name, escape(value))) if parts: parts.insert(0, '') return string.join(parts)_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)def isnmtoken(s): return _nmtoken_rx.match(s) is not None_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)def istoken(s): return _token_rx.match(s) is not Nonedef convert(ifp, ofp, xml=0, autoclose=(), verbatims=()): if xml: autoclose = () attrs = {} lastopened = None knownempties = [] knownempty = 0 lastempty = 0 inverbatim = 0 while 1: line = ifp.readline() if not line: break type = line[0] data = line[1:] if data and data[-1] == "\n": data = data[:-1] if type == "-": data = esistools.decode(data) data = escape(data) if not inverbatim: data = string.replace(data, "---", "—") ofp.write(data) if "\n" in data: lastopened = None knownempty = 0 lastempty = 0 elif type == "(": if data == "COMMENT": ofp.write("<!--") continue data = map_gi(data, _elem_map) if knownempty and xml: ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml))) else: ofp.write("<%s%s>" % (data, format_attrs(attrs, xml))) if knownempty and data not in knownempties: # accumulate knowledge! knownempties.append(data) attrs = {} lastopened = data lastempty = knownempty knownempty = 0 inverbatim = data in verbatims elif type == ")": if data == "COMMENT": ofp.write("-->") continue data = map_gi(data, _elem_map) if xml: if not lastempty: ofp.write("</%s>" % data) elif data not in knownempties: if data in autoclose: pass elif lastopened == data: ofp.write("</>") else: ofp.write("</%s>" % data) lastopened = None lastempty = 0 inverbatim = 0 elif type == "A": name, type, value = string.split(data, " ", 2) name = map_gi(name, _attr_map) attrs[name] = esistools.decode(value) elif type == "e": knownempty = 1 elif type == "&": ofp.write("&%s;" % data) knownempty = 0 else: raise RuntimeError, "unrecognized ESIS event type: '%s'" % type if LIST_EMPTIES: dump_empty_element_names(knownempties)def dump_empty_element_names(knownempties): d = {} for gi in knownempties: d[gi] = gi knownempties.append("") if os.path.isfile(EMPTIES_FILENAME): fp = open(EMPTIES_FILENAME) while 1: line = fp.readline() if not line: break gi = string.strip(line) if gi: d[gi] = gi fp = open(EMPTIES_FILENAME, "w") gilist = d.keys() gilist.sort() fp.write(string.join(gilist, "\n")) fp.write("\n") fp.close()def update_gi_map(map, names, fromsgml=1): for name in string.split(names, ","): if fromsgml: uncased = string.lower(name) else: uncased = name map[uncased] = namedef main(): import getopt import sys # autoclose = AUTOCLOSE xml = 1 xmldecl = 0 elem_names = '' attr_names = '' value_names = '' verbatims = ('verbatim', 'interactive-session') opts, args = getopt.getopt(sys.argv[1:], "adesx", ["autoclose=", "declare", "sgml", "xml", "elements-map=", "attributes-map", "values-map="]) for opt, arg in opts: if opt in ("-d", "--declare"): xmldecl = 1 elif opt == "-e": global LIST_EMPTIES LIST_EMPTIES = 1 elif opt in ("-s", "--sgml"): xml = 0 elif opt in ("-x", "--xml"): xml = 1 elif opt in ("-a", "--autoclose"): autoclose = string.split(arg, ",") elif opt == "--elements-map": elem_names = ("%s,%s" % (elem_names, arg))[1:] elif opt == "--attributes-map": attr_names = ("%s,%s" % (attr_names, arg))[1:] elif opt == "--values-map": value_names = ("%s,%s" % (value_names, arg))[1:] # # open input streams: # if len(args) == 0: ifp = sys.stdin ofp = sys.stdout elif len(args) == 1: ifp = open(args[0]) ofp = sys.stdout elif len(args) == 2: ifp = open(args[0]) ofp = open(args[1], "w") else: usage() sys.exit(2) # # setup the name maps: # if elem_names or attr_names or value_names: # assume the origin was SGML; ignore case of the names from the ESIS # stream but set up conversion tables to get the case right on output global _normalize_case _normalize_case = string.lower update_gi_map(_elem_map, string.split(elem_names, ",")) update_gi_map(_attr_map, string.split(attr_names, ",")) update_gi_map(_values_map, string.split(value_names, ",")) else: global map_gi map_gi = null_map_gi # # run the conversion: # try: if xml and xmldecl: opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n') convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims) except IOError, (err, msg): if err != errno.EPIPE: raiseif __name__ == "__main__": main()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -