📄 latex2esis.py
字号:
#! /usr/bin/env python
"""Generate ESIS events based on a LaTeX source document and
configuration data.
The conversion is not strong enough to work with arbitrary LaTeX
documents; it has only been designed to work with the highly stylized
markup used in the standard Python documentation. A lot of
information about specific markup is encoded in the control table
passed to the convert() function; changing this table can allow this
tool to support additional LaTeX markups.
The format of the table is largely undocumented; see the commented
headers where the table is specified in main(). There is no provision
to load an alternate table from an external file.
"""
import errno
import getopt
import os
import re
import string
import sys
import UserList
import xml.sax
import xml.sax.saxutils
from types import ListType, StringType, TupleType
from esistools import encode
DEBUG = 0
class LaTeXFormatError(Exception):
pass
class LaTeXStackError(LaTeXFormatError):
def __init__(self, found, stack):
msg = "environment close for %s doesn't match;\n stack = %s" \
% (found, stack)
self.found = found
self.stack = stack[:]
LaTeXFormatError.__init__(self, msg)
_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
_text_rx = re.compile(r"[^]~%\\{}]+")
_optional_rx = re.compile(r"\s*[[]([^]]*)[]]", re.MULTILINE)
# _parameter_rx is this complicated to allow {...} inside a parameter;
# this is useful to match tabular layout specifications like {c|p{24pt}}
_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
_start_group_rx = re.compile("[ \n]*{")
_start_optional_rx = re.compile("[ \n]*[[]")
ESCAPED_CHARS = "$%#^ {}&~"
def dbgmsg(msg):
if DEBUG:
sys.stderr.write(msg + "\n")
def pushing(name, point, depth):
dbgmsg("pushing <%s> at %s" % (name, point))
def popping(name, point, depth):
dbgmsg("popping </%s> at %s" % (name, point))
class _Stack(UserList.UserList):
def append(self, entry):
if type(entry) is not StringType:
raise LaTeXFormatError("cannot push non-string on stack: "
+ `entry`)
#dbgmsg("%s<%s>" % (" "*len(self.data), entry))
self.data.append(entry)
def pop(self, index=-1):
entry = self.data[index]
del self.data[index]
#dbgmsg("%s</%s>" % (" "*len(self.data), entry))
def __delitem__(self, index):
entry = self.data[index]
del self.data[index]
#dbgmsg("%s</%s>" % (" "*len(self.data), entry))
def new_stack():
if DEBUG:
return _Stack()
return []
class Conversion:
def __init__(self, ifp, ofp, table):
self.write = ofp.write
self.ofp = ofp
self.table = table
L = [s.rstrip() for s in ifp.readlines()]
L.append("")
self.line = string.join(L, "\n")
self.preamble = 1
def convert(self):
self.subconvert()
def subconvert(self, endchar=None, depth=0):
#
# Parses content, including sub-structures, until the character
# 'endchar' is found (with no open structures), or until the end
# of the input data is endchar is None.
#
stack = new_stack()
line = self.line
while line:
if line[0] == endchar and not stack:
self.line = line
return line
m = _comment_rx.match(line)
if m:
text = m.group(1)
if text:
self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
% encode(text))
line = line[m.end():]
continue
m = _begin_env_rx.match(line)
if m:
name = m.group(1)
entry = self.get_env_entry(name)
# re-write to use the macro handler
line = r"\%s %s" % (name, line[m.end():])
continue
m = _end_env_rx.match(line)
if m:
# end of environment
envname = m.group(1)
entry = self.get_entry(envname)
while stack and envname != stack[-1] \
and stack[-1] in entry.endcloses:
self.write(")%s\n" % stack.pop())
if stack and envname == stack[-1]:
self.write(")%s\n" % entry.outputname)
del stack[-1]
else:
raise LaTeXStackError(envname, stack)
line = line[m.end():]
continue
m = _begin_macro_rx.match(line)
if m:
# start of macro
macroname = m.group(1)
if macroname == "c":
# Ugh! This is a combining character...
endpos = m.end()
self.combining_char("c", line[endpos])
line = line[endpos + 1:]
continue
entry = self.get_entry(macroname)
if entry.verbatim:
# magic case!
pos = line.find("\\end{%s}" % macroname)
text = line[m.end(1):pos]
stack.append(entry.name)
self.write("(%s\n" % entry.outputname)
self.write("-%s\n" % encode(text))
self.write(")%s\n" % entry.outputname)
stack.pop()
line = line[pos + len("\\end{%s}" % macroname):]
continue
while stack and stack[-1] in entry.closes:
top = stack.pop()
topentry = self.get_entry(top)
if topentry.outputname:
self.write(")%s\n-\\n\n" % topentry.outputname)
#
if entry.outputname and entry.empty:
self.write("e\n")
#
params, optional, empty = self.start_macro(macroname)
# rip off the macroname
if params:
line = line[m.end(1):]
elif empty:
line = line[m.end(1):]
else:
line = line[m.end():]
opened = 0
implied_content = 0
# handle attribute mappings here:
for pentry in params:
if pentry.type == "attribute":
if pentry.optional:
m = _optional_rx.match(line)
if m and entry.outputname:
line = line[m.end():]
self.dump_attr(pentry, m.group(1))
elif pentry.text and entry.outputname:
# value supplied by conversion spec:
self.dump_attr(pentry, pentry.text)
else:
m = _parameter_rx.match(line)
if not m:
raise LaTeXFormatError(
"could not extract parameter %s for %s: %s"
% (pentry.name, macroname, `line[:100]`))
if entry.outputname:
self.dump_attr(pentry, m.group(1))
line = line[m.end():]
elif pentry.type == "child":
if pentry.optional:
m = _optional_rx.match(line)
if m:
line = line[m.end():]
if entry.outputname and not opened:
opened = 1
self.write("(%s\n" % entry.outputname)
stack.append(macroname)
stack.append(pentry.name)
self.write("(%s\n" % pentry.name)
self.write("-%s\n" % encode(m.group(1)))
self.write(")%s\n" % pentry.name)
stack.pop()
else:
if entry.outputname and not opened:
opened = 1
self.write("(%s\n" % entry.outputname)
stack.append(entry.name)
self.write("(%s\n" % pentry.name)
stack.append(pentry.name)
self.line = skip_white(line)[1:]
line = self.subconvert(
"}", len(stack) + depth + 1)[1:]
self.write(")%s\n" % stack.pop())
elif pentry.type == "content":
if pentry.implied:
implied_content = 1
else:
if entry.outputname and not opened:
opened = 1
self.write("(%s\n" % entry.outputname)
stack.append(entry.name)
line = skip_white(line)
if line[0] != "{":
raise LaTeXFormatError(
"missing content for " + macroname)
self.line = line[1:]
line = self.subconvert("}", len(stack) + depth + 1)
if line and line[0] == "}":
line = line[1:]
elif pentry.type == "text" and pentry.text:
if entry.outputname and not opened:
opened = 1
stack.append(entry.name)
self.write("(%s\n" % entry.outputname)
#dbgmsg("--- text: %s" % `pentry.text`)
self.write("-%s\n" % encode(pentry.text))
elif pentry.type == "entityref":
self.write("&%s\n" % pentry.name)
if entry.outputname:
if not opened:
self.write("(%s\n" % entry.outputname)
stack.append(entry.name)
if not implied_content:
self.write(")%s\n" % entry.outputname)
stack.pop()
continue
if line[0] == endchar and not stack:
self.line = line[1:]
return self.line
if line[0] == "}":
# end of macro or group
macroname = stack[-1]
if macroname:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -