📄 appendix_d.txt

📁 很详细的Python文字处理教程
💻 TXT
📖 第 1 页 / 共 2 页
字号:
上一页 12
                DEF:'blockquote'}
      divs = {RULE:'hr', VERTSPC:'br'}

      class Regexen:
          def __init__(self):
              # blank line is empty, spaces/dashes only, or proc instruct
              self.blank    = re.compile("^[ -]*$|^  THIS IS [A-Z]+$")
              self.chapter  = re.compile("^(CHAPTER|APPENDIX|FRONTMATTER)")
              self.section  = re.compile("^SECTION")
              self.subsect  = re.compile("^  (TOPIC|PROBLEM|EXERCISE)")
              self.subsub   = re.compile("^  [A-Z 0-9-]+:$") # chk befr body
              self.modline  = re.compile("^  =+$")
              self.pyshell  = re.compile("^ +>>>")
              self.codesamp = re.compile("^ +#[*]?[-=]+ .+ [-=]+#")
              self.numlist  = re.compile("^  \d+[.] ")       # chk befr body
              self.body     = re.compile("^  \S")            # 2 spc indent
              self.quote    = re.compile("^     ?\S")        # 4-5 spc indnt
              self.subbody  = re.compile("^      +")         # 6+ spc indent
              self.rule     = re.compile("^  (-\*-|!!!)$")
              self.vertspc  = re.compile("^  \+\+\+$")

      def Make_Blocks(fpin=sys.stdin, r=Regexen()):
          #-- Initialize the globals
          global state, blocks, laststate
          state, laststate = BLANK, BLANK
          blocks = [[BLANK]]
          #-- Break the file into relevant chunks
          for line in fpin.xreadlines():
              line = line.rstrip()            # Normalize line endings
              #-- for "one-line states" just act (no accumulation)
              if r.blank.match(line):
                  if inState(PYSHELL):        newState(laststate)
                  else:                       blocks[-1].append("")
              elif r.rule.match(line):        newState(RULE)
              elif r.vertspc.match(line):     newState(VERTSPC)
              elif r.chapter.match(line):     newState(CHAPTER)
              elif r.section.match(line):     newState(SECTION)
              elif r.subsect.match(line):     newState(SUBSECT)
              elif r.subsub.match(line):      newState(SUBSUB)
              elif r.modline.match(line):     newState(MODLINE)
              elif r.numlist.match(line):     newState(NUMLIST)
              elif r.pyshell.match(line):
                  if not inState(PYSHELL):    newState(PYSHELL)
              elif r.codesamp.match(line):    newState(CODESAMP)
              #-- now the multi-line states that are self-defining
              elif r.body.match(line):
                  if not inState(BODY):       newState(BODY)
              elif r.quote.match(line):
                  if inState(MODLINE):        newState(MODNAME)
                  elif r.blank.match(line):   newState(BLANK)
                  elif not inState(QUOTE):    newState(QUOTE)
              #-- now the "multi-line states" which eat further lines
              elif inState(MODLINE, PYSHELL, CODESAMP, NUMLIST, DEF):
                  "stay in this state until we get a blank line"
                  "...or other one-line prior type, but shouldn't happen"
              elif r.subbody.match(line):
                  "Sub-body is tricky: it might belong with several states:"
                  "PYSHELL, CODESAMP, NUMLIST, or as a def after BODY"
                  if inState(BODY):           newState(DEF)
                  elif inState(BLANK):
                      if laststate==DEF:      pass
                  elif inState(DEF, CODESAMP, PYSHELL, NUMLIST, MODNAME):
                      pass
              else:
                  raise ValueError, \
                        "unexpected input block state: %s\n%s" %(state,line)
              if inState(MODLINE, RULE, VERTSPC): pass
              elif r.blank.match(line): pass
              else: blocks[-1].append(line)
          return LookBack(blocks)

      def LookBack(blocks):
          types = [f[0] for f in blocks]
          for i in range(len(types)-1):
              this, next = types[i:i+2]
              if (this,next)==(BODY,DEF):
                  blocks[i][0] = TERM
          return blocks

      def newState(name):
          global state, laststate, blocks
          if name not in (BLANK, MODLINE):
              blocks.append([name])
          laststate = state
          state = name

      def inState(*names):
          return state in names

      def Process_Blocks(blocks, fpout=sys.stdout, title=html_title):
          fpout.write(html_open % title)
          for block in blocks:        # Massage each block as needed
              typ, lines = block[0], block[1:]
              tag = markup.get(typ, None)
              div = divs.get(typ, None)
              if tag is not None:
                  map(fpout.write, wrap_html(lines, tag))
              elif div is not None:
                  fpout.write('<%s />\n' % div)
              elif typ in (PYSHELL, CODESAMP):
                  fpout.write(fixcode('\n'.join(lines),style=typ))
              elif typ in (MODNAME,):
                  mod = '<hr/><h3 class="module">%s</h3>'%'\n'.join(lines)
                  fpout.write(mod)
              elif typ in (TERM,):
                  terms = '<br />\n'.join(lines)
                  fpout.write('<h4 class="libfunc">%s</h4>\n' % terms)
              else:
                  sys.stderr.write(typ+'\n')
          fpout.write(html_close)

      #-- Functions for start of block-type state
      def wrap_html(lines, tag):
          txt = '\n'.join(lines)
          for para in txt.split('\n\n'):
              if para: yield '<%s>%s</%s>\n' %\
                              (tag,URLify(Typography(escape(para))),tag)

      def fixcode(block, style=CODESAMP):
          block = LeftMargin(block)           # Move to left
          # Pull out title if available
          title = 'Code Sample'
          if style==CODESAMP:
              re_title = re.compile('^#\*?\-+ (.+) \-+#$', re.M)
              if_title = re_title.match(block)
              if if_title:
                  title = if_title.group(1)
                  block = re_title.sub('', block)  # take title out of code
          # Decide if it is Python code
          firstline = block[:block.find('\n')]
          if re.search(r'\.py_?|[Pp]ython|>>>', title+firstline):
              # Has .py, py_, Python/python, or >>> on first line/title
              block = ParsePython(block.rstrip()).toHTML()
              return code_block % (Typography(title), block)
          # elif the-will-and-the-way-is-there-to-format-language-X: ...
          else:
              return code_block % (Typography(title), escape(block).strip())

      def LeftMargin(txt):
          "Remove as many leading spaces as possible from whole block"
          for l in range(12,-1,-1):
              re_lead = '(?sm)'+' '*l+'\S'
              if re.match(re_lead, txt): break
          txt = re.sub('(?sm)^'+' '*l, '', txt)
          return txt

      def URLify(txt):
          # Conv special IMG URL's: Alt Text: http://site.org/img.png}
          # (don't actually try quite as hard to validate URL though)
          txt = re.sub('(?sm){(.*?):\s*(http://.*)}',
                       '<img src="\\2" alt="\\1">', txt)
          # Convert regular URL's
          txt = re.sub('(?:[^="])((?:http|ftp|file)://(?:[^ \n\r<\)]+))(\s)',
                       '<a href="\\1">\\1</a>\\2', txt)
          return txt

      def Typography(txt):
          rc = re.compile     # cut down line length
          MS = re.M | re.S
          # [module] names
          r = rc(r"""([\(\s'/">]|^)\[(.*?)\]([<\s\.\),:;'"?!/-])""", MS)
          txt = r.sub('\\1<i class="module">\\2</i>\\3',txt)
          # *strongly emphasize* words
          r = rc(r"""([\(\s'/"]|^)\*(.*?)\*([\s\.\),:;'"?!/-])""", MS)
          txt = r.sub('\\1<strong>\\2</strong>\\3', txt)
          # -emphasize- words
          r = rc(r"""([\(\s'/"]|^)-(.+?)-([\s\.\),:;'"?!/])""", MS)
          txt = r.sub('\\1<em>\\2</em>\\3', txt)
          # _Book Title_ citations
          r = rc(r"""([\(\s'/"]|^)_(.*?)_([\s\.\),:;'"?!/-])""", MS)
          txt = r.sub('\\1<cite>\\2</cite>\\3', txt)
          # 'Function()' names
          r = rc(r"""([\(\s/"]|^)'(.*?)'([\s\.\),:;"?!/-])""", MS)
          txt = r.sub("\\1<code>\\2</code>\\3", txt)
          # `library.func()` names
          r = rc(r"""([\(\s/"]|^)`(.*?)`([\s\.\),:;"?!/-])""", MS)
          txt = r.sub('\\1<i class="libfunc">\\2</i>\\3', txt)
          return txt

      if __name__ == '__main__':
          blocks = Make_Blocks()
          if len(sys.argv) > 1:
              Process_Blocks(blocks, title=sys.argv[1])
          else:
              Process_Blocks(blocks)
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -