content.py

来自「SumatraPDF是一款小型开源的pdf阅读工具。虽然玲珑小巧（只有800多K」· Python 代码 · 共 583 行 · 第 1/2 页
583 行
#  Content (c) 2002, 2004, 2006, 2007, 2008 David Turner <david@freetype.org>
#
#  This file contains routines used to parse the content of documentation
#  comment blocks and build more structured objects out of them.
#

from sources import *
from utils import *
import string, re


# this regular expression is used to detect code sequences. these
# are simply code fragments embedded in '{' and '}' like in:
#
#  {
#    x = y + z;
#    if ( zookoo == 2 )
#    {
#      foobar();
#    }
#  }
#
# note that indentation of the starting and ending accolades must be
# exactly the same. the code sequence can contain accolades at greater
# indentation
#
re_code_start = re.compile( r"(\s*){\s*$" )
re_code_end   = re.compile( r"(\s*)}\s*$" )


# this regular expression is used to isolate identifiers from
# other text
#
re_identifier = re.compile( r'(\w*)' )


# we collect macros ending in `_H'; while outputting the object data, we use
# this info together with the object's file location to emit the appropriate
# header file macro and name before the object itself
#
re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )


#############################################################################
#
# The DocCode class is used to store source code lines.
#
#   'self.lines' contains a set of source code lines that will be dumped as
#   HTML in a <PRE> tag.
#
#   The object is filled line by line by the parser; it strips the leading
#   "margin" space from each input line before storing it in 'self.lines'.
#
class  DocCode:

    def  __init__( self, margin, lines ):
        self.lines = []
        self.words = None

        # remove margin spaces
        for l in lines:
            if string.strip( l[:margin] ) == "":
                l = l[margin:]
            self.lines.append( l )

    def  dump( self, prefix = "", width = 60 ):
        lines = self.dump_lines( 0, width )
        for l in lines:
            print prefix + l

    def  dump_lines( self, margin = 0, width = 60 ):
        result = []
        for l in self.lines:
            result.append( " " * margin + l )
        return result



#############################################################################
#
# The DocPara class is used to store "normal" text paragraph.
#
#   'self.words' contains the list of words that make up the paragraph
#
class  DocPara:

    def  __init__( self, lines ):
        self.lines = None
        self.words = []
        for l in lines:
            l = string.strip( l )
            self.words.extend( string.split( l ) )

    def  dump( self, prefix = "", width = 60 ):
        lines = self.dump_lines( 0, width )
        for l in lines:
            print prefix + l

    def  dump_lines( self, margin = 0, width = 60 ):
        cur    = ""  # current line
        col    = 0   # current width
        result = []

        for word in self.words:
            ln = len( word )
            if col > 0:
                ln = ln + 1

            if col + ln > width:
                result.append( " " * margin + cur )
                cur = word
                col = len( word )
            else:
                if col > 0:
                    cur = cur + " "
                cur = cur + word
                col = col + ln

        if col > 0:
            result.append( " " * margin + cur )

        return result



#############################################################################
#
#  The DocField class is used to store a list containing either DocPara or
#  DocCode objects. Each DocField also has an optional "name" which is used
#  when the object corresponds to a field or value definition
#
class  DocField:

    def  __init__( self, name, lines ):
        self.name  = name  # can be None for normal paragraphs/sources
        self.items = []    # list of items

        mode_none  = 0     # start parsing mode
        mode_code  = 1     # parsing code sequences
        mode_para  = 3     # parsing normal paragraph

        margin     = -1    # current code sequence indentation
        cur_lines  = []

        # now analyze the markup lines to see if they contain paragraphs,
        # code sequences or fields definitions
        #
        start = 0
        mode  = mode_none

        for l in lines:
            # are we parsing a code sequence ?
            if mode == mode_code:
                m = re_code_end.match( l )
                if m and len( m.group( 1 ) ) <= margin:
                    # that's it, we finised the code sequence
                    code = DocCode( 0, cur_lines )
                    self.items.append( code )
                    margin    = -1
                    cur_lines = []
                    mode      = mode_none
                else:
                    # nope, continue the code sequence
                    cur_lines.append( l[margin:] )
            else:
                # start of code sequence ?
                m = re_code_start.match( l )
                if m:
                    # save current lines
                    if cur_lines:
                        para = DocPara( cur_lines )
                        self.items.append( para )
                        cur_lines = []

                    # switch to code extraction mode
                    margin = len( m.group( 1 ) )
                    mode   = mode_code
                else:
                    if not string.split( l ) and cur_lines:
                        # if the line is empty, we end the current paragraph,
                        # if any
                        para = DocPara( cur_lines )
                        self.items.append( para )
                        cur_lines = []
                    else:
                        # otherwise, simply add the line to the current
                        # paragraph
                        cur_lines.append( l )

        if mode == mode_code:
            # unexpected end of code sequence
            code = DocCode( margin, cur_lines )
            self.items.append( code )
        elif cur_lines:
            para = DocPara( cur_lines )
            self.items.append( para )

    def  dump( self, prefix = "" ):
        if self.field:
            print prefix + self.field + " ::"
            prefix = prefix + "----"

        first = 1
        for p in self.items:
            if not first:
                print ""
            p.dump( prefix )
            first = 0

    def  dump_lines( self, margin = 0, width = 60 ):
        result = []
        nl     = None

        for p in self.items:
            if nl:
                result.append( "" )

            result.extend( p.dump_lines( margin, width ) )
            nl = 1

        return result



# this regular expression is used to detect field definitions
#
re_field = re.compile( r"\s*(\w*|\w(\w|\.)*\w)\s*::" )



class  DocMarkup:

    def  __init__( self, tag, lines ):
        self.tag    = string.lower( tag )
        self.fields = []

        cur_lines = []
        field     = None
        mode      = 0

        for l in lines:
            m = re_field.match( l )
            if m:
                # we detected the start of a new field definition

                # first, save the current one
                if cur_lines:
                    f = DocField( field, cur_lines )
                    self.fields.append( f )
                    cur_lines = []
                    field     = None

                field     = m.group( 1 )   # record field name
                ln        = len( m.group( 0 ) )
                l         = " " * ln + l[ln:]
                cur_lines = [l]
            else:
                cur_lines.append( l )

        if field or cur_lines:
            f = DocField( field, cur_lines )
            self.fields.append( f )

    def  get_name( self ):
        try:
            return self.fields[0].items[0].words[0]
        except:
            return None

    def  get_start( self ):
        try:
            result = ""
            for word in self.fields[0].items[0].words:
                result = result + " " + word
            return result[1:]
        except:
            return "ERROR"

    def  dump( self, margin ):
        print " " * margin + "<" + self.tag + ">"
        for f in self.fields:
            f.dump( "  " )
        print " " * margin + "</" + self.tag + ">"



class  DocChapter:

    def  __init__( self, block ):
        self.block    = block
        self.sections = []
        if block:
content.py - 源码说明

本页面展示了「SumatraPDF是一款小型开源的pdf阅读工具。虽然玲珑小巧（只有800多KB）」中的 content.py 源码文件，采用 Python 编程语言编写，共 583 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与SumatraPDF相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?