📄 lexer.py

📁 convert C programs for use in C++ compiler environment
💻 PY
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
import string
import wstring
import sys
import os

"""
   C to C++ Library
   Functions for scanning C source and header files.
   (c) 2001-2005 by D.G. Sureau
   
	Modified: August 30 2005
	             by Georg Wittenburg   

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

    webmaster@scriptol.org
    http://www.scriptol.org

   Some language conventions....
    int a = 0;             in source, the definition of a variable
    extern int a;          in header, the declaration of a variable
    int a() { ... }        in source, int a()  is the interface of a function
    int a();               in header, int a()  is the prototype of a function
    Inside a class, members are attributes (variables) or methods (functions).
   Statements of a program stay in a "source" file
   Declarations and prototypes in a "header" file,
  also named file to include.
"""

TRUE  = 1
FALSE = 0

hpp = ".hpp"
cpp = ".cpp"

types = [ "extern", "*", "void", "unsigned", "signed", "char", "short",
          "int", "integer", "long", "double", "float", "far", "near",
          "huge", "interrupt", "static", "struct", "register" ,
          "volatile", "const", "struct" ]

modifiers = [ "extern", "static", "const", "volatile", "auto", "inline",
              "near", "far" ]

predefined = [ "FILE", "ffblk", "ftime", "_Cdecl", "size_t", "div_t",
           "ldiv_t", "wchar_t" ]

declarator = types + modifiers + predefined

typstruct = []    # new types of struct

constructs = [ "if", "switch", "while", "for", "case", "{", "else" ]

# These tokens are relevant to identify declarations and constructs
# they can be used on partially converted headers
reserved = [ "extern", "*", "void", "unsigned", "signed", "char", "short",
           "int", "integer", "long", "double", "float", "register",
           "far", "near", "huge" , "interrupt", "static", "volatile",
           "const", "auto",
           "struct", "typedef", "enum", "union",
           "if", "else", "switch", "while", "for", "do", "until"
           "case", "default",
           "goto", "break", "continue", "inline", "asm",
           "private", "protected", "public" ]

symbols = [ '+', '-', '*', '^', '/', '<', '>', '=',
            '|', '&', '~', '[', ']', '(', ')', '{', '}',
            '?', ':', ',', ';', '%', ' ', '\'', '\"'
            '\t', '\r', '\n' ]

symstr = "+-*^/<>=|&~[](){}?:,;% \'\"\t\r\n"


#---------------------------------- Utilities


def iswindows():
  if os.name == "msdos": return TRUE
  if os.name == "nt": return TRUE
  return FALSE

def oscase(str):     # make lower under Windows
  if iswindows(): str = string.lower(str)
  return str


def nextalpha(line, i):
 l = len(line)
 while i < l:
  if line[i] == ' ': i = i + 1
  else: break
 return i

def nextspace(line, i):
 l = len(line)
 while i < l:
  if line[i] != ' ': i = i + 1
  else: break
 return i



# Is the word a identifier

def isident(word):
 #print "is ident", word, "?"
 if word in reserved:     return FALSE
 if word[0] not in string.letters: return FALSE
 if len(word) == 1: return TRUE
 for c in word[1:]:
  if c in string.letters: continue
  if c in string.digits: continue
  if c == '_': continue
  return FALSE  # not in above lists
 return TRUE


# Get the first identifier following a list of types

def getident(line):
	line = removecomment(line)
	line = removedirective(line) 
	l = wstring.split(line, symstr, '*')   # * is both delimiter and keyword in C
	#print "getident", line, l
	for w in l:
		wi = string.lower(w)
		if wi == "typedef": continue
		if w in types: continue      # Skipping type keywords
		if isident(w): return w      # Identifier returned
		return None                  # Anything else is not of concern
	return None


# Get the list of ident in a multiple declaration of variables
# without assigment
# Ex:   int i,j,k;     char *aaa[], b[][]

def getvarnames(line):
 vlist = []
 l = wstring.split(line, " *;,\t\r\n", '*')
 for w in l:
   i = string.find(w, '[')
   if i != -1:
     w = w[:i]
   w = wstring.strip(w)
   if w in types: continue
   if isident(w): vlist.append(w)
 return vlist


# has an initializer

def initialized(vardef):
  j = string.find(vardef, '=')
  k = string.find(vardef, ';')
  return j < k

# a global variable

def isglobal(line):
  return isstatic(line)

# a static declaration

def isstatic(line):
  if line is None:  return FALSE
  if len(line) < 10: return FALSE    # min is "static int"
  words = wstring.split(line, " \t\r\n*;[](){}?:,", "*")
  i = 0
  if string.lower(words[0]) == "extern": i = i + 1
  return string.lower(words[i]) == "static"

# an external declaration

def isextern(line):
  if line is None:  return FALSE
  if len(line) < 6: return FALSE    # min is "int a;"
  words = wstring.split(line, " \t\r\n*;[](){}?:,", "*")
  return string.lower(words[0]) == "extern";

# Is the line a typed declaration?
# A list of type keywords followed by an ident

def isdeclaration(line):
  if line is None:  return FALSE
  if len(line) < 6: return FALSE    # min is "int a;"

  words = wstring.split(line, " \t\r\n*;[](){}?:,", "*")

  if words[0] == "extern":
    words.remove(words[0])
  first = words[0]
  if first == "*":       return FALSE   # no valid as first keyword
  if first not in types: return FALSE   # no typed

  # first being a type keyword, then scanning the line
  for w in words:
    if w in types: continue
    if isident(w): return TRUE
    return FALSE
  return TRUE


# Is the line starting a definition of variable, array, function?
# A function returning a integer may be declared without the type,
# it is implicitely typed
# Consequently a typed line is
# - a line starting with a ident followed by ( and not terminated by ;
# - a type (made of one or several keywords), followed by a ident
#   and followed by ( or [ or = or , or ;
# A prototype in a C source is ignored.

def isdefinition(line):
 if line == None: return FALSE
 line = wstring.strip(line)
 le = len(line)
 if le < 3: return FALSE          # minimum length:   f()
 # I get the left part
 j = 0
 for c in line:
  if c in [ '(', '[', ';', '=', ',' ]: break
  j = j + 1

 # function name without type not a call (terminated by semicolon)
 soleident = ((c == '(') & (line[-1:] != ';'))
 #print "solident", line, soleident, line[-1:]
 le = min(le, j)

 # Analysing the left part of the line
 line = line[:le]
 if line == "": return FALSE
 words = wstring.split(line, " *", '*')

 if len(words) == 0: return FALSE
 if len(words) == 1:
  if isident(words[0]):
    return soleident       # True is a identifier followed by (

 #print "words >>>", words

 # Handling case of the *ident= assignment inside block
 if words[0] == '*': return FALSE

 tflag = FALSE
 iflag = FALSE
 for w in words:
  if w == "extern": return FALSE   # External declarations are ignored
  if w in declarator:
   #print "type", w
   if iflag: return FALSE  # Type following ident?
   tflag = TRUE
   continue               # Continue to next word...
  if isident(w):
   #print "ident", w
   iflag = TRUE            # Must be the last word
   continue
  return FALSE            # Neither type nor identifier
 #print "ok", line
 return tflag              # Found a type followed or not by an identifier then (=;,[


# Is the line an instance of typedef struct?
# assumes the line is a var declaration
# I search for [a list of type keywords followed by] a typedef of struct

def istypstruct(line):
  words = wstring.split(line, vardelimiters, "*")
  for w in words:
    if w in typstruct:
      #print "istypstruct - ", w, "in typedef struct"
      return TRUE
    if w in types:
      #print w, "in type"
      continue
    if isident(w):
      #print w, "in ident"
      return FALSE
    break
  return FALSE


# Add a semicolon if missing

def addsemicolon(line):
  line = wstring.strip(line)
  if line[-1:] == ';': return line
  return line + ';'

# Add the "extern" keyword if missing

def addextern(str):
 words = string.split(str)
 if len(words) == 0: return str
 if words[0] == "extern": return str
 return "extern " + str


def removextern(line):
  i = string.find(string.lower(line), "extern")
  if i > -1:
    line = line[:i] + line[i+6:]
  return line

# Add the "static" keyword if missing

def addstatic(str):
 words = string.split(str)
 if len(words) == 0: return str
 if words[0] == "static": return str
 return "static " + str

def removestatic(line):
  i = string.find(string.lower(line), "static")
  if i > -1:
    line = line[:i] + line[i+6:]
  return line

def removeglobal(line):
  return removestatic(line)


#---------------------------------- Comments

# Processing a comment block
# I suppose than a block a comments
# starts on a line with no statement at left
# and finishes on a line, with no statements at right!
# Trailing and embedded comments are processed elsewhere

def opencomment(line):
 inside = FALSE
 if line[0:2] == "/*": inside = TRUE
 if line[:-2] == "*/": inside = FALSE
 return inside

# Called only when a comment line or block is opened

def closecomment(line):
 inside = TRUE
 p = ' '
 for c in line:
  if c == '/':
    if p == '*':
      inside = FALSE
  if c == '*': p = c
 return inside

# A comment starting and ending in the same line

def embeddedcomment(line):
  if line is None: return FALSE
  if len(line) < 2: return FALSE
  i = string.find(line, "/*")
  if i == -1: return FALSE
  j = string.find(line, "*/")
  if j == -1: return FALSE
  if j < i: return FALSE
  return TRUE

# A comment opened, but not closed

def unterminated(line):
  while 1:
   if line is None: return FALSE
   if len(line) < 2: return FALSE
   i = string.find(line, "/*")
   if i == -1: return FALSE       # comment not found
   line = line[i+2:]
   j = string.find(line, "*/")    # searching for end of comment
   if j == -1: break
   line = line[j+2:]              # getting the trailing part and loop
  return TRUE


# Processing embedded comment

def commentfound(line):
  if len(line) < 2: return FALSE
  #print "found comment:", line
  i = string.find(line, "/*")
  if i == -1: return FALSE
  return TRUE

def nocomment(line):
  i = string.find(line, "/*")
  if i == -1: return line
  j = string.find(line, "*/", i + 2)

  if j > i:                            # Terminator found
    if j < (len(line) - 2):            # embedded comment
      return line[:i] + line[j + 2:]   # removing the comment
    #print "truncated", line[:i]

  return line[:i]        # Line without starting comment


def removecomment(line):
  while commentfound(line):
    line = nocomment(line)
  return line

#----------------------------------- Compiler directives

# Remove compiler directives starting and ending with "__" and possibly
# followed by a block of "(...)".

def removedirective(line):
	# First find the end of the __-delimited directive name.
	# Return line unchanged, if there's no directive.
	offset = string.find(line, "__")
	if offset == -1: return line
	offset = string.find(line, "__", offset + 2)
	if offset == -1: return line
	offset += 2
	# offset points to the character after the directive name.
	# Remove "(...)" block.
	openbrackets = 0
	if line[offset] == '(':
		openbrackets += 1
		offset += 1
	else:
		return line[offset:]
	while openbrackets > 0:
		if line[offset] == '(':
			openbrackets += 1
		if line[offset] == ')':
			openbrackets -= 1
		offset += 1
	return line[offset:]


#----------------------------------- Define statement

# Is the line a define statement?

def isdefine(line):
  if line is None:   return FALSE
  if len(line) < 9:  return FALSE
  if line[0] != '#': return FALSE
  tmp = string.lower(line[1:8])
  if tmp != "define ": return FALSE
  return TRUE


# Processing a macro or pragma, true for all but alias

def opendef(line):
 if len(line) == 0: return FALSE
 inside = FALSE
 if line[0] == "#":
   inside = TRUE
   if line[1:7] == "define":
     #print line
     key, value = splitdefine(line)
     #print key, value
     if string.find(key, '(' ) == -1: inside = FALSE
 return inside

# Called only when a define block is opened
# Return TRUE if inside a multiline macro again

def closedef(line):
 line = wstring.chop(line)
 line = wstring.strip(line)
 if line[-1] == "\\": return TRUE
 return FALSE


# Get name and definition from a define statement
def splitdefine(line):
  line = line[8:]         # Skipping the keyword and the whitespace
  i = nextalpha(line, 0)  # Skipping other whitespaces
  j = nextspace(line, i)
  name = line[i:j]
  definition = line[j + 1:]
  definition = wstring.strip(definition)
  return name, definition


#---------------------------------- Curly brace enclosed block

# Processing a block to be skiped

blocklevel = 0

def openblock(line):
 if line is None: return FALSE
 if openrecord(line): return TRUE
 if '{' in line:      return TRUE
 return FALSE


# Called only when a block is opened
# Return TRUE while inside a multiline block

def closeblock(line):
 global blocklevel
 for c in line:
  if c == '{':   blocklevel = blocklevel + 1
  if c == '}':
    blocklevel = blocklevel - 1
    if blocklevel < 0: blocklevel = 0
    if blocklevel == 0: return FALSE
 return TRUE


# Process any kind of record to be skipped
12 3 下一页
💿 文件大小 141 K
👤 上传用户 zergwyk
📂 所属分类编译器/解释器
🏷️ 相关标签

#environment #compiler #programs #convert
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -