📄 unaopparser.pas
字号:
(*
----------------------------------------------
unaOPParser.dpr
Object Pascal parser
----------------------------------------------
This source code cannot be used without
proper license granted to you as a private
person or an entity by the Lake of Soft, Ltd
Visit http://lakeofsoft.com/ for more information.
Copyright (c) 2002 Lake of Soft, Ltd
All rights reserved
----------------------------------------------
created by:
Lake, 03 Apr 2002
modified by:
Lake, 03 Apr 2002
----------------------------------------------
*)
{$I unaDef.inc}
unit
unaOPParser;
interface
uses
Windows, unaTypes, unaClasses;
type
//
unaOPTokenType = (
unaoptEOF,
//
unaoptIdentifier,
unaoptNumber,
unaoptChar,
unaoptString,
unaoptPunctuationMark,
unaoptAssigment,
unaoptComment,
//
unaoptError
);
//
// -- unaObjectPascalToken --
//
unaObjectPascalToken = class
private
f_value: WideString;
f_type: unaOPTokenType;
//
procedure addChar(c: char);
public
function get(def: byte): byte; overload;
function get(def: char): char; overload;
function get(def: WideChar): WideChar; overload;
function get(def: int): int; overload;
function get(def: unsigned): unsigned; overload;
function get(const def: string): string; overload;
function get(const def: WideString): WideString; overload;
function get(def: double): double; overload;
function isToken(const value: string): bool; overload;
function isToken(const value: WideString): bool; overload;
//
property tokenType: unaOPTokenType read f_type write f_type;
property value: WideString read f_value;
end;
//
// -- unaObjectPascalParser --
//
unaObjectPascalParser = class
private
f_lineNum: unsigned;
f_isAnsiText: bool;
//
f_stream: unaAbstractStream;
f_token: unaObjectPascalToken;
f_lastToken: unaObjectPascalToken;
public
constructor create(const fileName: string; isAnsiText: bool); overload;
constructor create(const script: WideString); overload;
procedure AfterConstruction(); override;
destructor Destroy(); override;
//
function nextToken(token: unaObjectPascalToken = nil): bool;
function getLineNum(): unsigned;
//
property token: unaObjectPascalToken read f_lastToken;
end;
implementation
uses
unaUtils
{$IFDEF __SYSUTILS_H_}
, SysUtils
{$ENDIF}
;
{ unaObjectPascalToken }
// -- --
procedure unaObjectPascalToken.addChar(c: char);
begin
f_value := f_value + c;
end;
// -- --
function unaObjectPascalToken.get(def: byte): byte;
begin
result := str2IntByte(f_value, def);
end;
// -- --
function unaObjectPascalToken.get(def: char): char;
begin
if (0 < length(f_value)) then
result := char(f_value[1])
else
result := def;
end;
// -- --
function unaObjectPascalToken.get(def: double): double;
begin
// to do
result := 0;
end;
// -- --
function unaObjectPascalToken.get(def: int): int;
begin
result := str2IntInt(f_value, def);
end;
// -- --
function unaObjectPascalToken.get(const def: string): string;
begin
result := f_value;
end;
// -- --
function unaObjectPascalToken.get(def: unsigned): unsigned;
begin
result := str2IntUnsigned(f_value, def);
end;
// -- --
function unaObjectPascalToken.get(def: WideChar): WideChar;
begin
if (0 < length(f_value)) then
result := f_value[1]
else
result := def;
end;
// -- --
function unaObjectPascalToken.get(const def: WideString): WideString;
begin
result := f_value;
end;
// -- --
function unaObjectPascalToken.isToken(const value: string): bool;
begin
result := (0 = compareStr(lowerCase(f_value), lowerCase(value)));
end;
// -- --
function unaObjectPascalToken.isToken(const value: WideString): bool;
begin
result := (0 = compareStr(lowerCase(f_value), lowerCase(value)));
end;
{ unaObjectPascalParser }
// -- --
procedure unaObjectPascalParser.afterConstruction();
begin
inherited;
//
f_token := unaObjectPascalToken.create();
end;
// -- --
constructor unaObjectPascalParser.create(const fileName: string; isAnsiText: bool);
begin
inherited create();
//
f_stream := unaFileStream.createStream(fileName, GENERIC_READ);
f_isAnsiText := isAnsiText;
end;
// -- --
constructor unaObjectPascalParser.create(const script: WideString);
begin
f_stream := unaMemoryStream.create();
f_stream.write(script);
f_isAnsiText := false;
end;
// -- --
destructor unaObjectPascalParser.destroy();
begin
inherited;
//
freeAndNil(f_token);
freeAndNil(f_stream);
end;
// -- --
function unaObjectPascalParser.getLineNum: unsigned;
begin
result := f_lineNum + 1;
end;
// -- --
function unaObjectPascalParser.nextToken(token: unaObjectPascalToken): bool;
var
c: array[0..1] of char;
mode: byte;
noAdd: bool;
begin
if (nil = token) then
token := f_token;
//
token.f_type := unaoptEOF;
token.f_value := '';
//
mode := 0; // white space
//
while (1 <= f_stream.getAvailableSize()) do begin
//
f_stream.read(@c, 2, false);
noAdd := false;
if (#10 = c[0]) then
inc(f_lineNum);
//
case (mode) of
0: begin // "white space" mode
case (c[0]) of
'A'..'Z',
'a'..'z',
'_': begin
mode := 1; // identifier
token.f_type := unaoptIdentifier;
end;
'/': begin
if ('/' = c[1]) then begin
mode := 2; // "//" comment
token.f_type := unaoptComment;
end
else begin
mode := 10; // end of parse
token.f_type := unaoptPunctuationMark;
end;
end;
'(': begin
if ('*' = c[1]) then begin
mode := 3; // "(*" comment
token.f_type := unaoptComment;
end
else begin
mode := 10; // end of parse
token.f_type := unaoptPunctuationMark;
end;
end;
'{': begin
mode := 4; // start of "{" comment
token.f_type := unaoptComment;
end;
'0'..'9': begin
mode := 5; // start of number
token.f_type := unaoptNumber;
end;
':' : begin
if ('=' = c[1]) then begin
f_stream.read(@c, 2);
token.addChar(c[0]);
token.addChar(c[1]);
mode := 11; // assigment
token.f_type := unaoptAssigment;
end
else begin
mode := 10; // end of parse
token.f_type := unaoptPunctuationMark;
end;
end;
'#': begin
if (c[1] in ['0'..'9']) then begin
mode := 7; // start of # char
token.f_type := unaoptChar;
end
else begin
mode := 10; // end of parse
token.f_type := unaoptError;
end;
end;
'''': begin
noAdd := true; // do not add '
mode := 8; // start of char or string
token.f_type := unaoptString;
end;
#0..#32:
mode := 0; // white space
'"', #128..#255: begin
mode := 10;
token.f_type := unaoptError; // invalid symbol
end;
else begin
mode := 10; // punctuation mark
token.f_type := unaoptPunctuationMark;
end;
end; // end case (c), mode = 0
end; // mode 0
1: begin // "identifier" mode
case (c[0]) of
'a'..'z',
'A'..'Z',
'0'..'9',
'_': ; // continue
else
mode := 11; // stop
end; // end case (c[0]), mode = 1
end; // mode 1
2: begin // '"//" comment' mode
case (c[0]) of
#13, #10:
mode := 11; // stop
end; // end case (c[0]), mode = 2
end; // mode 2
3: begin // '"(*" comment' mode
case (c[0]) of
'*':
if (')' = c[1]) then begin
f_stream.read(@c, 1);
token.addChar(c[0]);
mode := 10; // stop
end;
end; // end case (c[0]), mode = 3
end; // mode 3
4: begin // '"{" comment' mode
case (c[0]) of
'}':
mode := 10; // stop
end; // end case (c[0]), mode = 4
end; // mode 4
5: begin // "number" mode
case (c[0]) of
'0'..'9': ; // ok to continue
'.':
if ('.' = c[1]) then // .. sign follows digit
mode := 11 // stop now
else
if (c[1] in ['0'..'9', 'e', 'E']) then
if (0 > pos('.', token.value)) then
// ok to continue
else begin
// have found two dots - indicate error
token.f_type := unaoptError;
mode := 10; // stop
end
else
mode := 10; // stop now - end of digit
'e', 'E': begin
case (c[1]) of
'-', '+': begin
f_stream.read(@c, 1); // add 'E'
token.addChar(c[0]);
end;
end;
mode := 6; // switch do "float number" mode
end;
else
mode := 11; // stop
end; // end case (c[0]), mode = 5
end; // mode 5
6: begin // "float number" mode
case (c[0]) of
'0'..'9': ; // ok to continue
else
mode := 11; // stop
end; // end case (c[0]), mode = 6
end; // mode 6
7: begin // "# char" mode
case (c[0]) of
'0'..'9': ; // ok to continue
else
mode := 11; // stop
end; // end case (c[0]), mode = 7
end; // mode 7
8: begin // "" mode
case (c[0]) of
'''': begin
noAdd := true;
if ('''' = c[1]) then // this is double '' - skip it
f_stream.read(@c, 1)
else
mode := 10; // end of string - stop
end;
#13, #10: // unterminated string - indicate error
mode := 10;
end; // end case (c[0]), mode = 8
end; // mode 8
end; // end of case
//
if (11 <> mode) then begin
f_stream.read(@c, 1);
if (not noAdd and (0 <> mode)) then
token.addChar(c[0]);
end;
if (10 <= mode) then
break;
end;
//
result := (unaoptError <> token.f_type);
f_lastToken := token;
end;
end.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -