📄 uwparser.pas
字号:
{ Written by Frank Plagge
Copyright (c) 1998 by Frank Plagge, Elsterweg 39, 38446 Wolfsburg, Germany
All rights reserved
*****************************************************************************
Permission to use, copy, modify, and distribute this software and its
documentation without fee for any purpose is hereby granted, provided that
the above copyright notice appears on all copies and that both that copyright
notice and this permission notice appear in all supporting documentation.
NO REPRESENTATIONS ARE MADE ABOUT THE SUITABILITY OF THIS SOFTWARE FOR ANY
PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
NEITHER FRANK PLAGGE OR ANY OTHER PERSON SHALL BE LIABLE FOR ANY DAMAGES
SUFFERED BY THE USE OF THIS SOFTWARE.
*****************************************************************************
Description:
This module contains the component TWParser. It contains the complete
lexical analysis of a stream source. This is a necessary basis to realize
a parser for any language. The analysis produces a stream of token.
}
{$B-}
unit uWParser;
interface
uses
Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs;
type
{ Set type for the special chars. }
TCharSet = set of Char;
{ Enumeration with the possible result token. }
TTokenType = ( ttComment, ttEof, ttError,
ttHexDecimal, ttIdentifier, ttInteger,
ttKeyword, ttReal, ttSpecialChar, ttString );
TTokenSubType = ( tsNone, tsComment1Block, tsComment2Block, tsCommentLine );
{ A token contains the token type, the belonging text and the position }
{ within the source stream. }
TToken = class
Token: TTokenType; // Token type.
SubType: TTokenSubType;// Token sub-type
Text: string; // Representing token text.
Position: Integer; // Positition within the file stream.
Row, Column: Integer; // Row and column within source file stream.
Tag: Integer; // User defined information.
public
procedure CopyFrom( SrcToken: TToken ); // The procedure copies contents
// from another token to the current.
end;
{ This event type is called if a token from the input stream is read
- Token is the actual read token,
- AddToList determines if the token is added to the list of read tokens (default is true)
- Stop determines if the analysis is stopped now (default is false); a stopped analysis
goes on with the method Restart. }
TTokenEvent = procedure (Sender: TObject; Token: TToken; var AddToList, Stop: Boolean) of object;
TWParser = class(TComponent) // TWParser is a main scanner component.
private
{ Private }
CommentBlock1Phase: Boolean; // Boolean flag variable. The flag is true
// if a comment block 1 reading phase is active
CommentBlock2Phase: Boolean; // Boolean flag variable. The flag is true
// if a comment block 2 reading phase is active
TokenList: TList; // The list of read tokens.
EAState: Byte; // A condition of the state machine.
EAText: string; // A string that belongs to a token.
EAToken: TTokenType; // Recognized token type.
EASubType: TTokenSubType; // Recognized token sub-type.
EARow: Integer; // Row of the first token char.
EAColumn: Integer; // Column of the first token char.
EAPosition: Integer; // Position of the first token char within the stream.
Eof: Boolean; // Indicates if end of file of the stream is reached.
NextChar: Char; // Next not processed char in the source stream.
FIdentChars: TCharSet; // Chars allowed within an identifier.
FAllowFigures: Boolean; // Allow figures within identifiers.
FAllowIdentifier: Boolean; // Allow identifiers.
FCaseSensitive: Boolean; // Detecting keywords case sensitive.
FCharacters: TCharSet; // Allowed special chars.
FCommentLine: string; // Introducing comment chars.
FComment1Begin: string; // Introducing comment block 1 begining string.
FComment1End: string; // Introducing comment block 1 ending string.
FComment2Begin: string; // Introducing comment block 2 begining string.
FComment2End: string; // Introducing comment block 2 ending string.
FKeywords: TStringList; // List of defined keywords.
FOnTokenRead: TTokenEvent; // User defined event if a new token is read.
SourceStream: TStream; // Input stream for lexical analysis.
SourceY: Integer; // Actual row within source stream.
SourceX: Integer; // Actual column within source stream.
FOwnSourceStream : boolean; // If it's true, free SourceStream when it's getting destroyes
procedure EASwitch0( ch: Char ); // Process a char at state 0 ( start).
procedure EASwitch1( ch: Char ); // Process a char at state 1.
procedure EASwitch3( ch: Char ); // Process a char at state 3.
procedure EASwitch5( ch: Char ); // Process a char at state 5.
procedure EASwitch7( ch: Char ); // Process a char at state 7.
procedure EASwitch9( ch: Char ); // Process a char at state 9.
procedure EASwitch11( ch: Char );// Process a char at state 11.
procedure EASwitch13( ch: Char );// Process a char at state 13.
procedure EASwitch14( ch: Char );// Process a char at state 14.
procedure EASwitch16( ch: Char );// Process a char at state 16.
procedure EASwitch18( ch: Char );// Process a char at state 18.
procedure EASwitch19( ch: Char );// Process a char at state 19.
function EqualStr( First, Second: string ) : Boolean; // Compare two strings depending on case sensitive set.
function GetAdditionalChars: string; // Read method for property AdditinalChars.
function GetCount: Integer; // Read method for property Count.
function GetToken( Index: Integer ) : TToken; // Read method for property Token.
function GetSpecialChars: string; // Read method for property SpecialChars.
function GetVersion: string; // Read the internal version number.
function IsKeyword( Value: string ) : Boolean; // Test if a string is a keyword.
function LookAheadChar: Char; // Get the net char without reading it.
function LookAheadStr( Count: Integer) : string;// Get the next count char without reading it.
procedure ProcessChar; // Process the available char.
procedure ReadCh( var ch: Char ); // Read a char from the source stream.
procedure ReadToken; // Read the next token.
procedure SkipChars( Count: Integer ); // Skip a number of chars.
procedure SetAdditionalChars( Value: string ); // Write method for AdditionalChars.
procedure SetKeywords( Value: TStringList ); // Write method for property Keywords.
procedure SetSpecialChars( Value: string ); // Write method for SpecialChars.
protected
{ Protected }
public
{ Public }
constructor Create(AOwner: TComponent); override; // Create the scanner component.
destructor Destroy; override; // Destroy the scanner component.
(* This is the main method taking a source stream to tokens. the
number of read tokens is available via the property count. the read
tokens are available in the array Token. before analyzing a new source stream
the results of a previously analysis are deleted. *)
procedure Analyze( Source: TStream ); // Analyze the input file.
(* The token list produced by the stream analysis can be cleared by this
method. *)
procedure ClearTokenList; // Clear token list.
(* A formerly stopped analysis goes on working *)
procedure Restart; // Restart a former broken analysis.
(* This property contains the number of token read by the method analyze. *)
property Count: Integer read GetCount; // Number of found token.
(* This property gives an easy access to the available token. a little example
shows the access:
for i := 1 to Count do WorkWithToken( Token[i-1] ); *)
property Token[Index: Integer] : TToken read GetToken;// Array with found token.
(* This property contains the internal implementation version. maybe this is
useful, all my self developed components have such a version control
mechanism. *)
property Version: string read GetVersion; // Internal implementation version.
{ If OwnSourceStream is true, SourceStream will be destryed when the component is getting destroyed }
property OwnSourceStream : boolean read FOwnSourceStream write FOwnSourceStream;
{The function search a token of specific type and name. Parameter Name may be blank.
In this event the first found token of the specific type is returned.
Unrequired parameter aStartIndex defines a index to start search from.
Search starts from begin if the parameter is missed. }
function FindToken(aTokenName : String; aTokenType : TTokenType; aStartIndex : integer = 0) : Integer;
{ The function returns true if a token, pointer by aIndex parameter, is of
specific type and the token text is equal the specific text. The function
is not case-sensetive. }
function IsToken(aIndex : Integer; aTokenType : TTokenType; aText : String) : boolean; overload;
{ The function returns true if a token is of specific type and the token text
is equal the specific text. The function is not case-sensetive. }
function IsToken(aToken : TToken; aTokenType : TTokenType; aText : String) : boolean; overload;
{ The function returns the original text of tokens from the source stream
pointed by aStartIndex and aEndIndex index parameters in Token array. }
function GetSourceString(aStartIndex, aEndIndex: Integer) : String;
published
(* Every language specific letter that is allowed within identifiers
can be integrated here. the letters 'a' to 'z' and 'A' to 'Z' are the
standard letters for identifiers. if any other additional letter is needed
it can be entered in this property. the scanner component sorts the string
automatical.
Note: The default value is ''. *)
property AdditionalChars: string read GetAdditionalChars write SetAdditionalChars; // Specifies an additional chars which allowed in a identifier.
(* If this property is true, figures are allowed within every identifier or
keyword. if this property is false no figures are allowed and every figure
will cause a ttIdentifier or ttKeyword token. the figure will be interpreted
as the beginning of a new token with the type ttInteger.
Note: The default value is True. *)
property AllowFigures: Boolean read FAllowFigures write FAllowFigures; // Specifies if the numbers (figures) allowed.
(* If this property is true, every identifier which is not a keyword will
result a ttIdentifier token. if this property is false every non keyword
identifier will cause a ttError token.
Note: The default value is True. *)
property AllowIdentifier: Boolean read FAllowIdentifier write FAllowIdentifier; // Specifies if the identifiers allowed.
(* This property contains the leading char for a comment. every comment is
introduced with this string and is ended by the end of line.
Note: The default value is // *)
property CommentLine: string read FCommentLine write FCommentLine; // Begining of Comment Line text.
(* This property contains the introducing string for a block comment of type 1.
Every block comment is introduced with this string and is ended by
the Comment1End string.
Note: The default value is {. *)
property Comment1Begin: string read FComment1Begin write FComment1Begin; // Begining of the coment block of type 2 definition.
(* This property contains the ending string for a block comment of type 1.
Every block comment is introduced with the Comment1Begin string.
Note: The default value is }. *)
property Comment1End: string read FComment1End write FComment1End; // Ending of the coment block of type 1 definition.
(* This property contains the introducing string for a block comment of type 2.
Every block comment is introduced with this string and is ended by
the Comment2End string.
Note: The default value is (*. *)
property Comment2Begin: string read FComment2Begin write FComment2Begin; // Begining of the coment block of type 2 definition.
(* This property contains the ending string for a block comment of type 2.
Every block comment is introduced with the Comment2Begin string. *)
property Comment2End: string read FComment2End write FComment2End; // Ending of the coment block of type 2 definition.
(* The keyword of the scanner are stored in the property Keywords. the
scanner component sorts list of keywords automatical. if the scanner reads
an identifier and the identifier is a member of the keywords it will result
a token ttKeyword *)
property Keywords: TStringList read FKeywords write SetKeywords; // List of reserved keywords.
(* If this property is true the compare if an identifier is made case.
Note: The default value is False. *)
property KeywordsCaseSensitive: Boolean read FCaseSensitive write FCaseSensitive; // Enables keywords case sensitive recognition.
(* Every special chars that results a ttSpecialChar token have to be entered
in this string. the scanner component sorts the string automatical.
Note: The default value is ''. *)
property SpecialChars: string read GetSpecialChars write SetSpecialChars; // Define Special Chars.
(* This user defined event is called if a new token is read from the input
stream. This event is called atfer the token is read and before it is stored
in the internal list of the scanner component. If any application dependent
changes before registering the new token are neccessary it is possible to
change every part of the new token. *)
property OnTokenRead: TTokenEvent read FOnTokenRead write FOnTokenRead; // This event fires off each time
// when the new token is found.
end;
implementation
const
cVersion: string = '1.06';
WhiteSpace: TCharSet = [' ', #9, #10, #13]; // known white spaces
// copy a token contents from another token
procedure TToken.CopyFrom( SrcToken: TToken );
begin
Token := SrcToken.Token;
SubType := SrcToken.SubType;
Text := SrcToken.Text;
Position := SrcToken.Position;
Row := SrcToken.Row;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -