📄 uwparser.pas

📁 Delphi脚本控件
💻 PAS
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
{ Written by Frank Plagge
  Copyright (c) 1998 by Frank Plagge, Elsterweg 39, 38446 Wolfsburg, Germany
  All rights reserved

  *****************************************************************************

  Permission to use, copy,  modify, and distribute this software and its
  documentation without fee for any purpose is hereby granted, provided that
  the above copyright notice appears on all copies and that both that copyright
  notice and this permission notice appear in all supporting documentation.

  NO REPRESENTATIONS ARE MADE ABOUT THE SUITABILITY OF THIS SOFTWARE FOR ANY
  PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
  NEITHER FRANK PLAGGE OR ANY OTHER PERSON SHALL BE LIABLE FOR ANY DAMAGES
  SUFFERED BY THE USE OF THIS SOFTWARE.

  *****************************************************************************

  Description:
  This module contains the component TWParser. It contains the complete
  lexical analysis of a stream source. This is a necessary basis to realize
  a parser for any language. The analysis produces a stream of token.
}

{$B-}

unit uWParser;

interface

uses
  Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs;

type

  { Set type for the special chars. }
  TCharSet = set of Char;

  { Enumeration with the possible result token. }
  TTokenType = ( ttComment, ttEof, ttError,
                 ttHexDecimal, ttIdentifier, ttInteger,
                 ttKeyword, ttReal, ttSpecialChar, ttString );

  TTokenSubType = ( tsNone, tsComment1Block, tsComment2Block, tsCommentLine );

  { A token contains the token type, the belonging text and the position }
  { within the source stream. }
  TToken = class
             Token:       TTokenType;   // Token type.
             SubType:     TTokenSubType;// Token sub-type
             Text:        string;       // Representing token text.
             Position:    Integer;      // Positition within the file stream.
             Row, Column: Integer;      // Row and column within source file stream.
             Tag:         Integer;      // User defined information.
           public
             procedure CopyFrom( SrcToken: TToken ); // The procedure copies contents
                                                     // from another token to the current.
           end;

  { This event type is called if a token from the input stream is read
   - Token is the actual read token,
   - AddToList determines if the token is added to the list of read tokens (default is true)
   - Stop determines if the analysis is stopped now (default is false); a stopped analysis
     goes on with the method Restart. }

  TTokenEvent = procedure (Sender: TObject; Token: TToken; var AddToList, Stop: Boolean) of object;


  TWParser = class(TComponent)       // TWParser is a main scanner component.
  private
    { Private }
    CommentBlock1Phase: Boolean;     // Boolean flag variable. The flag is true
                                     // if a comment block 1 reading phase is active
    CommentBlock2Phase: Boolean;     // Boolean flag variable. The flag is true
                                     // if a comment block 2 reading phase is active
    TokenList:  TList;               // The list of read tokens.
    EAState:    Byte;                // A condition of the state machine.
    EAText:     string;              // A string that belongs to a token.
    EAToken:    TTokenType;          // Recognized token type.
    EASubType:  TTokenSubType;       // Recognized token sub-type.
    EARow:      Integer;             // Row of the first token char.
    EAColumn:   Integer;             // Column of the first token char.
    EAPosition: Integer;             // Position of the first token char within the stream.
    Eof:        Boolean;             // Indicates if end of file of the stream is reached.
    NextChar:   Char;                // Next not processed char in the source stream.
    FIdentChars:      TCharSet;      // Chars allowed within an identifier.
    FAllowFigures:    Boolean;       // Allow figures within identifiers.
    FAllowIdentifier: Boolean;       // Allow identifiers.
    FCaseSensitive:   Boolean;       // Detecting keywords case sensitive.
    FCharacters:      TCharSet;      // Allowed special chars.
    FCommentLine:     string;        // Introducing comment chars.
    FComment1Begin:   string;        // Introducing comment block 1 begining string.
    FComment1End:     string;        // Introducing comment block 1 ending string.
    FComment2Begin:   string;        // Introducing comment block 2 begining string.
    FComment2End:     string;        // Introducing comment block 2 ending string.
    FKeywords:        TStringList;   // List of defined keywords.
    FOnTokenRead: TTokenEvent;       // User defined event if a new token is read.
    SourceStream: TStream;           // Input stream for lexical analysis.
    SourceY:  Integer;               // Actual row within source stream.
    SourceX:  Integer;               // Actual column within source stream.
    FOwnSourceStream : boolean;      // If it's true, free SourceStream when it's getting destroyes
    procedure EASwitch0( ch: Char ); // Process a char at state 0 ( start).
    procedure EASwitch1( ch: Char ); // Process a char at state 1.
    procedure EASwitch3( ch: Char ); // Process a char at state 3.
    procedure EASwitch5( ch: Char ); // Process a char at state 5.
    procedure EASwitch7( ch: Char ); // Process a char at state 7.
    procedure EASwitch9( ch: Char ); // Process a char at state 9.
    procedure EASwitch11( ch: Char );// Process a char at state 11.
    procedure EASwitch13( ch: Char );// Process a char at state 13.
    procedure EASwitch14( ch: Char );// Process a char at state 14.
    procedure EASwitch16( ch: Char );// Process a char at state 16.
    procedure EASwitch18( ch: Char );// Process a char at state 18.
    procedure EASwitch19( ch: Char );// Process a char at state 19.
    function  EqualStr( First, Second: string ) : Boolean; // Compare two strings depending on case sensitive set.
    function  GetAdditionalChars: string;  // Read method for property AdditinalChars.
    function  GetCount: Integer;           // Read method for property Count.
    function  GetToken( Index: Integer ) : TToken;  // Read method for property Token.
    function  GetSpecialChars: string;     // Read method for property SpecialChars.
    function  GetVersion: string;          // Read the internal version number.
    function  IsKeyword( Value: string ) : Boolean;  // Test if a string is a keyword.
    function  LookAheadChar: Char;                   // Get the net char without reading it.
    function  LookAheadStr( Count: Integer) : string;// Get the next count char without reading it.
    procedure ProcessChar;                      // Process the available char.
    procedure ReadCh( var ch: Char );           // Read a char from the source stream.
    procedure ReadToken;                        // Read the next token.
    procedure SkipChars( Count: Integer );      // Skip a number of chars.
    procedure SetAdditionalChars( Value: string ); // Write method for AdditionalChars.
    procedure SetKeywords( Value: TStringList );   // Write method for property Keywords.
    procedure SetSpecialChars( Value: string );    // Write method for SpecialChars.
  protected
    { Protected }
  public
    { Public }
    constructor Create(AOwner: TComponent); override;     // Create the scanner component.
    destructor  Destroy; override;                        // Destroy the scanner component.

    (* This is the main method taking a source stream to tokens. the
       number of read tokens is available via the property count. the read
       tokens are available in the array Token. before analyzing a new source stream
       the results of a previously analysis are deleted. *)
    procedure Analyze( Source: TStream );                 // Analyze the input file.

    (* The token list produced by the stream analysis can be cleared by this
       method. *)
    procedure ClearTokenList;                             // Clear token list.

    (* A formerly stopped analysis goes on working *)
    procedure Restart;                                    // Restart a former broken analysis.

    (* This property contains the number of token read by the method analyze. *)
    property Count: Integer read GetCount;                // Number of found token.

    (* This property gives an easy access to the available token. a little example
       shows the access:

       for i := 1 to Count do WorkWithToken( Token[i-1] ); *)
    property Token[Index: Integer] : TToken read GetToken;// Array with found token.

    (* This property contains the internal implementation version. maybe this is
       useful, all my self developed components have such a version control
       mechanism. *)
    property Version: string read GetVersion;             // Internal implementation version.

    { If OwnSourceStream is true, SourceStream will be destryed when the component is getting destroyed }
    property OwnSourceStream : boolean read FOwnSourceStream write FOwnSourceStream;

    {The function search a token of specific type and name. Parameter Name may be blank.
     In this event the first found token of the specific type is returned.
     Unrequired parameter aStartIndex defines a index to start search from.
     Search starts from begin if the parameter is missed. }
    function  FindToken(aTokenName : String; aTokenType : TTokenType; aStartIndex : integer = 0) : Integer;

    { The function returns true if a token, pointer by aIndex parameter, is of
      specific type and the token text is equal the specific text. The function
      is not case-sensetive. }
    function  IsToken(aIndex : Integer; aTokenType : TTokenType; aText : String) : boolean; overload;

    { The function returns true if a token is of specific type and the token text
      is equal the specific text. The function is not case-sensetive. }
    function  IsToken(aToken : TToken; aTokenType : TTokenType; aText : String) : boolean;  overload;

    { The function returns the original text of tokens from the source stream
      pointed by aStartIndex and aEndIndex index parameters in Token array. }
    function  GetSourceString(aStartIndex, aEndIndex: Integer) : String;

  published
    (* Every language specific letter that is allowed within identifiers
       can be integrated here. the letters 'a' to 'z' and 'A' to 'Z' are the
       standard letters for identifiers. if any other additional letter is needed
       it can be entered in this property. the scanner component sorts the string
       automatical.
       Note: The default value is ''. *)
    property AdditionalChars: string read GetAdditionalChars write SetAdditionalChars;  // Specifies an additional chars which allowed in a identifier.

    (* If this property is true, figures are allowed within every identifier or
       keyword. if this property is false no figures are allowed and every figure
       will cause a ttIdentifier or ttKeyword token. the figure will be interpreted
       as the beginning of a new token with the type ttInteger.
       Note: The default value is True. *)
    property AllowFigures: Boolean read FAllowFigures write FAllowFigures; // Specifies if the numbers (figures) allowed.

    (* If this property is true, every identifier which is not a keyword will
       result a ttIdentifier token. if this property is false every non keyword
       identifier will cause a ttError token.
       Note: The default value is True. *)
    property AllowIdentifier: Boolean read FAllowIdentifier write FAllowIdentifier; // Specifies if the identifiers allowed.

    (* This property contains the leading char for a comment. every comment is
       introduced with this string and is ended by the end of line.
       Note: The default value is // *)
    property CommentLine: string read FCommentLine write FCommentLine; // Begining of Comment Line text.

    (* This property contains the introducing string for a block comment of type 1.
       Every block comment is introduced with this string and is ended by
       the Comment1End string.
       Note: The default value is {. *)
    property Comment1Begin: string read FComment1Begin write FComment1Begin; // Begining of the coment block of type 2 definition.

    (* This property contains the ending string for a block comment of type 1.
       Every block comment is introduced with the Comment1Begin string.
       Note: The default value is }. *)
    property Comment1End: string read FComment1End write FComment1End; // Ending of the coment block of type 1 definition.

    (* This property contains the introducing string for a block comment of type 2.
       Every block comment is introduced with this string and is ended by
       the Comment2End string.
       Note: The default value is (*. *)
    property Comment2Begin: string read FComment2Begin write FComment2Begin; // Begining of the coment block of type 2 definition.

    (* This property contains the ending string for a block comment of type 2.
       Every block comment is introduced with the Comment2Begin string. *)
    property Comment2End: string read FComment2End write FComment2End; // Ending of the coment block of type 2 definition.

    (* The keyword of the scanner are stored in the property Keywords. the
       scanner component sorts list of keywords automatical. if the scanner reads
       an identifier and the identifier is a member of the keywords it will result
       a token ttKeyword *)
    property Keywords: TStringList read FKeywords write SetKeywords; // List of reserved keywords.

    (* If this property is true the compare if an identifier is made case.
       Note: The default value is False. *)
    property KeywordsCaseSensitive: Boolean read FCaseSensitive write FCaseSensitive; // Enables keywords case sensitive recognition.

    (* Every special chars that results a ttSpecialChar token have to be entered
       in this string. the scanner component sorts the string automatical.
       Note: The default value is ''. *)
    property SpecialChars: string read GetSpecialChars write SetSpecialChars; // Define Special Chars.

    (* This user defined event is called if a new token is read from the input
       stream. This event is called atfer the token is read and before it is stored
       in the internal list of the scanner component. If any application dependent
       changes before registering the new token are neccessary it is possible to
       change every part of the new token. *)
    property OnTokenRead: TTokenEvent read FOnTokenRead write FOnTokenRead; // This event fires off each time
                                                                            // when the new token is found.
  end;

implementation

const
  cVersion:   string   = '1.06';
  WhiteSpace: TCharSet = [' ', #9, #10, #13];   // known white spaces


// copy a token contents from another token
procedure TToken.CopyFrom( SrcToken: TToken );
begin
  Token := SrcToken.Token;
  SubType := SrcToken.SubType;
  Text := SrcToken.Text;
  Position := SrcToken.Position;
  Row := SrcToken.Row;
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -