📄 cplex.pas
字号:
{*******************************************************************
* *
* COMPONENT for MS DOS and Windows source code. *
* *
* (c) 1992, Roderic D. M. Page *
* *
* Language: Turbo Pascal (Pascal with object-oriented extensions) *
* Compiler: Turbo Pascal 6.0 (MS DOS) *
* Turbo Pascal for Windows 1.0 (WINDOWS) *
* *
* Notes: Program interface is currently Windows specific. *
* *
*******************************************************************}
{$I CPDIR.INC}
{*
Simple lexical scanner.
*}
unit cplex;
{
History
=======
2/28/91 Support for streams added.
Note that stream returns a null (#0) character
signalling the end of the file.
3/1/91 LEXOBJ.Init modified so that initialization routine
calls STREAMOBJ.GetChar. This is the last character
read from the stream. Assumes that there is always a
character read from the stream (this is ensured by
STREAMOBJ.Init).
4/3/91 Tabs are processed by GetToken.
5/3/91 Buffer outputs to logfile.
6/18/91 Tokens can include "." for compatibility with PAUP.
#0 ("NULL") character now handled separately.
BM(NH)
3 Jan 1992 Tabs in input file [!..] comments are now expanded
so that TAB characters are not added to the display
buffer.
19 Feb 1992 Code cleaned up, Windows API IsCharAlpha used to test for
letters (allows for foreign language letters).
26 Jun 1992 GetNonSpaceSymbol added for processing data matrices.
14 Jul 1992 Now stores output comments so that program can
store them if it wishes.
Oct 1992 Characters now classified using table lookup.
30 Oct 1992 Tokens ditto.
}
interface
uses
{$IFDEF WINDOWS}
{$IFDEF DEBUG}
WinCrt,
{$ENDIF}
WinProcs,
Strings,
{$ENDIF}
cpstream, { Buffered stream }
cpwbuf, { Text buffer }
cputil; { UpCaseStr }
const
EF = #26; { ASCII end of file marker }
COMMENTSIZE = 1024; { 1K buffer for \[!] comments }
type
CHARCLASS = (LET, DIG, LPAR, RPAR, POINT, SEM, COLON, MINUS, COMMA,
AST, EQL, QUOTE, UNDER, LBRACK, RBRACK, EXCL, HSH, ENDFL,
CRET, LFEED, FIXEDTAB, SPACE, QUEST, NULL, OTHR);
{ Kinds of characters that will be encountered }
CLASSARRAY = array[0..254] of CharClass;
{ Table }
const
CHARCLASSARRAY: CLASSARRAY = (
{ 0 1 2 3 4 5 6 7 8 9 }
{0} NULL, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, FIXEDTAB,
{1} LFEED, OTHR, OTHR, CRET, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{2} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,ENDFL, OTHR, OTHR, OTHR,
{3} OTHR, OTHR,SPACE, EXCL, OTHR, HSH, OTHR, OTHR, OTHR, QUOTE,
{4} LPAR, RPAR, AST, OTHR,COMMA,MINUS,POINT, OTHR, DIG, DIG,
{5} DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG,COLON, SEM,
{6} OTHR, EQL, OTHR,QUEST, OTHR, LET, LET, LET, LET, LET,
{7} LET, LET, LET, LET, LET, LET, LET, LET, LET, LET,
{8} LET, LET, LET, LET, LET, LET, LET, LET, LET, LET,
{9} LET,LBRACK,OTHR,RBRACK,OTHR,UNDER, OTHR, LET, LET, LET,
{10} LET, LET, LET, LET, LET, LET, LET, LET, LET, LET,
{11} LET, LET, LET, LET, LET, LET, LET, LET, LET, LET,
{12} LET, LET, LET, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{13} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{14} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{15} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{16} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{17} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{18} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{19} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{20} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{21} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{22} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{23} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{24} OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR, OTHR,
{25} OTHR, OTHR, OTHR, OTHR, OTHR);
{ }
type
STATES = (START, QUIT, ACCEPTED, GET_ID, GET_STRING,
GET_QUOTE, GET_NUMBER, GET_REAL, GET_COMMENT, GET_ECHO);
ACTIONS = (N, A, S, AS, AES, E, AE, ES, LE);
TOKENTYPES = (ttIDENTIFIER, ttNUMBER, ttOPENPAR, ttCLOSEPAR,
ttPERIOD, ttSEMICOLON, ttCOLON, ttMINUS, ttCOMMA,
ttASTERIX, ttEQUALS, ttHASH, ttSPACE, ttEOFL,ttOTHER,
ttBADTOKEN);
TOKENCLASSARRAY = array[CHARCLASS] of TOKENTYPES;
const
TOKENARRAY:TOKENCLASSARRAY =
(ttIDENTIFIER, ttNUMBER, ttOPENPAR, ttCLOSEPAR, ttPERIOD,
ttSEMICOLON, ttCOLON, ttMINUS, ttCOMMA, ttASTERIX, ttEQUALS,
ttOTHER, ttOTHER, ttOTHER, ttOTHER, ttOTHER, ttHASH,
ttEOFL, ttOTHER, ttOTHER, ttOTHER, ttSPACE, ttOTHER,
ttOTHER, ttOTHER);
type
LEXOBJ_PTR = ^LEXOBJ;
{ pointer to [LEXOBJ] }
LEXOBJ = object
{ lexical scanner }
f : STREAMOBJ_PTR; { input stream }
b : BUFFEROBJ; { input buffer }
tt : TOKENTYPES; { token }
curchar : char; { current character }
szComment : array[0..1024] of char; { output comments buffer }
constructor Init (S: STREAMOBJ_PTR);
{ Sets input stream to <\b S> and clears the output comments buffer}
destructor Done;
{ Abstract }
function EndOfFile:Boolean;
{ True if the current character is either null (\0) or
^Z (ASCII 26) }
function EndOfFileMarker:Boolean;
{ True if the current token is the end of file marker }
function FindCharClass (ch:char):CharClass;
{ Classify the character <\b ch> }
function GetComment:PChar;
{ Return pointer to output comments buffer }
function GetNonSpaceSymbol:char;
{ Return the next character ignoring spaces, tabs, and comments }
function GoodToken:Boolean;
{ True if current token is valid }
function Posn:string;
{ Return the current row and column in the input stream }
function SymbolType:CHARCLASS;
{ Return the type of the current character }
function TheCurChar:Char;
{ Return the current character }
function Token:string;
{ Return the current token }
function UpCaseToken:string;
{ Return the current token in UPPERCASE form }
procedure ClearComment;
{ Clear the output comments buffer }
procedure GetNonSpaceToken;
{ Get the next token ignoring spaces, tabs, and comments }
procedure GetSymbol;
{ Get the next symbol in the input stream }
procedure GetToken;
{ Get the next token in the input stream }
procedure ReadLine;
{ Skip CR/LF }
{$IFDEF DEBUG}
procedure ShowTokenType; { }
{$ENDIF}
private
st : STATES;
ac : ACTIONS;
echo : Boolean;
curcharclass: CHARCLASS;
function EndOfLine:Boolean;
procedure ClassifyChar (ch:char);
procedure ClassifyToken;
procedure DoAction;
procedure Re_set;
procedure ReadChar;
end;
implementation
{-----------------------------Init-----------------------------------------}
constructor LexObj.Init (S:STREAMOBJ_PTR);
{ Assign stream }
begin
f := S;
szComment[0] := #0;
end;
{-----------------------------Done-----------------------------------------}
{ Abstract }
destructor LEXOBJ.Done;
begin
end;
{-----------------------------GetComment-----------------------------------}
{ Return comment and clear comment buffer }
function LEXOBJ.GetComment:PChar;
begin
GetComment := StrNew (szComment);
szComment[0] := #0;
end;
{-----------------------------ClearComment---------------------------------}
procedure LEXOBJ.ClearComment;
begin
szComment[0] := #0;
end;
{-----------------------------DoAction-------------------------------------}
{ Do current action. }
procedure LexObj.DoAction;
var
lpBuf : PChar;
begin
case ac of
N: begin end; { no action }
A: B.AppendChar (curchar);
S: GetSymbol;
AS: begin B.AppendChar (curChar); GetSymbol; end;
AES: begin B.AppendChar (curChar); GetSymbol; end;
E: begin end;
AE: B.AppendChar (CurChar);
ES: GetSymbol;
LE: begin
{ Output comment in display buffer and
store in comment buffer for
later call by program. }
lpBuf := B.GetBufferText;
write (NewLog, lpBuf);
if (Strlen (lpBuf) + Strlen (szComment) < COMMENTSIZE) then
StrCat (szComment, lpBuf);
B.Clear;
end;
end;
end;
{-----------------------------Re_Set---------------------------------------}
{ Clear text buffer, mark last posn in stream, get first symbol. }
procedure LexObj.Re_set;
begin
B.Clear;
st := START;
echo := false;
f^.MarkLastPosn;
CurChar := f^.GetChar;
{ Fix for stream }
if CurChar = #0 then
CurChar := EF;
ClassifyChar (CurChar);
end;
{-----------------------------EndOfFile------------------------------------}
function LexObj.EndOfFile:Boolean;
begin
EndOfFile := ((CurChar = EF) or (CurChar = #0));
end;
{-----------------------------EndOfLine------------------------------------}
function LexObj.EndOfLine:Boolean;
begin
EndOfLine := (CurChar = CR);
end;
{-----------------------------EndOfFileMarker------------------------------}
function LexObj.EndOfFileMarker:Boolean;
begin
EndOfFileMarker := (tt = ttEOFL);
end;
{-----------------------------ReadChar-------------------------------------}
procedure LexObj.ReadChar;
begin
CurChar := f^.ReadChar;
end;
{-----------------------------ReadLine-------------------------------------}
procedure LexObj.ReadLine;
begin
while (CurChar in [CR,LF]) do
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -