📄 crtable.pas
字号:
UNIT CRTable;
{ renamed from Modula version CRT to avoid name space problems with CRT unit }
(* CRTable Table Handler
======= =============
(1) handles a symbol table for terminals, pragmas and nonterminals
(2) handles a table for character classes (for scanner generation)
(3) handles a top-down graph for productions
(4) computes various sets (start symbols, followers, any sets)
(5) contains procedures for grammar tests
--------------------------------------------------------------------*)
INTERFACE
USES Sets;
CONST
(* The following are chosen to ensure that data segments remain within the
64K limit imposed by Dos 16 bit systems. Manipulate them at your peril
if you need to handle large grammars! *)
maxSymbols = 256; (* max number of symbols
(terminals+nonterminals+pragmas) *)
maxTerminals = 256; (* max number of terminals *)
maxNt = 128; (* max number of nonterminals *)
maxNodes = 1500; (* max number of top-down graph nodes *)
maxClasses = 250; (* max number of character classes *)
maxSemLen = 64000; (* max length of a semantic text *)
normTrans = 0; (* DFA transition during normal scanning *)
contextTrans = 1; (* DFA transition during scanning of right context *)
maxList = 150; (* max array size in FindCircularProductions *)
maxLiterals = 127; (* max number of literal terminals *)
(* node types *)
unknown = 0;
t = 1; (* terminal symbol *)
pr = 2; (* pragma *)
nt = 3; (* nonterminal symbol *)
class = 4; (* character class *)
chart = 5; (* single character *)
wt = 6; (* weak terminal symbol *)
any = 7; (* symbol ANY *)
eps = 8; (* empty alternative *)
sync = 9; (* symbol SYNC *)
sem = 10; (* semantic action *)
alt = 11; (* alternative *)
iter = 12; (* iteration *)
opt = 13; (* option *)
noSym = -1;
eofSy = 0;
(* token kinds *)
classToken = 0; (* token class *)
litToken = 1; (* literal (e.g. keyword) not recognized by DFA *)
classLitToken = 2; (* token class that can also match a literal *)
MaxNameLength = 39; (* max length of token name *)
TYPE
Name = STRING [MaxNameLength];
Position = RECORD (* position of stretch of source text *)
beg: LONGINT; (* start relative to beginning of file *)
len: LONGINT; (* length *)
col: INTEGER; (* column number of start position *)
END;
SymbolNode = RECORD (* node of symbol table *)
typ: INTEGER; (* nt, t, pr, unknown *)
name, (* symbol name *)
constant: Name; (* named constant of symbol *)
struct: INTEGER; (* typ = nt: index of first node of syntax graph *)
(* typ = t: token kind: literal, class, ... *)
deletable: BOOLEAN; (* typ = nt: TRUE, if nonterminal is deletable *)
attrPos: Position; (* position of attributes in source text *)
semPos: Position; (* typ = pr: pos of sem action in source text *)
(* typ = nt: pos of local decls in source text *)
line: INTEGER; (* source text line number of symbol in this node *)
END;
GraphNode = RECORD (* node of top-down graph *)
typ : INTEGER; (* nt,sts,wts,chart,class,any,eps,sem,sync,alt,
iter,opt*)
next: INTEGER; (* to successor node *)
(* next < 0: to successor of enclosing structure *)
p1: INTEGER; (* typ IN {nt, t, wt}: index to symbol table *)
(* typ = any: index to anyset *)
(* typ = sync: index to syncset *)
(* typ = alt:
index of first node of first alternative *)
(* typ IN {iter, opt}: first node in subexpression *)
(* typ = chart: ordinal character value *)
(* typ = class: index of character class *)
p2: INTEGER; (* typ = alt:
index of first node of second alternative *)
(* typ IN {chart, class}: transition code *)
pos: Position; (* typ IN {nt, t, wt}:
source pos of actual attributes *)
(* typ = sem: source pos of sem action *)
line: INTEGER; (* source text line number of item in this node *)
END;
{ The next is nasty - we really want different sizes of sets
CRTSet = ARRAY [0 .. maxTerminals DIV 16 (*Sets.size*) ] OF BITSET;
}
CRTSet = Sets.BITARRAY;
{ This means some hacking later on from the original Modula stuff }
MarkList = ARRAY [0 .. maxNodes DIV Sets.size] OF BITSET;
VAR
maxT: INTEGER; (* terminals stored from 0 .. maxT in symbol table *)
maxP: INTEGER; (* pragmas stored from maxT+1..maxP in symbol table *)
firstNt: INTEGER; (* index of first nt: available after CompSymbolSets *)
lastNt: INTEGER; (* index of last nt: available after CompSymbolSets *)
maxC: INTEGER; (* index of last character class *)
nNodes: INTEGER; (* index of last top-down graph node *)
root: INTEGER; (* index of root node, filled by ATG *)
useDeclPos,
semDeclPos: Position; (* position of global semantic declarations *)
genScanner: BOOLEAN; (* TRUE: a scanner shall be generated *)
hasUses: BOOLEAN; (* TRUE: attribute grammar had USES clause *)
ignoreCase: BOOLEAN; (* TRUE: scanner treats lower case as upper case *)
symNames: BOOLEAN; (* TRUE: symbol names have to be assigned *)
ignored: CRTSet; (* characters ignored by the scanner *)
ddt: ARRAY ['A' .. 'Z'] OF BOOLEAN;
(* parameter, debug and test switches *)
PROCEDURE NewName (n: Name; s: STRING);
(* Inserts the pair (n, s) in the token symbol name table *)
FUNCTION NewSym (typ: INTEGER; n: Name; line: INTEGER): INTEGER;
(* Generates a new symbol with type t and name n and returns its index *)
PROCEDURE GetSym (sp: INTEGER; VAR sn: SymbolNode);
(* Gets symbol node with index sp in sn. *)
PROCEDURE PutSym (sp: INTEGER; sn: SymbolNode);
(* Replaces symbol node with index sp by sn. *)
FUNCTION FindSym (n: Name): INTEGER;
(* Gets symbol index for identifier with name n. *)
FUNCTION NewSet (s: CRTSet): INTEGER;
(* Stores s as a new set and returns its index. *)
PROCEDURE CompFirstSet (gp: INTEGER; VAR fs: CRTSet);
(* Computes start symbols of graph gp. *)
PROCEDURE CompExpected (gp, sp: INTEGER; VAR exp: CRTSet);
(* Computes all symbols expected at location gp in graph of symbol sp. *)
PROCEDURE CompDeletableSymbols;
(* Marks deletable nonterminals and prints them. *)
PROCEDURE CompSymbolSets;
(* Collects first-sets, follow-sets, any-sets, and sync-sets. *)
PROCEDURE PrintSymbolTable;
(* Prints the symbol table (for tracing). *)
PROCEDURE XRef;
(* Produces a cross reference listing of all symbols. *)
FUNCTION NewClass (n: Name; cset: CRTSet): INTEGER;
(* Defines a new character class and returns its index *)
FUNCTION ClassWithName (n: Name): INTEGER;
(* Searches for a class with the given name. Returns its index or -1 *)
FUNCTION ClassWithSet (s: CRTSet): INTEGER;
(* Searches for a class with the given set. Returns its index or -1 *)
PROCEDURE GetClass (n: INTEGER; VAR s: CRTSet);
(* Returns character class n *)
PROCEDURE GetClassName (n: INTEGER; VAR name: Name);
(* Returns the name of class n *)
PROCEDURE GetSet (nr: INTEGER; VAR s: CRTSet);
(* Gives access to precomputed symbol sets *)
FUNCTION NewNode (typ, p1, line: INTEGER): INTEGER;
(* Generates a new graph node with typ, p1, and source line number
line and returns its index. *)
PROCEDURE ClearMarkList (VAR m: MarkList);
(* Clears all elements of m *)
FUNCTION IsInMarkList (VAR s: MarkList; x: INTEGER): BOOLEAN;
(* Returns x IN s *)
PROCEDURE InclMarkList (VAR s : MarkList; x : INTEGER);
(* s.INCL(x) *)
PROCEDURE GetNode (gp: INTEGER; VAR n: GraphNode);
(* Gets graph node with index gp in n. *)
PROCEDURE PutNode (gp: INTEGER; n: GraphNode);
(* Replaces graph node with index gp by n. *)
PROCEDURE ConcatAlt (VAR gL1, gR1: INTEGER; gL2, gR2: INTEGER);
(* Makes (gL2, gR2) an alternative of the graph (gL1, gR1).
The resulting graph is identified by (gL1, gR1). *)
PROCEDURE ConcatSeq (VAR gL1, gR1: INTEGER; gL2, gR2: INTEGER);
(* Concatenates graph (gL1, gR1) with graph (gL2, gR2) via next-chain.
The resulting graph is identified by (gL1, gR1). *)
PROCEDURE MakeFirstAlt (VAR gL, gR: INTEGER);
(* Generates an alt-node with (gL, gR) as its first and only alternative *)
PROCEDURE MakeIteration (VAR gL, gR: INTEGER);
(* Encloses the graph (gL, gR) into an iteration construct.
The resulting graph is identified by (gL, gR). *)
PROCEDURE MakeOption (VAR gL, gR: INTEGER);
(* Encloses the graph (gL, gR) into an option construct.
The resulting graph is identified by (gL, gR). *)
PROCEDURE CompleteGraph (gp: INTEGER);
(* Lets right ends of graph gp be 0 *)
PROCEDURE StrToGraph (s: STRING; VAR gL, gR: INTEGER);
(* Generates linear graph from characters in s *)
FUNCTION DelGraph (gp: INTEGER): BOOLEAN;
(* TRUE, if (sub) graph with root gp is deletable. *)
FUNCTION DelNode (gn: GraphNode): BOOLEAN;
(* TRUE, if graph node gn is deletable, i.e. can be derived into the
empty string. *)
PROCEDURE PrintGraph;
(* Prints the graph (for tracing). *)
PROCEDURE FindCircularProductions (VAR ok: BOOLEAN);
(* Finds and prints the circular part of the grammar.
ok = TRUE means no circular part. *)
PROCEDURE LL1Test (VAR ll1: BOOLEAN);
(* Checks if the grammar satisfies the LL(1) conditions.
ll1 = TRUE means no LL(1)-conflicts. *)
PROCEDURE TestCompleteness (VAR ok: BOOLEAN);
(* ok = TRUE, if all nonterminals have productions. *)
PROCEDURE TestIfAllNtReached (VAR ok: BOOLEAN);
(* ok = TRUE, if all nonterminals can be reached from the start symbol. *)
PROCEDURE TestIfNtToTerm (VAR ok: BOOLEAN);
(* ok = TRUE, if all nonterminals can be reduced to terminals. *)
PROCEDURE AssignSymNames (default: BOOLEAN; VAR thereExists: BOOLEAN);
PROCEDURE Restriction (n, limit: INTEGER);
(* Signal compiler restriction and abort program *)
IMPLEMENTATION
USES CRS;
CONST
maxSetNr = 50; (* max. number of symbol sets *)
maxNames = 100; (* max. number of declared token names *)
TYPE
FirstSets = ARRAY [0 .. maxNt] OF
RECORD
ts : CRTSet; (* terminal symbols *)
ready : BOOLEAN; (* TRUE = ts is complete *)
END;
FollowSets = ARRAY [0 .. maxNt] OF
RECORD
ts : CRTSet; (* terminal symbols *)
nts : CRTSet; (* nts whose start set is to be included in ts *)
END;
CharClass =
RECORD
name : Name; (* class name *)
cset : INTEGER (* ptr to set representing the class *)
END;
SymbolTable = ARRAY [0 .. maxSymbols] OF SymbolNode;
ClassTable = ARRAY [0 .. maxClasses] OF CharClass;
GraphList = ARRAY [0 .. maxNodes] OF GraphNode;
SymbolSet = ARRAY [0 .. maxSetNr] OF CRTSet;
NameTable = ARRAY [1 .. maxNames] OF
RECORD
name, definition : Name
END;
VAR
(* moved symbol table to the heap Fri 08-20-1993 to allow larger one *)
st : ^ SymbolTable; (* symbol table for terminals,
pragmas, and nonterminals *)
gn : ^ GraphList; (* top-down graph *)
tt : NameTable; (* table of token name declarations *)
first : FirstSets; (* first[i] = first symbols of st[i+firstNt] *)
follow : FollowSets; (* follow[i] = followers of st[i+firstNt] *)
chClass : ClassTable; (* character classes *)
cset : SymbolSet; (* cset[0] = all SYNC symbols *)
maxSet : INTEGER; (* index of last symbol set *)
lastName, dummyName : INTEGER; (* for unnamed character classes *)
ch : CHAR;
PROCEDURE WriteText (VAR f : TEXT; s : STRING; w : INTEGER);
VAR
slen, i : INTEGER;
BEGIN
slen := Length(s);
FOR i := 1 TO w DO
IF i <= slen THEN Write(f, s[i]) ELSE Write(f, ' ');
END;
(* Restriction Implementation restriction
----------------------------------------------------------------------*)
PROCEDURE Restriction (n, limit : INTEGER);
BEGIN
WriteLn;
WriteLn('Restriction ', n:1);
CASE n OF
1 : Write(' Too many graph nodes');
2 : Write(' Too many symbols');
3 : Write(' Too many sets');
4 : Write( 'Too many character classes');
5 : Write( 'Too many symbol sets');
6 : Write( 'Too many token names');
7 : Write( 'Too many states');
8 : Write( 'Semantic text buffer overflow');
9 : Write( 'Circular check buffer overflow');
10 : Write( 'Too many literal terminals');
-1 : Write( 'Compiler error');
END;
IF n > 0 THEN WriteLn(' (limited to ', limit:1, ')');
(* maybe we want CRX.WriteStatistics; *)
HALT
END;
(* MovePragmas Move pragmas after terminals
----------------------------------------------------------------------*)
PROCEDURE MovePragmas;
VAR
i : INTEGER;
BEGIN
IF maxP > firstNt THEN
BEGIN
i := maxSymbols - 1; maxP := maxT;
WHILE i > lastNt DO BEGIN
INC(maxP);
IF maxP >= firstNt THEN Restriction(2, maxSymbols);
st^[maxP] := st^[i]; DEC(i)
END;
END
END;
(* ClearMarkList Clear mark list m
----------------------------------------------------------------------*)
PROCEDURE ClearMarkList (VAR m : MarkList);
VAR
i : INTEGER;
BEGIN
i := 0;
WHILE i < maxNodes DIV Sets.size DO BEGIN m[i] := []; INC(i) END;
END;
FUNCTION IsInMarkList (VAR s : MarkList; x : INTEGER) : BOOLEAN;
BEGIN
IsInMarkList := x MOD size IN s[x DIV size]
END;
PROCEDURE InclMarkList (VAR s : MarkList; x : INTEGER);
BEGIN
s[x DIV size] := s[x DIV size] + [x MOD size]
END;
(* GetNode Get node with index gp in n
----------------------------------------------------------------------*)
PROCEDURE GetNode (gp : INTEGER; VAR n : GraphNode);
BEGIN
n := gn^[gp]
END;
(* PutNode Replace node with index gp by n
----------------------------------------------------------------------*)
PROCEDURE PutNode (gp : INTEGER; n : GraphNode);
BEGIN
gn^[gp] := n
END;
(* NewName Collects a user defined token name
----------------------------------------------------------------------*)
PROCEDURE NewName (n : Name; s : STRING);
BEGIN
IF lastName = maxNames
THEN Restriction(6, maxNames)
ELSE
BEGIN
INC(lastName); symNames := TRUE;
tt[lastName].name := n; tt[lastName].definition := s;
END;
END;
(* NewSym Generate a new symbol and return its index
----------------------------------------------------------------------*)
FUNCTION NewSym (typ : INTEGER; n : Name; line : INTEGER) : INTEGER;
VAR
i : INTEGER;
BEGIN
IF maxT + 1 = firstNt
THEN Restriction(2, maxSymbols)
ELSE
BEGIN
CASE typ OF
t : BEGIN INC(maxT); i := maxT; END;
pr : BEGIN DEC(maxP); DEC(firstNt); DEC(lastNt); i := maxP; END;
nt, unknown : BEGIN DEC(firstNt); i := firstNt; END;
END;
IF maxT + 1 >= firstNt THEN Restriction(2, maxSymbols);
st^[i].typ := typ; st^[i].name := n;
st^[i].constant := ''; (* Bug fix - PDT *)
st^[i].struct := 0; st^[i].deletable := FALSE;
st^[i].attrPos.beg := -1; st^[i].semPos.beg := -1;
st^[i].line := line;
END;
NewSym := i;
END;
(* GetSym Get symbol sp in sn
----------------------------------------------------------------------*)
PROCEDURE GetSym (sp : INTEGER; VAR sn : SymbolNode);
BEGIN
sn := st^[sp]
END;
(* PutSym Replace symbol with index snix by sn
----------------------------------------------------------------------*)
PROCEDURE PutSym (sp : INTEGER; sn : SymbolNode);
BEGIN
st^[sp] := sn
END;
(* FindSym Find index of symbol with name n
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -