📄 lexbase.pas
字号:
{
This module collects the basic data types and operations used in the TP
Lex program, and other basic stuff that does not belong anywhere else:
- Lex input and output files and corresponding bookkeeping information
used by the parser
- symbolic character constants
- dynamically allocated strings and character classes
- integer sets
- generic quicksort and hash table routines
- utilities for list-generating
- other tiny utilities
Copyright (c) 1990-92 Albert Graef <ag@muwiinfa.geschichte.uni-mainz.de>
Copyright (C) 1996 Berend de Boer <berend@pobox.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
$Revision: 2 $
$Modtime: 96-08-01 10:21 $
$History: LEXBASE.PAS $
*
* ***************** Version 2 *****************
* User: Berend Date: 96-10-10 Time: 21:16
* Updated in $/Lex and Yacc/tply
* Updated for protected mode, windows and Delphi 1.X and 2.X.
}
unit LexBase;
interface
const
(* symbolic character constants: *)
bs = #8; (* backspace character *)
tab = #9; (* tab character *)
nl = #10; (* newline character *)
cr = #13; (* carriage return *)
ff = #12; (* form feed character *)
var
(* Filenames: *)
lfilename : String;
pasfilename : String;
lstfilename : String;
codfilename : String;
codfilepath : String; { Under linux, binary and conf file
are not in the same path}
(* Lex input, output, list and code template file: *)
yyin, yylst, yyout, yycod : Text;
(* the following values are initialized and updated by the parser: *)
line : String; (* current input line *)
lno : Integer; (* current line number *)
const
max_elems = 100; (* maximum size of integer sets *)
type
(* String and character class pointers: *)
StrPtr = ^String;
CClass = set of Char;
CClassPtr = ^CClass;
(* Sorted integer sets: *)
IntSet = array [0..max_elems] of Integer;
(* word 0 is size *)
IntSetPtr = ^IntSet;
(* Regular expressions: *)
RegExpr = ^Node;
NodeType = (mark_node, (* marker node *)
char_node, (* character node *)
str_node, (* string node *)
cclass_node, (* character class node *)
star_node, (* star node *)
plus_node, (* plus node *)
opt_node, (* option node *)
cat_node, (* concatenation node *)
alt_node); (* alternatives node (|) *)
Node = record case node_type : NodeType of
mark_node : (rule, pos : Integer);
char_node : (c : Char);
str_node : (str : StrPtr);
cclass_node : (cc : CClassPtr);
star_node, plus_node, opt_node : (r : RegExpr);
cat_node, alt_node : (r1, r2 : RegExpr);
end;
(* Some standard character classes: *)
const
letters : CClass = ['A'..'Z','a'..'z','_'];
digits : CClass = ['0'..'9'];
alphanums : CClass = ['A'..'Z','a'..'z','_','0'..'9'];
(* Operations: *)
(* Strings and character classes: *)
function newStr(str : String) : StrPtr;
(* creates a string pointer (only the space actually needed for the given
string is allocated) *)
function newCClass(cc : CClass) : CClassPtr;
(* creates a CClass pointer *)
(* Integer sets (set arguments are passed by reference even if they are not
modified, for greater efficiency): *)
procedure empty(var M : IntSet);
(* initializes M as empty *)
procedure singleton(var M : IntSet; i : Integer);
(* initializes M as a singleton set containing the element i *)
procedure include(var M : IntSet; i : Integer);
(* include i in M *)
procedure exclude(var M : IntSet; i : Integer);
(* exclude i from M *)
procedure setunion(var M, N : IntSet);
(* adds N to M *)
procedure setminus(var M, N : IntSet);
(* removes N from M *)
procedure intersect(var M, N : IntSet);
(* removes from M all elements NOT in N *)
function size(var M : IntSet) : Integer;
(* cardinality of set M *)
function member(i : Integer; var M : IntSet) : Boolean;
(* tests for membership of i in M *)
function isempty(var M : IntSet) : Boolean;
(* checks whether M is an empty set *)
function equal(var M, N : IntSet) : Boolean;
(* checks whether M and N are equal *)
function subseteq(var M, N : IntSet) : Boolean;
(* checks whether M is a subset of N *)
function newIntSet : IntSetPtr;
(* creates a pointer to an empty integer set *)
(* Constructors for regular expressions: *)
const epsExpr : RegExpr = nil;
(* empty regular expression *)
function markExpr(rule, pos : Integer) : RegExpr;
(* markers are used to denote endmarkers of rules, as well as other
special positions in rules, e.g. the position of the lookahead
operator; they are considered nullable; by convention, we use
the following pos numbers:
- 0: endmarker position
- 1: lookahead operator position *)
function charExpr(c : Char) : RegExpr;
(* character c *)
function strExpr(str : StrPtr) : RegExpr;
(* "str" *)
function cclassExpr(cc : CClassPtr) : RegExpr;
(* [str] where str are the literals in cc *)
function starExpr(r : RegExpr) : RegExpr;
(* r* *)
function plusExpr(r : RegExpr) : RegExpr;
(* r+ *)
function optExpr(r : RegExpr) : RegExpr;
(* r? *)
function mnExpr(r : RegExpr; m, n : Integer) : RegExpr;
(* constructor expanding expression r{m,n} to the corresponding
alt expression r^m|...|r^n *)
function catExpr(r1, r2 : RegExpr) : RegExpr;
(* r1r2 *)
function altExpr(r1, r2 : RegExpr) : RegExpr;
(* r1|r2 *)
(* Unifiers for regular expressions:
The following predicates check whether the specified regular
expression r is of the denoted type; if the predicate succeeds,
the other arguments of the predicate are instantiated to the
corresponding values. *)
function is_epsExpr(r : RegExpr) : Boolean;
(* empty regular expression *)
function is_markExpr(r : RegExpr; var rule, pos : Integer) : Boolean;
(* marker expression *)
function is_charExpr(r : RegExpr; var c : Char) : Boolean;
(* character c *)
function is_strExpr(r : RegExpr; var str : StrPtr) : Boolean;
(* "str" *)
function is_cclassExpr(r : RegExpr; var cc : CClassPtr) : Boolean;
(* [str] where str are the literals in cc *)
function is_starExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
(* r1* *)
function is_plusExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
(* r1+ *)
function is_optExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
(* r1? *)
function is_catExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
(* r1r2 *)
function is_altExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
(* r1|r2 *)
(* Quicksort: *)
type
OrderPredicate = function (i, j : Integer) : Boolean;
SwapProc = procedure (i, j : Integer);
procedure quicksort(lo, hi: Integer;
less : OrderPredicate;
swap : SwapProc);
(* General inplace sorting procedure based on the quicksort algorithm.
This procedure can be applied to any sequential data structure;
only the corresponding routines less which compares, and swap which
swaps two elements i,j of the target data structure, must be
supplied as appropriate for the target data structure.
- lo, hi: the lower and higher indices, indicating the elements to
be sorted
- less(i, j): should return true if element no. i `is less than'
element no. j, and false otherwise; any total quasi-ordering may
be supplied here (if neither less(i, j) nor less(j, i) then elements
i and j are assumed to be `equal').
- swap(i, j): should swap the elements with index i and j *)
(* Generic hash table routines (based on quadratic rehashing; hence the
table size must be a prime number): *)
type
TableLookupProc = function(k : Integer) : String;
TableEntryProc = procedure(k : Integer; symbol : String);
function key(symbol : String;
table_size : Integer;
lookup : TableLookupProc;
entry : TableEntryProc) : Integer;
(* returns a hash table key for symbol; inserts the symbol into the
table if necessary
- table_size is the symbol table size and must be a fixed prime number
- lookup is the table lookup procedure which should return the string
at key k in the table ('' if entry is empty)
- entry is the table entry procedure which is assumed to store the
given symbol at the given location *)
function definedKey(symbol : String;
table_size : Integer;
lookup : TableLookupProc) : Boolean;
(* checks the table to see if symbol is in the table *)
(* Utility routines: *)
function min(i, j : Integer) : Integer;
function max(i, j : Integer) : Integer;
(* minimum and maximum of two integers *)
function nchars(cc : CClass) : Integer;
(* returns the cardinality (number of characters) of a character class *)
function upper(str : String) : String;
(* returns str converted to uppercase *)
function strip(str : String) : String;
(* returns str with leading and trailing blanks stripped off *)
function blankStr(str : String) : String;
(* returns string of same length as str, with all non-whitespace characters
replaced by blanks *)
function intStr(i : Integer) : String;
(* returns the string representation of i *)
function isInt(str : String; var i : Integer) : Boolean;
(* checks whether str represents an integer; if so, returns the
value of it in i *)
function path(filename : String) : String;
(* returns the path in filename *)
function root(filename : String) : String;
(* returns root (i.e. extension stripped from filename) of
filename *)
function addExt(filename, ext : String) : String;
(* if filename has no extension and last filename character is not '.',
add extension ext to filename *)
function file_size(filename : String) : LongInt;
(* determines file size in bytes *)
(* Utility functions for list generating routines: *)
function charStr(c : char; reserved : CClass) : String;
(* returns a print name for character c, using the standard escape
conventions; reserved is the class of `reserved' special characters
which should be quoted with \ (\ itself is always quoted) *)
function singleQuoteStr(str : String) : String;
(* returns print name of str enclosed in single quotes, using the
standard escape conventions *)
function doubleQuoteStr(str : String) : String;
(* returns print name of str enclosed in double quotes, using the
standard escape conventions *)
function cclassStr(cc : CClass) : String;
(* returns print name of character class cc, using the standard escape
conventions; if cc contains more than 128 elements, the complement
notation (^) is used; if cc is the class of all (non-null) characters
except newline, the period notation is used *)
function cclassOrCharStr(cc : CClass) : String;
(* returns a print name for character class cc (either cclassStr, or,
if cc contains only one element, character in single quotes) *)
function regExprStr(r : RegExpr) : String;
(* unparses a regular expression *)
implementation
uses LexMsgs;
(* String and character class pointers: *)
function newStr(str : String) : StrPtr;
var strp : StrPtr;
begin
getmem(strp, succ(length(str)));
move(str, strp^, succ(length(str)));
newStr := strp;
end(*newStr*);
function newCClass(cc : CClass) : CClassPtr;
var ccp : CClassPtr;
begin
new(ccp);
ccp^ := cc;
newCClass := ccp;
end(*newCClass*);
(* Integer sets: *)
procedure empty(var M : IntSet);
begin
M[0] := 0;
end(*empty*);
procedure singleton(var M : IntSet; i : Integer);
begin
M[0] := 1; M[1] := i;
end(*singleton*);
procedure include(var M : IntSet; i : Integer);
var l, r, k : Integer;
begin
(* binary search: *)
l := 1; r := M[0];
k := l + (r-l) div 2;
while (l<r) and (M[k]<>i) do
begin
if M[k]<i then
l := succ(k)
else
r := pred(k);
k := l + (r-l) div 2;
end;
if (k>M[0]) or (M[k]<>i) then
begin
if M[0]>=max_elems then fatal(intset_overflow);
if (k<=M[0]) and (M[k]<i) then
begin
move(M[k+1], M[k+2], (M[0]-k)*sizeOf(Integer));
M[k+1] := i;
end
else
begin
move(M[k], M[k+1], (M[0]-k+1)*sizeOf(Integer));
M[k] := i;
end;
inc(M[0]);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -