📄 csregex.pas
字号:
destructor TcsReExpr.Destroy;
begin
FMatches.Free;
inherited Destroy;
end;
procedure TcsReExpr.CheckRegExp;
begin
if regexp_t.buffer = '' then
raise Exception.Create('No compiled pattern available.');
if regexp_t.Translate <> '' then
if Length(regexp_t.Translate) <> 256 then
raise Exception.Create('Translate table length error.');
if regexp_t.fastmap <> '' then
if Length(regexp_t.fastmap) <> 256 then
raise Exception.Create('Fastmap table length error.');
end;
function TcsReExpr.GetMatches: TStringList;
begin
Result := nil;
if not FNoChange then begin
DoMatch;
FNoChange := True;
end;
Result := FMatches;
end;
procedure TcsReExpr.SetUseFastmap(const fstm: boolean);
begin
if fstm <> FUseFastmap then begin
if fstm then
SetLength(regexp_t.fastmap, 256)
else
SetLength(regexp_t.fastmap, 0);
FStyleChange := True;
FUseFastmap := fstm;
end;
end;
procedure TcsReExpr.SetCanBeEmpty(const BeEm: boolean);
begin
if BeEm <> FCanBeEmpty then begin
FStyleChange := True;
FCanBeEmpty := BeEm;
end;
end;
procedure TcsReExpr.SetStr(const str: string);
begin
FNoChange := False;
FStr := str;
UniqueString(FStr);
FBuffer := PChar(FStr);
FBufferSize := Length(FStr);
FBufferEnd := PChar(integer(FBuffer) + FBufferSize);
if FActive then DoMatch;
end;
procedure TcsReExpr.SetSyntaxStyles(const NewStyles: TmkreSyntaxStyles);
const
Syntax: array[low(TmkreSyntaxStyle)..high(TmkreSyntaxStyle)] of integer = (
RE_NO_BK_PARENS, //* no quoting for parentheses */
RE_NO_BK_VBAR, //* no quoting for vertical bar */
RE_BK_PLUS_QM, //* quoting needed for + and ? */
RE_TIGHT_VBAR, //* | binds tighter than ^ and $ */
RE_NEWLINE_OR, //* treat newline as or */
RE_CONTEXT_INDEP_OPS, //* ^$?*+ are special in all contexts */
RE_ANSI_HEX, //* ansi sequences (\n etc) and \xhh */
RE_NO_GNU_EXTENSIONS, //* no gnu extensions */
RE_HIGHCHARSWHITESPACE); //* characters above 127 are whitespace */
var
i: TmkreSyntaxStyle;
begin
if NewStyles <> FSyntaxStyles then begin
FStyle := 0;
for i := low(TmkreSyntaxStyle) to high(TmkreSyntaxStyle) do
if i in NewStyles then FStyle := FStyle + Syntax[i];
FSyntaxStyles := NewStyles;
FStyleChange := True;
FNoChange := False;
end;
end;
// Original code starts here
{$IFDEF ver90}
procedure Assert(const blnAssertion: boolean; const strOnError: string);
begin
if not blnAssertion then
raise Exception.Create(SreAssertion + strOnError);
end;
{$ENDIF}
procedure New_state(var state: Tmatch_state; const nregs: integer);
var
i: integer;
begin
for i := 0 to nregs - 1 do begin
state._start[i] := nil;
state._end[i] := nil;
state.changed[i] := 0;
end;
state.stack.current := @state.stack.first;
state.stack.first.prev := nil;
state.stack.first.next := nil;
state.stack.index := 0;
state.level := 0;
state.count := 0;
state.point := 0;
end;
// Free any memory that might have been malloc'd
procedure Free_state(var state: Tmatch_state);
begin
while (state.stack.first.next <> nil) do begin
state.stack.current := state.stack.first.next;
state.stack.first.next := state.stack.current.next;
dispose(state.stack.current);
state.stack.current := nil;
end;
end;
function short(const a: word): integer;
begin
if (a > $7FFF) then Result := a - $10000
else Result := a;
end;
procedure TcsReExpr.re_compile_fastmap_aux(
var code: PChar; pos: integer; const visited: PChar; var can_be_null: char;
const fastmap: PChar);
var
a, b: integer;
syntaxcode: char;
begin
if visited[pos] <> #0 then exit;
visited[pos] := #1;
while True do
begin
case regexp_compiled_ops(ord(code[pos])) of // > ord
Cend: begin
// inc(pos);
can_be_null := #1;
Exit;
end;
Cbol,
Cbegbuf,
Cendbuf,
Cwordbeg,
Cwordend,
Cwordbound,
Cnotwordbound:
begin
inc(pos);
for a := 0 to 255 do
fastmap[a] := #1;
end;
Csyntaxspec:
begin
inc(pos);
syntaxcode := code[pos];
// inc(pos);
for a := 0 to 255 do
if (ord(re_syntax_table[a]) and ord(syntaxcode)) > 0 then // integer > ord
fastmap[a] := #1;
Exit;
end;
Cnotsyntaxspec:
begin
inc(pos);
syntaxcode := code[pos];
// inc(pos);
for a := 0 to 255 do
if not ((ord(re_syntax_table[a]) and ord(syntaxcode)) > 0) then // integer > ord
fastmap[a] := #1;
Exit;
end;
Ceol:
begin
// inc(pos);
fastmap[10] := #1; // was 13
//can match null, but only at end of buffer
if can_be_null = #0 then can_be_null := #2;
Exit;
end;
Cset:
begin
inc(pos);
for a := 0 to 31 do
if code[pos + a] <> #0 then
for b := 0 to 7 do
if (ord(code[pos + a]) and (1 shl b)) > 0 then // integer > ord
fastmap[(a shl 3) + b] := #1;
// inc(pos, 32);
Exit;
end;
Cexact:
begin
inc(pos);
fastmap[ord(code[pos])] := #1; // integer > ord
Exit;
end;
Canychar:
begin
// inc(pos);
for a := 0 to 255 do
if a <> 10 then // was 13
fastmap[a] := #1;
exit;
end;
Cstart_memory,
Cend_memory: inc(pos, 2);
Cmatch_memory:
begin
// inc(pos);
for a := 0 to 255 do
fastmap[a] := #1;
can_be_null := #1;
Exit;
end;
Cjump,
Cdummy_failure_jump,
Cupdate_failure_jump,
Cstar_jump:
begin
inc(pos);
a := ord(code[pos]) + ord(code[pos + 1]) shl 8; // integer > ord
inc(pos, 2); // check a for sign!
pos := pos + short(a);
if visited[pos] <> #0 then
{/* argh... the regexp contains empty loops. This is not
good, as this may cause a failure stack overflow when
matching. Oh well. */
/* this path leads nowhere; pursue other paths. */}
Exit;
visited[pos] := #1;
end;
Cfailure_jump:
begin
inc(pos);
a := ord(code[pos]) + ord(code[pos + 1]) shl 8; // integer > ord
inc(pos, 2); // check a for sign!
a := pos + short(a);
re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
end;
Crepeat1: inc(pos, 3);
else
begin
raise ERegularExpression.Create(SreUnknowRE);
end;
end;
end;
end;
function TcsReExpr.re_do_compile_fastmap(
const bufferstr: string; const pos: integer; var can_be_null: char;
const fastmap: PChar): boolean;
var
small_visited: array[0..511] of char;
Ext_visited: string;
visited, buffer: PChar;
begin
if Length(bufferstr) <= SizeOf(small_visited) then
visited := small_visited
else begin
SetLength(Ext_visited, Length(bufferstr));
visited := @Ext_visited[1];
end;
can_be_null := #0;
FillChar(fastmap^, 256, 0);
FillChar(visited^, Length(bufferstr), 0);
buffer := @bufferstr[1];
re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
Result := true;
end;
{: This computes the fastmap for the regexp. For this to have any effect, the
calling program must have initialized the fastmap field to point to an array
of 256 characters. }
procedure TcsReExpr.re_compile_fastmap;
begin
if (regexp_t.fastmap = '') or (regexp_t.fastmap_accurate) then Exit;
if not (re_do_compile_fastmap(regexp_t.buffer, 0, regexp_t.can_be_null, PChar(regexp_t.fastmap))) then
Exit;
if regexp_t.buffer[1] = Char(Cbol) then regexp_t.anchor := 1 //begline
else
if regexp_t.buffer[1] = Char(Cbegbuf) then regexp_t.anchor := 2 //begbuf
else regexp_t.anchor := 0; //none
regexp_t.fastmap_accurate := True;
end;
{: star is coded as:
1: failure_jump 2
... code for operand of star
star_jump 1
2: ... code after star
We change the star_jump to update_failure_jump if we can determine \
that it is safe to do so; otherwise we change it to an ordinary \
jump.
plus is coded as
jump 2
1: failure_jump 3
2: ... code for operand of plus
star_jump 1
3: ... code after plus
For star_jump considerations this is processed identically to star.
*
}
function TcsReExpr.re_optimize_star_jump(
var code: PChar): boolean;
label
make_normal_jump, loop_p1;
var
map: array[0..255] of char;
can_be_null: char;
p1, p2: PChar;
ch: char;
a, b: integer;
num_instructions: integer;
begin
Result := False;
num_instructions := 0;
a := short(byte(code[0]) + byte(code[1]) * 256);
inc(code, 2);
p1 := code + a + 3; //skip the failure_jump
//Check that the jump is within the pattern
if (p1 < @regexp_t.buffer[1]) or
((integer(@regexp_t.buffer[1]) + length(regexp_t.buffer)) < integer(p1)) then
Exit;
Assert((p1[-3] = char(Cfailure_jump)), 'No Cfailure_jump');
p2 := code;
//p1 points inside loop, p2 points to after loop
if not re_do_compile_fastmap(regexp_t.buffer, integer(p2) - integer(@regexp_t.buffer[1]), can_be_null, map) then
goto make_normal_jump;
{/* If we might introduce a new update point inside the
* loop, we can't optimize because then update_jump would
* update a wrong failure point. Thus we have to be
* quite careful here.
*/}
//loop until we find something that consumes a character
loop_p1:
inc(num_instructions);
case regexp_compiled_ops(ord(p1[0])) of
Cbol,
Ceol,
Cbegbuf,
Cendbuf,
Cwordbeg,
Cwordend,
Cwordbound,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -