⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 csregex.pas

📁 Delphi script parser
💻 PAS
📖 第 1 页 / 共 5 页
字号:
destructor TcsReExpr.Destroy;
begin
  FMatches.Free;
  inherited Destroy;
end;

procedure TcsReExpr.CheckRegExp;
begin
  if regexp_t.buffer = '' then
    raise Exception.Create('No compiled pattern available.');
  if regexp_t.Translate <> '' then
    if Length(regexp_t.Translate) <> 256 then
      raise Exception.Create('Translate table length error.');
  if regexp_t.fastmap <> '' then
    if Length(regexp_t.fastmap) <> 256 then
      raise Exception.Create('Fastmap table length error.');
end;

function TcsReExpr.GetMatches: TStringList;
begin
  Result := nil;
  if not FNoChange then begin
    DoMatch;
    FNoChange := True;
  end;
  Result := FMatches;
end;

procedure TcsReExpr.SetUseFastmap(const fstm: boolean);
begin
  if fstm <> FUseFastmap then begin
    if fstm then
      SetLength(regexp_t.fastmap, 256)
    else
      SetLength(regexp_t.fastmap, 0);
    FStyleChange := True;
    FUseFastmap := fstm;
  end;
end;

procedure TcsReExpr.SetCanBeEmpty(const BeEm: boolean);
begin
  if BeEm <> FCanBeEmpty then begin
    FStyleChange := True;
    FCanBeEmpty := BeEm;
  end;
end;

procedure TcsReExpr.SetStr(const str: string);
begin
  FNoChange := False;
  FStr := str;
  UniqueString(FStr);
  FBuffer := PChar(FStr);
  FBufferSize := Length(FStr);
  FBufferEnd := PChar(integer(FBuffer) + FBufferSize);
  if FActive then DoMatch;
end;

procedure TcsReExpr.SetSyntaxStyles(const NewStyles: TmkreSyntaxStyles);
const
  Syntax: array[low(TmkreSyntaxStyle)..high(TmkreSyntaxStyle)] of integer = (
    RE_NO_BK_PARENS, //* no quoting for parentheses */
    RE_NO_BK_VBAR, //* no quoting for vertical bar */
    RE_BK_PLUS_QM, //* quoting needed for + and ? */
    RE_TIGHT_VBAR, //* | binds tighter than ^ and $ */
    RE_NEWLINE_OR, //* treat newline as or */
    RE_CONTEXT_INDEP_OPS, //* ^$?*+ are special in all contexts */
    RE_ANSI_HEX, //* ansi sequences (\n etc) and \xhh */
    RE_NO_GNU_EXTENSIONS, //* no gnu extensions */
    RE_HIGHCHARSWHITESPACE); //* characters above 127 are whitespace */
var
  i: TmkreSyntaxStyle;
begin
  if NewStyles <> FSyntaxStyles then begin
    FStyle := 0;
    for i := low(TmkreSyntaxStyle) to high(TmkreSyntaxStyle) do
      if i in NewStyles then FStyle := FStyle + Syntax[i];
    FSyntaxStyles := NewStyles;
    FStyleChange := True;
    FNoChange := False;
  end;
end;

// Original code starts here

{$IFDEF ver90}

procedure Assert(const blnAssertion: boolean; const strOnError: string);
begin
  if not blnAssertion then
    raise Exception.Create(SreAssertion + strOnError);
end;
{$ENDIF}

procedure New_state(var state: Tmatch_state; const nregs: integer);
var
  i: integer;
begin
  for i := 0 to nregs - 1 do begin
    state._start[i] := nil;
    state._end[i] := nil;
    state.changed[i] := 0;
  end;
  state.stack.current := @state.stack.first;
  state.stack.first.prev := nil;
  state.stack.first.next := nil;
  state.stack.index := 0;
  state.level := 0;
  state.count := 0;
  state.point := 0;
end;

// Free any memory that might have been malloc'd

procedure Free_state(var state: Tmatch_state);
begin
  while (state.stack.first.next <> nil) do begin
    state.stack.current := state.stack.first.next;
    state.stack.first.next := state.stack.current.next;
    dispose(state.stack.current);
    state.stack.current := nil;
  end;
end;

function short(const a: word): integer;
begin
  if (a > $7FFF) then Result := a - $10000
  else Result := a;
end;

procedure TcsReExpr.re_compile_fastmap_aux(
  var code: PChar; pos: integer; const visited: PChar; var can_be_null: char;
  const fastmap: PChar);
var
  a, b: integer;
  syntaxcode: char;
begin
  if visited[pos] <> #0 then exit;
  visited[pos] := #1;
  while True do
  begin
    case regexp_compiled_ops(ord(code[pos])) of // > ord
      Cend: begin
//              inc(pos);
          can_be_null := #1;
          Exit;
        end;
      Cbol,
        Cbegbuf,
        Cendbuf,
        Cwordbeg,
        Cwordend,
        Cwordbound,
        Cnotwordbound:
        begin
          inc(pos);
          for a := 0 to 255 do
            fastmap[a] := #1;
        end;
      Csyntaxspec:
        begin
          inc(pos);
          syntaxcode := code[pos];
//            inc(pos);
          for a := 0 to 255 do
            if (ord(re_syntax_table[a]) and ord(syntaxcode)) > 0 then // integer > ord
              fastmap[a] := #1;
          Exit;
        end;
      Cnotsyntaxspec:
        begin
          inc(pos);
          syntaxcode := code[pos];
//            inc(pos);
          for a := 0 to 255 do
            if not ((ord(re_syntax_table[a]) and ord(syntaxcode)) > 0) then // integer > ord
              fastmap[a] := #1;
          Exit;
        end;
      Ceol:
        begin
//            inc(pos);
          fastmap[10] := #1; // was 13
            //can match null, but only at end of buffer
          if can_be_null = #0 then can_be_null := #2;
          Exit;
        end;
      Cset:
        begin
          inc(pos);
          for a := 0 to 31 do
            if code[pos + a] <> #0 then
              for b := 0 to 7 do
                if (ord(code[pos + a]) and (1 shl b)) > 0 then // integer > ord
                  fastmap[(a shl 3) + b] := #1;
//            inc(pos, 32);
          Exit;
        end;
      Cexact:
        begin
          inc(pos);
          fastmap[ord(code[pos])] := #1; // integer > ord
          Exit;
        end;
      Canychar:
        begin
//            inc(pos);
          for a := 0 to 255 do
            if a <> 10 then // was 13
              fastmap[a] := #1;
          exit;
        end;
      Cstart_memory,
        Cend_memory: inc(pos, 2);
      Cmatch_memory:
        begin
//            inc(pos);
          for a := 0 to 255 do
            fastmap[a] := #1;
          can_be_null := #1;
          Exit;
        end;
      Cjump,
        Cdummy_failure_jump,
        Cupdate_failure_jump,
        Cstar_jump:
        begin
          inc(pos);
          a := ord(code[pos]) + ord(code[pos + 1]) shl 8; // integer > ord
          inc(pos, 2); // check a for sign!
          pos := pos + short(a);
          if visited[pos] <> #0 then
            {/* argh... the regexp contains empty loops.  This is not
                good, as this may cause a failure stack overflow when
                matching.  Oh well. */
             /* this path leads nowhere; pursue other paths. */}
            Exit;
          visited[pos] := #1;
        end;
      Cfailure_jump:
        begin
          inc(pos);
          a := ord(code[pos]) + ord(code[pos + 1]) shl 8; // integer > ord
          inc(pos, 2); // check a for sign!
          a := pos + short(a);
          re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
        end;
      Crepeat1: inc(pos, 3);
    else
      begin
        raise ERegularExpression.Create(SreUnknowRE);
      end;
    end;
  end;
end;

function TcsReExpr.re_do_compile_fastmap(
  const bufferstr: string; const pos: integer; var can_be_null: char;
  const fastmap: PChar): boolean;
var
  small_visited: array[0..511] of char;
  Ext_visited: string;
  visited, buffer: PChar;
begin
  if Length(bufferstr) <= SizeOf(small_visited) then
    visited := small_visited
  else begin
    SetLength(Ext_visited, Length(bufferstr));
    visited := @Ext_visited[1];
  end;
  can_be_null := #0;
  FillChar(fastmap^, 256, 0);
  FillChar(visited^, Length(bufferstr), 0);
  buffer := @bufferstr[1];
  re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
  Result := true;
end;

{: This computes the fastmap for the regexp.  For this to have any effect, the
   calling program must have initialized the fastmap field to point to an array
   of 256 characters. }

procedure TcsReExpr.re_compile_fastmap;
begin
  if (regexp_t.fastmap = '') or (regexp_t.fastmap_accurate) then Exit;
  if not (re_do_compile_fastmap(regexp_t.buffer, 0, regexp_t.can_be_null, PChar(regexp_t.fastmap))) then
    Exit;
  if regexp_t.buffer[1] = Char(Cbol) then regexp_t.anchor := 1 //begline
  else
    if regexp_t.buffer[1] = Char(Cbegbuf) then regexp_t.anchor := 2 //begbuf
    else regexp_t.anchor := 0; //none
  regexp_t.fastmap_accurate := True;
end;


{: star is coded as:
1: failure_jump 2
   ... code for operand of star
   star_jump 1
2: ... code after star

We change the star_jump to update_failure_jump if we can determine \
that it is safe to do so; otherwise we change it to an ordinary \
jump.

plus is coded as

    jump 2
1: failure_jump 3
2: ... code for operand of plus
   star_jump 1
3: ... code after plus

For star_jump considerations this is processed identically to star.
*
}

function TcsReExpr.re_optimize_star_jump(
  var code: PChar): boolean;
label
  make_normal_jump, loop_p1;
var
  map: array[0..255] of char;
  can_be_null: char;
  p1, p2: PChar;
  ch: char;
  a, b: integer;
  num_instructions: integer;
begin
  Result := False;
  num_instructions := 0;

  a := short(byte(code[0]) + byte(code[1]) * 256);

  inc(code, 2);
  p1 := code + a + 3; //skip the failure_jump
  //Check that the jump is within the pattern
  if (p1 < @regexp_t.buffer[1]) or
    ((integer(@regexp_t.buffer[1]) + length(regexp_t.buffer)) < integer(p1)) then
    Exit;
  Assert((p1[-3] = char(Cfailure_jump)), 'No Cfailure_jump');
  p2 := code;
  //p1 points inside loop, p2 points to after loop
  if not re_do_compile_fastmap(regexp_t.buffer, integer(p2) - integer(@regexp_t.buffer[1]), can_be_null, map) then
    goto make_normal_jump;
{/* If we might introduce a new update point inside the
  * loop, we can't optimize because then update_jump would
  * update a wrong failure point.  Thus we have to be
  * quite careful here.
  */}

  //loop until we find something that consumes a character
  loop_p1:
  inc(num_instructions);
  case regexp_compiled_ops(ord(p1[0])) of
    Cbol,
      Ceol,
      Cbegbuf,
      Cendbuf,
      Cwordbeg,
      Cwordend,
      Cwordbound,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -