⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 csregex.pas

📁 Delphi script parser
💻 PAS
📖 第 1 页 / 共 5 页
字号:
      Cnotwordbound:
      begin
        inc(p1);
        goto loop_p1;
      end;
    Cstart_memory,
      Cend_memory:
      begin
        inc(p1, 2);
        goto loop_p1;
      end;
    Cexact:
      begin
        inc(p1);
        ch := p1[0];
        inc(p1);
        if (map[ord(ch)]) <> #0 then
          goto make_normal_jump;
      end;
    Canychar:
      begin
        inc(p1);
        for b := 0 to 255 do
          if (b <> 10) and (map[b] <> #0) then // was 13
            goto make_normal_jump;
      end;
    Cset:
      begin
        inc(p1);
        for b := 0 to 255 do
            // Check This!!!!
          if ((ord(p1[b div 8]) and (1 shl (b and 7))) <> 0) and // integer > ord
            (map[b] <> #0) then
            goto make_normal_jump;
        inc(p1, 32);
      end;
  else
    goto make_normal_jump;
  end;

  //now we know that we can't backtrack.
  while p1 <> (p2 - 3) do begin
    inc(num_instructions);
    case regexp_compiled_ops(ord(p1[0])) of
      Cend: Exit;
      Cbol,
        Ceol,
        Canychar,
        Cbegbuf,
        Cendbuf,
        Cwordbeg,
        Cwordend,
        Cwordbound,
        Cnotwordbound: inc(p1);
      Cset: inc(p1, 33);
      Cexact,
        Cstart_memory,
        Cend_memory,
        Cmatch_memory,
        Csyntaxspec,
        Cnotsyntaxspec: inc(p1, 2);
      Cjump,
        Cstar_jump,
        Cfailure_jump,
        Cupdate_failure_jump,
        Cdummy_failure_jump: goto make_normal_jump;
    else
      Exit;
    end;
  end;
  //make_update_jump:
  dec(code, 3);
  inc(a, 3);
  code[0] := char(Cupdate_failure_jump);
  code[1] := char(a and 255);
  code[2] := char(a shr 8);
  if num_instructions > 1 then begin
    Result := True;
    Exit;
  end;
  assert(num_instructions = 1, 'No instructions found!');
  {/* if the only instruction matches a single character, we can do
  * better */}
  p1 := code + 3 + a; //start of sole instruction
  if (p1[0] = char(Cset)) or (p1[0] = char(Cexact)) or (p1[0] = char(Canychar)) or
    (p1[0] = char(Csyntaxspec)) or (p1[0] = char(Cnotsyntaxspec)) then
    code[0] := char(Crepeat1);
  Result := True;
  Exit;

  make_normal_jump:
  dec(code, 3);
  code[0] := char(Cjump);
  Result := True;
end;

function TcsReExpr.re_optimize: boolean;
var
  code: PChar;
begin
  Result := False;
  code := @regexp_t.buffer[1];
  while True do begin
    case regexp_compiled_ops(ord(code[0])) of
      Cend:
        begin
          Result := True;
          Exit;
        end;
      Canychar,
        Cbol,
        Ceol,
        Cbegbuf,
        Cendbuf,
        Cwordbeg,
        Cwordend,
        Cwordbound,
        Cnotwordbound: inc(code);
      Cset: inc(code, 33);
      Cexact,
        Cstart_memory,
        Cend_memory,
        Cmatch_memory,
        Csyntaxspec,
        Cnotsyntaxspec: inc(code, 2);
      Cstar_jump: begin
          inc(code);
          if not re_optimize_star_jump(code) then Exit;
        end;
      Cupdate_failure_jump,
        Cjump,
        Cdummy_failure_jump,
        Cfailure_jump,
        Crepeat1: inc(code, 3);
    else
      Exit;
    end;
  end;
end;

function TcsReExpr.hex_char_to_decimal(
  const ch: char): char;
begin
  Result := #16; // error
  if (ch >= '0') and (ch <= '9') then
    Result := char(ord(ch) - ord('0'));
  if (ch >= 'a') and (ch <= 'f') then
    Result := char(ord(ch) - ord('a') + 10);
  if (ch >= 'A') and (ch <= 'F') then
    Result := char(ord(ch) - ord('A') + 10);
end;

function TcsReExpr.Ansi_Translate(
  const ch: char; const size: integer; var pos: integer;
  const regex, translate: string): char;
var
  gethex_ch, gethex_value: char;
begin
  Result := #0;
  case ch of
    'a', 'A': Result := #7; // audible bell
    'b', 'B': Result := #8; // backspace
    'f', 'F': Result := #12; // form feed
    'n', 'N': Result := #10; // line feed
    'r', 'R': Result := #13; // carriage return
    't', 'T': Result := #9; // tab
    'v', 'V': Result := #11; // vertical tab
    'x', 'X': begin // hex code
        if Pos > Size then
          raise ERegularExpression.Create('Regular expression ends prematurely');
        gethex_ch := regex[pos];
        inc(pos);
        gethex_value := hex_char_to_decimal(gethex_ch);
        if (gethex_value = #16) then
          raise ERegularExpression.Create('No valid hex value');
        if Pos > Size then
          raise ERegularExpression.Create('Regular expression ends prematurely');
        gethex_ch := regex[pos];
        inc(pos);
        gethex_ch := hex_char_to_decimal(gethex_ch);
        if (gethex_value = #16) then
          raise ERegularExpression.Create('');
        Result := char(ord(gethex_value) * 16 + ord(gethex_ch));
      end;
  else
    if translate <> '' then
      Result := translate[ord(ch)];
  end;
end;

procedure TcsReExpr.re_compile_initialize;
var
  a, i: integer;
begin
//  FillChar(bufp.re_syntax_table, 256, 0); // not nessesary
  for a := ord('a') to ord('z') do re_syntax_table[a] := char(Sword);
  for a := ord('A') to ord('Z') do re_syntax_table[a] := char(Sword);
  for a := ord('0') to ord('9') do re_syntax_table[a] := char(Sword or Sdigit or Shexdigit);
  for a := ord('0') to ord('7') do re_syntax_table[a] := char(ord(re_syntax_table[a]) + Soctaldigit); // integer > ord
  for a := ord('a') to ord('f') do re_syntax_table[a] := char(ord(re_syntax_table[a]) + Shexdigit); // integer > ord
  for a := ord('A') to ord('F') do re_syntax_table[a] := char(ord(re_syntax_table[a]) + Shexdigit); // integer > ord
  re_syntax_table[ord('_')] := char(Sword);
  for a := 9 to 13 do re_syntax_table[a] := char(Swhitespace);
  re_syntax_table[ord(' ')] := char(Swhitespace);

  if (FStyle and RE_HIGHCHARSWHITESPACE) = RE_HIGHCHARSWHITESPACE then begin
    for a := 128 to 255 do re_syntax_table[a] := char(ord(re_syntax_table[a]) + Swhitespace);
  end;

  for i := 0 to 255 do begin
    regexp_plain_ops[i] := Rnormal;
    regexp_quoted_ops[i] := Rnormal;
  end;
  for a := ord('0') to ord('9') do regexp_quoted_ops[a] := Rmemory;
  regexp_plain_ops[ord('\')] := Rquote;
  if (FStyle and RE_NO_BK_PARENS) = RE_NO_BK_PARENS then begin
    regexp_plain_ops[ord('(')] := Ropenpar;
    regexp_plain_ops[ord(')')] := Rclosepar;
  end else begin
    regexp_quoted_ops[ord('(')] := Ropenpar;
    regexp_quoted_ops[ord(')')] := Rclosepar;
  end;

  if (FStyle and RE_NO_BK_VBAR) = RE_NO_BK_VBAR then
    regexp_plain_ops[ord('|')] := Ror
  else
    regexp_quoted_ops[ord('|')] := Ror;
  regexp_plain_ops[ord('*')] := Rstar;
  if (FStyle and RE_BK_PLUS_QM) = RE_BK_PLUS_QM then begin
    regexp_quoted_ops[ord('+')] := Rplus;
    regexp_quoted_ops[ord('?')] := Roptional;
  end else begin
    regexp_plain_ops[ord('+')] := Rplus;
    regexp_plain_ops[ord('?')] := Roptional;
  end;

  if (FStyle and RE_NEWLINE_OR) = RE_NEWLINE_OR then
    regexp_plain_ops[10] := Ror; // was 13
  regexp_plain_ops[ord('[')] := Ropenset;
  regexp_plain_ops[ord('^')] := Rbol;
  regexp_plain_ops[ord('$')] := Reol;
  regexp_plain_ops[ord('.')] := Ranychar;
  if not ((FStyle and RE_NO_GNU_EXTENSIONS) = RE_NO_GNU_EXTENSIONS) then begin
    regexp_quoted_ops[ord('d')] := RDigitChar; // RJ 2000-04-01 special for digits 0-9
    regexp_quoted_ops[ord('D')] := RNotDigitChar; // RJ 2000-04-01 special for digits 0-9
    regexp_quoted_ops[ord('w')] := Rwordchar;
    regexp_quoted_ops[ord('W')] := Rnotwordchar;
    regexp_quoted_ops[ord('<')] := Rwordbeg;
    regexp_quoted_ops[ord('>')] := Rwordend;
    regexp_quoted_ops[ord('b')] := Rwordbound;
    regexp_quoted_ops[ord('B')] := Rnotwordbound;
    regexp_quoted_ops[ord('`')] := Rbegbuf;
    regexp_quoted_ops[44] := Rendbuf; // '
  end;
  if (FStyle and RE_ANSI_HEX) = RE_ANSI_HEX then
    regexp_quoted_ops[ord('v')] := Rextended_memory;
  for a := 0 to ord(Rnum_ops) - 1 do
    regexp_precedences[a] := #4;
  if (FStyle and RE_TIGHT_VBAR) > 0 then begin
    regexp_precedences[ord(Ror)] := #3;
    regexp_precedences[ord(Rbol)] := #2;
    regexp_precedences[ord(Reol)] := #2;
  end else begin
    regexp_precedences[ord(Ror)] := #2;
    regexp_precedences[ord(Rbol)] := #3;
    regexp_precedences[ord(Reol)] := #3;
  end;
  regexp_precedences[ord(Rclosepar)] := #1;
  regexp_precedences[ord(Rend)] := #0;
  regexp_context_indep_ops := (FStyle and RE_CONTEXT_INDEP_OPS) > 0;
  regexp_ansi_sequences := (FStyle and RE_ANSI_HEX) > 0;

  re_compile_initialized := True;
end;

procedure TcsReExpr.Inser_Jump(
  const pos: integer; const opcode_type: regexp_compiled_ops;
  const addr: integer; var pattern_offset: integer; var pattern: string);
var
  a, disp: integer;
begin
  for a := pattern_offset - 1 downto pos do
    pattern[a + 3] := pattern[a];
  pattern[pos] := char(opcode_type);
  //PUT_ADDR(offset,addr)
  disp := addr - (pos + 1) - 2;
  pattern[pos + 1] := char(disp and 255);
  pattern[pos + 2] := char((disp shr 8) and 255);
  inc(pattern_offset, 3);
end;

{: This compiles the regexp (given in regex and length in regex_size).
   This empty string if the regexp compiled successfully, and an error message
   if an error was encountered.
   The translate field must be set to point to a valid translation table, or
   empty if it is not used. }

function TcsReExpr.re_compile_pattern: string;
label
  normal_char, store_opcode_and_arg, store_opcode;
var
  i, pos, current_level, level: integer;
  op: regexp_syntax_op;
  opcode: regexp_compiled_ops;
  pattern_offset: integer;
  starts: array[0..NUM_LEVELS * MAX_NESTING] of integer;
  starts_base: integer;
  future_jumps: array[0..MAX_NESTING] of integer;
  num_jumps: integer;
  a, ch: char;
  pattern: string;
  translate: string;
  next_register: integer;
  paren_depth: integer;
  num_open_registers: integer;
  open_registers: array[0..RE_NREGS] of integer;
  beginning_context: boolean;

  size, disp: integer;

  complement, firstchar, range: boolean;
  prev, offset: integer;
begin
  pattern_offset := 0;
  ch := #0;
  if not re_compile_initialized then
    re_compile_initialize;
  regexp_t.fastmap_accurate := false;
  regexp_t.uses_registers := True;
  regexp_t.num_registers := 1;
  translate := regexp_t.translate;
  pattern := '';
  pattern_offset := 1;
  try
    starts_base := 0;
    num_jumps := 0;
    current_level := 0;
    Starts[starts_base + current_level] := pattern_offset; {SET_LEVEL_START}
    num_open_registers := 0;
    next_register := 1;
    paren_depth := 0;
    beginning_context := True;
    op := Rnum_ops; // maybe wrong, just give it a try
    {we use Rend dummy to ensure that pending jumps are updated
     (due to low priority of Rend) before exiting the loop.}
    size := Length(FPattern);
    pos := 1;
    while op <> Rend do begin
      if pos > size then op := Rend
      else begin
        if pos > size then
          raise ERegularExpression.Create(SreEndPrem);
        ch := FPattern[pos];
        inc(pos);
        if translate <> '' then ch := translate[ord(ch)];
        op := regexp_plain_ops[ord(ch)];
        if op = RQuote then begin
          if pos > size then
            raise ERegularExpression.Create(SreEndPrem);
          ch := FPattern[pos];
          inc(pos);
          op := regexp_quoted_ops[ord(ch)];
          if (op = Rnormal) and regexp_ansi_sequences then

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -