📄 csregex.pas
字号:
Cnotwordbound:
begin
inc(p1);
goto loop_p1;
end;
Cstart_memory,
Cend_memory:
begin
inc(p1, 2);
goto loop_p1;
end;
Cexact:
begin
inc(p1);
ch := p1[0];
inc(p1);
if (map[ord(ch)]) <> #0 then
goto make_normal_jump;
end;
Canychar:
begin
inc(p1);
for b := 0 to 255 do
if (b <> 10) and (map[b] <> #0) then // was 13
goto make_normal_jump;
end;
Cset:
begin
inc(p1);
for b := 0 to 255 do
// Check This!!!!
if ((ord(p1[b div 8]) and (1 shl (b and 7))) <> 0) and // integer > ord
(map[b] <> #0) then
goto make_normal_jump;
inc(p1, 32);
end;
else
goto make_normal_jump;
end;
//now we know that we can't backtrack.
while p1 <> (p2 - 3) do begin
inc(num_instructions);
case regexp_compiled_ops(ord(p1[0])) of
Cend: Exit;
Cbol,
Ceol,
Canychar,
Cbegbuf,
Cendbuf,
Cwordbeg,
Cwordend,
Cwordbound,
Cnotwordbound: inc(p1);
Cset: inc(p1, 33);
Cexact,
Cstart_memory,
Cend_memory,
Cmatch_memory,
Csyntaxspec,
Cnotsyntaxspec: inc(p1, 2);
Cjump,
Cstar_jump,
Cfailure_jump,
Cupdate_failure_jump,
Cdummy_failure_jump: goto make_normal_jump;
else
Exit;
end;
end;
//make_update_jump:
dec(code, 3);
inc(a, 3);
code[0] := char(Cupdate_failure_jump);
code[1] := char(a and 255);
code[2] := char(a shr 8);
if num_instructions > 1 then begin
Result := True;
Exit;
end;
assert(num_instructions = 1, 'No instructions found!');
{/* if the only instruction matches a single character, we can do
* better */}
p1 := code + 3 + a; //start of sole instruction
if (p1[0] = char(Cset)) or (p1[0] = char(Cexact)) or (p1[0] = char(Canychar)) or
(p1[0] = char(Csyntaxspec)) or (p1[0] = char(Cnotsyntaxspec)) then
code[0] := char(Crepeat1);
Result := True;
Exit;
make_normal_jump:
dec(code, 3);
code[0] := char(Cjump);
Result := True;
end;
function TcsReExpr.re_optimize: boolean;
var
code: PChar;
begin
Result := False;
code := @regexp_t.buffer[1];
while True do begin
case regexp_compiled_ops(ord(code[0])) of
Cend:
begin
Result := True;
Exit;
end;
Canychar,
Cbol,
Ceol,
Cbegbuf,
Cendbuf,
Cwordbeg,
Cwordend,
Cwordbound,
Cnotwordbound: inc(code);
Cset: inc(code, 33);
Cexact,
Cstart_memory,
Cend_memory,
Cmatch_memory,
Csyntaxspec,
Cnotsyntaxspec: inc(code, 2);
Cstar_jump: begin
inc(code);
if not re_optimize_star_jump(code) then Exit;
end;
Cupdate_failure_jump,
Cjump,
Cdummy_failure_jump,
Cfailure_jump,
Crepeat1: inc(code, 3);
else
Exit;
end;
end;
end;
function TcsReExpr.hex_char_to_decimal(
const ch: char): char;
begin
Result := #16; // error
if (ch >= '0') and (ch <= '9') then
Result := char(ord(ch) - ord('0'));
if (ch >= 'a') and (ch <= 'f') then
Result := char(ord(ch) - ord('a') + 10);
if (ch >= 'A') and (ch <= 'F') then
Result := char(ord(ch) - ord('A') + 10);
end;
function TcsReExpr.Ansi_Translate(
const ch: char; const size: integer; var pos: integer;
const regex, translate: string): char;
var
gethex_ch, gethex_value: char;
begin
Result := #0;
case ch of
'a', 'A': Result := #7; // audible bell
'b', 'B': Result := #8; // backspace
'f', 'F': Result := #12; // form feed
'n', 'N': Result := #10; // line feed
'r', 'R': Result := #13; // carriage return
't', 'T': Result := #9; // tab
'v', 'V': Result := #11; // vertical tab
'x', 'X': begin // hex code
if Pos > Size then
raise ERegularExpression.Create('Regular expression ends prematurely');
gethex_ch := regex[pos];
inc(pos);
gethex_value := hex_char_to_decimal(gethex_ch);
if (gethex_value = #16) then
raise ERegularExpression.Create('No valid hex value');
if Pos > Size then
raise ERegularExpression.Create('Regular expression ends prematurely');
gethex_ch := regex[pos];
inc(pos);
gethex_ch := hex_char_to_decimal(gethex_ch);
if (gethex_value = #16) then
raise ERegularExpression.Create('');
Result := char(ord(gethex_value) * 16 + ord(gethex_ch));
end;
else
if translate <> '' then
Result := translate[ord(ch)];
end;
end;
procedure TcsReExpr.re_compile_initialize;
var
a, i: integer;
begin
// FillChar(bufp.re_syntax_table, 256, 0); // not nessesary
for a := ord('a') to ord('z') do re_syntax_table[a] := char(Sword);
for a := ord('A') to ord('Z') do re_syntax_table[a] := char(Sword);
for a := ord('0') to ord('9') do re_syntax_table[a] := char(Sword or Sdigit or Shexdigit);
for a := ord('0') to ord('7') do re_syntax_table[a] := char(ord(re_syntax_table[a]) + Soctaldigit); // integer > ord
for a := ord('a') to ord('f') do re_syntax_table[a] := char(ord(re_syntax_table[a]) + Shexdigit); // integer > ord
for a := ord('A') to ord('F') do re_syntax_table[a] := char(ord(re_syntax_table[a]) + Shexdigit); // integer > ord
re_syntax_table[ord('_')] := char(Sword);
for a := 9 to 13 do re_syntax_table[a] := char(Swhitespace);
re_syntax_table[ord(' ')] := char(Swhitespace);
if (FStyle and RE_HIGHCHARSWHITESPACE) = RE_HIGHCHARSWHITESPACE then begin
for a := 128 to 255 do re_syntax_table[a] := char(ord(re_syntax_table[a]) + Swhitespace);
end;
for i := 0 to 255 do begin
regexp_plain_ops[i] := Rnormal;
regexp_quoted_ops[i] := Rnormal;
end;
for a := ord('0') to ord('9') do regexp_quoted_ops[a] := Rmemory;
regexp_plain_ops[ord('\')] := Rquote;
if (FStyle and RE_NO_BK_PARENS) = RE_NO_BK_PARENS then begin
regexp_plain_ops[ord('(')] := Ropenpar;
regexp_plain_ops[ord(')')] := Rclosepar;
end else begin
regexp_quoted_ops[ord('(')] := Ropenpar;
regexp_quoted_ops[ord(')')] := Rclosepar;
end;
if (FStyle and RE_NO_BK_VBAR) = RE_NO_BK_VBAR then
regexp_plain_ops[ord('|')] := Ror
else
regexp_quoted_ops[ord('|')] := Ror;
regexp_plain_ops[ord('*')] := Rstar;
if (FStyle and RE_BK_PLUS_QM) = RE_BK_PLUS_QM then begin
regexp_quoted_ops[ord('+')] := Rplus;
regexp_quoted_ops[ord('?')] := Roptional;
end else begin
regexp_plain_ops[ord('+')] := Rplus;
regexp_plain_ops[ord('?')] := Roptional;
end;
if (FStyle and RE_NEWLINE_OR) = RE_NEWLINE_OR then
regexp_plain_ops[10] := Ror; // was 13
regexp_plain_ops[ord('[')] := Ropenset;
regexp_plain_ops[ord('^')] := Rbol;
regexp_plain_ops[ord('$')] := Reol;
regexp_plain_ops[ord('.')] := Ranychar;
if not ((FStyle and RE_NO_GNU_EXTENSIONS) = RE_NO_GNU_EXTENSIONS) then begin
regexp_quoted_ops[ord('d')] := RDigitChar; // RJ 2000-04-01 special for digits 0-9
regexp_quoted_ops[ord('D')] := RNotDigitChar; // RJ 2000-04-01 special for digits 0-9
regexp_quoted_ops[ord('w')] := Rwordchar;
regexp_quoted_ops[ord('W')] := Rnotwordchar;
regexp_quoted_ops[ord('<')] := Rwordbeg;
regexp_quoted_ops[ord('>')] := Rwordend;
regexp_quoted_ops[ord('b')] := Rwordbound;
regexp_quoted_ops[ord('B')] := Rnotwordbound;
regexp_quoted_ops[ord('`')] := Rbegbuf;
regexp_quoted_ops[44] := Rendbuf; // '
end;
if (FStyle and RE_ANSI_HEX) = RE_ANSI_HEX then
regexp_quoted_ops[ord('v')] := Rextended_memory;
for a := 0 to ord(Rnum_ops) - 1 do
regexp_precedences[a] := #4;
if (FStyle and RE_TIGHT_VBAR) > 0 then begin
regexp_precedences[ord(Ror)] := #3;
regexp_precedences[ord(Rbol)] := #2;
regexp_precedences[ord(Reol)] := #2;
end else begin
regexp_precedences[ord(Ror)] := #2;
regexp_precedences[ord(Rbol)] := #3;
regexp_precedences[ord(Reol)] := #3;
end;
regexp_precedences[ord(Rclosepar)] := #1;
regexp_precedences[ord(Rend)] := #0;
regexp_context_indep_ops := (FStyle and RE_CONTEXT_INDEP_OPS) > 0;
regexp_ansi_sequences := (FStyle and RE_ANSI_HEX) > 0;
re_compile_initialized := True;
end;
procedure TcsReExpr.Inser_Jump(
const pos: integer; const opcode_type: regexp_compiled_ops;
const addr: integer; var pattern_offset: integer; var pattern: string);
var
a, disp: integer;
begin
for a := pattern_offset - 1 downto pos do
pattern[a + 3] := pattern[a];
pattern[pos] := char(opcode_type);
//PUT_ADDR(offset,addr)
disp := addr - (pos + 1) - 2;
pattern[pos + 1] := char(disp and 255);
pattern[pos + 2] := char((disp shr 8) and 255);
inc(pattern_offset, 3);
end;
{: This compiles the regexp (given in regex and length in regex_size).
This empty string if the regexp compiled successfully, and an error message
if an error was encountered.
The translate field must be set to point to a valid translation table, or
empty if it is not used. }
function TcsReExpr.re_compile_pattern: string;
label
normal_char, store_opcode_and_arg, store_opcode;
var
i, pos, current_level, level: integer;
op: regexp_syntax_op;
opcode: regexp_compiled_ops;
pattern_offset: integer;
starts: array[0..NUM_LEVELS * MAX_NESTING] of integer;
starts_base: integer;
future_jumps: array[0..MAX_NESTING] of integer;
num_jumps: integer;
a, ch: char;
pattern: string;
translate: string;
next_register: integer;
paren_depth: integer;
num_open_registers: integer;
open_registers: array[0..RE_NREGS] of integer;
beginning_context: boolean;
size, disp: integer;
complement, firstchar, range: boolean;
prev, offset: integer;
begin
pattern_offset := 0;
ch := #0;
if not re_compile_initialized then
re_compile_initialize;
regexp_t.fastmap_accurate := false;
regexp_t.uses_registers := True;
regexp_t.num_registers := 1;
translate := regexp_t.translate;
pattern := '';
pattern_offset := 1;
try
starts_base := 0;
num_jumps := 0;
current_level := 0;
Starts[starts_base + current_level] := pattern_offset; {SET_LEVEL_START}
num_open_registers := 0;
next_register := 1;
paren_depth := 0;
beginning_context := True;
op := Rnum_ops; // maybe wrong, just give it a try
{we use Rend dummy to ensure that pending jumps are updated
(due to low priority of Rend) before exiting the loop.}
size := Length(FPattern);
pos := 1;
while op <> Rend do begin
if pos > size then op := Rend
else begin
if pos > size then
raise ERegularExpression.Create(SreEndPrem);
ch := FPattern[pos];
inc(pos);
if translate <> '' then ch := translate[ord(ch)];
op := regexp_plain_ops[ord(ch)];
if op = RQuote then begin
if pos > size then
raise ERegularExpression.Create(SreEndPrem);
ch := FPattern[pos];
inc(pos);
op := regexp_quoted_ops[ord(ch)];
if (op = Rnormal) and regexp_ansi_sequences then
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -