📄 regexpr.c
字号:
{ \
bufp->allocated = alloc; \
bufp->buffer = pattern; \
bufp->used = pattern_offset; \
}
#define GETHEX(var) \
{ \
unsigned char gethex_ch, gethex_value; \
NEXTCHAR(gethex_ch); \
gethex_value = hex_char_to_decimal(gethex_ch); \
if (gethex_value == 16) \
goto hex_error; \
NEXTCHAR(gethex_ch); \
gethex_ch = hex_char_to_decimal(gethex_ch); \
if (gethex_ch == 16) \
goto hex_error; \
(var) = gethex_value * 16 + gethex_ch; \
}
#define ANSI_TRANSLATE(ch) \
{ \
switch (ch) \
{ \
case 'a': \
case 'A': \
{ \
ch = 7; /* audible bell */ \
break; \
} \
case 'b': \
case 'B': \
{ \
ch = 8; /* backspace */ \
break; \
} \
case 'f': \
case 'F': \
{ \
ch = 12; /* form feed */ \
break; \
} \
case 'n': \
case 'N': \
{ \
ch = 10; /* line feed */ \
break; \
} \
case 'r': \
case 'R': \
{ \
ch = 13; /* carriage return */ \
break; \
} \
case 't': \
case 'T': \
{ \
ch = 9; /* tab */ \
break; \
} \
case 'v': \
case 'V': \
{ \
ch = 11; /* vertical tab */ \
break; \
} \
case 'x': /* hex code */ \
case 'X': \
{ \
GETHEX(ch); \
break; \
} \
default: \
{ \
/* other characters passed through */ \
if (translate) \
ch = translate[(unsigned char)ch]; \
break; \
} \
} \
}
char *re_compile_pattern(unsigned char *regex, int size, regexp_t bufp)
{
int a;
int pos;
int op;
int current_level;
int level;
int opcode;
int pattern_offset = 0, alloc;
int starts[NUM_LEVELS * MAX_NESTING];
int starts_base;
int future_jumps[MAX_NESTING];
int num_jumps;
unsigned char ch = '\0';
unsigned char *pattern;
unsigned char *translate;
int next_register;
int paren_depth;
int num_open_registers;
int open_registers[RE_NREGS];
int beginning_context;
if (!re_compile_initialized)
re_compile_initialize();
bufp->used = 0;
bufp->fastmap_accurate = 0;
bufp->uses_registers = 1;
bufp->num_registers = 1;
translate = bufp->translate;
pattern = bufp->buffer;
alloc = bufp->allocated;
if (alloc == 0 || pattern == NULL)
{
alloc = 256;
pattern = malloc(alloc);
if (!pattern)
goto out_of_memory;
}
pattern_offset = 0;
starts_base = 0;
num_jumps = 0;
current_level = 0;
SET_LEVEL_START;
num_open_registers = 0;
next_register = 1;
paren_depth = 0;
beginning_context = 1;
op = -1;
/* we use Rend dummy to ensure that pending jumps are updated
(due to low priority of Rend) before exiting the loop. */
pos = 0;
while (op != Rend)
{
if (pos >= size)
op = Rend;
else
{
NEXTCHAR(ch);
if (translate)
ch = translate[(unsigned char)ch];
op = regexp_plain_ops[(unsigned char)ch];
if (op == Rquote)
{
NEXTCHAR(ch);
op = regexp_quoted_ops[(unsigned char)ch];
if (op == Rnormal && regexp_ansi_sequences)
ANSI_TRANSLATE(ch);
}
}
level = regexp_precedences[op];
/* printf("ch='%c' op=%d level=%d current_level=%d
curlevstart=%d\n", ch, op, level, current_level,
CURRENT_LEVEL_START); */
if (level > current_level)
{
for (current_level++; current_level < level; current_level++)
SET_LEVEL_START;
SET_LEVEL_START;
}
else
if (level < current_level)
{
current_level = level;
for (;num_jumps > 0 &&
future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
num_jumps--)
PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
}
switch (op)
{
case Rend:
{
break;
}
case Rnormal:
{
normal_char:
opcode = Cexact;
store_opcode_and_arg: /* opcode & ch must be set */
SET_LEVEL_START;
ALLOC(2);
STORE(opcode);
STORE(ch);
break;
}
case Ranychar:
{
opcode = Canychar;
store_opcode:
SET_LEVEL_START;
ALLOC(1);
STORE(opcode);
break;
}
case Rquote:
{
abort();
/*NOTREACHED*/
}
case Rbol:
{
if (!beginning_context) {
if (regexp_context_indep_ops)
goto op_error;
else
goto normal_char;
}
opcode = Cbol;
goto store_opcode;
}
case Reol:
{
if (!((pos >= size) ||
((regexp_syntax & RE_NO_BK_VBAR) ?
(regex[pos] == '\174') :
(pos+1 < size && regex[pos] == '\134' &&
regex[pos+1] == '\174')) ||
((regexp_syntax & RE_NO_BK_PARENS)?
(regex[pos] == ')'):
(pos+1 < size && regex[pos] == '\134' &&
regex[pos+1] == ')')))) {
if (regexp_context_indep_ops)
goto op_error;
else
goto normal_char;
}
opcode = Ceol;
goto store_opcode;
/* NOTREACHED */
break;
}
case Roptional:
{
if (beginning_context) {
if (regexp_context_indep_ops)
goto op_error;
else
goto normal_char;
}
if (CURRENT_LEVEL_START == pattern_offset)
break; /* ignore empty patterns for ? */
ALLOC(3);
INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
pattern_offset + 3);
break;
}
case Rstar:
case Rplus:
{
if (beginning_context) {
if (regexp_context_indep_ops)
goto op_error;
else
goto normal_char;
}
if (CURRENT_LEVEL_START == pattern_offset)
break; /* ignore empty patterns for + and * */
ALLOC(9);
INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
pattern_offset + 6);
INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START);
if (op == Rplus) /* jump over initial failure_jump */
INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
CURRENT_LEVEL_START + 6);
break;
}
case Ror:
{
ALLOC(6);
INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
pattern_offset + 6);
if (num_jumps >= MAX_NESTING)
goto too_complex;
STORE(Cjump);
future_jumps[num_jumps++] = pattern_offset;
STORE(0);
STORE(0);
SET_LEVEL_START;
break;
}
case Ropenpar:
{
SET_LEVEL_START;
if (next_register < RE_NREGS)
{
bufp->uses_registers = 1;
ALLOC(2);
STORE(Cstart_memory);
STORE(next_register);
open_registers[num_open_registers++] = next_register;
bufp->num_registers++;
next_register++;
}
paren_depth++;
PUSH_LEVEL_STARTS;
current_level = 0;
SET_LEVEL_START;
break;
}
case Rclosepar:
{
if (paren_depth <= 0)
goto parenthesis_error;
POP_LEVEL_STARTS;
current_level = regexp_precedences[Ropenpar];
paren_depth--;
if (paren_depth < num_open_registers)
{
bufp->uses_registers = 1;
ALLOC(2);
STORE(Cend_memory);
num_open_registers--;
STORE(open_registers[num_open_registers]);
}
break;
}
case Rmemory:
{
if (ch == '0')
goto bad_match_register;
assert(ch >= '0' && ch <= '9');
bufp->uses_registers = 1;
opcode = Cmatch_memory;
ch -= '0';
goto store_opcode_and_arg;
}
case Rextended_memory:
{
NEXTCHAR(ch);
if (ch < '0' || ch > '9')
goto bad_match_register;
NEXTCHAR(a);
if (a < '0' || a > '9')
goto bad_match_register;
ch = 10 * (a - '0') + ch - '0';
if (ch == 0 || ch >= RE_NREGS)
goto bad_match_register;
bufp->uses_registers = 1;
opcode = Cmatch_memory;
goto store_opcode_and_arg;
}
case Ropenset:
{
int complement;
int prev;
int offset;
int range;
int firstchar;
SET_LEVEL_START;
ALLOC(1+256/8);
STORE(Cset);
offset = pattern_offset;
for (a = 0; a < 256/8; a++)
STORE(0);
NEXTCHAR(ch);
if (translate)
ch = translate[(unsigned char)ch];
if (ch == '\136')
{
complement = 1;
NEXTCHAR(ch);
if (translate)
ch = translate[(unsigned char)ch];
}
else
complement = 0;
prev = -1;
range = 0;
firstchar = 1;
while (ch != '\135' || firstchar)
{
firstchar = 0;
if (regexp_ansi_sequences && ch == '\134')
{
NEXTCHAR(ch);
ANSI_TRANSLATE(ch);
}
if (range)
{
for (a = prev; a <= (int)ch; a++)
SETBIT(pattern, offset, a);
prev = -1;
range = 0;
}
else
if (prev != -1 && ch == '-')
range = 1;
else
{
SETBIT(pattern, offset, ch);
prev = ch;
}
NEXTCHAR(ch);
if (translate)
ch = translate[(unsigned char)ch];
}
if (range)
SETBIT(pattern, offset, '-');
if (complement)
{
for (a = 0; a < 256/8; a++)
pattern[offset+a] ^= 0xff;
}
break;
}
case Rbegbuf:
{
opcode = Cbegbuf;
goto store_opcode;
}
case Rendbuf:
{
opcode = Cendbuf;
goto store_opcode;
}
case Rwordchar:
{
opcode = Csyntaxspec;
ch = Sword;
goto store_opcode_and_arg;
}
case Rnotwordchar:
{
opcode = Cnotsyntaxspec;
ch = Sword;
goto store_opcode_and_arg;
}
case Rwordbeg:
{
opcode = Cwordbeg;
goto store_opcode;
}
case Rwordend:
{
opcode = Cwordend;
goto store_opcode;
}
case Rwordbound:
{
opcode = Cwordbound;
goto store_opcode;
}
case Rnotwordbound:
{
opcode = Cnotwordbound;
goto store_opcode;
}
default:
{
abort();
}
}
beginning_context = (op == Ropenpar || op == Ror);
}
if (starts_base != 0)
goto parenthesis_error;
assert(num_jumps == 0);
ALLOC(1);
STORE(Cend);
SET_FIELDS;
if(!re_optimize(bufp))
return "Optimization error";
return NULL;
op_error:
SET_FIELDS;
return "Badly placed special character";
bad_match_register:
SET_FIELDS;
return "Bad match register number";
hex_error:
SET_FIELDS;
return "Bad hexadecimal number";
parenthesis_error:
SET_FIELDS;
return "Badly placed parenthesis";
out_of_memory:
SET_FIELDS;
return "Out of memory";
ends_prematurely:
SET_FIELDS;
return "Regular expression ends prematurely";
too_complex:
SET_FIELDS;
return "Regular expression too complex";
}
#undef CHARAT
#undef NEXTCHAR
#undef GETHEX
#undef ALLOC
#undef STORE
#undef CURRENT_LEVEL_START
#undef SET_LEVEL_START
#undef PUSH_LEVEL_STARTS
#undef POP_LEVEL_STARTS
#undef PUT_ADDR
#undef INSERT_JUMP
#undef SETBIT
#undef SET_FIELDS
#define PREFETCH if (text == textend) goto fail
#define NEXTCHAR(var) \
PREFETCH; \
var = (unsigned char)*text++; \
if (translate) \
var = translate[var]
int re_match(regexp_t bufp, unsigned char *string, int size, int pos,
regexp_registers_t old_regs)
{
unsigned char *code;
unsigned char *translate;
unsigned char *text;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -