📄 regex.c
字号:
break; case Ropenpar: SET_LEVEL_START; if (next_register < RE_NREGS) { bufp->uses_registers = 1; ALLOC(2); STORE(Cstart_memory); STORE(next_register); open_registers[num_open_registers++] = next_register; next_register++; } paren_depth++; PUSH_LEVEL_STARTS; current_level = 0; SET_LEVEL_START; break; case Rclosepar: if (paren_depth <= 0) goto parenthesis_error; POP_LEVEL_STARTS; current_level = regexp_precedences[Ropenpar]; paren_depth--; if (paren_depth < num_open_registers) { bufp->uses_registers = 1; ALLOC(2); STORE(Cend_memory); num_open_registers--; STORE(open_registers[num_open_registers]); } break; case Rmemory: if (ch == '0') goto bad_match_register; assert(ch >= '0' && ch <= '9'); bufp->uses_registers = 1; opcode = Cmatch_memory; ch -= '0'; goto store_opcode_and_arg; case Rextended_memory: NEXTCHAR(ch); if (ch < '0' || ch > '9') goto bad_match_register; NEXTCHAR(a); if (a < '0' || a > '9') goto bad_match_register; ch = 10 * (a - '0') + ch - '0'; if (ch <= 0 || ch >= RE_NREGS) goto bad_match_register; bufp->uses_registers = 1; opcode = Cmatch_memory; goto store_opcode_and_arg; case Ropenset: { int complement,prev,offset,range,firstchar; SET_LEVEL_START; ALLOC(1+256/8); STORE(Cset); offset = pattern_offset; for (a = 0; a < 256/8; a++) STORE(0); NEXTCHAR(ch); if (translate) ch = translate[(unsigned char)ch]; if (ch == '\136') { complement = 1; NEXTCHAR(ch); if (translate) ch = translate[(unsigned char)ch]; } else complement = 0; prev = -1; range = 0; firstchar = 1; while (ch != '\135' || firstchar) { firstchar = 0; if (regexp_ansi_sequences && ch == '\134') { NEXTCHAR(ch); ANSI_TRANSLATE(ch); } if (range) { for (a = prev; a <= (int)ch; a++) SETBIT(pattern, offset, a); prev = -1; range = 0; } else if (prev != -1 && ch == '-') range = 1; else { SETBIT(pattern, offset, ch); prev = ch; } NEXTCHAR(ch); if (translate) ch = translate[(unsigned char)ch]; } if (range) SETBIT(pattern, offset, '-'); if (complement) { for (a = 0; a < 256/8; a++) pattern[offset+a] ^= 0xff; } break; } case Rbegbuf: opcode = Cbegbuf; goto store_opcode; case Rendbuf: opcode = Cendbuf; goto store_opcode; case Rwordchar: opcode = Csyntaxspec; ch = Sword; goto store_opcode_and_arg; case Rnotwordchar: opcode = Cnotsyntaxspec; ch = Sword; goto store_opcode_and_arg; case Rwordbeg: opcode = Cwordbeg; goto store_opcode; case Rwordend: opcode = Cwordend; goto store_opcode; case Rwordbound: opcode = Cwordbound; goto store_opcode; case Rnotwordbound: opcode = Cnotwordbound; goto store_opcode;#ifdef emacs case Remacs_at_dot: opcode = Cemacs_at_dot; goto store_opcode; case Remacs_syntaxspec: NEXTCHAR(ch); if (translate) ch = translate[(unsigned char)ch]; opcode = Csyntaxspec; ch = syntax_spec_code[(unsigned char)ch]; goto store_opcode_and_arg; case Remacs_notsyntaxspec: NEXTCHAR(ch); if (translate) ch = translate[(unsigned char)ch]; opcode = Cnotsyntaxspec; ch = syntax_spec_code[(unsigned char)ch]; goto store_opcode_and_arg;#endif /* emacs */ default: abort(); } beginning_context = (op == Ropenpar || op == Ror); } if (starts_base != 0) goto parenthesis_error; assert(num_jumps == 0); ALLOC(1); STORE(Cend); SET_FIELDS; return NULL; op_error: SET_FIELDS; return "Badly placed special character"; bad_match_register: SET_FIELDS; return "Bad match register number"; hex_error: SET_FIELDS; return "Bad hexadecimal number"; parenthesis_error: SET_FIELDS; return "Badly placed parenthesis"; out_of_memory: SET_FIELDS; return "Out of memory"; ends_prematurely: SET_FIELDS; return "Regular expression ends prematurely"; too_complex: SET_FIELDS; return "Regular expression too complex";}#undef CHARAT#undef NEXTCHAR#undef GETHEX#undef ALLOC#undef STORE#undef CURRENT_LEVEL_START#undef SET_LEVEL_START#undef PUSH_LEVEL_STARTS#undef POP_LEVEL_STARTS#undef PUT_ADDR#undef INSERT_JUMP#undef SETBIT#undef SET_FIELDSstatic void hre_compile_fastmap_aux(code, pos, visited, can_be_null, fastmap)char *code, *visited, *can_be_null, *fastmap;int pos;{ int a, b, syntaxcode; if (visited[pos]) return; /* we have already been here */ visited[pos] = 1; for (;;) switch (code[pos++]) { case Cend: *can_be_null = 1; return; case Cbol: case Cbegbuf: case Cendbuf: case Cwordbeg: case Cwordend: case Cwordbound: case Cnotwordbound:#ifdef emacs case Cemacs_at_dot:#endif /* emacs */ break; case Csyntaxspec: syntaxcode = code[pos++]; for (a = 0; a < 256; a++) if (SYNTAX(a) == syntaxcode) fastmap[a] = 1; return; case Cnotsyntaxspec: syntaxcode = code[pos++]; for (a = 0; a < 256; a++) if (SYNTAX(a) != syntaxcode) fastmap[a] = 1; return; case Ceol: fastmap['\n'] = 1; if (*can_be_null == 0) *can_be_null = 2; /* can match null, but only at end of buffer*/ return; case Cset: for (a = 0; a < 256/8; a++) if (code[pos + a] != 0) for (b = 0; b < 8; b++) if (code[pos + a] & (1 << b)) fastmap[(a << 3) + b] = 1; pos += 256/8; return; case Cexact: fastmap[(unsigned char)code[pos]] = 1; return; case Canychar: for (a = 0; a < 256; a++) if (a != '\n') fastmap[a] = 1; return; case Cstart_memory: case Cend_memory: pos++; break; case Cmatch_memory: /* should this ever happen for sensible patterns??? */ *can_be_null = 1; return; case Cjump: case Cdummy_failure_jump: case Cupdate_failure_jump: case Cstar_jump: a = (unsigned char)code[pos++]; a |= (unsigned char)code[pos++] << 8; pos += (int)(short)a; if (visited[pos]) { /* argh... the regexp contains empty loops. This is not good, as this may cause a failure stack overflow when matching. Oh well. */ /* this path leads nowhere; pursue other paths. */ return; } visited[pos] = 1; break; case Cfailure_jump: a = (unsigned char)code[pos++]; a |= (unsigned char)code[pos++] << 8; a = pos + (int)(short)a; hre_compile_fastmap_aux(code, a, visited, can_be_null, fastmap); break; default: abort(); /* probably some opcode is missing from this switch */ /*NOTREACHED*/ }}static int re_do_compile_fastmap(buffer, used, pos, can_be_null, fastmap)char *buffer, *fastmap, *can_be_null;int used, pos;{ char small_visited[512], *visited; if (used <= sizeof(small_visited)) visited = small_visited; else { visited = malloc(used); if (!visited) return 0; } *can_be_null = 0; memset(fastmap, 0, 256); memset(visited, 0, used); hre_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap); if (visited != small_visited) free(visited); return 1;}void hre_compile_fastmap(bufp)regexp_t bufp;{ if (!bufp->fastmap || bufp->fastmap_accurate) return; assert(bufp->used > 0); if (!re_do_compile_fastmap(bufp->buffer, bufp->used, 0, &bufp->can_be_null, bufp->fastmap)) return; if (bufp->buffer[0] == Cbol) bufp->anchor = 1; /* begline */ else if (bufp->buffer[0] == Cbegbuf) bufp->anchor = 2; /* begbuf */ else bufp->anchor = 0; /* none */ bufp->fastmap_accurate = 1;}#define INITIAL_FAILURES 128 /* initial # failure points to allocate */#define MAX_FAILURES 4100 /* max # of failure points before failing */int hre_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop)regexp_t bufp;char *string1, *string2;int size1, size2, pos, mstop;regexp_registers_t regs;{ struct failure_point { char *text, *partend, *code; } *failure_stack_start, *failure_sp, *failure_stack_end, initial_failure_stack[INITIAL_FAILURES]; char *code, *translate, *text, *textend, *partend, *part_2_end; char *regstart_text[RE_NREGS], *regstart_partend[RE_NREGS]; char *regend_text[RE_NREGS], *regend_partend[RE_NREGS]; int a, b, ch, reg, regch, match_end; char *regtext, *regpartend, *regtextend;#define PREFETCH \ MACRO_BEGIN \ if (text == partend) \ { \ if (text == textend) \ goto fail; \ text = string2; \ partend = part_2_end; \ } \ MACRO_END#define NEXTCHAR(var) \ MACRO_BEGIN \ PREFETCH; \ (var) = (unsigned char)*text++; \ if (translate) \ (var) = (unsigned char)translate[(var)]; \ MACRO_END assert(pos >= 0 && size1 >= 0 && size2 >= 0 && mstop >= 0); assert(mstop <= size1 + size2); assert(pos <= mstop); if (pos <= size1) { text = string1 + pos; if (mstop <= size1) { partend = string1 + mstop; textend = partend; } else { partend = string1 + size1; textend = string2 + mstop - size1; } part_2_end = string2 + mstop - size1; } else { text = string2 + pos - size1; partend = string2 + mstop - size1; textend = partend; part_2_end = partend; } if (bufp->uses_registers && regs != NULL) for (a = 0; a < RE_NREGS; a++) regend_text[a] = NULL; code = bufp->buffer; translate = bufp->translate; failure_stack_start = failure_sp = initial_failure_stack; failure_stack_end = initial_failure_stack + INITIAL_FAILURES;#if 0 /* hre_search_2 has already done this, and otherwise we get little benefit from this. So I'll leave this out. */ if (bufp->fastmap_accurate && !bufp->can_be_null && text != textend && !bufp->fastmap[translate ? (unsigned char)translate[(unsigned char)*text] : (unsigned char)*text]) return -1; /* it can't possibly match */#endif continue_matching: for (;;) { switch (*code++) { case Cend: if (partend != part_2_end) match_end = text - string1; else match_end = text - string2 + size1; if (regs) { regs->start[0] = pos; regs->end[0] = match_end; if (!bufp->uses_registers) { for (a = 1; a < RE_NREGS; a++) { regs->start[a] = -1; regs->end[a] = -1; } } else { for (a = 1; a < RE_NREGS; a++) { if (regend_text[a] == NULL) { regs->start[a] = -1; regs->end[a] = -1; continue; } if (regstart_partend[a] != part_2_end) regs->start[a] = regstart_text[a] - string1; else regs->start[a] = regstart_text[a] - string2 + size1; if (regend_partend[a] != part_2_end) regs->end[a] = regend_text[a] - string1; else regs->end[a] = regend_text[a] - string2 + size1; } } } if (failure_stack_start != initial_failure_stack) free((char *)failure_stack_start); return match_end - pos; case Cbol: if (text == string1 || text[-1] == '\n') /* text[-1] always valid */ break; goto fail; case Ceol: if (text == string2 + size2 || (text == string1 + size1 ? (size2 == 0 || *string2 == '\n') : *text == '\n')) break; goto fail; case Cset: NEXTCHAR(ch); if (code[ch/8] & (1<<(ch & 7))) { code += 256/8; break; } goto fail; case Cexact: NEXTCHAR(ch); if (ch != (unsigned char)*code++) goto fail; break; case Canychar: NEXTCHAR(ch); if (ch == '\n') goto fail; break; case Cstart_memory: reg = *code++; regstart_text[reg] = text; regstart_partend[reg] = partend; break; case Cend_memory: reg = *code++; regend_text[reg] = text; regend_partend[reg] = partend; break; case Cmatch_memory: reg = *code++; if (regend_text[reg] == NULL) goto fail; /* or should we just match nothing? */ regtext = regstart_text[reg]; regtextend = regend_text[reg]; if (regstart_partend[reg] == regend_partend[reg]) regpartend = regtextend; else regpartend = string1 + size1; for (;regtext != regtextend;) { NEXTCHAR(ch); if (regtext == regpartend) regtext = string2; regch = (unsigned char)*regtext++; if (translate) regch = (unsigned char)translate[regch]; if (regch != ch) goto fail; } break; case Cstar_jump: /* star is coded as: 1: failure_jump 2 ... code for operand of star star_jump 1 2: ... code after star We change the star_jump to update_failure_jump if we can determine that it is safe to do so; otherwise we change it to an ordinary jump. plus is coded as jump 2 1: failure_jump 3 2: ... code for operand of plus star_jump 1 3: ... code after plus For star_jump considerations this is processed identically to star. */ a = (unsigned char)*code++; a |= (unsigned char)*code++ << 8; a = (int)(short)a; { char map[256], can_be_null; char *p1, *p2; p1 = code + a + 3; /* skip the failure_jump */ assert(p1[-3] == Cfailure_jump);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -