📄 regex.c
字号:
p2 = code; /* p1 points inside loop, p2 points to after loop */ if (!re_do_compile_fastmap(bufp->buffer, bufp->used, p2 - bufp->buffer, &can_be_null, map)) goto make_normal_jump; /* If we might introduce a new update point inside the loop, we can't optimize because then update_jump would update a wrong failure point. Thus we have to be quite careful here. */ loop_p1: /* loop until we find something that consumes a character */ switch (*p1++) { case Cbol: case Ceol: case Cbegbuf: case Cendbuf: case Cwordbeg: case Cwordend: case Cwordbound: case Cnotwordbound:#ifdef emacs case Cemacs_at_dot:#endif /* emacs */ goto loop_p1; case Cstart_memory: case Cend_memory: p1++; goto loop_p1; case Cexact: ch = (unsigned char)*p1++; if (map[ch]) goto make_normal_jump; break; case Canychar: for (b = 0; b < 256; b++) if (b != '\n' && map[b]) goto make_normal_jump; break; case Cset: for (b = 0; b < 256; b++) if ((p1[b >> 3] & (1 << (b & 7))) && map[b]) goto make_normal_jump; p1 += 256/8; break; default: goto make_normal_jump; } /* now we know that we can't backtrack. */ while (p1 != p2 - 3) { switch (*p1++) { case Cend: abort(); /* we certainly shouldn't get this inside loop */ /*NOTREACHED*/ case Cbol: case Ceol: case Canychar: case Cbegbuf: case Cendbuf: case Cwordbeg: case Cwordend: case Cwordbound: case Cnotwordbound:#ifdef emacs case Cemacs_at_dot:#endif /* emacs */ break; case Cset: p1 += 256/8; break; case Cexact: case Cstart_memory: case Cend_memory: case Cmatch_memory: case Csyntaxspec: case Cnotsyntaxspec: p1++; break; case Cjump: case Cstar_jump: case Cfailure_jump: case Cupdate_failure_jump: case Cdummy_failure_jump: goto make_normal_jump; default: printf("regex.c: processing star_jump: unknown op %d\n", p1[-1]); break; } } goto make_update_jump; } make_normal_jump: /* printf("changing to normal jump\n"); */ code -= 3; *code = Cjump; break; make_update_jump: /* printf("changing to update jump\n"); */ code -= 2; a += 3; /* jump to after the Cfailure_jump */ code[-1] = Cupdate_failure_jump; code[0] = a & 0xff; code[1] = a >> 8; /* fall to next case */ case Cupdate_failure_jump: failure_sp[-1].text = text; failure_sp[-1].partend = partend; /* fall to next case */ case Cjump: a = (unsigned char)*code++; a |= (unsigned char)*code++ << 8; code += (int)(short)a; break; case Cdummy_failure_jump: case Cfailure_jump: if (failure_sp == failure_stack_end) { if (failure_stack_start != initial_failure_stack) goto error; failure_stack_start = (struct failure_point *) malloc(MAX_FAILURES * sizeof(*failure_stack_start)); failure_stack_end = failure_stack_start + MAX_FAILURES; memcpy((char *)failure_stack_start, (char *)initial_failure_stack, INITIAL_FAILURES * sizeof(*failure_stack_start)); failure_sp = failure_stack_start + INITIAL_FAILURES; } a = (unsigned char)*code++; a |= (unsigned char)*code++ << 8; a = (int)(short)a; if (code[-3] == Cdummy_failure_jump) { /* this is only used in plus */ assert(*code == Cfailure_jump); b = (unsigned char)code[1]; b |= (unsigned char)code[2] << 8; failure_sp->code = code + (int)(short)b + 3; failure_sp->text = NULL; code += a; } else { failure_sp->code = code + a; failure_sp->text = text; failure_sp->partend = partend; } failure_sp++; break; case Cbegbuf: if (text == string1) break; goto fail; case Cendbuf: if (size2 == 0 ? text == string1 + size1 : text == string2 + size2) break; goto fail; case Cwordbeg: if (text == string2 + size2) goto fail; if (size2 == 0 && text == string1 + size1) goto fail; if (SYNTAX(text == string1 + size1 ? *string1 : *text) != Sword) goto fail; if (text == string1) break; if (SYNTAX(text[-1]) != Sword) break; goto fail; case Cwordend: if (text == string1) goto fail; if (SYNTAX(text[-1]) != Sword) goto fail; if (text == string2 + size2) break; if (size2 == 0 && text == string1 + size1) break; if (SYNTAX(*text) == Sword) goto fail; break; case Cwordbound: /* Note: as in gnu regexp, this also matches at the beginning and end of buffer. */ if (text == string1 || text == string2 + size2 || (size2 == 0 && text == string1 + size1)) break; if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword)) break; goto fail; case Cnotwordbound: /* Note: as in gnu regexp, this never matches at the beginning and end of buffer. */ if (text == string1 || text == string2 + size2 || (size2 == 0 && text == string1 + size1)) goto fail; if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword))) goto fail; break; case Csyntaxspec: NEXTCHAR(ch); if (SYNTAX(ch) != (unsigned char)*code++) goto fail; break; case Cnotsyntaxspec: NEXTCHAR(ch); if (SYNTAX(ch) != (unsigned char)*code++) break; goto fail;#ifdef emacs case Cemacs_at_dot: if (PTR_CHAR_POS((unsigned char *)text) + 1 != point) goto fail; break;#endif /* emacs */ default: abort(); /*NOTREACHED*/ } } abort(); /*NOTREACHED*/ fail: if (failure_sp != failure_stack_start) { failure_sp--; text = failure_sp->text; if (text == NULL) goto fail; partend = failure_sp->partend; code = failure_sp->code; goto continue_matching; } if (failure_stack_start != initial_failure_stack) free((char *)failure_stack_start); return -1; error: if (failure_stack_start != initial_failure_stack) free((char *)failure_stack_start); return -2;}#undef PREFETCH#undef NEXTCHAR#undef PUSH_FAILUREint hre_match(bufp, string, size, pos, regs)regexp_t bufp;char *string;int size, pos;regexp_registers_t regs;{ return hre_match_2(bufp, string, size, (char *)NULL, 0, pos, regs, size);}int hre_search_2(bufp, string1, size1, string2, size2, pos, range, regs, mstop)regexp_t bufp;char *string1, *string2;int size1, size2, pos, range, mstop;regexp_registers_t regs;{ char *fastmap, *translate, *text, *partstart, *partend; int dir, ret; char anchor; /* printf ("hre_search_2\n"); printf ("string1 (%s) with size %d\n", string1, size1); printf ("string2 (%s) with size %d\n", string2, size2); printf ("pos %d, range %d, mstop %d\n", pos, range, mstop); fflush(stdout);*/ assert(size1 >= 0 && size2 >= 0 && pos >= 0 && mstop >= 0); assert(pos + range + 1 >= 0 && pos + range - 1 <= size1 + size2); assert(pos <= mstop); fastmap = bufp->fastmap; translate = bufp->translate; if (fastmap && !bufp->fastmap_accurate) hre_compile_fastmap(bufp); anchor = bufp->anchor; if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */ fastmap = NULL; if (range < 0) { dir = -1; range = -range; } else dir = 1; if (anchor == 2) if (pos != 0) return -1; else range = 0; for (; range >= 0; range--, pos += dir) { if (fastmap) { if (dir == 1) { /* searching forwards */ if (pos < size1) { text = string1 + pos; if (pos + range > size1) partend = string1 + size1; else partend = string1 + pos + range; } else { text = string2 + pos - size1; partend = string2 + pos + range - size1; } partstart = text; if (translate) while (text != partend && !fastmap[(unsigned char) translate[(unsigned char)*text]]) text++; else while (text != partend && !fastmap[(unsigned char)*text]) text++; pos += text - partstart; range -= text - partstart; if (pos == size1 + size2 && bufp->can_be_null == 0) return -1; } else { /* searching backwards */ if (pos <= size1) { text = string1 + pos; partstart = string1 + pos - range; } else { text = string2 + pos - size1; if (range < pos - size1) partstart = string2 + pos - size1 - range; else partstart = string2; } partend = text; if (translate) while (text != partstart && !fastmap[(unsigned char) translate[(unsigned char)*text]]) text--; else while (text != partstart && !fastmap[(unsigned char)*text]) text--; pos -= partend - text; range -= partend - text; } } if (anchor == 1) { /* anchored to begline */ if (pos > 0 && (pos <= size1 ? string1[pos - 1] : string2[pos - size1 - 1]) != '\n') continue; } assert(pos >= 0 && pos <= size1 + size2); ret = hre_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop); if (ret >= 0) return pos; if (ret == -2) return -2; } return -1;}int hre_search(bufp, string, size, startpos, range, regs)regexp_t bufp;char *string;int size, startpos, range;regexp_registers_t regs;{ return hre_search_2(bufp, string, size, (char *)NULL, 0, startpos, range, regs, size);}static struct re_pattern_buffer hre_comp_buf;char *hre_comp(s)char *s;{ if (s == NULL) { if (!hre_comp_buf.buffer) return "Out of memory"; return NULL; } if (!hre_comp_buf.buffer) { /* the buffer will be allocated automatically */ hre_comp_buf.fastmap = malloc(256); hre_comp_buf.translate = NULL; } return hre_compile_pattern(s, strlen(s), &hre_comp_buf);}#ifndef c_plusplusint hre_exec(s)char *s;{ int len = strlen(s); return hre_search(&hre_comp_buf, s, len, 0, len, (regexp_registers_t)NULL) >= 0;}#endif#ifdef TEST_REGEXPint main(){ char buf[500]; char *cp; struct re_pattern_buffer exp; struct re_registers regs; int a,pos; char fastmap[256]; exp.allocated = 0; exp.buffer = 0; exp.translate = NULL; exp.fastmap = fastmap; /* hre_set_syntax(RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_ANSI_HEX); */ while (1) { printf("Enter regexp:\n"); gets(buf); cp=hre_compile_pattern(buf, strlen(buf), &exp); if (cp) { printf("Error: %s\n", cp); continue; } hre_compile_fastmap(&exp); printf("dump:\n"); for (pos = 0; pos < exp.used;) { printf("%d: ", pos); switch (exp.buffer[pos++]) { case Cend: strcpy(buf, "end"); break; case Cbol: strcpy(buf, "bol"); break; case Ceol: strcpy(buf, "eol"); break; case Cset: strcpy(buf, "set "); for (a = 0; a < 256/8; a++) sprintf(buf+strlen(buf)," %02x", (unsigned char)exp.buffer[pos++]); break; case Cexact: sprintf(buf, "exact '%c' 0x%x", exp.buffer[pos], (unsigned char)exp.buffer[pos]); pos++; break; case Canychar: strcpy(buf, "anychar"); break; case Cstart_memory: sprintf(buf, "start_memory %d", exp.buffer[pos++]); break; case Cend_memory: sprintf(buf, "end_memory %d", exp.buffer[pos++]); break; case Cmatch_memory: sprintf(buf, "match_memory %d", exp.buffer[pos++]); break; case Cjump: case Cdummy_failure_jump: case Cstar_jump: case Cfailure_jump: case Cupdate_failure_jump: a = (unsigned char)exp.buffer[pos++]; a += (unsigned char)exp.buffer[pos++] << 8; a = (int)(short)a; switch (exp.buffer[pos-3]) { case Cjump: cp = "jump"; break; case Cstar_jump: cp = "star_jump"; break; case Cfailure_jump: cp = "failure_jump"; break; case Cupdate_failure_jump: cp = "update_failure_jump"; break; case Cdummy_failure_jump: cp = "dummy_failure_jump"; break; default: cp = "unknown jump"; break; } sprintf(buf, "%s %d", cp, a + pos); break; case Cbegbuf: strcpy(buf,"begbuf"); break; case Cendbuf: strcpy(buf,"endbuf"); break; case Cwordbeg: strcpy(buf,"wordbeg"); break; case Cwordend: strcpy(buf,"wordend"); break; case Cwordbound: strcpy(buf,"wordbound"); break; case Cnotwordbound: strcpy(buf,"notwordbound"); break; default: sprintf(buf, "unknown code %d", (unsigned char)exp.buffer[pos - 1]); break; } printf("%s\n", buf); } printf("can_be_null = %d uses_registers = %d anchor = %d\n", exp.can_be_null, exp.uses_registers, exp.anchor); printf("fastmap:"); for (a = 0; a < 256; a++) if (exp.fastmap[a]) printf(" %d", a); printf("\n"); printf("Enter strings. An empty line terminates.\n"); while (fgets(buf, sizeof(buf), stdin)) { if (buf[0] == '\n') break; a = hre_search(&exp, buf, strlen(buf), 0, strlen(buf), ®s); printf("search returns %d\n", a); if (a != -1) { for (a = 0; a < RE_NREGS; a++) { printf("buf %d: %d to %d\n", a, regs.start[a], regs.end[a]); } } } }}#endif /* TEST_REGEXP */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -