📄 re.c
字号:
/*************************************************************************** * This program is Copyright (C) 1986, 1987, 1988 by Jonathan Payne. JOVE * * is provided to you without charge, and with no warranty. You may give * * away copies of JOVE, including sources, provided that this notice is * * included in all the files. * ***************************************************************************//* search package */#include "jove.h"#include "re.h"#include "ctype.h"private void search proto((int, bool, bool));private int do_comp proto((struct RE_block *,int));private char searchstr[128]; /* global search string */char rep_search[128], /* replace search string */ rep_str[128]; /* contains replacement string */bool CaseIgnore = OFF, /* ignore case? */ WrapScan = OFF, /* wrap at end of buffer? */ UseRE = OFF; /* use regular expressions */#define cind_cmp(a, b) (CharUpcase(a) == CharUpcase(b))private int REpeekc;private char *REptr;private intREgetc(){ int c; if ((c = REpeekc) != -1) REpeekc = -1; else if (*REptr) c = *REptr++; else c = '\0'; return c;}#define STAR 01 /* Match any number of last RE. */#define AT_BOL 2 /* ^ */#define AT_EOL 4 /* $ */#define AT_BOW 6 /* \< */#define AT_EOW 8 /* \> */#define OPENP 10 /* \( */#define CLOSEP 12 /* \) */#define CURLYB 14 /* \{ */#define NOSTR 14 /* Codes <= NOSTR can't be *'d. */#define ANYC (NOSTR+2) /* . */#define NORMC (ANYC+2) /* normal character */#define CINDC (NORMC+2) /* case independent character */#define ONE_OF (CINDC+2) /* [xxx] */#define NONE_OF (ONE_OF+2) /* [^xxx] */#define BACKREF (NONE_OF+2) /* \# */#define EOP (BACKREF+2) /* end of pattern *//* ONE_OF/NONE_OF is represented as a bit vector. * These symbols parameterize the representation. */#define BYTESIZE 8#define SETSIZE (NCHARS / BYTESIZE)#define SETBYTE(c) ((c) / BYTESIZE)#define SETBIT(c) (1 << ((c) % BYTESIZE))#define NPAR 10 /* [0-9] - 0th is the entire matched string, i.e. & */private char *comp_ptr, **alt_p, **alt_endp;voidREcompile(pattern, re, re_blk)char *pattern;bool re;struct RE_block *re_blk;{ REptr = pattern; REpeekc = -1; comp_ptr = re_blk->r_compbuf; alt_p = re_blk->r_alternates; alt_endp = alt_p + NALTS; *alt_p++ = comp_ptr; re_blk->r_nparens = 0; (void) do_comp(re_blk, re ? OKAY_RE : NORM); *alt_p = NULL; re_blk->r_anchored = NO; re_blk->r_firstc = '\0'; /* do a little post processing */ if (re_blk->r_alternates[1] == NULL) { char *p; p = re_blk->r_alternates[0]; for (;;) { switch (*p) { case OPENP: case CLOSEP: p += 2; continue; case AT_BOW: case AT_EOW: p += 1; continue; case AT_BOL: re_blk->r_anchored = YES; /* don't set firstc -- won't work */ break; case NORMC: case CINDC: re_blk->r_firstc = CharUpcase(p[2]); break; default: break; } break; } }}/* compile the pattern into an internal code */private intdo_comp(re_blk, kind)struct RE_block *re_blk;int kind;{ char *this_verb, *prev_verb, *start_p, *comp_endp; int parens[NPAR], *parenp, c, ret_code; parenp = parens; this_verb = NULL; ret_code = 1; comp_endp = &re_blk->r_compbuf[COMPSIZE - 6]; /* wrap the whole expression around (implied) parens */ if (kind == OKAY_RE) { *comp_ptr++ = OPENP; *comp_ptr++ = re_blk->r_nparens; *parenp++ = re_blk->r_nparens++; } start_p = comp_ptr; while ((c = REgetc()) != '\0') { if (comp_ptr > comp_endp) {toolong: complain("Search string too long/complex."); } prev_verb = this_verb; this_verb = comp_ptr; if (kind == NORM && strchr(".[*", c) != NULL) goto defchar; switch (c) { case '\\': switch (c = REgetc()) { case '\0': complain("[Premature end of pattern]"); /*NOTREACHED*/ case '{': { char *wcntp; /* word count */ *comp_ptr++ = CURLYB; wcntp = comp_ptr; *comp_ptr++ = 0; for (;;) { int comp_val; char *comp_len; comp_len = comp_ptr++; comp_val = do_comp(re_blk, IN_CB); *comp_len = comp_ptr - comp_len; (*wcntp) += 1; if (comp_val == 0) break; } break; } case '}': if (kind != IN_CB) complain("Unexpected \\}."); ret_code = 0; goto outahere; case '(': if (re_blk->r_nparens >= NPAR) complain("Too many ('s; max is %d.", NPAR); *comp_ptr++ = OPENP; *comp_ptr++ = re_blk->r_nparens; *parenp++ = re_blk->r_nparens++; break; case ')': if (parenp == parens) complain("Too many )'s."); *comp_ptr++ = CLOSEP; *comp_ptr++ = *--parenp; break; case '|': if (alt_p >= alt_endp) complain("Too many alternates; max %d.", NALTS); /* close off previous alternate */ *comp_ptr++ = CLOSEP; *comp_ptr++ = *--parenp; *comp_ptr++ = EOP; *alt_p++ = comp_ptr; /* start a new one */ re_blk->r_nparens = 0; *comp_ptr++ = OPENP; *comp_ptr++ = re_blk->r_nparens; *parenp++ = re_blk->r_nparens++; start_p = comp_ptr; break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': *comp_ptr++ = BACKREF; *comp_ptr++ = c - '0'; break; case '<': *comp_ptr++ = AT_BOW; break; case '>': *comp_ptr++ = AT_EOW; break; default: goto defchar; } break; case ',': if (kind != IN_CB) goto defchar; goto outahere; case '.': *comp_ptr++ = ANYC; break; case '^': if (comp_ptr == start_p) { *comp_ptr++ = AT_BOL; break; } goto defchar; case '$': if ((REpeekc = REgetc()) != '\0' && REpeekc != '\\') goto defchar; *comp_ptr++ = AT_EOL; break; case '[': { int chrcnt; *comp_ptr++ = ONE_OF; if (comp_ptr + SETSIZE >= comp_endp) goto toolong; byte_zero(comp_ptr, (size_t) SETSIZE); if ((REpeekc = REgetc()) == '^') { *this_verb = NONE_OF; /* Get it for real this time. */ (void) REgetc(); } chrcnt = 0; while ((c = REgetc()) != ']' && c != '\0') { if (c == '\\') { c = REgetc(); if (c == '\0') break; } else if ((REpeekc = REgetc()) == '-') { int i; i = c; (void) REgetc(); /* reread '-' */ c = REgetc(); if (c == '\0') break; while (i < c) { comp_ptr[SETBYTE(i)] |= SETBIT(i); i += 1; } } comp_ptr[SETBYTE(c)] |= SETBIT(c); chrcnt += 1; } if (c == '\0') complain("Missing ]."); if (chrcnt == 0) complain("Empty []."); comp_ptr += SETSIZE; break; } case '*': if (prev_verb == NULL || *prev_verb <= NOSTR || (*prev_verb&STAR)!=0) goto defchar; if (*prev_verb == NORMC || *prev_verb == CINDC) { char lastc = comp_ptr[-1]; /* The * operator applies only to the * previous character. Since we were * building a string-matching command * (NORMC or CINDC), we must split it * up and work with the last character. * * Note that the STARed versions of these * commands do not operate on strings, and * so do not need or have character counts. */ if (prev_verb[1] == 1) { /* Only one char in string: * delete old command. */ this_verb = prev_verb; } else { /* Several chars in string: * strip off the last. * New verb is derived from old. */ prev_verb[1] -= 1; this_verb -= 1; *this_verb = *prev_verb; } comp_ptr = this_verb + 1; *comp_ptr++ = lastc; } else { /* This command is just the previous one, * whose verb we will modify. */ this_verb = prev_verb; } *this_verb |= STAR; break; default:defchar: if ((prev_verb == NULL) || !(*prev_verb == NORMC || *prev_verb == CINDC)) { /* create new string command */ *comp_ptr++ = (CaseIgnore) ? CINDC : NORMC; *comp_ptr++ = 0; } else { /* merge this into previous string command */ this_verb = prev_verb; } this_verb[1] += 1; *comp_ptr++ = c; break; } }outahere: /* End of pattern, let's do some error checking. */ if (kind == OKAY_RE) { *comp_ptr++ = CLOSEP; *comp_ptr++ = *--parenp; } if (parenp != parens) complain("Unmatched ()'s."); if (kind == IN_CB && c == '\0') /* end of pattern with missing \}. */ complain("Missing \\}."); *comp_ptr++ = EOP; return ret_code;}private char *pstrtlst[NPAR], /* index into re_blk->r_lbuf */ *pendlst[NPAR], *REbolp, /* begining-of-line pointer */ *locrater, /* roof of last substitution */ *loc1, /* start of matched text */ *loc2; /* roof of matched text */int REbom, /* beginning and end columns of match */ REeom, REdelta; /* increase in line length due to last re_dosub */private boolbackref(n, linep)int n;register char *linep;{ register char *backsp, *backep; backsp = pstrtlst[n]; backep = pendlst[n]; while (*backsp++ == *linep++) if (backsp >= backep) return YES; return NO;}private boolmember(comp_ptr, c, af)register char *comp_ptr;register int c;bool af;{ if (c == '\0') return NO; /* try to match EOL always fails */ if (comp_ptr[SETBYTE(c)] & SETBIT(c)) return af; return !af;}private boolREmatch(linep, comp_ptr)register char *linep, *comp_ptr;{ char *first_p; register int n; for (;;) switch (*comp_ptr++) { case NORMC: n = *comp_ptr++; while (--n >= 0) if (*linep++ != *comp_ptr++) return NO; continue; case CINDC: /* case independent comparison */ n = *comp_ptr++; while (--n >= 0) if (!cind_cmp(*linep++, *comp_ptr++)) return NO; continue; case EOP: loc2 = linep; REeom = (loc2 - REbolp); return YES; /* Success! */ case AT_BOL: if (linep == REbolp && linep != locrater) continue; return NO; case AT_EOL: if (*linep == '\0') continue; return NO; case ANYC: if (*linep++ != '\0') continue; return NO;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -