📄 regexp.c
字号:
/*+------------------------------------------------------------------------- regexp.c -- regular expression functions made sane wht@n4hgf.Mt-Park.GA.US Defined functions: advance(lp,ep) compile(pattern,ep,endbuf,seof) ecmp(a,b,count) getrnge(regexp) regexp_compile(regexp,cmpbuf,cmpbuf_size,emsg) regexp_operation(match_str,regexp_str,rtn_value) regexp_scan(cmpbuf,str_to_search,match,matchlen) step(p1,p2)--------------------------------------------------------------------------*//*+:EDITS:*//*:09-10-1992-14:00-wht@n4hgf-ECU release 3.20 *//*:08-22-1992-15:39-wht@n4hgf-ECU release 3.20 BETA *//*:08-28-1991-14:07-wht@n4hgf2-SVR4 cleanup by aega84!lh *//*:07-25-1991-12:59-wht@n4hgf-ECU release 3.10 *//*:08-14-1990-20:40-wht@n4hgf-ecu3.00-flush old edit history */#include <stdio.h>#ifdef USE_PROTOS# include "protos.h"#endif#include "ecuerror.h"#include "esd.h"#include "var.h"#include <setjmp.h>#define CBRA 2#define CCHR 4#define CDOT 8#define CCL 12#define CDOL 20#define CCEOF 22#define CKET 24#define CBACK 36#define STAR 01#define RNGE 03#define NBRA 9#define PLACE(c) ep[c >> 3] |= bittab[c & 07]#define ISTHERE(c) (ep[c >> 3] & bittab[c & 07])void getrnge();int advance();int ecmp();extern int proc_level;extern int proctrace;char *braslist[NBRA];char *braelist[NBRA];int nbra;int ebra;char *match_start;char *match_end;char *locs;int sed;int nodelim;int circf;int low;int size;unsigned char bittab[] = { 1,2,4,8,16,32,64,128 };jmp_buf compile_error_jmpbuf;/*+------------------------------------------------------------------------- compile(pattern,ep,endbuf,seof)--------------------------------------------------------------------------*/voidcompile(pattern,ep,endbuf,seof)char *pattern;register char *ep;char *endbuf;int seof;{ register char *sp = pattern; register c; register eof = seof; char *lastep = pattern; int cclcnt; char bracket[NBRA],*bracketp; int closed; char neg; int lc; int i,cflg; lastep = 0; if((c = *sp++) == eof || c == '\n') { if(c == '\n') { --sp; nodelim = 1; } if(*ep == 0 && !sed) longjmp(compile_error_jmpbuf,41); return; } bracketp = bracket; circf = closed = nbra = ebra = 0; if(c == '^') circf++; else --sp; while(1) { if(ep >= endbuf) longjmp(compile_error_jmpbuf,50); c = *sp++; if(c != '*' && ((c != '\\') || (*sp != 0x7B))) lastep = ep; if(c == eof) { *ep++ = CCEOF; return; } switch(c) { case '.': *ep++ = CDOT; continue; case '\n': if(!sed) { --sp; *ep++ = CCEOF; nodelim = 1; return; } else longjmp(compile_error_jmpbuf,36); case '*': if(lastep==0 || *lastep==CBRA || *lastep==CKET) goto defchar; *lastep |= STAR; continue; case '$': if(*sp != eof && *sp != '\n') goto defchar; *ep++ = CDOL; continue; case '[': if(&ep[17] >= endbuf) longjmp(compile_error_jmpbuf,50); *ep++ = CCL; lc = 0; for(i = 0; i < 16; i++) ep[i] = 0; neg = 0; if((c = *sp++) == '^') { neg = 1; c = *sp++; } do { if(c == '\0' || c == '\n') longjmp(compile_error_jmpbuf,49); if(c == '-' && lc != 0) { if((c = *sp++) == ']') { PLACE('-'); break; } while(lc < c) { PLACE(lc); lc++; } } if(c == '\\') { switch(c = *sp++) { case 'n': c = '\n'; break; } } lc = c; PLACE(c); } while((c = *sp++) != ']'); if(neg) { for(cclcnt = 0; cclcnt < 16; cclcnt++) ep[cclcnt] ^= -1; ep[0] &= 0376; } ep += 16; continue; case '\\': switch(c = *sp++) { case 0x28: /* open paren */ if(nbra >= NBRA) longjmp(compile_error_jmpbuf,43); *bracketp++ = nbra; *ep++ = CBRA; *ep++ = nbra++; continue; case 0x29: /* close paren */ if(bracketp <= bracket || ++ebra != nbra) longjmp(compile_error_jmpbuf,42); *ep++ = CKET; *ep++ = *--bracketp; closed++; continue; case 0x7B: /* open brace */ if(lastep == (char *) (0)) goto defchar; *lastep |= RNGE; cflg = 0;nlim: c = *sp++; i = 0; do { if('0' <= c && c <= '9') i = 10 * i + c - '0'; else longjmp(compile_error_jmpbuf,16); } while(((c = *sp++) != '\\') && (c != ',')); if(i >= 255) longjmp(compile_error_jmpbuf,11); *ep++ = i; if(c == ',') { if(cflg++) longjmp(compile_error_jmpbuf,44); if((c = *sp++) == '\\') *ep++ = 255; else { --sp; goto nlim; /* get 2'nd number */ } } if(*sp++ != 0x7D) /* close brace */ longjmp(compile_error_jmpbuf,45); if(!cflg) /* one number */ *ep++ = i; else if((ep[-1] & 0377) < (ep[-2] & 0377)) longjmp(compile_error_jmpbuf,46); continue; case '\n': longjmp(compile_error_jmpbuf,36); case 'n': c = '\n'; goto defchar; default: if(c >= '1' && c <= '9') { if((c -= '1') >= closed) longjmp(compile_error_jmpbuf,25); *ep++ = CBACK; *ep++ = c; continue; } } /* Drop through to default to use \ to turn off special chars */defchar: default: lastep = ep; *ep++ = CCHR; *ep++ = c; } }} /* end of compile *//*+------------------------------------------------------------------------- step(p1,p2)--------------------------------------------------------------------------*/intstep(p1,p2)register char *p1,*p2;{ register c; if(circf) { match_start = p1; return(advance(p1,p2)); } /* fast check for first character */ if(*p2==CCHR) { c = p2[1]; do { if(*p1 != c) continue; if(advance(p1,p2)) { match_start = p1; return(1); } } while(*p1++); return(0); } /* regular algorithm */ do { if(advance(p1,p2)) { match_start = p1; return(1); } } while(*p1++); return(0);} /* end of step *//*+------------------------------------------------------------------------- advance(lp,ep)--------------------------------------------------------------------------*/intadvance(lp,ep)register char *lp,*ep;{ register char *curlp; char c; char *bbeg; int ct; while(1) switch(*ep++) { case CCHR: if(*ep++ == *lp++) continue; return(0); case CDOT: if(*lp++) continue; return(0); case CDOL: if(*lp==0) continue; return(0); case CCEOF: match_end = lp; return(1); case CCL: c = *lp++ & 0177; if(ISTHERE(c)) { ep += 16; continue; } return(0); case CBRA: braslist[*ep++] = lp; continue; case CKET: braelist[*ep++] = lp; continue; case CCHR|RNGE: c = *ep++; getrnge(ep); while(low--) if(*lp++ != c) return(0); curlp = lp; while(size--) if(*lp++ != c) break; if(size < 0) lp++; ep += 2; goto star; case CDOT|RNGE: getrnge(ep); while(low--) if(*lp++ == '\0') return(0); curlp = lp; while(size--) if(*lp++ == '\0') break; if(size < 0) lp++; ep += 2; goto star; case CCL|RNGE: getrnge(ep + 16); while(low--) { c = *lp++ & 0177; if(!ISTHERE(c)) return(0); } curlp = lp; while(size--) { c = *lp++ & 0177; if(!ISTHERE(c)) break; } if(size < 0) lp++; ep += 18; /* 16 + 2 */ goto star; case CBACK: bbeg = braslist[*ep]; ct = braelist[*ep++] - bbeg; if(ecmp(bbeg,lp,ct)) { lp += ct; continue; } return(0); case CBACK|STAR: bbeg = braslist[*ep]; ct = braelist[*ep++] - bbeg; curlp = lp; while(ecmp(bbeg,lp,ct)) lp += ct; while(lp >= curlp) { if(advance(lp,ep)) return(1); lp -= ct; } return(0); case CDOT|STAR: curlp = lp; while(*lp++); goto star; case CCHR|STAR: curlp = lp; while(*lp++ == *ep); ep++; goto star; case CCL|STAR: curlp = lp; do { c = *lp++ & 0177; } while(ISTHERE(c)); ep += 16; goto star;star: do { if(--lp == locs) break; if(advance(lp,ep)) return(1); } while(lp > curlp); return(0); }} /* end of advance *//*+------------------------------------------------------------------------- getrnge(regexp)--------------------------------------------------------------------------*/voidgetrnge(regexp)register char *regexp;{ low = *regexp++ & 0377; size = ((*regexp & 0377) == 255) ? 20000 : (*regexp & 0377) - low;} /* end of getrnge *//*+------------------------------------------------------------------------- ecmp(a,b,count)--------------------------------------------------------------------------*/intecmp(a,b,count)register char *a,*b;register count;{ while(count--) if(*a++ != *b++) return(0); return(1);} /* end of ecmp *//*+------------------------------------------------------------------------- itmp = regexp_compile(regexp,cmpbuf,cmpbuf_size,emsg)returns 0 if no compile error,else error occurred (*emsg points to error message text)--------------------------------------------------------------------------*/intregexp_compile(regexp,cmpbuf,cmpbuf_size,emsg)char *regexp;char *cmpbuf;int cmpbuf_size;char **emsg;{ register int itmp; static char errm[40]; if(itmp = setjmp(compile_error_jmpbuf)) { switch(itmp) { case 11: *emsg = "Range endpoint too large"; break; case 16: *emsg = "Bad number"; break; case 25: *emsg = "\"\\digit\" out of range"; break; case 36: *emsg = "Illegal or missing delimiter"; break; case 41: *emsg = "No previous regular expression"; break; case 42: *emsg = "More \\)'s than \\('s in regular expression"; break; case 43: *emsg = "More \\('s than \\)'s in regular expression"; break; case 44: *emsg = "More than 2 numbers in \\{ \\}"; break; case 45: *emsg = "} expected after \\"; break; case 46: *emsg = "First number exceeds second in \\{ \\}"; break; case 49: *emsg = "[] imbalance"; break; case 50: *emsg = "Regular expression too complex"; break; default: sprintf(errm,"Unknown regexp compile error %d",itmp); *emsg = errm; break; } return(itmp); } compile(regexp,cmpbuf,cmpbuf + cmpbuf_size,0); return(0);} /* end of regexp_compile *//*+------------------------------------------------------------------------- regexp_scan(cmpbuf,str_to_search,&match,&matchlen)return 1 if string match found, else 0if string matches, match receives pointer to first byte, matchlen = lengthof matching string--------------------------------------------------------------------------*/regexp_scan(cmpbuf,str_to_search,match,matchlen)char *cmpbuf;char *str_to_search;char **match;int *matchlen;{ register int itmp = step(str_to_search,cmpbuf); if(itmp) { *match = match_start; *matchlen = (int)(match_end - match_start); } return(itmp);} /* end of regexp_scan *//*+------------------------------------------------------------------------- regexp_operation(match_str,regexp_str,rtn_value)one stop operation used by procedure language:determine if 'match_str' matches 'regexp_str', returning the index ofthe match in *rtn_value and setting #I0 to the length of the matchreturns 0 if no error, else eFATAL_ALREADY after printing the error message--------------------------------------------------------------------------*/intregexp_operation(match_str,regexp_str,rtn_value)char *match_str;char *regexp_str;long *rtn_value;{#define CMPBUF_SIZE 512char cmpbuf[CMPBUF_SIZE];char *emsg;char *match;int matchlen; if(regexp_compile(regexp_str,cmpbuf,sizeof(cmpbuf),&emsg)) { pprintf("regexp compile error: %s\n",emsg); return(eFATAL_ALREADY); } if(regexp_scan(cmpbuf,match_str,&match,&matchlen)) { *rtn_value = (long)(match - match_str); iv[0] = (long)matchlen; if(proc_level && proctrace) pprintf("%match set $i00 = %ld\n",iv[0]); } else *rtn_value = -1L; return(0);} /* end of regexp_operation *//* vi: set tabstop=4 shiftwidth=4: *//* end of regexp.c */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -