📄 regen.c
字号:
#include <u.h>#include <libc.h>#include <bio.h>#include <regexp.h>#include "dfa.h"/*** * Regular expression for matching. */char *ignore[] = { /* HTML that isn't A, IMG, or FONT */ /* Must have a space somewhere to avoid catching <email@address> */ "<[ \n\r]*(" "[^aif]|" "a[^> \t\r\n]|" "i[^mM \t\r\n]|" "im[^gG \t\r\n]|" "img[^> \t\r\n]|" "f[^oO \t\r\n]|" "fo[^Nn \t\r\n]|" "fon[^tT \t\r\n]|" "font[^> \r\t\n]" ")[^>]*[ \t\n\r][^>]*>", "<[ \n\r]*(" "i|im|f|fo|fon" ")[ \t\r\n][^>]*>", /* ignore html comments */ "<!--([^\\-]|-[^\\-]|--[^>]|\n)*-->", /* random mail strings */ "^message-id:.*\n([ ].*\n)*", "^in-reply-to:.*\n([ ].*\n)*", "^references:.*\n([ ].*\n)*", "^date:.*\n([ ].*\n)*", "^delivery-date:.*\n([ ].*\n)*", "e?smtp id .*", "^ id.*", "boundary=.*", "name=\"", "filename=\"", "news:<[^>]+>", "^--[^ ]*$", /* base64 encoding */ "^[0-9a-zA-Z+\\-=/]+$", /* uu encoding */ "^[!-Z]+$", /* little things */ ".", "\n",};char *keywords[] ={ "([a-zA-Z'`$!¡-]|[0-9]([.,][0-9])*)+",};int debug;Dreprog*dregcomp(char *buf){ Reprog *r; Dreprog *d; if(debug) print(">>> '%s'\n", buf); r = regcomp(buf); if(r == nil) sysfatal("regcomp"); d = dregcvt(r); if(d == nil) sysfatal("dregcomp"); free(r); return d;}char*strcpycase(char *d, char *s){ int cc, esc; cc = 0; esc = 0; while(*s){ if(*s == '[') cc++; if(*s == ']') cc--; if(!cc && 'a' <= *s && *s <= 'z'){ *d++ = '['; *d++ = *s; *d++ = *s+'A'-'a'; *d++ = ']'; }else *d++ = *s; if(*s == '\\') esc++; else if(esc) esc--; s++; } return d;}voidregerror(char *msg){ sysfatal("regerror: %s", msg);}voidbuildre(Dreprog *re[3]){ int i; static char buf[16384], *s; re[0] = dregcomp("^From "); s = buf; for(i=0; i<nelem(keywords); i++){ if(i != 0) *s++ = '|'; s = strcpycase(s, keywords[i]); } *s = 0; re[1] = dregcomp(buf); s = buf; for(i=0; i<nelem(ignore); i++){ if(i != 0) *s++ = '|'; s = strcpycase(s, ignore[i]); } *s = 0; re[2] = dregcomp(buf);}voidusage(void){ fprint(2, "usage: regen [-d]\n"); exits("usage");}voidmain(int argc, char **argv){ Dreprog *re[3]; Biobuf b; ARGBEGIN{ default: usage(); case 'd': debug = 1; }ARGEND if(argc != 0) usage(); buildre(re); Binit(&b, 1, OWRITE); Bprintdfa(&b, re[0]); Bprintdfa(&b, re[1]); Bprintdfa(&b, re[2]); exits(0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -