nwords.l
来自「<B>Digital的Unix操作系统VAX 4.2源码</B>」· L 代码 · 共 501 行
L
501 行
%{/* break out words, output cap + word(inverted) */#ifndef lintstatic char sccsid[] = "@(#)nwords.l 4.2 (Berkeley) 82/11/06";#endif not lint#include <stdio.h>#include <ctype.h>#define OUT() for(i=yyleng-1;i>=0; i--)putchar(yytext[i]); putchar('\n')#define OUT1(nam) printf("%c:%s\n",nam,yytext)#define OUTN(string) printf("%s\n",string)#include "names.h"#include "nhash.c"#include "dict.c"#include "ydict.c"#include "abbrev.c"char nt[] = "D:n't";char qs[] = "c:'s";char fin[] = "E:.";int NOCAPS = 0; /* if set all caps are turned to lower case */int i,j;int dot = 0;int first = 1;int qflg,nflg;int cap = 0;%}%p 3000%a 3300%o 4500L [a-z]N [0-9]C [A-Z]A [a-zA-Z]P [a-zA-Z0-9]%%^[.!].+[\n] { if(dot){ OUTN(fin); dot = 0; first = 1; } printf(":%s",yytext); }May { if(first == 0){ OUT1(NOUN); } else { first = 0; yytext[0] = tolower(yytext[0]); cap = 1; goto wd; } }"U.S." { OUT1(NOUN); }{C}{L}*'[s] { pos(1); if(first==1)first=0; }{C}+['][s] { if(NOCAPS) for(i=0;i<yyleng;i++) if(isupper(yytext[i]))yytext[i] = tolower(yytext[i]); OUT1(POS); }{P}+([-]{P}+)+ { if(NOCAPS) for(i=0;i<yyleng;i++) if(isupper(yytext[i]))yytext[i] = tolower(yytext[i]); OUT1(NOUN_ADJ); }{C}{C}+ { if(NOCAPS) for(i=0;i<yyleng;i++) yytext[i] = tolower(yytext[i]); if((i=input()) == 's'){ yytext[yyleng++] = 's'; yytext[yyleng] = '\0'; OUT1(PNOUN); } else { unput(i); if(!NOCAPS) for(i=0;i<yyleng;i++)yytext[i] = tolower(yytext[i]); goto wd; } }[LD][']{C}{L}* { if(NOCAPS){ yytext[0] = tolower(yytext[0]); yytext[2] = tolower(yytext[2]); } OUT1(NOUN_ADJ); }{C}{L}* { if(first==1) first=0; else cap = 1; if(yyleng==1 && yytext[0] == 'I'){ cap = 0; goto wd; } yytext[0] = tolower(yytext[0]); goto wd; }{N}":"{N}{N} { OUT1(NOUN_ADJ); }({N}*[,])*({N}+".")+[ \t\n]+{C} { for(i=yyleng-1;i>0;i--) if(yytext[i] == '.')break; unput(yytext[yyleng-1]); yytext[i] = '\0'; OUT1(NOUN_ADJ); OUTN(fin); first = 1; }([hH]e"/"[sS]he)|([sS]he"/"[hH]e) { if(NOCAPS) if(isupper(yytext[0]))yytext[0] = tolower(yytext[0]); OUT1(PRONS); }([hH]is"/"[hH]er)|([hH]er"/"[hH]is) { if(NOCAPS) if(isupper(yytext[0]))yytext[0] = tolower(yytext[0]); OUT1(POS); }[ \t`]*[a-zA-Z0-9.]*("\/"[a-zA-Z0-9.]+)+[']* { if(yytext[yyleng-1] == '.'){ if(ahead() == 0)dot=1; } if(NOCAPS) for(i=0;i<yyleng;i++) if(isupper(yytext[i]))yytext[i] = tolower(yytext[i]); OUT1(NOUN_ADJ); }{N}+([,]{N}+)*("."{N}+)*[']*[s]* { OUT1(NOUN_ADJ); }{N}*([,]{N}+)*("."{N}+)+[']*[s]* { OUT1(NOUN_ADJ); }{N}+([,]{N}+)*("."{N}*)*[']*[s]* { if(yytext[yyleng-1] == '.')dot=1; OUT1(NOUN_ADJ); }({A}*{N}+{A}*)+ { if(input() == '.') ahead(); if(NOCAPS) for(i=0;i<yyleng;i++) if(isupper(yytext[i]))yytext[i]=tolower(yytext[i]); OUT1(NOUN_ADJ); }{N}+[%] { OUT1(NOUN_ADJ); }"$"{N}+([,]{N}+)*("."{N}*)* { if(yytext[yyleng-1] == '.')dot=1; OUT1(NOUN); }[Aa]"."[ ]*[Mm]"." { OUT1(ADJ_ADV); }[Pp]"."[ ]*[Mm]"." { OUT1(ADJ_ADV); }"a."[ ]*"d." { OUT1(ADJ_ADV); }"b."[ ]*"c." { OUT1(ADJ_ADV); }"i."[ ]*"e." { OUT1(PREP); }"e."[ ]*"g." { OUT1(PREP); }"etc."[ \n]*[,)]* { i = yytext[4]; yytext[4] = '\0'; OUT1(NOUN); yytext[4] = i; yytext[0] = yytext[yyleng-1]; yytext[1] = '\0'; if(yytext[0] == ',' || yytext[0] == ')') OUT1(','); else { OUTN(fin); first = 1; } }"et al." { OUT1(NOUN); }in"."[ \n]*{C} { unput(yytext[yyleng-1]); yytext[2] = '\0'; OUT1(PREP); OUTN(fin); first = 1; }Ph"."[ ]*[Dd]"." { OUT1(ADJ); }[A-Z]"." { dot=1; OUT1(NOUN); }can't { yytext[3]='\0'; yyleng -= 2; nflg=1; goto wd; }won't { OUT1('X'); }ain't { OUT1('g'); }{L}+n't { nflg=1; yytext[yyleng-3]='\0'; yyleng -= 3; goto wd; }[A-Z]{L}+n't { yytext[0] = tolower(yytext[0]); nflg=1; yytext[yyleng-3]='\0'; yyleng -= 3; goto wd; }o'clock { OUT1(ADV); }{L}+'[s] { pos(0); }'ll { OUT1(lookup("will",1,0)); }'ve { OUT1(lookup("have",1,0)); }'re { OUT1(lookup("are",1,0)); }'d { OUT1(lookup("had",1,0)); }'m { OUT1(lookup("am",1,0)); }'ld { OUT1(lookup("would",1,0)); }{L}+ {wd: if((j = lookup(yytext,1,0)) != 0){ first=0; if(cap){ if(!NOCAPS) yytext[0] = toupper(yytext[0]); cap = 0; if(dot)OUTN(fin); } dot=0; OUT1(j); if(nflg==1){ nflg=0; OUTN(nt); } } else{ first = dot=0; if(yytext[yyleng-1] == 'y' && cap == 0){ switch(yytext[yyleng-2]){ case 'c': look(cy,yyleng-2,NOUN); break; case 'f': look(fy,yyleng-2,VERB); break; case 'l': look(ly,yyleng-2,ADV); break; case 'g': if(yytext[yyleng-3] == 'o'){ OUT1(NOUN); break; } look(gy,yyleng-2,ADJ); break; case 'r': switch(yytext[yyleng-3]){ case 'a': look(ary,yyleng-3,ADJ); break; case 'o': look(ory,yyleng-3,ADJ); break; case 'e': look(ery,yyleng-3,NOUN); break; default: look(ry,yyleng-2,NOUN); } break; case 't': if(yytext[yyleng-3] == 'i')look(ity,yyleng-3,NOUN); else look(ty,yyleng-2,ADJ); break; default: OUT(); } } else { if(cap){ if(!NOCAPS)yytext[0] = toupper(yytext[0]); cap = 0; OUT1(NOUN_ADJ); } else { OUT(); } } } }[\n] ;[ ]+ ;[\t]+ ;";" { OUT1(';'); first=1; }(\"|`|')+ { if(dot){ OUTN(fin); dot=0; } if(qflg==1){ qflg=0; OUT1('"'); } else { qflg=1; first=1; OUT1('"'); } }".\"" { qflg=0; first=1; OUT1(END); }"..." { OUT1(','); }"/." { first = 1; OUT1(END); }{A}{A}+"." { yytext[yyleng-1] = '\0'; if((j=abbrev(yytext,1,0)) != 0){ if(isupper(yytext[0])){ if(NOCAPS)yytext[0] = tolower(yytext[0]); if(first == 1)first=0; } yytext[yyleng-1] = '.'; OUT1(j); } else { j = ahead(); if(j == 0) yyleng--; for(i=0;i<yyleng;i++) if(isupper(yytext[i])){ yytext[i] = tolower(yytext[i]); if(i == 0)cap = 1; else cap = 0; } if(j == 0)goto wd; OUT1(NOUN_ADJ); } }"." { first=1; OUT1(END); }"!\"" { qflg=0; first=1; OUT1(END); }"!" { first=1; OUT1(END); }"?\"" { qflg=0; first=1; OUT1(END); }"?" { first=1; OUT1(END); }":" { OUT1(','); first=1; }[-]+ { OUT1(','); first=1; }"," { OUT1(','); }(\[|\(|\{|\]|\)|\}) { OUT1(','); }. {/* fprintf(stderr,"nwords funny char: %c\n",yytext[0])*/ ; }%%look(f,n,cc)char (*f)();int n;char cc;{ int nn; char save; save=yytext[n]; yytext[n] = '\0'; nn=(*f)(yytext,1,0); yytext[n] = save; if(nn != 0){ OUT1(nn); } else { OUT1(cc); }}pos(flg){ int ii,j; if(flg == 1)yytext[0] = tolower(yytext[0]); for(ii=yyleng-1;yytext[ii] != '\''; ii--); yytext[ii] = '\0'; if((j=lookup(yytext,1,0)) != 0){ yyleng = ii; OUT1(j); OUTN(qs); } else{ if(flg==1 && !NOCAPS)yytext[0] = toupper(yytext[0]); yytext[ii] = '\''; OUT1(POS); }}char *filename="-";main(argc,argv)int argc;char *argv[];{ register int rc=0; putchar(':'); putchar('\n'); getd(); getab(); ygetd(); if(argc<=1) { yylex(); OUTN(fin); }else{ while(argc>1) { if(freopen(argv[1],"r",stdin)==NULL) { fprintf(stderr,"%s: cannot open\n", argv[1]); rc++; }else{ filename=argv[1]; yylex(); OUTN(fin); } argc--; argv++; } } return(rc);}ahead(){ register int c; if(isalnum((c=input()))){ yytext[yyleng++] = '.'; while(!isspace((c=input() ))) yytext[yyleng++] = c; yytext[yyleng] = '\0'; unput(c); return(1); } unput(c); unput('.'); return(0);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?