📄 sprog.c
字号:
#include <u.h>#include <libc.h>#include <bio.h>#include <ctype.h>#include "code.h"/* fig leaves for possibly signed char quantities */#define ISUPPER(c) isupper((c)&0xff)#define ISLOWER(c) islower((c)&0xff)#define ISALPHA(c) isalpha((c)&0xff)#define ISDIGIT(c) isdigit((c)&0xff)#define ISVOWEL(c) voweltab[(c)&0xff]#define Tolower(c) (ISUPPER(c)? (c)-'A'+'a': (c))#define pair(a,b) (((a)<<8) | (b))#define DLEV 2#define DSIZ 40typedef long Bits;#define Set(h, f) ((long)(h) & (f))Bits nop(char*, char*, char*, int, int);Bits strip(char*, char*, char*, int, int);Bits ize(char*, char*, char*, int, int);Bits i_to_y(char*, char*, char*, int, int);Bits ily(char*, char*, char*, int, int);Bits subst(char*, char*, char*, int, int);Bits CCe(char*, char*, char*, int, int);Bits tion(char*, char*, char*, int, int);Bits an(char*, char*, char*, int, int);Bits s(char*, char*, char*, int, int);Bits es(char*, char*, char*, int, int);Bits bility(char*, char*, char*, int, int);Bits y_to_e(char*, char*, char*, int, int);Bits VCe(char*, char*, char*, int, int);Bits trypref(char*, char*, int, int);Bits tryword(char*, char*, int, int);Bits trysuff(char*, int, int);Bits dict(char*, char*);void typeprint(Bits);void pcomma(char*);void ise(void);int ordinal(void);char* skipv(char*);int inun(char*, Bits);char* ztos(char*);void readdict(char*);typedef struct Ptab Ptab;struct Ptab{ char* s; int flag;};typedef struct Suftab Suftab;struct Suftab{ char *suf; Bits (*p1)(char*, char*, char*, int, int); int n1; char *d1; char *a1; int flag; int affixable; Bits (*p2)(char*, char*, char*, int, int); int n2; char *d2; char *a2;};Suftab staba[] = { {"aibohp",subst,1,"-e+ia","",NOUN, NOUN}, 0};Suftab stabc[] ={ {"cai",strip,1,"","+c",N_AFFIX, ADJ|NOUN}, {"citsi",strip,2,"","+ic",N_AFFIX, ADJ | N_AFFIX | NOUN}, {"citi",ize,1,"-e+ic","",N_AFFIX, ADJ }, {"cihparg",i_to_y,1,"-y+ic","",NOUN, ADJ|NOUN }, {"cipocs",ize,1,"-e+ic","",NOUN, ADJ }, {"cirtem",i_to_y,1,"-y+ic","",NOUN, ADJ }, {"cigol",i_to_y,1,"-y+ic","",NOUN, ADJ }, {"cimono",i_to_y,1,"-y+ic","",NOUN, ADJ }, {"cibohp",subst,1,"-e+ic","",NOUN, ADJ }, 0};Suftab stabd[] ={ {"de",strip,1,"","+d",ED,ADJ |COMP,i_to_y,2,"-y+ied","+ed"}, {"dooh",ily,4,"-y+ihood","+hood",NOUN | ADV, NOUN}, 0};Suftab stabe[] ={ /* * V_affix for comment ->commence->commentment?? */ {"ecna",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX}, {"ecne",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX}, {"elbaif",i_to_y,4,"-y+iable","",V_IRREG,ADJ}, {"elba",CCe,4,"-e+able","+able",V_AFFIX,ADJ}, {"evi",subst,0,"-ion+ive","",N_AFFIX | V_AFFIX,NOUN | N_AFFIX| ADJ}, {"ezi",CCe,3,"-e+ize","+ize",N_AFFIX|ADJ ,V_AFFIX | VERB |ION | COMP}, {"ekil",strip,4,"","+like",N_AFFIX ,ADJ}, 0};Suftab stabg[] ={ {"gniee",strip,3,"","+ing",V_IRREG ,ADJ|NOUN}, {"gnikam",strip,6,"","+making",NOUN,NOUN}, {"gnipeek",strip,7,"","+keeping",NOUN,NOUN}, {"gni",CCe,3,"-e+ing","+ing",V_IRREG ,ADJ|ED|NOUN}, 0};Suftab stabl[] ={ {"ladio",strip,2,"","+al",NOUN |ADJ,ADJ}, {"laci",strip,2,"","+al",NOUN |ADJ,ADJ |NOUN|N_AFFIX}, {"latnem",strip,2,"","+al",N_AFFIX,ADJ}, {"lanoi",strip,2,"","+al",N_AFFIX,ADJ|NOUN}, {"luf",ily,3,"-y+iful","+ful",N_AFFIX,ADJ | NOUN}, 0};Suftab stabm[] ={ /* congregational + ism */ {"msi",CCe,3,"-e+ism","ism",N_AFFIX|ADJ,NOUN}, {"margo",subst,-1,"-ph+m","",NOUN,NOUN}, 0};Suftab stabn[] ={ {"noitacifi",i_to_y,6,"-y+ication","",ION,NOUN | N_AFFIX}, {"noitazi",ize,4,"-e+ation","",ION,NOUN| N_AFFIX}, {"noit",tion,3,"-e+ion","+ion",ION,NOUN| N_AFFIX | V_AFFIX |VERB|ACTOR}, {"naino",an,3,"","+ian",NOUN|PROP_COLLECT,NOUN| N_AFFIX}, {"namow",strip,5,"","+woman",MAN,PROP_COLLECT|N_AFFIX}, {"nam",strip,3,"","+man",MAN,PROP_COLLECT | N_AFFIX | VERB}, {"na",an,1,"","+n",NOUN|PROP_COLLECT,NOUN | N_AFFIX}, {"nemow",strip,5,"","+women",MAN,PROP_COLLECT}, {"nem",strip,3,"","+man",MAN,PROP_COLLECT}, {"nosrep",strip,6,"","+person",MAN,PROP_COLLECT}, 0};Suftab stabp[] ={ {"pihs",strip,4,"","+ship",NOUN|PROP_COLLECT,NOUN| N_AFFIX}, 0};Suftab stabr[] ={ {"rehparg",subst,1,"-y+er","",ACTOR,NOUN,strip,2,"","+er"}, {"reyhparg",nop,0,"","",0,NOUN}, {"reyl",nop,0,"","",0,NOUN}, {"rekam",strip,5,"","+maker",NOUN,NOUN}, {"repeek",strip,6,"","+keeper",NOUN,NOUN}, {"re",strip,1,"","+r",ACTOR,NOUN | N_AFFIX|VERB|ADJ, i_to_y,2,"-y+ier","+er"}, {"rota",tion,2,"-e+or","",ION,NOUN| N_AFFIX|_Y}, {"rotc",tion,2,"","+or",ION,NOUN| N_AFFIX}, {"rotp",tion,2,"","+or",ION,NOUN| N_AFFIX}, 0};Suftab stabs[] ={ {"ssen",ily,4,"-y+iness","+ness",ADJ|ADV,NOUN| N_AFFIX}, {"ssel",ily,4,"-y+iless","+less",NOUN | PROP_COLLECT,ADJ }, {"se",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH , es,2,"-y+ies","+es"}, {"s'",s,2,"","+'s",PROP_COLLECT | NOUN,DONT_TOUCH }, {"s",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH }, 0};Suftab stabt[] ={ {"tnem",strip,4,"","+ment",V_AFFIX,NOUN | N_AFFIX | ADJ|VERB}, {"tse",strip,2,"","+st",EST,DONT_TOUCH, i_to_y,3,"-y+iest","+est" }, {"tsigol",i_to_y,2,"-y+ist","",N_AFFIX,NOUN | N_AFFIX}, {"tsi",CCe,3,"-e+ist","+ist",N_AFFIX|ADJ,NOUN | N_AFFIX|COMP}, 0};Suftab staby[] ={ {"ycna",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX}, {"ycne",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX}, {"ytilib",bility,5,"-le+ility","",ADJ | V_AFFIX,NOUN | N_AFFIX}, {"ytisuo",nop,0,"","",NOUN}, {"ytilb",nop,0,"","",0,NOUN}, {"yti",CCe,3,"-e+ity","+ity",ADJ ,NOUN | N_AFFIX }, {"ylb",y_to_e,1,"-e+y","",ADJ,ADV}, {"ylc",nop,0,"","",0}, {"ylelb",nop,0,"","",0}, {"ylelp",nop,0,"","",0}, {"yl",ily,2,"-y+ily","+ly",ADJ,ADV|COMP}, {"yrtem",subst,0,"-er+ry","",NOUN,NOUN | N_AFFIX}, {"y",CCe,1,"-e+y","+y",_Y,ADJ|COMP}, 0};Suftab stabz[] ={ 0};Suftab* suftab[] ={ staba, stabz, stabc, stabd, stabe, stabz, stabg, stabz, stabz, stabz, stabz, stabl, stabm, stabn, stabz, stabp, stabz, stabr, stabs, stabt, stabz, stabz, stabz, stabz, staby, stabz,};Ptab ptaba[] ={ "anti", 0, "auto", 0, 0};Ptab ptabb[] ={ "bio", 0, 0};Ptab ptabc[] ={ "counter", 0, 0};Ptab ptabd[] ={ "dis", 0, 0};Ptab ptabe[] ={ "electro", 0, 0};Ptab ptabf[] ={ "femto", 0, 0};Ptab ptabg[] ={ "geo", 0, "giga", 0, 0};Ptab ptabh[] ={ "hyper", 0, 0};Ptab ptabi[] ={ "immuno", 0, "im", IN, "intra", 0, "inter", 0, "in", IN, "ir", IN, "iso", 0, 0};Ptab ptabj[] ={ 0};Ptab ptabk[] ={ "kilo", 0, 0};Ptab ptabl[] ={ 0};Ptab ptabm[] ={ "magneto", 0, "mega", 0, "meta", 0, "micro", 0, "mid", 0, "milli", 0, "mini", 0, "mis", 0, "mono", 0, "multi", 0, 0};Ptab ptabn[] ={ "nano", 0, "neuro", 0, "non", 0, 0};Ptab ptabo[] ={ "out", 0, "over", 0, 0};Ptab ptabp[] ={ "para", 0, "photo", 0, "pico", 0, "poly", 0, "pre", 0, "pseudo", 0, "psycho", 0, 0};Ptab ptabq[] ={ "quasi", 0, 0};Ptab ptabr[] ={ "radio", 0, "re", 0, 0};Ptab ptabs[] ={ "semi", 0, "stereo", 0, "sub", 0, "super", 0, 0};Ptab ptabt[] ={ "tele", 0, "tera", 0, "thermo", 0, 0};Ptab ptabu[] ={ "ultra", 0, "under", 0, /*must precede un*/ "un", IN, 0};Ptab ptabv[] ={ 0};Ptab ptabw[] ={ 0};Ptab ptabx[] ={ 0};Ptab ptaby[] ={ 0};Ptab ptabz[] ={ 0};Ptab* preftab[] ={ ptaba, ptabb, ptabc, ptabd, ptabe, ptabf, ptabg, ptabh, ptabi, ptabj, ptabk, ptabl, ptabm, ptabn, ptabo, ptabp, ptabq, ptabr, ptabs, ptabt, ptabu, ptabv, ptabw, ptabx, ptaby, ptabz,};typedef struct { char *mesg; enum { NONE, SUFF, PREF} type;} Deriv;int aflag;int cflag;int fflag;int vflag;int xflag;int nflag;char word[500];char* original;Deriv emptyderiv;Deriv deriv[DSIZ+3];char affix[DSIZ*10]; /* 10 is longest affix message */int prefcount;int suffcount;char* acmeid;char space[300000]; /* must be as large as "words"+"space" in pcode run */Bits encode[2048]; /* must be as long as "codes" in pcode run */int nencode;char voweltab[256];char* spacep[128*128+1]; /* pointer to words starting with 'xx' */Biobuf bin;Biobuf bout;char* codefile = "/sys/lib/amspell";char* brfile = "/sys/lib/brspell";char* Usage = "usage";voidmain(int argc, char *argv[]){ char *ep, *cp; char *dp; int j, i, c; int low; Bits h; Binit(&bin, 0, OREAD); Binit(&bout, 1, OWRITE); for(i=0; c = "aeiouyAEIOUY"[i]; i++) voweltab[c] = 1; while(argc > 1) { if(argv[1][0] != '-') break; for(i=1; c = argv[1][i]; i++) switch(c) { default: fprint(2, "usage: spell [-bcCvx] [-f file]\n"); exits(Usage); case 'a': aflag++; continue; case 'b': ise(); if(!fflag) codefile = brfile; continue; case 'C': /* for "correct" */ vflag++; case 'c': /* for ocr */ cflag++; continue; case 'v': vflag++; continue; case 'x': xflag++; continue; case 'f': if(argc <= 2) { fprint(2, "spell: -f requires another argument\n"); exits(Usage); } argv++; argc--; codefile = argv[1]; fflag++; goto brk; } brk: argv++; argc--; } readdict(codefile); if(argc > 1) { fprint(2, "usage: spell [-bcCvx] [-f file]\n"); exits(Usage); } if(aflag) cflag = vflag = 0; for(;;) { affix[0] = 0; original = Brdline(&bin, '\n'); if(original == 0) exits(0); original[Blinelen(&bin)-1] = 0; low = 0; if(aflag) { acmeid = original; while(*original != ':') if(*original++ == 0) exits(0); while(*++original != ':') if(*original == 0) exits(0); *original++ = 0; } for(ep=word,dp=original; j = *dp; ep++,dp++) { if(ISLOWER(j)) low++; if(ep >= word+sizeof(word)-1) break; *ep = j; } *ep = 0; if(ISDIGIT(word[0]) && ordinal()) continue; h = 0; if(!low && !(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH))) for(cp=original+1,dp=word+1; dp<ep; dp++,cp++) *dp = Tolower(*cp); if(!h) for(;;) { /* at most twice */ if(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH)) break; if(h = trysuff(ep,0,ALL|STOP|DONT_TOUCH)) break; if(!ISUPPER(word[0])) break; cp = original; dp = word; while(*dp = *cp++) { if(!low) *dp = Tolower(*dp); dp++; } word[0] = Tolower(word[0]); } if(cflag) { if(!h || Set(h,STOP)) print("-"); else if(!vflag) print("+"); else print("%c",'0' + (suffcount>0) + (prefcount>4? 8: 2*prefcount)); } else if(!h || Set(h,STOP)) { if(aflag) Bprint(&bout, "%s:%s\n", acmeid, original); else Bprint(&bout, "%s\n", original); } else if(affix[0] != 0 && affix[0] != '.') print("%s\t%s\n", affix, original); } /* not reached */}/* strip exactly one suffix and do * indicated routine(s), which may recursively * strip suffixes */Bitstrysuff(char* ep, int lev, int flag){ Suftab *t; char *cp, *sp; Bits h = 0; int initchar = ep[-1]; flag &= ~MONO; lev += DLEV; if(lev < DSIZ) { deriv[lev] = emptyderiv; deriv[lev-1] = emptyderiv; } if(!ISLOWER(initchar)) return h; for(t=suftab[initchar-'a']; sp=t->suf; t++) { cp = ep; while(*sp) if(*--cp != *sp++) goto next; for(sp=ep-t->n1; --sp >= word && !ISVOWEL(*sp);) ; if(sp < word) continue; if(!(t->affixable & flag)) return 0; h = (*t->p1)(ep-t->n1, t->d1, t->a1, lev+1, t->flag|STOP); if(!h && t->p2!=0) { if(lev < DSIZ) { deriv[lev] = emptyderiv; deriv[lev+1] = emptyderiv; } h = (*t->p2)(ep-t->n2, t->d2, t->a2, lev, t->flag|STOP); } break; next:; } return h;}Bitsnop(char* ep, char* d, char* a, int lev, int flag){ USED(ep, d, a, lev, flag); return 0;}Bitscstrip(char* ep, char* d, char* a, int lev, int flag){ int temp = ep[0]; if(ISVOWEL(temp) && ISVOWEL(ep[-1])) { switch(pair(ep[-1],ep[0])) { case pair('a', 'a'): case pair('a', 'e'): case pair('a', 'i'): case pair('e', 'a'): case pair('e', 'e'): case pair('e', 'i'): case pair('i', 'i'): case pair('o', 'a'): return 0; } } else if(temp==ep[-1]&&temp==ep[-2]) return 0; return strip(ep,d,a,lev,flag);}Bitsstrip(char* ep, char* d, char* a, int lev, int flag){ Bits h = trypref(ep, a, lev, flag); USED(d); if(Set(h,MONO) && ISVOWEL(*ep) && ISVOWEL(ep[-2])) h = 0; if(h) return h; if(ISVOWEL(*ep) && !ISVOWEL(ep[-1]) && ep[-1]==ep[-2]) { h = trypref(ep-1,a,lev,flag|MONO); if(h) return h; } return trysuff(ep,lev,flag);}Bitss(char* ep, char* d, char* a, int lev, int flag){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -