⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 msgclass.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
字号:
#include <u.h>#include <libc.h>#include <bio.h>#include <ctype.h>#include "msgdb.h"voidusage(void){	fprint(2, "usage: upas/msgclass [-a] [-d name dbfile]... [-l lockfile] [-m mul] [-t thresh] [tokenfile ...]\n");	exits("usage");}enum{	MAXBEST = 32,	MAXLEN = 64,	MAXTAB = 256,};typedef struct Ndb Ndb;struct Ndb{	char *name;	char *file;	Msgdb *db;	double p;	long nmsg;};typedef struct Word Word;struct Word{	char s[MAXLEN];	int count[MAXTAB];	double p[MAXTAB];	double mp;	int mi; /* w.p[w.mi] = w.mp */	int nmsg;};Ndb db[MAXTAB];int ndb;int add;int mul;Msgdb *indb;Word best[MAXBEST];int mbest = 15;int nbest;void process(Biobuf*, char*);void lockfile(char*);voidnoteword(Word *w, char *s){	int i;	for(i=nbest-1; i>=0; i--)		if(w->mp < best[i].mp)			break;	i++;	if(i >= mbest)		return;	if(nbest == mbest)		nbest--;	if(i < nbest)		memmove(&best[i+1], &best[i], (nbest-i)*sizeof(best[0]));	best[i] = *w;	strecpy(best[i].s, best[i].s+MAXLEN, s);	nbest++;}voidmain(int argc, char **argv){	int i, bad, m, tot, nn, j;	Biobuf bin, *b, bout;	char *s, *lf;	double totp, p, thresh;	long n;	Word w;	lf = nil;	thresh = 0;	ARGBEGIN{	case 'a':		add = 1;		break;	case 'd':		if(ndb >= MAXTAB)			sysfatal("too many db classes");		db[ndb].name = EARGF(usage());		db[ndb].file = EARGF(usage());		ndb++;		break;	case 'l':		lf = EARGF(usage());		break;	case 'm':		mul = atoi(EARGF(usage()));		break;	case 't':		thresh = atof(EARGF(usage()));		break;	default:		usage();	}ARGEND	if(ndb == 0){		fprint(2, "must have at least one -d option\n");		usage();	}	indb = mdopen(nil, 1);	if(argc == 0){		Binit(&bin, 0, OREAD);		process(&bin, "<stdin>");		Bterm(&bin);	}else{		bad = 0;		for(i=0; i<argc; i++){			if((b = Bopen(argv[i], OREAD)) == nil){				fprint(2, "opening %s: %r\n", argv[i]);				bad = 1;				continue;			}			process(b, argv[i]);			Bterm(b);		}		if(bad)			exits("open inputs");	}	lockfile(lf);	bad = 0;	for(i=0; i<ndb; i++){		if((db[i].db = mdopen(db[i].file, 0)) == nil){			fprint(2, "opendb %s: %r\n", db[i].file);			bad = 1;		}		db[i].nmsg = mdget(db[i].db, "*From*");	}	if(bad)		exits("open databases");	/* run conditional probabilities of input words, getting 15 most specific */	mdenum(indb);	nbest = 0;	while(mdnext(indb, &s, &n) >= 0){		tot = 0;		totp = 0.0;		for(i=0; i<ndb; i++){			nn = mdget(db[i].db, s)*(i==0 ? 3 : 1);			tot += nn;			w.count[i] = nn;			p = w.count[i]/(double)db[i].nmsg;			if(p >= 1.0)				p = 1.0;			w.p[i] = p;			totp += p;		}//fprint(2, "%s tot %d totp %g\n", s, tot, totp);		if(tot < 2)			continue;		w.mp = 0.0;		for(i=0; i<ndb; i++){			p = w.p[i];			p /= totp;			if(p < 0.001)				p = 0.001;			else if(p > 0.999)				p = 0.999;			if(p > w.mp){				w.mp = p;				w.mi = i;			}			w.p[i] = p;		}		noteword(&w, s);	}	/* compute conditional probabilities of message classes using 15 most specific */	totp = 0.0;	for(i=0; i<ndb; i++){		p = 1.0;		for(j=0; j<nbest; j++)			p *= best[j].p[i];		db[i].p = p;		totp += p;	}	for(i=0; i<ndb; i++)		db[i].p /= totp;	m = 0;	for(i=1; i<ndb; i++)		if(db[i].p > db[m].p)			m = i;	Binit(&bout, 1, OWRITE);	if(db[m].p < thresh)		m = -1;	if(m >= 0)		Bprint(&bout, "%s", db[m].name);	else		Bprint(&bout, "inconclusive");	for(j=0; j<ndb; j++)		Bprint(&bout, " %s=%g", db[j].name, db[j].p);	Bprint(&bout, "\n");	for(i=0; i<nbest; i++){		Bprint(&bout, "%s", best[i].s);		for(j=0; j<ndb; j++)			Bprint(&bout, " %s=%g", db[j].name, best[i].p[j]);		Bprint(&bout, "\n");	}		Bprint(&bout, "%s %g\n", best[i].s, best[i].p[m]);	Bterm(&bout);	if(m >= 0 && add){		mdenum(indb);		while(mdnext(indb, &s, &n) >= 0)			mdput(db[m].db, s, mdget(db[m].db, s)+n*mul);		mdclose(db[m].db);	}	exits(nil);}voidprocess(Biobuf *b, char*){	char *s;	char *p;	long n;	while((s = Brdline(b, '\n')) != nil){		s[Blinelen(b)-1] = 0;		if((p = strrchr(s, ' ')) != nil){			*p++ = 0;			n = atoi(p);		}else			n = 1;		mdput(indb, s, mdget(indb, s)+n);	}}int tpid;voidkilltickle(void){	postnote(PNPROC, tpid, "die");}voidlockfile(char *s){	int fd, t, w;	char err[ERRMAX];	if(s == nil)		return;	w = 50;	t = 0;	for(;;){		fd = open(s, OREAD);		if(fd >= 0)			break;		rerrstr(err, sizeof err);		if(strstr(err, "file is locked")==nil && strstr(err, "exclusive lock")==nil))			break;		sleep(w);		t += w;		if(w < 1000)			w = (w*3)/2;		if(t > 120*1000)			break;	}	if(fd < 0)		sysfatal("could not lock %s", s);	switch(tpid = fork()){	case -1:		sysfatal("fork: %r");	case 0:		for(;;){			sleep(30*1000);			free(dirfstat(fd));		}		_exits(nil);	default:		break;	}	close(fd);	atexit(killtickle);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -