⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bayes.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
字号:
#include <u.h>#include <libc.h>#include <bio.h>#include <regexp.h>#include "hash.h"enum{	MAXTAB = 256,	MAXBEST = 32,};typedef struct Table Table;struct Table{	char *file;	Hash *hash;	int nmsg;};typedef struct Word Word;struct Word{	Stringtab *s;	/* from hmsg */	int count[MAXTAB];	/* counts from each table */	double p[MAXTAB];	/* probabilities from each table */	double mp;	/* max probability */	int mi;		/* w.p[w.mi] = w.mp */};Table tab[MAXTAB];int ntab;Word best[MAXBEST];int mbest;int nbest;int debug;voidusage(void){	fprint(2, "usage: bayes [-D] [-m maxword] boxhash ... ~ msghash ...\n");	exits("usage");}void*emalloc(int n){	void *v;	v = mallocz(n, 1);	if(v == nil)		sysfatal("out of memory");	return v;}voidnoteword(Word *w){	int i;	for(i=nbest-1; i>=0; i--)		if(w->mp < best[i].mp)			break;	i++;	if(i >= mbest)		return;	if(nbest == mbest)		nbest--;	if(i < nbest)		memmove(&best[i+1], &best[i], (nbest-i)*sizeof(best[0]));	best[i] = *w;	nbest++;}Hash*hread(char *s){	Hash *h;	Biobuf *b;	if((b = Bopenlock(s, OREAD)) == nil)		sysfatal("open %s: %r", s);	h = emalloc(sizeof(Hash));	Breadhash(b, h, 1);	Bterm(b);	return h;}voidmain(int argc, char **argv){	int i, j, a, mi, oi, tot, keywords;	double totp, p, xp[MAXTAB];	Hash *hmsg;	Word w;	Stringtab *s, *t;	Biobuf bout;	mbest = 15;	keywords = 0;	ARGBEGIN{	case 'D':		debug = 1;		break;	case 'k':		keywords = 1;		break;	case 'm':		mbest = atoi(EARGF(usage()));		if(mbest > MAXBEST)			sysfatal("cannot keep more than %d words", MAXBEST);		break;	default:		usage();	}ARGEND	for(i=0; i<argc; i++)		if(strcmp(argv[i], "~") == 0)			break;	if(i > MAXTAB)		sysfatal("cannot handle more than %d tables", MAXTAB);	if(i+1 >= argc)		usage();	for(i=0; i<argc; i++){		if(strcmp(argv[i], "~") == 0)			break;		tab[ntab].file = argv[i];		tab[ntab].hash = hread(argv[i]);		s = findstab(tab[ntab].hash, "*nmsg*", 6, 1);		if(s == nil || s->count == 0)			tab[ntab].nmsg = 1;		else			tab[ntab].nmsg = s->count;		ntab++;	}	Binit(&bout, 1, OWRITE);	oi = ++i;	for(a=i; a<argc; a++){		hmsg = hread(argv[a]);		nbest = 0;		for(s=hmsg->all; s; s=s->link){			w.s = s;			tot = 0;			totp = 0.0;			for(i=0; i<ntab; i++){				t = findstab(tab[i].hash, s->str, s->n, 0);				if(t == nil)					w.count[i] = 0;				else					w.count[i] = t->count;				tot += w.count[i];				p = w.count[i]/(double)tab[i].nmsg;				if(p >= 1.0)					p = 1.0;				w.p[i] = p;				totp += p;			}			if(tot < 5){		/* word does not appear enough; give to box 0 */				w.p[0] = 0.5;				for(i=1; i<ntab; i++)					w.p[i] = 0.1;				w.mp = 0.5;				w.mi = 0;				noteword(&w);				continue;			}			w.mp = 0.0;			for(i=0; i<ntab; i++){				p = w.p[i];				p /= totp;				if(p < 0.01)					p = 0.01;				else if(p > 0.99)					p = 0.99;				if(p > w.mp){					w.mp = p;					w.mi = i;				}				w.p[i] = p;			}			noteword(&w);		}		totp = 0.0;		for(i=0; i<ntab; i++){			p = 1.0;			for(j=0; j<nbest; j++)				p *= best[j].p[i];			xp[i] = p;			totp += p;		}		for(i=0; i<ntab; i++)			xp[i] /= totp;		mi = 0;		for(i=1; i<ntab; i++)			if(xp[i] > xp[mi])				mi = i;		if(oi != argc-1)			Bprint(&bout, "%s: ", argv[a]);		Bprint(&bout, "%s %f", tab[mi].file, xp[mi]);		if(keywords){			for(i=0; i<nbest; i++){				Bprint(&bout, " ");				Bwrite(&bout, best[i].s->str, best[i].s->n);				Bprint(&bout, " %f", best[i].p[mi]);			}		}		freehash(hmsg);		Bprint(&bout, "\n");		if(debug){			for(i=0; i<nbest; i++){				Bwrite(&bout, best[i].s->str, best[i].s->n);				Bprint(&bout, " %f", best[i].p[mi]);				if(best[i].p[mi] < best[i].mp)					Bprint(&bout, " (%f %s)", best[i].mp, tab[best[i].mi].file);				Bprint(&bout, "\n");			}		}	}	Bterm(&bout);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -