⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcode.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
字号:
#include <u.h>#include <libc.h>#include <bio.h>#include <ctype.h>#include "code.h"/* read an annotated spelling list in the form	word <tab> affixcode [ , affixcode ] ...   print a reencoded version	octal <tab> word */typedef	struct	Dict	Dict;struct	Dict{	char*	word;	int	encode;};Dict	words[200000];char	space[500000];long	encodes[4094];long	nspace;long	nwords;int	ncodes;Biobuf	bout;void	readinput(int f);long	typecode(char *str);int	wcmp(void*, void*);void	pdict(void);void	sput(int);voidmain(int argc, char *argv[]){	int f;	Binit(&bout, 1, OWRITE);	nwords = 0;	nspace = 0;	ncodes = 0;	if(argc <= 1)		readinput(0);	while(argc > 1) {		f = open(argv[1], 0);		if(f < 0) {			fprint(2, "Cannot open %s\n", argv[1]);			exits("open");		}		readinput(f);		argc--;		argv++;	}	fprint(2, "words = %ld; space = %ld; codes = %d\n",		nwords, nspace, ncodes);	qsort(words, nwords, sizeof(words[0]), wcmp);	pdict();	exits(0);}wcmp(void *a, void *b){	return strcmp(((Dict*)a)->word, ((Dict*)b)->word);}voidreadinput(int f){	long i;	char *code, *line, *bword;	Biobuf buf;	long lineno = 0;	Binit(&buf, f, OREAD);	while(line = Brdline(&buf, '\n')) {		line[Blinelen(&buf)-1] = 0;		lineno++;		code = line;		while(isspace(*code))			code++;		bword = code;		while(*code && !isspace(*code))			code++;		i = code-bword;		memmove(space+nspace, bword, i);		words[nwords].word = space+nspace;		nspace += i;		space[nspace] = 0;		nspace++;		if(*code) {			*code++ = 0;			while(isspace(*code))				code++;		}		words[nwords].encode = typecode(code);		nwords++;		if(nwords >= sizeof(words)/sizeof(words[0])) {			fprint(2, "words array too small\n");			exits("words");		}		if(nspace >= sizeof(space)/sizeof(space[0])) {			fprint(2, "space array too small\n");			exits("space");		}	}	Bterm(&buf);}typedef	struct	Class	Class;struct	Class{	char*	codename;	long	bits;};Class	codea[]  ={	{ "a", ADJ },	{ "adv", ADV },	0};Class	codec[] ={	{ "comp", COMP },	0};Class	coded[] ={	{ "d", DONT_TOUCH},	0};Class	codee[] ={	{ "ed",	ED },	{ "er", ACTOR },	0};Class	codei[] ={	{ "in", IN },	{ "ion", ION },	0};Class	codem[] ={	{ "man", MAN },	{ "ms", MONO },	0};Class	coden[] ={	{ "n", NOUN },	{ "na", N_AFFIX },	{ "nopref", NOPREF },	0};Class	codep[] ={	{ "pc", PROP_COLLECT },	0};Class	codes[] ={	{ "s", STOP },	0};Class	codev[] ={	{ "v", VERB },	{ "va", V_AFFIX },	{ "vi", V_IRREG },	0};Class	codey[] ={	{ "y", _Y },	0};Class	codez[] ={	0};Class*	codetab[] ={	codea,	codez,	codec,	coded,	codee,	codez,	codez,	codez,	codei,	codez,	codez,	codez,	codem,	coden,	codez,	codep,	codez,	codez,	codes,	codez,	codez,	codev,	codez,	codez,	codey,	codez,};longtypecode(char *str){	Class *p;	long code;	int n, i;	char *s, *sp, *st;	code = 0;loop:	for(s=str; *s != 0 && *s != ','; s++)		;	for(p = codetab[*str-'a']; sp = p->codename; p++) {		st = str;		for(n=s-str;; st++,sp++) {			if(*st != *sp)				goto cont;			n--;			if(n == 0)				break;		}		code |= p->bits;		if(*s == 0)			goto out;		str = s+1;		goto loop;	cont:;	}	fprint(2, "Unknown affix code \"%s\"\n", str);	return 0;out:	for(i=0; i<ncodes; i++)		if(encodes[i] == code)			return i;	encodes[i] = code;	ncodes++;	return i;}voidsput(int s){	Bputc(&bout, s>>8);	Bputc(&bout, s);}voidlput(long l){	Bputc(&bout, l>>24);	Bputc(&bout, l>>16);	Bputc(&bout, l>>8);	Bputc(&bout, l);}/* * spit out the encoded dictionary * all numbers are encoded big-endian. *	struct *	{ *		short	ncodes; *		long	encodes[ncodes]; *		struct *		{ *			short	encode; *			char	word[*]; *		} words[*]; *	}; * 0x8000 flag for code word * 0x7800 count of number of common bytes with previous word * 0x07ff index into codes array for affixes */voidpdict(void){	long i, count;	int encode, j, c;	char *lastword, *thisword, *word;	sput(ncodes);	for(i=0; i<ncodes; i++)		lput(encodes[i]);	count = ncodes*4 + 2;	lastword = "";	for(i=0; i<nwords; i++) {		word = words[i].word;		thisword = word;		for(j=0; *thisword == *lastword; j++) {			if(*thisword == 0) {				fprint(2, "identical words: %s\n", word);				break;			}			thisword++;			lastword++;		}		if(j > 15)			j = 15;		encode = words[i].encode;		c = (1<<15) | (j<<11) | encode;		sput(c);		count += 2;		for(thisword=word+j; c = *thisword; thisword++) {			Bputc(&bout, c);			count++;		}		lastword = word;	}	fprint(2, "output bytes = %ld\n", count);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -