📄 common.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
字号:
#include <u.h>#include <libc.h>#include <bio.h>#include <regexp.h>#include "spam.h"enum {	Quanta	= 8192,	Minbody = 6000,	HdrMax	= 15,};typedef struct keyword Keyword;typedef struct word Word;struct word{	char	*string;	int	n;};struct	keyword{	char	*string;	int	value;};Word	htmlcmds[] ={	"html",		4,	"!doctype html", 13,	0,};Word	hrefs[] ={	"a href=",	7,	"a title=",	8,	"a target=",	9,	"base href=",	10,	"img src=",	8,	"img border=",	11,	"form action=", 12,	"!--",		3,	0,};/* *	RFC822 header keywords to look for for fractured header. *	all lengths must be less than HdrMax defined above. */Word	hdrwords[] ={	"cc:",			3,	"bcc:", 		4,	"to:",			3,	0,			0,};Keyword	keywords[] ={	"header",	HoldHeader,	"line",		SaveLine,	"hold",		Hold,	"dump",		Dump,	"loff",		Lineoff,	0,		Nactions,};Patterns patterns[] = {[Dump]		{ "DUMP:", 0, 0 },[HoldHeader]	{ "HEADER:", 0, 0 },[Hold]		{ "HOLD:", 0, 0 },[SaveLine]	{ "LINE:", 0, 0 },[Lineoff]	{ "LINEOFF:", 0, 0 },[Nactions]	{ 0, 0, 0 },};static char*	endofhdr(char*, char*);static	int	escape(char**);static	int	extract(char*);static	int	findkey(char*);static	int	hash(int);static	int	isword(Word*, char*, int);static	void	parsealt(Biobuf*, char*, Spat**);/* *	The canonicalizer: convert input to canonical representation */char*readmsg(Biobuf *bp, int *hsize, int *bufsize){	char *p, *buf;	int n, offset, eoh, bsize, delta;	buf = 0;	offset = 0;	if(bufsize)		*bufsize = 0;	if(hsize)		*hsize = 0;	for(;;) {		buf = Realloc(buf, offset+Quanta+1);		n = Bread(bp, buf+offset, Quanta);		if(n < 0){			free(buf);			return 0;		}		p = buf+offset;			/* start of this chunk */		offset += n;			/* end of this chunk */		buf[offset] = 0;		if(n == 0){			if(offset == 0)				return 0;			break;		}		if(hsize == 0)			/* don't process header */			break;		if(p != buf && p[-1] == '\n')	/* check for EOH across buffer split */			p--;		p = endofhdr(p, buf+offset);		if(p)			break;		if(offset >= Maxread)		/* gargantuan header - just punt*/		{			if(hsize)				*hsize = offset;			if(bufsize)				*bufsize = offset;			return buf;		}	}	eoh = p-buf;				/* End of header */	bsize = offset - eoh;			/* amount of body already read */		/* Read at least Minbody bytes of the body */	if (bsize < Minbody){		delta = Minbody-bsize;		buf = Realloc(buf, offset+delta+1);		n = Bread(bp, buf+offset, delta);		if(n > 0) {			offset += n;			buf[offset] = 0;		}	}	if(hsize)		*hsize = eoh;	if(bufsize)		*bufsize = offset;	return buf;}static	intisword(Word *wp, char *text, int len){	for(;wp->string; wp++)		if(len >= wp->n && strncmp(text, wp->string, wp->n) == 0)			return 1;	return 0;}static char*endofhdr(char *raw, char *end){	int i;	char *p, *q;	char buf[HdrMax];	/* 	 * can't use strchr to search for newlines because	 * there may be embedded NULL's.	 */	for(p = raw; p < end; p++){		if(*p != '\n' || p[1] != '\n')			continue;		p++;		for(i = 0, q = p+1; i < sizeof(buf) && *q; q++){			buf[i++] = tolower(*q);			if(*q == ':' || *q == '\n')				break;		}		if(!isword(hdrwords, buf, i))			return p+1;	}	return 0;}static	inthtmlmatch(Word *wp, char *text, char *end, int *n){	char *cp;	int i, c, lastc;	char buf[MaxHtml];	/*	 * extract a string up to '>'	 */	i = lastc = 0;	cp = text;	while (cp < end && i < sizeof(buf)-1){		c = *cp++;		if(c == '=')			c = escape(&cp);		switch(c){		case 0:		case '\r':			continue;		case '>':			goto out;		case '\n':		case ' ':		case '\t':			if(lastc == ' ')				continue;			c = ' ';			break;		default:			c = tolower(c);			break;		}		buf[i++] = lastc = c;	}out:	buf[i] = 0;	if(n)		*n = cp-text;	return isword(wp, buf, i);}static intescape(char **msg){	int c;	char *p;	p = *msg;	c = *p;	if(c == '\n'){		p++;		c = *p++;	} else	if(c == '2'){		c = tolower(p[1]);		if(c == 'e'){			p += 2;			c = '.';		}else		if(c == 'f'){			p += 2;			c = '/';		}else		if(c == '0'){			p += 2;			c = ' ';		}		else c = '=';	} else {		if(c == '3' && tolower(p[1]) == 'd')			p += 2;		c = '=';	}	*msg = p;	return c;}static inthtmlchk(char **msg, char *end){	int n;	char *p;	static int ishtml;	p = *msg;	if(ishtml == 0){		ishtml = htmlmatch(htmlcmds, p, end, &n);			/* If not an HTML keyword, check if it's		 * an HTML comment (<!comment>).  if so,		 * skip over it; otherwise copy it in.		 */		if(ishtml == 0 && *p != '!')	/* not comment */			return '<';		/* copy it */	} else if(htmlmatch(hrefs, p, end, &n))	/* if special HTML string  */		return '<';			/* copy it */		/*	 * this is an uninteresting HTML command; skip over it.	 */	p += n;	*msg = p+1;	return *p;}/* * decode a base 64 encode body */voidconv64(char *msg, char *end, char *buf, int bufsize){	int len, i;	char *cp;	len = end - msg;	i = (len*3)/4+1;	// room for max chars + null	cp = Malloc(i);	len = dec64((uchar*)cp, i, msg, len);	convert(cp, cp+len, buf, bufsize, 1);	free(cp);}intconvert(char *msg, char *end, char *buf, int bufsize, int isbody){	char *p;	int c, lastc, base64;	lastc = 0;	base64 = 0;	while(msg < end && bufsize > 0){		c = *msg++;		/*		 * In the body only, try to strip most HTML and		 * replace certain MIME escape sequences with the character		 */		if(isbody) {			do{				p = msg;				if(c == '<')					c = htmlchk(&msg, end);				if(c == '=')					c = escape(&msg);			} while(p != msg && p < end);		}		switch(c){		case 0:		case '\r':			continue;		case '\t':		case ' ':		case '\n':			if(lastc == ' ')				continue;			c = ' ';			break;		case 'C':	/* check for MIME base 64 encoding in header */		case 'c':			if(isbody == 0)			if(msg < end-32 && *msg == 'o' && msg[1] == 'n')			if(cistrncmp(msg+2, "tent-transfer-encoding: base64", 30) == 0)				base64 = 1;			c = 'c';			break;		default:			c = tolower(c);			break;		}		*buf++ = c;		lastc = c;		bufsize--;	}	*buf = 0;	return base64;}/* *	The pattern parser: build data structures from the pattern file */static inthash(int c){	return c & 127;}static	intfindkey(char *val){	Keyword *kp;	for(kp = keywords; kp->string; kp++)		if(strcmp(val, kp->string) == 0)				break;	return kp->value;}#define	whitespace(c)	((c) == ' ' || (c) == '\t')voidparsepats(Biobuf *bp){	Pattern *p, *new;	char *cp, *qp;	int type, action, n, h;	Spat *spat;	for(;;){		cp = Brdline(bp, '\n');		if(cp == 0)			break;		cp[Blinelen(bp)-1] = 0;		while(*cp == ' ' || *cp == '\t')			cp++;		if(*cp == '#' || *cp == 0)			continue;		type = regexp;		if(*cp == '*'){			type = string;			cp++;		}		qp = strchr(cp, ':');		if(qp == 0)			continue;		*qp = 0;		if(debug)			fprint(2, "action = %s\n", cp);		action = findkey(cp);		if(action >= Nactions)			continue;		cp = qp+1;		n = extract(cp);		if(n <= 0 || *cp == 0)			continue;		qp = strstr(cp, "~~");		if(qp){			*qp = 0;			n = strlen(cp);		}		if(debug)			fprint(2, " Pattern: `%s'\n", cp);			/* Hook regexps into a chain */		if(type == regexp) {			new = Malloc(sizeof(Pattern));			new->action = action;			new->pat = regcomp(cp);			if(new->pat == 0){				free(new);				continue;			}			new->type = regexp;			new->alt = 0;			new->next = 0;			if(qp)				parsealt(bp, qp+2, &new->alt);			new->next = patterns[action].regexps;			patterns[action].regexps = new;			continue;		}			/* not a Regexp - hook strings into Pattern hash chain */		spat = Malloc(sizeof(*spat));		spat->next = 0;		spat->alt = 0;		spat->len = n;		spat->string = Malloc(n+1);		spat->c1 = cp[1];		strcpy(spat->string, cp);		if(qp)			parsealt(bp, qp+2, &spat->alt);		p = patterns[action].strings;		if(p == 0) {			p = Malloc(sizeof(Pattern));			memset(p, 0, sizeof(*p));			p->action = action;			p->type = string;			patterns[action].strings = p;		}		h = hash(*spat->string);		spat->next = p->spat[h];		p->spat[h] = spat;	}}static voidparsealt(Biobuf *bp, char *cp, Spat** head){	char *p;	Spat *alt;	while(cp){		if(*cp == 0){		/*escaped newline*/			do{				cp = Brdline(bp, '\n');				if(cp == 0)					return;				cp[Blinelen(bp)-1] = 0;			} while(extract(cp) <= 0 || *cp == 0);		}		p = cp;		cp = strstr(p, "~~");		if(cp){			*cp = 0;			cp += 2;		}		if(strlen(p)){			alt = Malloc(sizeof(*alt));			alt->string = strdup(p);			alt->next = *head;			*head = alt;		}	}}static intextract(char *cp){	int c;	char *p, *q, *r;	p = q = r = cp;	while(whitespace(*p))		p++;	while(c = *p++){		if (c == '#')			break;		if(c == '"'){			while(*p && *p != '"'){				if(*p == '\\' && p[1] == '"')					p++;				if('A' <= *p && *p <= 'Z')					*q++ = *p++ + ('a'-'A');				else					*q++ = *p++;			}			if(*p)				p++;			r = q;		/* never back up over a quoted string */		} else {			if('A' <= c && c <= 'Z')				c += ('a'-'A');			*q++ = c;		}	}	while(q > r && whitespace(q[-1]))		q--;	*q = 0;	return q-cp;}/* *	The matching engine: compare canonical input to pattern structures */static Spat*isalt(char *message, Spat *alt){	while(alt) {		if(*cmd)		if(message != cmd && strstr(cmd, alt->string))			break;		if(message != header+1 && strstr(header+1, alt->string))			break;		if(strstr(message, alt->string))			break;		alt = alt->next;	}	return alt;}intmatchpat(Pattern *p, char *message, Resub *m){	Spat *spat;	char *s;	int c, c1;	if(p->type == string){		c1 = *message;		for(s=message; c=c1; s++){			c1 = s[1];			for(spat=p->spat[hash(c)]; spat; spat=spat->next){				if(c1 == spat->c1)				if(memcmp(s, spat->string, spat->len) == 0)				if(!isalt(message, spat->alt)){					m->sp = s;					m->ep = s + spat->len;					return 1;				}			}		}		return 0;	}	m->sp = m->ep = 0;	if(regexec(p->pat, message, m, 1) == 0)		return 0;	if(isalt(message, p->alt))		return 0;	return 1;}voidxprint(int fd, char *type, Resub *m){	char *p, *q;	int i;	if(m->sp == 0 || m->ep == 0)		return;		/* back up approx 30 characters to whitespace */	for(p = m->sp, i = 0; *p && i < 30; i++, p--)			;	while(*p && *p != ' ')		p--;	p++;		/* grab about 30 more chars beyond the end of the match */	for(q = m->ep, i = 0; *q && i < 30; i++, q++)			;	while(*q && *q != ' ')		q++;	fprint(fd, "%s %.*s~%.*s~%.*s\n", type, (int)(m->sp-p), p, (int)(m->ep-m->sp), m->sp, (int)(q-m->ep), m->ep);}enum {	INVAL=	255};static uchar t64d[256] = {/*00 */	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,/*10*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,/*20*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,	INVAL, INVAL, INVAL,    62, INVAL, INVAL, INVAL,    63,/*30*/	   52,	  53,	 54,	55,    56,    57,    58,    59,	   60,	  61, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,/*40*/	INVAL,    0,      1,     2,     3,     4,     5,     6,	    7,    8,      9,    10,    11,    12,    13,    14,/*50*/	   15,   16,     17,    18,    19,    20,    21,    22,	   23,   24,     25, INVAL, INVAL, INVAL, INVAL, INVAL,/*60*/	INVAL,   26,     27,    28,    29,    30,    31,    32,	   33,   34,     35,    36,    37,    38,    39,    40,/*70*/	   41,   42,     43,    44,    45,    46,    47,    48,	   49,   50,     51, INVAL, INVAL, INVAL, INVAL, INVAL,/*80*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,/*90*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,/*A0*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,/*B0*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,/*C0*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,/*D0*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,/*E0*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,/*F0*/	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,	INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,};
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -