⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parse.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
字号:
#include <u.h>#include <libc.h>#include <bio.h>#include <String.h>#include <ctype.h>#include <thread.h>#include "wiki.h"static Wpage*mkwtxt(int type, char *text){	Wpage *w;	w = emalloc(sizeof(*w));	w->type = type;	w->text = text;	setmalloctag(w, getcallerpc(&type));	return w;}/* * turn runs of whitespace into single spaces, * eliminate whitespace at beginning and end. */char*strcondense(char *s, int cutbegin){	char *r, *w, *es;	int inspace;	es = s+strlen(s);	inspace = cutbegin;	for(r=w=s; *r; r++){		if(isspace(*r)){			if(!inspace){				inspace=1;				*w++ = ' ';			}		}else{			inspace=0;			*w++ = *r;		}	}	assert(w <= es);	if(inspace && w>s){		--w;		*w = '\0';	}	else		*w = '\0';	return s;}/* * turn runs of Wplain into single Wplain. */static Wpage*wcondense(Wpage *wtxt){	Wpage *ow, *w;	for(w=wtxt; w; ){		if(w->type == Wplain)			strcondense(w->text, 1);		if(w->type != Wplain || w->next==nil		|| w->next->type != Wplain){			w=w->next;			continue;		}		w->text = erealloc(w->text, strlen(w->text)+1+strlen(w->next->text)+1);		strcat(w->text, " ");		strcat(w->text, w->next->text);				ow = w->next;		w->next = ow->next;		ow->next = nil;		freepage(ow);	}	return wtxt;}/* * Parse a link, without the brackets. */static Wpage*mklink(char *s){	char *q;	Wpage *w;	for(q=s; *q && *q != '|'; q++)		;	if(*q == '\0'){		w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));		w->url = nil;	}else{		*q = '\0';		w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));		w->url = estrdup(strcondense(q+1, 1));	}	setmalloctag(w, getcallerpc(&s));	return w;}/* * Parse Wplains, inserting Wlink nodes where appropriate. */static Wpage*wlink(Wpage *wtxt){	char *p, *q, *r, *s;	Wpage *w, *nw;	for(w=wtxt; w; w=nw){		nw = w->next;		if(w->type != Wplain)			continue;		while(w->text[0]){			p = w->text;			for(q=p; *q && *q != '['; q++)				;			if(*q == '\0')				break;			for(r=q; *r && *r != ']'; r++)				;			if(*r == '\0')				break;			*q = '\0';			*r = '\0';			s = w->text;			w->text = estrdup(w->text);			w->next = mklink(q+1);			w = w->next;			w->next = mkwtxt(Wplain, estrdup(r+1));			free(s);			w = w->next;			w->next = nw;		}		assert(w->next == nw);	}	return wtxt;	}static intismanchar(int c){	return ('a' <= c && c <= 'z')		|| ('A' <= c && c <= 'Z')		|| ('0' <= c && c <= '9')		|| c=='_' || c=='-' || c=='.' || c=='/'		|| (c < 0);	/* UTF */}static Wpage*findmanref(char *p, char **beginp, char **endp){	char *q, *r;	Wpage *w;	q=p;	for(;;){		for(; q[0] && (q[0] != '(' || !isdigit(q[1]) || q[2] != ')'); q++)			;		if(*q == '\0')			break;		for(r=q; r>p && ismanchar(r[-1]); r--)			;		if(r==q){			q += 3;			continue;		}		*q = '\0';		w = mkwtxt(Wman, estrdup(r));		*beginp = r;		*q = '(';		w->section = q[1]-'0';		*endp = q+3;		setmalloctag(w, getcallerpc(&p));		return w;	}	return nil;}/* * Parse Wplains, looking for man page references. * This should be done by using a plumb(6)-style  * control file rather than hard-coding things here. */static Wpage*wman(Wpage *wtxt){	char *q, *r;	Wpage *w, *mw, *nw;	for(w=wtxt; w; w=nw){		nw = w->next;		if(w->type != Wplain)			continue;		while(w->text[0]){			if((mw = findmanref(w->text, &q, &r)) == nil)				break;			*q = '\0';			w->next = mw;			w = w->next;			w->next = mkwtxt(Wplain, estrdup(r));			w = w->next;			w->next = nw;		}		assert(w->next == nw);	}	return wtxt;	}static int isheading(char *p) {	Rune r;	int hasupper=0;	while(*p) {		p+=chartorune(&r,p);		if(isupperrune(r))			hasupper=1;		else if(islowerrune(r))			return 0;	}	return hasupper;}Wpage*Brdpage(char *(*rdline)(void*,int), void *b){	char *p, *c;	int waspara;	Wpage *w, **pw;	w = nil;	pw = &w;	waspara = 1;	while((p = rdline(b, '\n')) != nil){		if(p[0] != '!')			p = strcondense(p, 1);		if(p[0] == '\0'){			if(waspara==0){				waspara=1;				*pw = mkwtxt(Wpara, nil);				pw = &(*pw)->next;			}			continue;		}		waspara = 0;		switch(p[0]){		case '*':			*pw = mkwtxt(Wbullet, nil);			pw = &(*pw)->next;			*pw = mkwtxt(Wplain, estrdup(p+1));			pw = &(*pw)->next;			break;		case '!':			*pw = mkwtxt(Wpre, estrdup(p[1]==' '?p+2:p+1));			pw = &(*pw)->next;			break;		case '-':			for(c = p; *c != '\0'; c++) {				if(*c != '-') {					c = p;					break;				}			}			if( (c-p) > 4) {				*pw = mkwtxt(Whr, nil);				pw = &(*pw)->next;				break;			}			/* else fall thru */		default:			if(isheading(p)){				*pw = mkwtxt(Wheading, estrdup(p));				pw = &(*pw)->next;				continue;			}			*pw = mkwtxt(Wplain, estrdup(p));			pw = &(*pw)->next;			break;		}	}	if(w == nil)		werrstr("empty page");		*pw = nil;	w = wcondense(w);	w = wlink(w);	w = wman(w);	setmalloctag(w, getcallerpc(&rdline));	return w;		}voidprintpage(Wpage *w){	for(; w; w=w->next){		switch(w->type){		case Wpara:			print("para\n");			break;		case Wheading:			print("heading '%s'\n", w->text);			break;		case Wbullet:			print("bullet\n");			break;		case Wlink:			print("link '%s' '%s'\n", w->text, w->url);			break;		case Wman:			print("man %d %s\n", w->section, w->text);			break;		case Wplain:			print("plain '%s'\n", w->text);			break;		case Whr:			print("hr\n");			break;		case Wpre:			print("pre '%s'\n", w->text);			break;		}	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -