⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 n8.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
字号:
#include "tdef.h"#include "fns.h"#include "ext.h"#define	HY_BIT	0200	/* stuff in here only works for 7-bit ascii */			/* this value is used (as a literal) in suftab.c */			/* to encode possible hyphenation points in suffixes. */			/* it could be changed, by widening the tables */			/* to be shorts instead of chars. *//* * troff8.c *  * hyphenation */int	hexsize = 0;		/* hyphenation exception list size */char	*hbufp = NULL;		/* base of list */char	*nexth = NULL;		/* first free slot in list */Tchar	*hyend;#define THRESH 160 		/* digram goodness threshold */int	thresh = THRESH;int	texhyphen(void);static	int	alpha(Tchar);void hyphen(Tchar *wp){	int j;	Tchar *i;	i = wp;	while (punct((*i++)))		;	if (!alpha(*--i))		return;	wdstart = i++;	while (alpha(*i++))		;	hyend = wdend = --i - 1;	while (punct((*i++)))		;	if (*--i)		return;	if (wdend - wdstart < 4)	/* 4 chars is too short to hyphenate */		return;	hyp = hyptr;	*hyp = 0;	hyoff = 2;	/* for now, try exceptions first, then tex (if hyphalg is non-zero),	   then suffix and digram if tex didn't hyphenate it at all.	*/	if (!exword() && !texhyphen() && !suffix())		digram();	/* this appears to sort hyphenation points into increasing order */	*hyp++ = 0;	if (*hyptr) 		for (j = 1; j; ) {			j = 0;			for (hyp = hyptr + 1; *hyp != 0; hyp++) {				if (*(hyp - 1) > *hyp) {					j++;					i = *hyp;					*hyp = *(hyp - 1);					*(hyp - 1) = i;				}			}		}}static alpha(Tchar i)	/* non-zero if really alphabetic */{	if (ismot(i))		return 0;	else if (cbits(i) >= ALPHABET)	/* this isn't very elegant, but there's */		return 0;		/* no good way to make sure i is in range for */	else				/* the call of isalpha */		return isalpha(cbits(i));}punct(Tchar i){	if (!i || alpha(i))		return(0);	else		return(1);}void caseha(void)	/* set hyphenation algorithm */{	hyphalg = HYPHALG;	if (skip())		return;	noscale++;	hyphalg = atoi0();	noscale = 0;}void caseht(void)	/* set hyphenation threshold;  not in manual! */{	thresh = THRESH;	if (skip())		return;	noscale++;	thresh = atoi0();	noscale = 0;}char *growh(char *where){	char *new;	hexsize += NHEX;	if ((new = grow(hbufp, hexsize, sizeof(char))) == NULL)		return NULL;	if (new == hbufp) {		return where;	} else {		int diff;		diff = where - hbufp;		hbufp = new;		return new + diff;	}}void casehw(void){	int i, k;	char *j;	Tchar t;	if (nexth == NULL) {		if ((nexth = hbufp = grow(hbufp, NHEX, sizeof(char))) == NULL) {			ERROR "No space for exception word list." WARN;			return;		}		hexsize = NHEX;	}	k = 0;	while (!skip()) {		if ((j = nexth) >= hbufp + hexsize - 2)			if ((j = nexth = growh(j)) == NULL)				goto full;		for (;;) {			if (ismot(t = getch()))				continue;			i = cbits(t);			if (i == ' ' || i == '\n') {				*j++ = 0;				nexth = j;				*j = 0;				if (i == ' ')					break;				else					return;			}			if (i == '-') {				k = HY_BIT;				continue;			}			*j++ = maplow(i) | k;			k = 0;			if (j >= hbufp + hexsize - 2)				if ((j = growh(j)) == NULL)					goto full;		}	}	return;full:	ERROR "Cannot grow exception word list." WARN;	*nexth = 0;}int exword(void){	Tchar *w;	char *e, *save;	e = hbufp;	while (1) {		save = e;		if (e == NULL || *e == 0)			return(0);		w = wdstart;		while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) {			e++; 			w++;		}		if (!*e) {			if (w-1 == hyend || (w == wdend && maplow(cbits(*w)) == 's')) {				w = wdstart;				for (e = save; *e; e++) {					if (*e & HY_BIT)						*hyp++ = w;					if (hyp > hyptr + NHYP - 1)						hyp = hyptr + NHYP - 1;					w++;				}				return(1);			} else {				e++; 				continue;			}		} else 			while (*e++)				;	}}suffix(void){	Tchar *w;	char *s, *s0;	Tchar i;	extern char *suftab[];again:	i = cbits(*hyend);	if (!alpha(i))		return(0);	if (i < 'a')		i -= 'A' - 'a';	if ((s0 = suftab[i-'a']) == 0)		return(0);	for (;;) {		if ((i = *s0 & 017) == 0)			return(0);		s = s0 + i - 1;		w = hyend - 1;		while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) {			s--;			w--;		}		if (s == s0)			break;		s0 += i;	}	s = s0 + i - 1;	w = hyend;	if (*s0 & HY_BIT) 		goto mark;	while (s > s0) {		w--;		if (*s-- & HY_BIT) {mark:			hyend = w - 1;			if (*s0 & 0100)	/* 0100 used in suftab to encode something too */				continue;			if (!chkvow(w))				return(0);			*hyp++ = w;		}	}	if (*s0 & 040)		return(0);	if (exword())		return(1);	goto again;}maplow(int i){	if (isupper(i)) 		i = tolower(i);	return(i);}vowel(int i){	switch (i) {	case 'a': case 'A':	case 'e': case 'E':	case 'i': case 'I':	case 'o': case 'O':	case 'u': case 'U':	case 'y': case 'Y':		return(1);	default:		return(0);	}}Tchar *chkvow(Tchar *w){	while (--w >= wdstart)		if (vowel(cbits(*w)))			return(w);	return(0);}void digram(void){	Tchar *w;	int val;	Tchar *nhyend, *maxw;	int maxval;	extern char bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13];again:	if (!(w = chkvow(hyend + 1)))		return;	hyend = w;	if (!(w = chkvow(hyend)))		return;	nhyend = w;	maxval = 0;	w--;	while (++w < hyend && w < wdend - 1) {		val = 1;		if (w == wdstart)			val *= dilook('a', cbits(*w), bxh);		else if (w == wdstart + 1)			val *= dilook(cbits(*(w-1)), cbits(*w), bxxh);		else 			val *= dilook(cbits(*(w-1)), cbits(*w), xxh);		val *= dilook(cbits(*w), cbits(*(w+1)), xhx);		val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx);		if (val > maxval) {			maxval = val;			maxw = w + 1;		}	}	hyend = nhyend;	if (maxval > thresh)		*hyp++ = maxw;	goto again;}dilook(int a, int b, char t[26][13]){	int i, j;	i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];	if (!(j & 01))		i >>= 4;	return(i & 017);}/* here beginneth the tex hyphenation code, as interpreted freely *//* the main difference is that there is no attempt to squeeze space *//* as tightly at tex does. */static int	texit(Tchar *, Tchar *);static int	readpats(void);static void	install(char *);static void	fixup(void);static int	trieindex(int, int);static char	pats[50000];	/* size ought to be computed dynamically */static char	*nextpat = pats;static char	*trie[27*27];	/* english-specific sizes */int texhyphen(void){	static int loaded = 0;		/* -1: couldn't find tex file */	if (hyphalg == 0 || loaded == -1)	/* non-zero => tex for now */		return 0;	if (loaded == 0) {		if (readpats())			loaded = 1;		else			loaded = -1;	}	return texit(wdstart, wdend);}static int texit(Tchar *start, Tchar *end)	/* hyphenate as in tex, return # found */{	int nw, i, k, equal, cnt[500];	char w[500+1], *np, *pp, *wp, *xpp, *xwp;	w[0] = '.';	for (nw = 1; start <= end && nw < 500-1; nw++, start++)		w[nw] = maplow(tolower(cbits(*start)));	start -= (nw - 1);	w[nw++] = '.';	w[nw] = 0;/* * printf("try %s\n", w);*/	for (i = 0; i <= nw; i++)		cnt[i] = '0';	for (wp = w; wp < w + nw; wp++) {		for (pp = trie[trieindex(*wp, *(wp+1))]; pp < nextpat; ) {			if (pp == 0		/* no trie entry */			 || *pp != *wp		/* no match on 1st letter */			 || *(pp+1) != *(wp+1))	/* no match on 2nd letter */				break;		/*   so move to next letter of word */			equal = 1;			for (xpp = pp+2, xwp = wp+2; *xpp; )				if (*xpp++ != *xwp++) {					equal = 0;					break;				}			if (equal) {				np = xpp+1;	/* numpat */				for (k = wp-w; *np; k++, np++)					if (*np > cnt[k])						cnt[k] = *np;/* * printf("match: %s  %s\n", pp, xpp+1);*/			}			pp += *(pp-1);	/* skip over pattern and numbers to next */		}	}/* * for (i = 0; i < nw; i++) printf("%c", w[i]); * printf("  "); * for (i = 0; i <= nw; i++) printf("%c", cnt[i]); * printf("\n");*//* * 	for (i = 1; i < nw - 1; i++) { * 		if (i > 2 && i < nw - 3 && cnt[i] % 2) * 			printf("-"); * 		if (cbits(start[i-1]) != '.') * 			printf("%c", cbits(start[i-1])); * 	} * 	printf("\n");*/	for (i = 1; i < nw -1; i++)		if (i > 2 && i < nw - 3 && cnt[i] % 2)			*hyp++ = start + i - 1;	return hyp - hyptr;	/* non-zero if a hyphen was found */}/*	This code assumes that hyphen.tex looks like		% some comments		\patterns{ % more comments		pat5ter4ns, 1 per line, SORTED, nothing else		}		more goo		\hyphenation{ % more comments		ex-cep-tions, one per line; i ignore this part for now		}	this code is NOT robust against variations.  unfortunately,	it looks like every local language version of this file has	a different format.  i have also made no provision for weird	characters.  sigh.*/static int readpats(void){	FILE *fp;	char buf[200], buf1[200];	if ((fp = fopen(TEXHYPHENS, "r")) == NULL	 && (fp = fopen(DWBalthyphens, "r")) == NULL) {		ERROR "warning: can't find hyphen.tex" WARN;		return 0;	}	while (fgets(buf, sizeof buf, fp) != NULL) {		sscanf(buf, "%s", buf1);		if (strcmp(buf1, "\\patterns{") == 0)			break;	}	while (fgets(buf, sizeof buf, fp) != NULL) {		if (buf[0] == '}')			break;		install(buf);	}	fclose(fp);	fixup();	return 1;}static void install(char *s)	/* map ab4c5de to: 12 abcde \0 00405 \0 */{	int npat, lastpat;	char num[500], *onextpat = nextpat;	num[0] = '0';	*nextpat++ = ' ';	/* fill in with count later */	for (npat = lastpat = 0; *s != '\n' && *s != '\0'; s++) {		if (isdigit(*s)) {			num[npat] = *s;			lastpat = npat;		} else {			*nextpat++ = *s;			npat++;			num[npat] = '0';		}	}	*nextpat++ = 0;	if (nextpat > pats + sizeof(pats)-20) {		ERROR "tex hyphenation table overflow, tail end ignored" WARN;		nextpat = onextpat;	}	num[lastpat+1] = 0;	strcat(nextpat, num);	nextpat += strlen(nextpat) + 1;}static void fixup(void)	/* build indexes of where . a b c ... start */{	char *p, *lastc;	int n;	for (lastc = pats, p = pats+1; p < nextpat; p++)		if (*p == ' ') {			*lastc = p - lastc;			lastc = p;		}	*lastc = p - lastc;	for (p = pats+1; p < nextpat; ) {		n = trieindex(p[0], p[1]);		if (trie[n] == 0)			trie[n] = p;		p += p[-1];	}	/* printf("pats = %d\n", nextpat - pats); */}static int trieindex(int d1, int d2){	return 27 * (d1 == '.' ? 0 : d1 - 'a' + 1) + (d2 == '.' ? 0 : d2 - 'a' + 1);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -