⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 diffreg.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
字号:
#include <u.h>#include <libc.h>#include <bio.h>#include "diff.h"/*	diff - differential file comparison**	Uses an algorithm due to Harold Stone, which finds*	a pair of longest identical subsequences in the two*	files.**	The major goal is to generate the match vector J.*	J[i] is the index of the line in file1 corresponding*	to line i file0. J[i] = 0 if there is no*	such line in file1.**	Lines are hashed so as to work in core. All potential*	matches are located by sorting the lines of each file*	on the hash (called value). In particular, this*	collects the equivalence classes in file1 together.*	Subroutine equiv replaces the value of each line in*	file0 by the index of the first element of its *	matching equivalence in (the reordered) file1.*	To save space equiv squeezes file1 into a single*	array member in which the equivalence classes*	are simply concatenated, except that their first*	members are flagged by changing sign.**	Next the indices that point into member are unsorted into*	array class according to the original order of file0.**	The cleverness lies in routine stone. This marches*	through the lines of file0, developing a vector klist*	of "k-candidates". At step i a k-candidate is a matched*	pair of lines x,y (x in file0 y in file1) such that*	there is a common subsequence of lenght k*	between the first i lines of file0 and the first y *	lines of file1, but there is no such subsequence for*	any smaller y. x is the earliest possible mate to y*	that occurs in such a subsequence.**	Whenever any of the members of the equivalence class of*	lines in file1 matable to a line in file0 has serial number *	less than the y of some k-candidate, that k-candidate *	with the smallest such y is replaced. The new *	k-candidate is chained (via pred) to the current*	k-1 candidate so that the actual subsequence can*	be recovered. When a member has serial number greater*	that the y of all k-candidates, the klist is extended.*	At the end, the longest subsequence is pulled out*	and placed in the array J by unravel.**	With J in hand, the matches there recorded are*	check'ed against reality to assure that no spurious*	matches have crept in due to hashing. If they have,*	they are broken, and "jackpot " is recorded--a harmless*	matter except that a true match for a spuriously*	mated line may now be unnecessarily reported as a change.**	Much of the complexity of the program comes simply*	from trying to minimize core utilization and*	maximize the range of doable problems by dynamically*	allocating what is needed and reusing what is not.*	The core requirements for problems larger than somewhat*	are (in words) 2*length(file0) + length(file1) +*	3*(number of k-candidates installed),  typically about*	6n words for files of length n. *//* TIDY THIS UP */struct cand {	int x;	int y;	int pred;} cand;struct line {	int serial;	int value;} *file[2], line;int len[2];int binary;struct line *sfile[2];	/*shortened by pruning common prefix and suffix*/int slen[2];int pref, suff;	/*length of prefix and suffix*/int *class;	/*will be overlaid on file[0]*/int *member;	/*will be overlaid on file[1]*/int *klist;		/*will be overlaid on file[0] after class*/struct cand *clist;	/* merely a free storage pot for candidates */int clen;int *J;		/*will be overlaid on class*/long *ixold;	/*will be overlaid on klist*/long *ixnew;	/*will be overlaid on file[1]*//* END OF SOME TIDYING */static void	sort(struct line *a, int n)	/*shellsort CACM #201*/{	int m;	struct line *ai, *aim, *j, *k;	struct line w;	int i;	m = 0;	for (i = 1; i <= n; i *= 2)		m = 2*i - 1;	for (m /= 2; m != 0; m /= 2) {		k = a+(n-m);		for (j = a+1; j <= k; j++) {			ai = j;			aim = ai+m;			do {				if (aim->value > ai->value ||				   aim->value == ai->value &&				   aim->serial > ai->serial)					break;				w = *ai;				*ai = *aim;				*aim = w;				aim = ai;				ai -= m;			} while (ai > a && aim >= ai);		}	}}static voidunsort(struct line *f, int l, int *b){	int *a;	int i;	a = MALLOC(int, (l+1));	for(i=1;i<=l;i++)		a[f[i].serial] = f[i].value;	for(i=1;i<=l;i++)		b[i] = a[i];	FREE(a);}static voidprune(void){	int i,j;	for(pref=0;pref<len[0]&&pref<len[1]&&		file[0][pref+1].value==file[1][pref+1].value;		pref++ ) ;	for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&		file[0][len[0]-suff].value==file[1][len[1]-suff].value;		suff++) ;	for(j=0;j<2;j++) {		sfile[j] = file[j]+pref;		slen[j] = len[j]-pref-suff;		for(i=0;i<=slen[j];i++)			sfile[j][i].serial = i;	}}static voidequiv(struct line *a, int n, struct line *b, int m, int *c){	int i, j;	i = j = 1;	while(i<=n && j<=m) {		if(a[i].value < b[j].value)			a[i++].value = 0;		else if(a[i].value == b[j].value)			a[i++].value = j;		else			j++;	}	while(i <= n)		a[i++].value = 0;	b[m+1].value = 0;	j = 0;	while(++j <= m) {		c[j] = -b[j].serial;		while(b[j+1].value == b[j].value) {			j++;			c[j] = b[j].serial;		}	}	c[j] = -1;}static intnewcand(int x, int  y, int pred){	struct cand *q;	clist = REALLOC(clist, struct cand, (clen+1));	q = clist + clen;	q->x = x;	q->y = y;	q->pred = pred;	return clen++;}static intsearch(int *c, int k, int y){	int i, j, l;	int t;	if(clist[c[k]].y < y)	/*quick look for typical case*/		return k+1;	i = 0;	j = k+1;	while((l=(i+j)/2) > i) {		t = clist[c[l]].y;		if(t > y)			j = l;		else if(t < y)			i = l;		else			return l;	}	return l+1;}static intstone(int *a, int n, int *b, int *c){	int i, k,y;	int j, l;	int oldc, tc;	int oldl;	k = 0;	c[0] = newcand(0,0,0);	for(i=1; i<=n; i++) {		j = a[i];		if(j==0)			continue;		y = -b[j];		oldl = 0;		oldc = c[0];		do {			if(y <= clist[oldc].y)				continue;			l = search(c, k, y);			if(l!=oldl+1)				oldc = c[l-1];			if(l<=k) {				if(clist[c[l]].y <= y)					continue;				tc = c[l];				c[l] = newcand(i,y,oldc);				oldc = tc;				oldl = l;			} else {				c[l] = newcand(i,y,oldc);				k++;				break;			}		} while((y=b[++j]) > 0);	}	return k;}static voidunravel(int p){	int i;	struct cand *q;	for(i=0; i<=len[0]; i++) {		if (i <= pref)			J[i] = i;		else if (i > len[0]-suff)			J[i] = i+len[1]-len[0];		else			J[i] = 0;	}	for(q=clist+p;q->y!=0;q=clist+q->pred)		J[q->x+pref] = q->y+pref;}static voidoutput(void){	int m, i0, i1, j0, j1;	m = len[0];	J[0] = 0;	J[m+1] = len[1]+1;	if (mode != 'e') {		for (i0 = 1; i0 <= m; i0 = i1+1) {			while (i0 <= m && J[i0] == J[i0-1]+1)				i0++;			j0 = J[i0-1]+1;			i1 = i0-1;			while (i1 < m && J[i1+1] == 0)				i1++;			j1 = J[i1+1]-1;			J[i1] = j1;			change(i0, i1, j0, j1);		}	}	else {		for (i0 = m; i0 >= 1; i0 = i1-1) {			while (i0 >= 1 && J[i0] == J[i0+1]-1 && J[i0])				i0--;			j0 = J[i0+1]-1;			i1 = i0+1;			while (i1 > 1 && J[i1-1] == 0)				i1--;			j1 = J[i1-1]+1;			J[i1] = j1;			change(i1 , i0, j1, j0);		}	}	if (m == 0)		change(1, 0, 1, len[1]);	flushchanges();}#define BUF 4096static intcmp(Biobuf* b1, Biobuf* b2){	int n;	uchar buf1[BUF], buf2[BUF];	int f1, f2;	vlong nc = 1;	uchar *b1s, *b1e, *b2s, *b2e;	f1 = Bfildes(b1);	f2 = Bfildes(b2);	seek(f1, 0, 0);	seek(f2, 0, 0);	b1s = b1e = buf1;	b2s = b2e = buf2;	for(;;){		if(b1s >= b1e){			if(b1s >= &buf1[BUF])				b1s = buf1;			n = read(f1, b1s,  &buf1[BUF] - b1s);			b1e = b1s + n;		}		if(b2s >= b2e){			if(b2s >= &buf2[BUF])				b2s = buf2;			n = read(f2, b2s,  &buf2[BUF] - b2s);			b2e = b2s + n;		}		n = b2e - b2s;		if(n > b1e - b1s)			n = b1e - b1s;		if(n <= 0)			break;		if(memcmp((void *)b1s, (void *)b2s, n) != 0){			return 1;		}				nc += n;		b1s += n;		b2s += n;	}	if(b1e - b1s == b2e - b2s)		return 0;	return 1;	}voiddiffreg(char *f, char *t){	Biobuf *b0, *b1;	int k;	binary = 0;	b0 = prepare(0, f);	if (!b0)		return;	b1 = prepare(1, t);	if (!b1) {		Bterm(b0);		return;	}	if (binary){		// could use b0 and b1 but this is simpler.		if (cmp(b0, b1))			print("binary files %s %s differ\n", f, t);		Bterm(b0);		Bterm(b1);		return;	}	clen = 0;	prune();	sort(sfile[0], slen[0]);	sort(sfile[1], slen[1]);	member = (int *)file[1];	equiv(sfile[0], slen[0], sfile[1], slen[1], member);	member = REALLOC(member, int, slen[1]+2);	class = (int *)file[0];	unsort(sfile[0], slen[0], class);	class = REALLOC(class, int, slen[0]+2);	klist = MALLOC(int, slen[0]+2);	clist = MALLOC(struct cand, 1);	k = stone(class, slen[0], member, klist);	FREE(member);	FREE(class);	J = MALLOC(int, len[0]+2);	unravel(klist[k]);	FREE(clist);	FREE(klist);	ixold = MALLOC(long, len[0]+2);	ixnew = MALLOC(long, len[1]+2);	Bseek(b0, 0, 0); Bseek(b1, 0, 0);	check(b0, b1);	output();	FREE(J); FREE(ixold); FREE(ixnew);	Bterm(b0); Bterm(b1);			/* ++++ */}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -