⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 diff.c

📁 unix v7是最后一个广泛发布的研究型UNIX版本
💻 C
字号:
#/*	diff - differential file comparison**	Uses an algorithm due to Harold Stone, which finds*	a pair of longest identical subsequences in the two*	files.**	The major goal is to generate the match vector J.*	J[i] is the index of the line in file1 corresponding*	to line i file0. J[i] = 0 if there is no*	such line in file1.**	Lines are hashed so as to work in core. All potential*	matches are located by sorting the lines of each file*	on the hash (called value_____). In particular, this*	collects the equivalence classes in file1 together.*	Subroutine equiv____  replaces the value of each line in*	file0 by the index of the first element of its *	matching equivalence in (the reordered) file1.*	To save space equiv_____ squeezes file1 into a single*	array member______ in which the equivalence classes*	are simply concatenated, except that their first*	members are flagged by changing sign.**	Next the indices that point into member______ are unsorted_______   into*	array class_____ according to the original order of file0.**	The cleverness lies in routine stone______. This marches*	through the lines of file0, developing a vector klist_____*	of "k-candidates". At step i a k-candidate is a matched*	pair of lines x,y (x in file0 y in file1) such that*	there is a common subsequence of lenght k*	between the first i lines of file0 and the first y *	lines of file1, but there is no such subsequence for*	any smaller y. x is the earliest possible mate to y*	that occurs in such a subsequence.**	Whenever any of the members of the equivalence class of*	lines in file1 matable to a line in file0 has serial number *	less than the y of some k-candidate, that k-candidate *	with the smallest such y is replaced. The new *	k-candidate is chained (via pred____) to the current*	k-1 candidate so that the actual subsequence can*	be recovered. When a member has serial number greater*	that the y of all k-candidates, the klist is extended.*	At the end, the longest subsequence is pulled out*	and placed in the array J by unravel_______.**	With J in hand, the matches there recorded are*	check_____ed against reality to assure that no spurious*	matches have crept in due to hashing. If they have,*	they are broken, and "jackpot " is recorded--a harmless*	matter except that a true match for a spuriously*	mated line may now be unnecessarily reported as a change.**	Much of the complexity of the program comes simply*	from trying to minimize core utilization and*	maximize the range of doable problems by dynamically*	allocating what is needed and reusing what is not.*	The core requirements for problems larger than somewhat*	are (in words) 2*length(file0) + length(file1) +*	3*(number of k-candidates installed),  typically about*	6n words for files of length n. */#include <stdio.h>#include <ctype.h>#include <sys/types.h>#include <sys/stat.h>#include <signal.h>#define	prints(s)	fputs(s,stdout)#define HALFLONG 16#define low(x)	(x&((1L<<HALFLONG)-1))#define high(x)	(x>>HALFLONG)FILE *input[2];FILE *fopen();struct cand {	int x;	int y;	int pred;} cand;struct line {	int serial;	int value;} *file[2], line;int len[2];struct line *sfile[2];	/*shortened by pruning common prefix and suffix*/int slen[2];int pref, suff;	/*length of prefix and suffix*/int *class;	/*will be overlaid on file[0]*/int *member;	/*will be overlaid on file[1]*/int *klist;		/*will be overlaid on file[0] after class*/struct cand *clist;	/* merely a free storage pot for candidates */int clen = 0;int *J;		/*will be overlaid on class*/long *ixold;	/*will be overlaid on klist*/long *ixnew;	/*will be overlaid on file[1]*/int opt;	/* -1,0,1 = -e,normal,-f */int status = 2;int anychange = 0;char *empty = "";int bflag;char *tempfile;	/*used when comparing against std input*/char *mktemp();char *dummy;	/*used in resetting storage search ptr*/done(){	unlink(tempfile);	exit(status);}char *talloc(n){	extern char *malloc();	register char *p;	p = malloc((unsigned)n);	if(p!=NULL)		return(p);	noroom();}char *ralloc(p,n)	/*compacting reallocation */char *p;{	register char *q;	char *realloc();	free(p);	free(dummy);	dummy = malloc(1);	q = realloc(p, (unsigned)n);	if(q==NULL)		noroom();	return(q);}noroom(){	mesg("files too big, try -h\n",empty);	done();}sort(a,n)	/*shellsort CACM #201*/struct line *a;{	struct line w;	register int j,m;	struct line *ai;	register struct line *aim;	int k;	for(j=1;j<=n;j*= 2)		m = 2*j - 1;	for(m/=2;m!=0;m/=2) {		k = n-m;		for(j=1;j<=k;j++) {			for(ai = &a[j]; ai > a; ai -= m) {				aim = &ai[m];				if(aim < ai)					break;	/*wraparound*/				if(aim->value > ai[0].value ||				   aim->value == ai[0].value &&				   aim->serial > ai[0].serial)					break;				w.value = ai[0].value;				ai[0].value = aim->value;				aim->value = w.value;				w.serial = ai[0].serial;				ai[0].serial = aim->serial;				aim->serial = w.serial;			}		}	}}unsort(f, l, b)struct line *f;int *b;{	register int *a;	register int i;	a = (int *)talloc((l+1)*sizeof(int));	for(i=1;i<=l;i++)		a[f[i].serial] = f[i].value;	for(i=1;i<=l;i++)		b[i] = a[i];	free((char *)a);}filename(pa1, pa2)char **pa1, **pa2;{	register char *a1, *b1, *a2;	char buf[512];	struct stat stbuf;	int i, f;	a1 = *pa1;	a2 = *pa2;	if(stat(a1,&stbuf)!=-1 && ((stbuf.st_mode&S_IFMT)==S_IFDIR)) {		b1 = *pa1 = malloc(100);		while(*b1++ = *a1++) ;		b1[-1] = '/';		a1 = b1;		while(*a1++ = *a2++)			if(*a2 && *a2!='/' && a2[-1]=='/')				a1 = b1;	}	else if(a1[0]=='-'&&a1[1]==0&&tempfile==0) {		signal(SIGHUP,done);		signal(SIGINT,done);		signal(SIGPIPE,done);		signal(SIGTERM,done);		*pa1 = tempfile = mktemp("/tmp/dXXXXX");		if((f=creat(tempfile,0600)) < 0) {			mesg("cannot create ",tempfile);			done();		}		while((i=read(0,buf,512))>0)			write(f,buf,i);		close(f);	}}prepare(i, arg)char *arg;{	register struct line *p;	register j,h;	if((input[i] = fopen(arg,"r")) == NULL){		mesg("cannot open ", arg);		done();	}	p = (struct line *)talloc(3*sizeof(line));	for(j=0; h=readhash(input[i]);) {		p = (struct line *)ralloc((char *)p,(++j+3)*sizeof(line));		p[j].value = h;	}	len[i] = j;	file[i] = p;	fclose(input[i]);}prune(){	register i,j;	for(pref=0;pref<len[0]&&pref<len[1]&&		file[0][pref+1].value==file[1][pref+1].value;		pref++ ) ;	for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&		file[0][len[0]-suff].value==file[1][len[1]-suff].value;		suff++) ;	for(j=0;j<2;j++) {		sfile[j] = file[j]+pref;		slen[j] = len[j]-pref-suff;		for(i=0;i<=slen[j];i++)			sfile[j][i].serial = i;	}}equiv(a,n,b,m,c)struct line *a, *b;int *c;{	register int i, j;	i = j = 1;	while(i<=n && j<=m) {		if(a[i].value <b[j].value)			a[i++].value = 0;		else if(a[i].value == b[j].value)			a[i++].value = j;		else			j++;	}	while(i <= n)		a[i++].value = 0;	b[m+1].value = 0;	j = 0;	while(++j <= m) {		c[j] = -b[j].serial;		while(b[j+1].value == b[j].value) {			j++;			c[j] = b[j].serial;		}	}	c[j] = -1;}main(argc, argv)char **argv;{	register int k;	char **args;	args = argv;	if(argc>3 && *argv[1]=='-') {		argc--;		argv++;		for(k=1;argv[0][k];k++) {			switch(argv[0][k]) {			case 'e':				opt = -1;				break;			case 'f':				opt = 1;				break;			case 'b':				bflag = 1;				break;			case 'h':				execv("/usr/lib/diffh",args);				mesg("cannot find diffh",empty);				done();			}		}	}	if(argc!=3) {		mesg("arg count",empty);		done();	}	dummy = malloc(1);	filename(&argv[1], &argv[2]);	filename(&argv[2], &argv[1]);	prepare(0, argv[1]);	prepare(1, argv[2]);	prune();	sort(sfile[0],slen[0]);	sort(sfile[1],slen[1]);	member = (int *)file[1];	equiv(sfile[0], slen[0], sfile[1], slen[1], member);	member = (int *)ralloc((char *)member,(slen[1]+2)*sizeof(int));	class = (int *)file[0];	unsort(sfile[0], slen[0], class);	class = (int *)ralloc((char *)class,(slen[0]+2)*sizeof(int));	klist = (int *)talloc((slen[0]+2)*sizeof(int));	clist = (struct cand *)talloc(sizeof(cand));	k = stone(class, slen[0], member, klist);	free((char *)member);	free((char *)class);	J = (int *)talloc((len[0]+2)*sizeof(int));	unravel(klist[k]);	free((char *)clist);	free((char *)klist);	ixold = (long *)talloc((len[0]+2)*sizeof(long));	ixnew = (long *)talloc((len[1]+2)*sizeof(long));	check(argv);	output(argv);	status = anychange;	done();}stone(a,n,b,c)int *a;int *b;int *c;{	register int i, k,y;	int j, l;	int oldc, tc;	int oldl;	k = 0;	c[0] = newcand(0,0,0);	for(i=1; i<=n; i++) {		j = a[i];		if(j==0)			continue;		y = -b[j];		oldl = 0;		oldc = c[0];		do {			if(y <= clist[oldc].y)				continue;			l = search(c, k, y);			if(l!=oldl+1)				oldc = c[l-1];			if(l<=k) {				if(clist[c[l]].y <= y)					continue;				tc = c[l];				c[l] = newcand(i,y,oldc);				oldc = tc;				oldl = l;			} else {				c[l] = newcand(i,y,oldc);				k++;				break;			}		} while((y=b[++j]) > 0);	}	return(k);}newcand(x,y,pred){	register struct cand *q;	clist = (struct cand *)ralloc((char *)clist,++clen*sizeof(cand));	q = clist + clen -1;	q->x = x;	q->y = y;	q->pred = pred;	return(clen-1);}search(c, k, y)int *c;{	register int i, j, l;	int t;	if(clist[c[k]].y<y)	/*quick look for typical case*/		return(k+1);	i = 0;	j = k+1;	while((l=(i+j)/2) > i) {		t = clist[c[l]].y;		if(t > y)			j = l;		else if(t < y)			i = l;		else			return(l);	}	return(l+1);}unravel(p){	register int i;	register struct cand *q;	for(i=0; i<=len[0]; i++)		J[i] =	i<=pref ? i:			i>len[0]-suff ? i+len[1]-len[0]:			0;	for(q=clist+p;q->y!=0;q=clist+q->pred)		J[q->x+pref] = q->y+pref;}/* check does double duty:1.  ferret out any fortuitous correspondences dueto confounding by hashing (which result in "jackpot")2.  collect random access indexes to the two files */check(argv)char **argv;{	register int i, j;	int jackpot;	long ctold, ctnew;	char c,d;	input[0] = fopen(argv[1],"r");	input[1] = fopen(argv[2],"r");	j = 1;	ixold[0] = ixnew[0] = 0;	jackpot = 0;	ctold = ctnew = 0;	for(i=1;i<=len[0];i++) {		if(J[i]==0) {			ixold[i] = ctold += skipline(0);			continue;		}		while(j<J[i]) {			ixnew[j] = ctnew += skipline(1);			j++;		}		for(;;) {			c = getc(input[0]);			d = getc(input[1]);			ctold++;			ctnew++;			if(bflag && isspace(c) && isspace(d)) {				do {					if(c=='\n') break;					ctold++;				} while(isspace(c=getc(input[0])));				do {					if(d=='\n') break;					ctnew++;				} while(isspace(d=getc(input[1])));			}			if(c!=d) {				jackpot++;				J[i] = 0;				if(c!='\n')					ctold += skipline(0);				if(d!='\n')					ctnew += skipline(1);				break;			}			if(c=='\n')				break;		}		ixold[i] = ctold;		ixnew[j] = ctnew;		j++;	}	for(;j<=len[1];j++) {		ixnew[j] = ctnew += skipline(1);	}	fclose(input[0]);	fclose(input[1]);/*	if(jackpot)		mesg("jackpot",empty);*/}skipline(f){	register i;	for(i=1;getc(input[f])!='\n';i++) ;	return(i);}output(argv)char **argv;{	int m;	register int i0, i1, j1;	int j0;	input[0] = fopen(argv[1],"r");	input[1] = fopen(argv[2],"r");	m = len[0];	J[0] = 0;	J[m+1] = len[1]+1;	if(opt!=-1) for(i0=1;i0<=m;i0=i1+1) {		while(i0<=m&&J[i0]==J[i0-1]+1) i0++;		j0 = J[i0-1]+1;		i1 = i0-1;		while(i1<m&&J[i1+1]==0) i1++;		j1 = J[i1+1]-1;		J[i1] = j1;		change(i0,i1,j0,j1);	} else for(i0=m;i0>=1;i0=i1-1) {		while(i0>=1&&J[i0]==J[i0+1]-1&&J[i0]!=0) i0--;		j0 = J[i0+1]-1;		i1 = i0+1;		while(i1>1&&J[i1-1]==0) i1--;		j1 = J[i1-1]+1;		J[i1] = j1;		change(i1,i0,j1,j0);	}	if(m==0)		change(1,0,1,len[1]);}change(a,b,c,d){	if(a>b&&c>d) return;	anychange = 1;	if(opt!=1) {		range(a,b,",");		putchar(a>b?'a':c>d?'d':'c');		if(opt!=-1) range(c,d,",");	} else {		putchar(a>b?'a':c>d?'d':'c');		range(a,b," ");	}	putchar('\n');	if(opt==0) {		fetch(ixold,a,b,input[0],"< ");		if(a<=b&&c<=d) prints("---\n");	}	fetch(ixnew,c,d,input[1],opt==0?"> ":empty);	if(opt!=0&&c<=d) prints(".\n");}range(a,b,separator)char *separator;{	printf("%d", a>b?b:a);	if(a<b) {		printf("%s%d", separator, b);	}}fetch(f,a,b,lb,s)long *f;FILE *lb;char *s;{	register int i, j;	register int nc;	for(i=a;i<=b;i++) {		fseek(lb,f[i-1],0);		nc = f[i]-f[i-1];		prints(s);		for(j=0;j<nc;j++)			putchar(getc(lb));	}}/* hashing has the effect of * arranging line in 7-bit bytes and then * summing 1-s complement in 16-bit hunks */readhash(f)FILE *f;{	long sum;	register unsigned shift;	register space;	register t;	sum = 1;	space = 0;	if(!bflag) for(shift=0;(t=getc(f))!='\n';shift+=7) {		if(t==-1)			return(0);		sum += (long)t << (shift%=HALFLONG);	}	else for(shift=0;;) {		switch(t=getc(f)) {		case -1:			return(0);		case '\t':		case ' ':			space++;			continue;		default:			if(space) {				shift += 7;				space = 0;			}			sum += (long)t << (shift%=HALFLONG);			shift += 7;			continue;		case '\n':			break;		}		break;	}	sum = low(sum) + high(sum);	return((short)low(sum) + (short)high(sum));}mesg(s,t)char *s, *t;{	fprintf(stderr,"diff: %s%s\n",s,t);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -