diffreg.c

来自「<B>Digital的Unix操作系统VAX 4.2源码</B>」· C语言 代码 · 共 1,111 行 · 第 1/2 页

C
1,111
字号
#ifndef lintstatic char *sccsid = "@(#)diffreg.c	4.1	ULTRIX	7/17/90";#endif lint/************************************************************************ *									* *			Copyright (c) 1988 by				* *		Digital Equipment Corporation, Maynard, MA		* *			All rights reserved.				* *									* *   This software is furnished under a license and may be used and	* *   copied  only  in accordance with the terms of such license and	* *   with the  inclusion  of  the  above  copyright  notice.   This	* *   software  or  any  other copies thereof may not be provided or	* *   otherwise made available to any other person.  No title to and	* *   ownership of the software is hereby transferred.			* *									* *   This software is  derived  from  software  received  from  the	* *   University    of   California,   Berkeley,   and   from   Bell	* *   Laboratories.  Use, duplication, or disclosure is  subject  to	* *   restrictions  under  license  agreements  with  University  of	* *   California and with AT&T.						* *									* *   The information in this software is subject to change  without	* *   notice  and should not be construed as a commitment by Digital	* *   Equipment Corporation.						* *									* *   Digital assumes no responsibility for the use  or  reliability	* *   of its software on equipment which is not supplied by Digital.	* *									* ************************************************************************//************************************************************************ *	Modification History *      -------------------- * *	10-Nov-88		Tim N *			Took 4.3-5 sources and added changes made to *			previous version's sources.  Includes DL's 8 *			bit clean changes. *			This 4.3-5 source had id: *				"@(#)diffreg.c 4.17 10/22/87"; ************************************************************************/#include "diff.h"/* * diff - compare two files. *//* *	Uses an algorithm due to Harold Stone, which finds *	a pair of longest identical subsequences in the two *	files. * *	The major goal is to generate the match vector J. *	J[i] is the index of the line in file1 corresponding *	to line i file0. J[i] = 0 if there is no *	such line in file1. * *	Lines are hashed so as to work in core. All potential *	matches are located by sorting the lines of each file *	on the hash (called ``value''). In particular, this *	collects the equivalence classes in file1 together. *	Subroutine equiv replaces the value of each line in *	file0 by the index of the first element of its  *	matching equivalence in (the reordered) file1. *	To save space equiv squeezes file1 into a single *	array member in which the equivalence classes *	are simply concatenated, except that their first *	members are flagged by changing sign. * *	Next the indices that point into member are unsorted into *	array class according to the original order of file0. * *	The cleverness lies in routine stone. This marches *	through the lines of file0, developing a vector klist *	of "k-candidates". At step i a k-candidate is a matched *	pair of lines x,y (x in file0 y in file1) such that *	there is a common subsequence of length k *	between the first i lines of file0 and the first y  *	lines of file1, but there is no such subsequence for *	any smaller y. x is the earliest possible mate to y *	that occurs in such a subsequence. * *	Whenever any of the members of the equivalence class of *	lines in file1 matable to a line in file0 has serial number  *	less than the y of some k-candidate, that k-candidate  *	with the smallest such y is replaced. The new  *	k-candidate is chained (via pred) to the current *	k-1 candidate so that the actual subsequence can *	be recovered. When a member has serial number greater *	that the y of all k-candidates, the klist is extended. *	At the end, the longest subsequence is pulled out *	and placed in the array J by unravel * *	With J in hand, the matches there recorded are *	check'ed against reality to assure that no spurious *	matches have crept in due to hashing. If they have, *	they are broken, and "jackpot" is recorded--a harmless *	matter except that a true match for a spuriously *	mated line may now be unnecessarily reported as a change. * *	Much of the complexity of the program comes simply *	from trying to minimize core utilization and *	maximize the range of doable problems by dynamically *	allocating what is needed and reusing what is not. *	The core requirements for problems larger than somewhat *	are (in words) 2*length(file0) + length(file1) + *	3*(number of k-candidates installed),  typically about *	6n words for files of length n.  */#define	prints(s)	fputs(s,stdout)#define	ismulti(c)	((c & 0xff) >= 0xa0 && (c & 0xff) <= 0xff)FILE	*input[2];FILE	*fopen();struct cand {	int	x;	int	y;	int	pred;} cand;struct line {	int	serial;	int	value;} *file[2], line;int	len[2];struct	line *sfile[2];	/* shortened by pruning common prefix and suffix */int	slen[2];int	pref, suff;	/* length of prefix and suffix */int	*class;		/* will be overlaid on file[0] */int	*member;	/* will be overlaid on file[1] */int	*klist;		/* will be overlaid on file[0] after class */struct	cand *clist;	/* merely a free storage pot for candidates */int	clen = 0;int	*J;		/* will be overlaid on class */long	*ixold;		/* will be overlaid on klist */long	*ixnew;		/* will be overlaid on file[1] */char	*chrtran;	/* translation table for case-folding *//* chrtran points to one of 2 translation tables: *	cup2low if folding upper to lower case *	clow2low if not folding case */char	clow2low[256] = {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff};char	cup2low[256] = {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff};diffreg(){	register int i, j;	FILE *f1, *f2;	char buf1[BUFSIZ], buf2[BUFSIZ];	if (hflag) {		diffargv[0] = "diffh";		execv(diffh, diffargv);		fprintf(stderr, "diff: ");		perror(diffh);		done();	}	chrtran = (iflag? cup2low : clow2low);	if ((stb1.st_mode & S_IFMT) == S_IFDIR) {		file1 = splice(file1, file2);		if (stat(file1, &stb1) < 0) {			fprintf(stderr, "diff: ");			perror(file1);			done();		}	} else if ((stb2.st_mode & S_IFMT) == S_IFDIR) {		file2 = splice(file2, file1);		if (stat(file2, &stb2) < 0) {			fprintf(stderr, "diff: ");			perror(file2);			done();		}	} else if (!strcmp(file1, "-")) {		if (!strcmp(file2, "-")) {			fprintf(stderr, "diff: can't specify - -\n");			done();		}		file1 = copytemp();		if (stat(file1, &stb1) < 0) {			fprintf(stderr, "diff: ");			perror(file1);			done();		}	} else if (!strcmp(file2, "-")) {		file2 = copytemp();		if (stat(file2, &stb2) < 0) {			fprintf(stderr, "diff: ");			perror(file2);			done();		}	}	if ((f1 = fopen(file1, "r")) == NULL) {		fprintf(stderr, "diff: ");		perror(file1);		done();	}	if ((f2 = fopen(file2, "r")) == NULL) {		fprintf(stderr, "diff: ");		perror(file2);		fclose(f1);		done();	}	if (stb1.st_size != stb2.st_size)		goto notsame;	for (;;) {		i = fread(buf1, 1, BUFSIZ, f1);		j = fread(buf2, 1, BUFSIZ, f2);		if (i < 0 || j < 0 || i != j)			goto notsame;		if (i == 0 && j == 0) {			fclose(f1);			fclose(f2);			status = 0;		/* files don't differ */			goto same;		}		for (j = 0; j < i; j++)			if (buf1[j] != buf2[j])				goto notsame;	}notsame:	/*	 *	Files certainly differ at this point; set status accordingly	 */	status = 1;	if (!asciifile(f1) || !asciifile(f2)) {		printf("Binary files %s and %s differ\n", file1, file2);		fclose(f1);		fclose(f2);		done();	}	prepare(0, f1);	prepare(1, f2);	fclose(f1);	fclose(f2);	prune();	sort(sfile[0],slen[0]);	sort(sfile[1],slen[1]);	member = (int *)file[1];	equiv(sfile[0], slen[0], sfile[1], slen[1], member);	member = (int *)ralloc((char *)member,(slen[1]+2)*sizeof(int));	class = (int *)file[0];	unsort(sfile[0], slen[0], class);	class = (int *)ralloc((char *)class,(slen[0]+2)*sizeof(int));	klist = (int *)talloc((slen[0]+2)*sizeof(int));	clist = (struct cand *)talloc(sizeof(cand));	i = stone(class, slen[0], member, klist);	free((char *)member);	free((char *)class);	J = (int *)talloc((len[0]+2)*sizeof(int));	unravel(klist[i]);	free((char *)clist);	free((char *)klist);	ixold = (long *)talloc((len[0]+2)*sizeof(long));	ixnew = (long *)talloc((len[1]+2)*sizeof(long));	check();	output();	status = anychange;same:	if (opt == D_CONTEXT && anychange == 0)		printf("No differences encountered\n");	done();}#include <sys/param.h>char *copytemp(){	char buf[MAXBSIZE];	register int i, f;	signal(SIGHUP,done);	signal(SIGINT,done);	signal(SIGPIPE,done);	signal(SIGTERM,done);	tempfile = mktemp("/tmp/dXXXXX");	f = creat(tempfile,0600);	if (f < 0) {		fprintf(stderr, "diff: ");		perror(tempfile);		done();	}	while ((i = read(0,buf,sizeof(buf))) > 0)		if (write(f,buf,i) != i) {			fprintf(stderr, "diff: ");			perror(tempfile);			done();		}	close(f);	return (tempfile);}char *splice(dir, file)	char *dir, *file;{	char *tail;	char buf[BUFSIZ];	if (!strcmp(file, "-")) {		fprintf(stderr, "diff: can't specify - with other arg directory\n");		done();	}	tail = rindex(file, '/');	if (tail == 0)		tail = file;	else		tail++;	(void)sprintf(buf, "%s/%s", dir, tail);	return (savestr(buf));}prepare(i, fd)	int i;	FILE *fd;{	register struct line *p;	register j,h;	fseek(fd, (long)0, 0);	p = (struct line *)talloc(3*sizeof(line));	for(j=0; h=readhash(fd);) {		p = (struct line *)ralloc((char *)p,(++j+3)*sizeof(line));		p[j].value = h;	}	len[i] = j;	file[i] = p;}prune(){	register i,j;	for(pref=0;pref<len[0]&&pref<len[1]&&		file[0][pref+1].value==file[1][pref+1].value;		pref++ ) ;	for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&		file[0][len[0]-suff].value==file[1][len[1]-suff].value;		suff++) ;	for(j=0;j<2;j++) {		sfile[j] = file[j]+pref;		slen[j] = len[j]-pref-suff;		for(i=0;i<=slen[j];i++)			sfile[j][i].serial = i;	}}equiv(a,n,b,m,c)struct line *a, *b;int *c;{	register int i, j;	i = j = 1;	while(i<=n && j<=m) {		if(a[i].value <b[j].value)			a[i++].value = 0;		else if(a[i].value == b[j].value)			a[i++].value = j;		else			j++;	}	while(i <= n)		a[i++].value = 0;	b[m+1].value = 0;	j = 0;	while(++j <= m) {		c[j] = -b[j].serial;		while(b[j+1].value == b[j].value) {			j++;			c[j] = b[j].serial;		}	}	c[j] = -1;}stone(a,n,b,c)int *a;int *b;register int *c;{	register int i, k,y;	int j, l;	int oldc, tc;	int oldl;	k = 0;	c[0] = newcand(0,0,0);	for(i=1; i<=n; i++) {		j = a[i];		if(j==0)			continue;		y = -b[j];		oldl = 0;		oldc = c[0];		do {			if(y <= clist[oldc].y)				continue;			l = search(c, k, y);			if(l!=oldl+1)				oldc = c[l-1];			if(l<=k) {				if(clist[c[l]].y <= y)					continue;				tc = c[l];				c[l] = newcand(i,y,oldc);				oldc = tc;				oldl = l;			} else {				c[l] = newcand(i,y,oldc);				k++;				break;			}		} while((y=b[++j]) > 0);	}	return(k);}newcand(x,y,pred){	register struct cand *q;	clist = (struct cand *)ralloc((char *)clist,++clen*sizeof(cand));	q = clist + clen -1;	q->x = x;	q->y = y;	q->pred = pred;	return(clen-1);}search(c, k, y)int *c;{	register int i, j, l;	int t;	if(clist[c[k]].y<y)	/*quick look for typical case*/		return(k+1);	i = 0;	j = k+1;	while (1) {		l = i + j;		if ((l >>= 1) <= i) 			break;		t = clist[c[l]].y;		if(t > y)			j = l;		else if(t < y)			i = l;		else			return(l);	}	return(l+1);}unravel(p){	register int i;	register struct cand *q;	for(i=0; i<=len[0]; i++)		J[i] =	i<=pref ? i:			i>len[0]-suff ? i+len[1]-len[0]:			0;	for(q=clist+p;q->y!=0;q=clist+q->pred)		J[q->x+pref] = q->y+pref;}/* check does double duty:1.  ferret out any fortuitous correspondences dueto confounding by hashing (which result in "jackpot")2.  collect random access indexes to the two files */check(){	register int i, j;	int jackpot;	register long ctold, ctnew;	register int c,d;	if ((input[0] = fopen(file1,"r")) == NULL) {		perror(file1);		done();	}	if ((input[1] = fopen(file2,"r")) == NULL) {		perror(file2);		done();	}	j = 1;	ixold[0] = ixnew[0] = 0;	jackpot = 0;	ctold = ctnew = 0;	for(i=1;i<=len[0];i++) {		if(J[i]==0) {			ixold[i] = ctold += skipline(0);			continue;		}		while(j<J[i]) {			ixnew[j] = ctnew += skipline(1);			j++;		}		if(bflag || wflag || iflag) {			for(;;) {				c = getc(input[0]);				d = getc(input[1]);				ctold++;				ctnew++;				if(bflag && isspace(c) && isspace(d)) {					do {						if(c=='\n')							break;						ctold++;					} while(isspace(c=getc(input[0])));					do {						if(d=='\n')							break;						ctnew++;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?