diff.c

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 1,393 行 · 第 1/3 页

C
1,393
字号
/****************************************************************************
*
*                            Open Watcom Project
*
*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
*  ========================================================================
*
*    This file contains Original Code and/or Modifications of Original
*    Code as defined in and that are subject to the Sybase Open Watcom
*    Public License version 1.0 (the 'License'). You may not use this file
*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
*    provided with the Original Code and Modifications, and is also
*    available at www.sybase.com/developer/opensource.
*
*    The Original Code and all software distributed under the License are
*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
*    NON-INFRINGEMENT. Please see the License for the specific language
*    governing rights and limitations under the License.
*
*  ========================================================================
*
* Description:  diff.c - public domain context diff program
*
****************************************************************************/


#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <ctype.h>
#include <stdarg.h>
#include <limits.h>
#include <sys/stat.h>
#include "diff.h"

#ifdef UNIX
    #include "clibext.h"
#endif

typedef unsigned long ULONG;
typedef signed long SLONG;
typedef unsigned short USHORT;
typedef int INT;

#define  EOS    0
#define  TEMPFILE  "diff.tmp"
#define  TRUE   1
#define  FALSE      0

typedef struct candidate {
    SLONG       b;          /* Line in fileB     */
    SLONG       a;          /* Line in fileA     */
    SLONG       link;       /* Previous candidate    */
}               CANDIDATE;

typedef struct line {
    USHORT      hash;       /* Hash value etc.       */
    short       serial;     /* Line number        */
}               LINE;

LINE            *file[2];       /* Hash/line for total file  */
#define  fileA  file[0]
#define  fileB  file[1]

LINE            *sfile[2];      /* Hash/line after prefix  */
#define  sfileA  sfile[0]
#define  sfileB  sfile[1]

SLONG           len[2];         /* Actual lines in each file  */
#define  lenA  len[0]
#define  lenB  len[1]

SLONG           slen[2];        /* Squished lengths      */
#define  slenA  slen[0]
#define  slenB  slen[1]

SLONG           prefix;         /* Identical lines at start  */
SLONG           suffix;         /* Identical lenes at end  */

FILE            *infd[2] = { NULL, NULL}; /* Input file identifiers  */
FILE            *tempfd;        /* Temp for input redirection  */

/* forward declarations */
void            error( char *, ... );
void            fatal( char *, ... );
char            *fgetss();
USHORT          hash();
extern char     *myalloc();     /* Storage allocator     */
extern char     *compact();     /* Storage compactor     */

/*
 * The following vectors overlay the area defined by fileA
 */

short           *class;         /* Unsorted line numbers  */
SLONG           *klist;         /* Index of element in clist  */
CANDIDATE       *clist;         /* Storage pool for candidates  */
SLONG           clength = 0;    /* Number of active candidates  */
#define CSIZE_INC 50            /* How many to allocate each time we have to */
SLONG           csize = CSIZE_INC;      /* Current size of storage pool */

SLONG           *match;         /* Longest subsequence       */
long            *oldseek;       /* Seek position in file A  */

/*
 * The following vectors overlay the area defined by fileB
 */

short           *member;        /* Concatenated equiv. classes  */
long            *newseek;       /* Seek position in file B  */

/*
 * Global variables
 */

char            *Dflag = NULL;  /* output #ifdef code */
INT             Hflag = FALSE;  /* half hearted algorithm */
INT             nflag = FALSE;  /* Edit script requested  */
INT             eflag = FALSE;  /* Edit script requested  */
INT             bflag = FALSE;  /* Blank supress requested  */
INT             cflag = FALSE;  /* Context printout      */
INT             iflag = FALSE;  /* Ignore case requested  */
INT             tflag = FALSE;  /* Test for enough memory flag */
INT             xflag = 0;      /* Test for enough memory flag */
INT             havediffs = FALSE;
char            text[1025];      /* Input line from file1  */
char            textb[1025];     /* Input from file2 for check  */

char            *cmdusage =
"usage:\n"
"        diff [options] file1 file2\n"
"\n"
"options:\n"
"        -H           Use half hearted algorithm if memory runs out\n"
"        -n           generate an edit script for RCS\n"
"        -e           generate an edit script\n"
"        -b           ignore blanks\n"
"        -c[n]        print n context line (defaults to 3)\n"
"        -i           ignore case\n"
"        -t           quiet mode. return 3 if not enough memory\n"
"        -x           shift return codes by 100\n"
;

INT     main( INT, char ** );
void    input( SLONG );
void    squish( void );
void    sort( LINE *, SLONG );
void    equiv( void );
void    unsort( void );
SLONG   subseq( void );
SLONG   newcand( SLONG, SLONG, SLONG );
SLONG   search( ULONG, ULONG, SLONG );
void    unravel( SLONG );
INT     check( char *, char * );
void    output( char *, char * );
void    change( SLONG, SLONG, SLONG, SLONG );
void    range( SLONG, SLONG, SLONG );
void    fetch( long *, SLONG, SLONG, SLONG, FILE *, char * );
INT     getline( FILE *, char * );
USHORT  hash( char * );
char    *myalloc( ULONG, char * );
void    myfree( void * );
char    *compact( char *, ULONG, char * );
void    noroom( char * );
INT     streq( char *, char * );
void    cant( char *, char *, SLONG );
void    fatal( char *, ... );
void    error( char *, ... );
void    fputss( char *, FILE * );
char    *fgetss( char *, SLONG, FILE * );

/*
 * Diff main program
 */

// #include "diff.def"

INT main( argc, argv )
     INT             argc;
     char          **argv;
{
    register SLONG      i;
    register char       *ap;
    struct stat         st;
    char                path[_MAX_PATH];
    char                fname[_MAX_FNAME];
    char                ext[_MAX_EXT];

    while( argc > 1 && *( ap = argv[1] ) == '-' && *++ap != EOS ) {
        while( *ap != EOS ) {
            switch( ( *ap++ ) ) {
            case 'b':
                bflag++;
                break;

            case 'c':
                if( *ap > '0' && *ap <= '9' )
                    cflag = *ap++ -'0';
                else
                    cflag = 3;
                break;

            case 'e':
                eflag++;
                break;

            case 'd':
                Dflag = ap;
                while( *ap != EOS )
		    ++ap;
                break;

            case 'H':
                Hflag++;
                break;

            case 'n':
                nflag++;
                break;

            case 'i':
                iflag++;
                break;

            case 't':
                tflag++;
                break;

            case 'x':
                xflag = DIFF_RETURN_ADD;
                break;

            default:
                error( "bad option '-%c'\n", ap[ -1] );
                exit( xflag + DIFF_NOT_COMPARED );
            }
        }
        argc--;
        argv++;
    }

    if( argc != 3 ) {
        error( cmdusage );
        exit( xflag + DIFF_NOT_COMPARED );
    }
    if( nflag + ( cflag != 0 ) + eflag > 1 ) {
        error( " -c, -n and -e are incompatible.\n" );
        exit( xflag + DIFF_NOT_COMPARED );
    }
    argv++;
    for( i = 0; i <= 1; i++ ) {
        if( argv[i][0] == '-' && argv[i][1] == EOS ) {
            infd[i] = stdin;
            if( ( tempfd = fopen( TEMPFILE, "w" ) ) == NULL ) {
                cant( TEMPFILE, "work", 1 );
            }
        } else {
            strcpy( path, argv[i] );
            if( i == 1 && stat( argv[i], &st ) == 0 && S_ISDIR( st.st_mode ) ) {
                _splitpath( argv[i - 1], NULL, NULL, fname, ext );
                _makepath( path, NULL, argv[i], fname, ext );
            }
            infd[i] = fopen( path, "r" );
            if( !infd[i] ) {
                cant( path, "input", 2 );      /* Fatal error */
            }
        }
    }

    if( infd[0] == stdin && infd[1] == stdin ) {
        error( "Can't diff two things both on standard input." );
        exit( xflag + DIFF_NOT_COMPARED );
    }
    if( infd[0] == NULL && infd[1] == NULL ) {
        cant( argv[0], "input", 0 );
        cant( argv[1], "input", 1 );
    }

    /*
     * Read input, building hash tables.
     */
    input( 0 );
    input( 1 );
    squish();
#ifdef DEBUG
    printf( "before sort\n" );
    for( i = 1; i <= slenA; i++ ) {
        printf( "sfileA[%d] = %6d %06o\n",
                i, sfileA[i].serial, sfileA[i].hash );
    }
    for( i = 1; i <= slenB; i++ ) {
        printf( "sfileB[%d] = %6d %06o\n",
                i, sfileB[i].serial, sfileB[i].hash );
    }
#endif
    sort( sfileA, slenA );
    sort( sfileB, slenB );
#ifdef DEBUG
    printf( "after sort\n" );
    for( i = 1; i <= slenA; i++ ) {
        printf( "sfileA[%d] = %6d %06o\n",
                i, sfileA[i].serial, sfileB[i].hash );
    }
    for( i = 1; i <= slenB; i++ ) {
        printf( "sfileB[%d] = %6d %06o\n",
                i, sfileB[i].serial, sfileB[i].hash );
    }
#endif

    /*
     * Build equivalence classes.
     */
    member = ( short * ) fileB;
    equiv();
    member = ( short * ) compact( ( char *) member, ( slenB + 2 ) * sizeof( SLONG ),
                                 "squeezing member vector" );
    fileB = ( LINE * ) member;

    /*
     * Reorder equivalence classes into array class[]
     */
    class = ( short * ) fileA;
    unsort();
    class = ( short * ) compact( ( char *) class, ( slenA + 2 ) * sizeof( SLONG ),
                                "compacting class vector" );
    fileA = ( LINE * ) class;
    /*
     * Find longest subsequences
     */
    klist = ( SLONG * ) myalloc( ( slenA + 2 ) * sizeof( SLONG ), "klist" );
    clist = ( CANDIDATE * ) myalloc( csize * sizeof( CANDIDATE ), "clist" );
    i = subseq();
    myfree( &member );
    fileB = NULL;
    myfree( &class );
    fileA = NULL;
    match = ( SLONG * ) myalloc( ( lenA + 2 ) * sizeof( SLONG ), "match" );
    unravel( klist[i] );
    myfree( &clist );
    myfree( &klist );

    /*
     * Check for fortuitous matches and output differences
     */
    oldseek = ( long * ) myalloc( ( lenA + 2 ) * sizeof( *oldseek ), "oldseek" );
    newseek = ( long * ) myalloc( ( lenB + 2 ) * sizeof( *newseek ), "newseek" );
    if( check( argv[0], argv[1] ) ) {
#ifdef DEBUG
        fprintf( stderr, "Spurious match, output is not optimal\n" );
#else
        ;
#endif
    }
    output( argv[0], argv[1] );
    if( tempfd != NULL ) {
        fclose( tempfd );
        unlink( TEMPFILE );
    }
    myfree( &oldseek );
    myfree( &newseek );
    myfree( &fileA );
    myfree( &fileB );

    return( xflag + ( havediffs ? DIFF_HAVE_DIFFS : DIFF_NO_DIFFS ) );
}

/*
 * Read the file, building hash table
 */

void input( which )
     SLONG             which;      /* 0 or 1 to redefine infd[]  */
{
    register LINE       *lentry;
    register SLONG      linect = 0;
    FILE                *fd;
#define LSIZE_INC 200           /* # of line entries to alloc at once */
    SLONG               lsize = LSIZE_INC;

    lentry = ( LINE * ) myalloc( sizeof( LINE )* ( lsize + 3 ), "line" );
    fd = infd[which];
    while( !getline( fd, text ) ) {
        if( ++linect >= lsize ) {
            lsize += 200;
            lentry = ( LINE * ) compact( ( char *) lentry,
                                         ( lsize + 3 ) * sizeof( LINE ),
                                         "extending line vector" );
        }
        lentry[linect].hash = hash( text );
    }

    /*
     * If input was from stdin ("-" command), finish off the temp file.
     */
    if( fd == stdin ) {
        fclose( tempfd );
        tempfd = infd[which] = fopen( TEMPFILE, "r" );
    }

    /*
     * If we wanted to be stingy with memory, we could realloc lentry down to
     * its exact size (+3 for some odd reason) here.  No need?
     */
    len[which] = linect;
    file[which] = lentry;
}

/*
 * Look for initial and trailing sequences that have identical hash values.
 * Don't bother building them into the candidate vector.
 */

void squish()
{
    register SLONG      i;
    register LINE       *ap;
    register LINE       *bp;
    SLONG               j;
    SLONG               k;

    /*
     * prefix -> first line (from start) that doesn't hash identically
     */
    i = 0;
    ap = &fileA[1];
    bp = &fileB[1];
    while( i < lenA && i < lenB && ap->hash == bp->hash ) {
        i++;
        ap++;
        bp++;
    }
    prefix = i;

    /*
     * suffix -> first line (from end) that doesn't hash identically
     */
    j = lenA - i;
    k = lenB - i;
    ap = &fileA[lenA];
    bp = &fileB[lenB];
    i = 0;
    while( i < j && i < k && ap->hash == bp->hash ) {
        i++;
        ap--;
        bp--;
    }
    suffix = i;

    /*
     * Tuck the counts away
     */
    for( k = 0; k <= 1; k++ ) {
        sfile[k] = file[k] + prefix;
        j = slen[k] = len[k] - prefix - suffix;

        for( i = 0, ap = sfile[k]; i <= slen[k]; i++, ap++ ) {
            ap->serial = i;
        }
    }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?