uniq.c

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 271 行

C
271
字号
/****************************************************************************
*
*                            Open Watcom Project
*
*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
*  ========================================================================
*
*    This file contains Original Code and/or Modifications of Original
*    Code as defined in and that are subject to the Sybase Open Watcom
*    Public License version 1.0 (the 'License'). You may not use this file
*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
*    provided with the Original Code and Modifications, and is also
*    available at www.sybase.com/developer/opensource.
*
*    The Original Code and all software distributed under the License are
*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
*    NON-INFRINGEMENT. Please see the License for the specific language
*    governing rights and limitations under the License.
*
*  ========================================================================
*
* Description:  POSIX uniq utility
*               Finds unique lines in input
*
****************************************************************************/


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#include "misc.h"
#include "getopt.h"
#include "argvrx.h"
#include "argvenv.h"

#define  MIN_LINE_LEN   80

#define  MODE_UNIQUE    0x01
#define  MODE_REPEAT    0x02
#define  MODE_COUNT     0x04

char *OptEnvVar="uniq";

static const char *usageMsg[] = {
    "Usage: uniq [-?ud] [-c] [-number] [+number] [@env] [input [output]]",
    "\tenv                : environment variable to expand",
    "\tinput              : name of input file",
    "\toutput             : name of output file",
    "\tOptions: -?        : display this message",
    "\t\t -u        : display lines which are not repeated",
    "\t\t -d        : display only one copy of lines which are repeated",
    "\t\t -c        : display all lines, without repetition, along with",
    "\t\t           : the number of times each line occurred",
    "\t\t -number   : skip number fields before comparison",
    "\t\t +number   : skip number characters in field before comparison",
    NULL
};

typedef struct line {                       // Line of a file.
    char           *buff;
    unsigned        size;
} line;

/*
 * Local functions.
 */

static int getNextLine( FILE *fp, line *l )
{
    int         ch = 0;
    unsigned    os = 0;

    if( l->size == 0 ) {
        l->size = MIN_LINE_LEN * sizeof( char );
        l->buff = (char *) malloc( l->size );
    }

    while( 1 ) {
        if( os >= l->size - 1 ) {                   // Buffer getting small.
            l->size += MIN_LINE_LEN * sizeof( char );
            l->buff  = (char *) realloc( l->buff, l->size );
        }
        ch = fgetc( fp );

        if( ch == EOF ) {
            break;
        } else if( (char) ch == '\n' ) {
            break;
        } else {
            *(l->buff + os) = (char) ch;
            os++;
        }
    }
    *(l->buff + os) = '\0';

    return( (ch == EOF)  &&  (os == 0) );
}

static int  compareLines( line *ln1, line *ln2, int fld, int chr )
{
    char        *p1;
    char        *p2;
    int          fcnt;
    int          cnt;

    for( fcnt = 0, p1 = ln1->buff; *p1 != '\0'  &&  fcnt < fld; p1++ ) {
        if( *p1 == ' '  ||  *p1 == '\t' ) {
            fcnt++;
            for( ; *p1 == ' '  ||  *p1 == '\t' ; p1++ );
            p1--;
        }
    }
    for( fcnt = 0, p2 = ln2->buff; *p2 != '\0'  &&  fcnt < fld; p2++ ) {
        if( *p2 == ' '  ||  *p2 == '\t' ) {
            fcnt++;
            for( ; *p2 == ' '  ||  *p2 == '\t' ; p2++ );
            p2--;
        }
    }

    for( cnt = 0; *p1 != '\0'  &&  cnt < chr; p1++, cnt++ );
    for( cnt = 0; *p2 != '\0'  &&  cnt < chr; p2++, cnt++ );

    if( *p1 == '\0'  &&  *p2 == '\0' ) {
        return( 0 );
    } else if( *p1 == '\0'  ||  *p2 == '\0' ) {
        return( 1 );
    } else {
        return( strcmp( p1, p2 ) );
    }
}

static void copyLines( line *dest, line *src )
{
    if( src->size > dest->size ) {
        dest->size = src->size;
        dest->buff = (char *) realloc( dest->buff, dest->size * sizeof(char) );
    }
    strcpy( dest->buff, src->buff );
}

static void displayUniq( FILE *in, FILE *out, int mode, int fldos, int chros )
{
    line        ln1 = { NULL, 0 }, ln2 = { NULL, 0 };
    int         count  = 0;
    int         done   = 0;
    int         cmp    = 0;

    done = getNextLine( in, &ln1 );

    while( !done ) {
        done = getNextLine( in, &ln2 );

        if( done ) {
            cmp = 1;
        } else {
            cmp = compareLines( &ln1, &ln2, fldos, chros );
        }

        if( !cmp ) {
            count++;
        } else {
            if( mode & MODE_UNIQUE  &&  count == 0 ) {
                fprintf( out, "%s\n", ln1.buff );
            }
            if( mode & MODE_REPEAT  &&  count != 0 ) {
                fprintf( out, "%s\n", ln1.buff );
            }
            if( mode & MODE_COUNT ) {
                fprintf( out, "    %d %s\n", count + 1, ln1.buff );
            }
            count = 0;
            copyLines( &ln1, &ln2 );
        }
    }

    free( ln1.buff );
    free( ln2.buff );
}

static int isNumber( char *s )
{
    if( *s == '\0' ) {
        return( 0 );
    }

    for( ; *s != '\0'; s++ ) {
        if( !isdigit( *s ) ) {
            return( 0 );
        }
    }
    return( 1 );
}

void main( int argc, char **argv )
{
    FILE       *in, *out;
    int         ch;

    int         mode   = 0;
    int         fld_os = 0;
    int         chr_os = 0;

    argv = ExpandEnv( &argc, argv );

    while( 1 ) {
        ch = GetOpt( &argc, argv, "#udc", usageMsg );
        if( ch == -1 ) {
            break;
        }
        switch( ch ) {
            case 'u':
                mode &= ~MODE_COUNT;            // turn off counting mode
                mode |= MODE_UNIQUE;
                break;
            case 'd':
                mode &= ~MODE_COUNT;            // turn off counting mode
                mode |= MODE_REPEAT;
                break;
            case 'c':
                mode  = MODE_COUNT;
                break;
            case '#':
                if( !isNumber( OptArg ) ) {
                    Die( "uniq: invalid field offset\n" );
                }
                fld_os = atoi( OptArg );
                break;
        }
    }
    if( mode == 0 ) {
        mode = MODE_UNIQUE | MODE_REPEAT;
    }

    argv++;
    argc--;
    if( *argv != NULL  &&  **argv == '+' ) {
        if( !isNumber( *argv + 1 ) ) {
            Die( "uniq: invalid character offset\n" );
        }
        chr_os = atoi( *argv + 1 );
        argv++;
        argc--;
    }
    if( *argv == NULL ) {
        displayUniq( stdin, stdout, mode, fld_os, chr_os );
    } else {
        if( (in = fopen( *argv, "r" )) == NULL ) {
            Die( "uniq: cannot open input file \"%s\"\n", *argv );
        }
        argv++;
        if( *argv != NULL ) {
            if( (out = fopen( *argv, "w" )) == NULL ) {
                Die( "uniq: cannot open output file \"%s\"\n", *argv );
            }
            displayUniq( in, out, mode, fld_os, chr_os );
            fclose( out );
        } else {
            displayUniq( in, stdout, mode, fld_os, chr_os );
        }
        fclose( in );
    }
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?