compress.c

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 996 行 · 第 1/3 页

C
996
字号
/****************************************************************************
*
*                            Open Watcom Project
*
*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
*  ========================================================================
*
*    This file contains Original Code and/or Modifications of Original
*    Code as defined in and that are subject to the Sybase Open Watcom
*    Public License version 1.0 (the 'License'). You may not use this file
*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
*    provided with the Original Code and Modifications, and is also
*    available at www.sybase.com/developer/opensource.
*
*    The Original Code and all software distributed under the License are
*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
*    NON-INFRINGEMENT. Please see the License for the specific language
*    governing rights and limitations under the License.
*
*  ========================================================================
*
* Description:  LZW file compression ala IEEE Computer, June 1984.
*
****************************************************************************/


/*
 * Authors: Spencer W. Thomas, Jim McKie, Steve Davies, Ken Turkowski
 *          James A. Woods, Joe Orost, Dave Mack        
 */


#include <ctype.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <utime.h>
#include <fcntl.h>
#include <libgen.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "util.h"


static const char *usage_text[] = {
    "Usage:\tcompress [-?cdfv] [-b bits] [file ...]",
    "\tfile        : files to compress or uncompress, stdin",
    "\t\t      if none specified.",
    "\tOptions:",
    "\t\t -? : print this list",
    "\t\t -b : limit max number of bits/code to `bits'",
    "\t\t -c : write output on stdout, don't remove original",
    "\t\t -d : decompress input (compresses by default)",
    "\t\t -f : force output file to be generated, even if one",
    "\t\t      already exists, or if no space is saved",
    "\t\t -v : write verbose compression statistics to stderr",
    NULL
};

#ifdef min
#undef min
#endif

#define min(a,b)    ((a>b) ? b : a)

/* MAXPATHLEN - maximum length of a pathname we allow */
#define MAXPATHLEN 1024

/* Setup for 16-bit machines */
#if !(INT_MAX > 65536L)
    #define BITS            12
    #define IO_BUF_SIZE     4096
#endif

#ifndef BITS
    #define BITS 16
#endif

#ifndef IO_BUF_SIZE
    #define IO_BUF_SIZE     65536
#endif

#if BITS == 16
  #define HSIZE 69001       /* 95% occupancy */
#endif
#if BITS == 15
  #define HSIZE 35023       /* 94% occupancy */
#endif
#if BITS == 14
  #define HSIZE 18013       /* 91% occupancy */
#endif
#if BITS == 13
  #define HSIZE 9001        /* 91% occupancy */
#endif
#if BITS <= 12
  #define HSIZE 5003        /* 80% occupancy */
#endif

/*
 * a code_int must be able to hold 2**BITS values of type int, and also -1
 */
#if BITS > 15
typedef long int        code_int;
#else
typedef int             code_int;
#endif

typedef long int        count_int;
typedef unsigned char   char_type;

/* Defines for third byte of header */
#define BIT_MASK    0x1f
#define BLOCK_MASK  0x80

/* Masks 0x40 and 0x20 are free.  0x20 could mean that there is
 * a fourth header byte (for expansion).
 */

#define INIT_BITS   9                   /* initial number of bits/code */

static char_type    magic_header[] = { "\037\235" };    /* 1F 9D */
static int n_bits;                      /* number of bits/code */
static int maxbits = BITS;              /* user settable max # bits/code */
static code_int maxcode;                /* maximum code, given n_bits */
static code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */
static code_int free_ent = 0;           /* first unused entry */
static int exit_stat = 0;
static int nomagic = 0;     /* use a 3-byte magic number header, unless old file */
static int zcat_flg = 0;    /* write output on stdout, suppress messages */
static int quiet = 1;       /* don't tell me about compression */
static int do_decomp = 0;
static int overwrite = 0;   /* do not overwrite unless given -f flag */

#define MAXCODE(n_bits) ((1 << (n_bits)) - 1)

static count_int        htab[HSIZE];
static unsigned short   codetab[HSIZE];

#define htabof(i)       htab[i]
#define codetabof(i)    codetab[i]

static code_int         hsize = HSIZE;  /* for dynamic table sizing */
static count_int        fsize;

/*
 * To save much memory, we overlay the table used by compress() with those
 * used by decompress().  The tab_prefix table is the same size and type
 * as the codetab.  The tab_suffix table needs 2**BITS characters.  We
 * get this from the beginning of htab.  The output stack uses the rest
 * of htab, and contains characters.  There is plenty of room for any
 * possible stack (stack used to be 8000 characters).
 */

#define tab_prefixof(i) codetabof(i)
#define tab_suffixof(i) ((char_type *)(htab))[i]
#define de_stack        ((char_type *)&tab_suffixof(1<<BITS))

/*
 * block compression parameters -- after all codes are used up,
 * and compression rate changes, start over.
 */
static int          block_compress = BLOCK_MASK;
static int          clear_flg = 0;
static long int     ratio = 0;

/* ratio check interval */
#if BITS == 16
    #define CHECK_GAP 50000 
#else
    #define CHECK_GAP 10000 /* ratio check interval */
#endif
static count_int checkpoint = CHECK_GAP;

/*
 * the next two codes should not be changed lightly, as they must not
 * lie within the contiguous general code space.
 */
#define FIRST   257 /* first free entry */
#define CLEAR   256 /* table clear output code */

static int force = 0;
static int valid = 0;       /* set when signal can remove ofname */
static char ofname[MAXPATHLEN];
static void (*bgnd_flag)();
static struct stat  statbuf, insbuf;

static int      offset;
static long     in_count = 1;       /* length of input */
static long     bytes_out;          /* length of compressed output */
static long     out_count = 0;      /* # of codes output (for debugging) */

static char buf[BITS];

static char_type lmask[9] = { 0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00 };
static char_type rmask[9] = { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };


/*
 * This routine returns 1 if we are running in the foreground and stderr
 * is a tty.
 */
static int foreground( void )
/***************************/
{
    if( bgnd_flag ) {           /* background? */
        return( 0 );
    } else {                    /* foreground */
        if( isatty( 2 ) ) {     /* and stderr is a tty */
            return( 1 );
        } else {
            return( 0 );
        }
    }
}

static void onintr( int sig )
/***************************/
{
    if( valid )
        unlink( ofname );
    exit( 1 );
}

static void oops( int sig ) /* wild pointer -- assume bad input */
/*************************/
{
    if( do_decomp == 1 )
        fprintf( stderr, "uncompress: corrupt input\n" );
    if( valid )
        unlink( ofname );
    exit( 1 );
}

static void writeerr( void )
/**************************/
{
    perror( ofname );
    if( valid )
        unlink( ofname );
    exit( 1 );
}

static void print_ratio( FILE *stream, long num, long den )
/*********************************************************/
{
    int     q;          /* Doesn't need to be long */

    if( num > 214748L ) {       /* 2147483647/10000 */
        q = num / (den / 10000L);
    } else {
        q = 10000L * num / den;     /* Long calculations, though */
    }
    if( q < 0 ) {
        putc( '-', stream );
        q = -q;
    }
    fprintf( stream, "%d.%02d%%", q / 100, q % 100 );
}

/*****************************************************************
 * Output the given code.
 * Inputs:
 *  code:   A n_bits-bit integer.  If == -1, then EOF.  This assumes
 *          that n_bits =< (long)wordsize - 1.
 * Outputs:
 *  Outputs code to the file.
 * Assumptions:
 *  Chars are 8 bits long.
 * Algorithm:
 *  Maintain a BITS character long buffer (so that 8 codes will
 *  fit in it exactly). When the buffer fills up empty it and start over.
 */

static void output( code_int code )
/*********************************/
{
    int     r_off = offset, bits = n_bits;
    char    *bp = buf;

    if( code >= 0 ) {
        /*
         * Get to the first byte.
         */
        bp += (r_off >> 3);
        r_off &= 7;
        /*
         * Since code is always >= 8 bits, only need to mask the first
         * hunk on the left.
         */
        *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
        bp++;
        bits -= (8 - r_off);
        code >>= 8 - r_off;
        /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
        if( bits >= 8 ) {
            *bp++ = code;
            code >>= 8;
            bits -= 8;
        }
        /* Last bits. */
        if( bits )
            *bp = code;

        offset += n_bits;
        if( offset == (n_bits << 3) ) {
            bp = buf;
            bits = n_bits;
            bytes_out += bits;
            do {
                putchar( *bp++ );
            } while( --bits );
            offset = 0;
        }

        /*
         * If the next entry is going to be too big for the code size,
         * then increase it, if possible.
         */
        if( free_ent > maxcode || (clear_flg > 0) ) {
            /*
             * Write the whole buffer, because the input side won't
             * discover the size increase until after it has read it.
             */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?