📄 compress.c
字号:
#ifdef SCCSIDstatic char *SccsId = "@(#)compress.c 1.14 9/24/87";#endif /* SCCSID */static char rcs_ident[] = "Based on compress.c,v 4.0 85/07/30 12:50:00 joe Release";/* * Compress - data compression program */#define min(a,b) ((a>b) ? b : a)/* * machine variants which require cc -Dmachine: pdp11, z8000, pcxt *//* * Set USERMEM to the maximum amount of physical user memory available * in bytes. USERMEM is used to determine the maximum BITS that can be used * for compression. * * SACREDMEM is the amount of physical memory saved for others; compress * will hog the rest. */#ifndef SACREDMEM#define SACREDMEM 0#endif#ifndef USERMEM# define USERMEM 450000 /* default user memory */#endif#ifdef interdata /* (Perkin-Elmer) */#define SIGNED_COMPARE_SLOW /* signed compare is slower than unsigned */#endif#ifdef pdp11# define BITS 12 /* max bits/code for 16-bit machine */# define NO_UCHAR /* also if "unsigned char" functions as signed char */# undef USERMEM #endif /* pdp11 */ /* don't forget to compile with -i */#ifdef z8000# define BITS 12# undef vax /* weird preprocessor */# undef USERMEM #endif /* z8000 */#ifdef pcxt# define BITS 12# undef USERMEM#endif /* pcxt */#ifdef USERMEM# if USERMEM >= (433484+SACREDMEM)# define PBITS 16# else# if USERMEM >= (229600+SACREDMEM)# define PBITS 15# else# if USERMEM >= (127536+SACREDMEM)# define PBITS 14# else# if USERMEM >= (73464+SACREDMEM)# define PBITS 13# else# define PBITS 12# endif# endif# endif# endif# undef USERMEM#endif /* USERMEM */#ifdef PBITS /* Preferred BITS for this memory size */# ifndef BITS# define BITS PBITS# endif /* BITS */#endif /* PBITS */#if BITS == 16# define HSIZE 69001 /* 95% occupancy */#endif#if BITS == 15# define HSIZE 35023 /* 94% occupancy */#endif#if BITS == 14# define HSIZE 18013 /* 91% occupancy */#endif#if BITS == 13# define HSIZE 9001 /* 91% occupancy */#endif#if BITS <= 12# define HSIZE 5003 /* 80% occupancy */#endif#ifdef M_XENIX /* Stupid compiler can't handle arrays with */# if BITS == 16 /* more than 65535 bytes - so we fake it */# define XENIX_16# else# if BITS > 13 /* Code only handles BITS = 12, 13, or 16 */# define BITS 13# endif# endif#endif/* * a code_int must be able to hold 2**BITS values of type int, and also -1 */#if BITS > 15typedef long int code_int;#elsetypedef int code_int;#endif#ifdef SIGNED_COMPARE_SLOWtypedef unsigned long int count_int;typedef unsigned short int count_short;#elsetypedef long int count_int;#endif#ifdef NO_UCHAR typedef char char_type;#else typedef unsigned char char_type;#endif /* UCHAR */char_type magic_header[] = { "\037\235" }; /* 1F 9D *//* Defines for third byte of header */#define BIT_MASK 0x1f#define BLOCK_MASK 0x80/* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is a fourth header byte (for expansion).*/#define INIT_BITS 9 /* initial number of bits/code *//* * compress.c - File compression ala IEEE Computer, June 1984. * * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) * Jim McKie (decvax!mcvax!jim) * Steve Davies (decvax!vax135!petsd!peora!srd) * Ken Turkowski (decvax!decwrl!turtlevax!ken) * James A. Woods (decvax!ihnp4!ames!jaw) * Joe Orost (decvax!vax135!petsd!joe) * */#include <stdio.h>#include <ctype.h>#include <signal.h>#include <sys/types.h>#include <sys/stat.h>#define ARGVAL() (*++(*argv) || (--argc && *++argv))int n_bits; /* number of bits/code */int maxbits = BITS; /* user settable max # bits/code */code_int maxcode; /* maximum code, given n_bits */code_int maxmaxcode = 1L << BITS; /* should NEVER generate this code */#ifdef COMPATIBLE /* But wrong! */# define MAXCODE(n_bits) (1L << (n_bits) - 1)#else# define MAXCODE(n_bits) ((1L << (n_bits)) - 1)#endif /* COMPATIBLE */#ifdef XENIX_16count_int htab0[8192];count_int htab1[8192];count_int htab2[8192];count_int htab3[8192];count_int htab4[8192];count_int htab5[8192];count_int htab6[8192];count_int htab7[8192];count_int htab8[HSIZE-65536];count_int * htab[9] = { htab0, htab1, htab2, htab3, htab4, htab5, htab6, htab7, htab8 };#define htabof(i) (htab[(i) >> 13][(i) & 0x1fff])unsigned short code0tab[16384];unsigned short code1tab[16384];unsigned short code2tab[16384];unsigned short code3tab[16384];unsigned short code4tab[16384];unsigned short * codetab[5] = { code0tab, code1tab, code2tab, code3tab, code4tab };#define codetabof(i) (codetab[(i) >> 14][(i) & 0x3fff])#else /* Normal machine */# ifdef sel/* support gould base register problems *//*NOBASE*/count_int htab [HSIZE];unsigned short codetab [HSIZE];/*NOBASE*/# else /* !gould */count_int htab [HSIZE];unsigned short codetab [HSIZE];# endif /* !gould */#define htabof(i) htab[i]#define codetabof(i) codetab[i]#endif /* !XENIX_16 */code_int hsize = HSIZE; /* for dynamic table sizing */count_int fsize;/* * To save much memory, we overlay the table used by compress() with those * used by decompress(). The tab_prefix table is the same size and type * as the codetab. The tab_suffix table needs 2**BITS characters. We * get this from the beginning of htab. The output stack uses the rest * of htab, and contains characters. There is plenty of room for any * possible stack (stack used to be 8000 characters). */#define tab_prefixof(i) codetabof(i)#ifdef XENIX_16# define tab_suffixof(i) ((char_type *)htab[(i)>>15])[(i) & 0x7fff]# define de_stack ((char_type *)(htab2))#else /* Normal machine */# define tab_suffixof(i) ((char_type *)(htab))[i]# define de_stack ((char_type *)&tab_suffixof(1<<BITS))#endif /* XENIX_16 */code_int free_ent = 0; /* first unused entry */int exit_stat = 0;code_int getcode();Usage() {#ifdef DEBUGfprintf(stderr,"Usage: compress [-dDVfc] [-b maxbits] [file ...]\n");}int debug = 0;#elsefprintf(stderr,"Usage: compress [-dfvcV] [-b maxbits] [file ...]\n");}#endif /* DEBUG */int nomagic = 0; /* Use a 3-byte magic number header, unless old file */int zcat_flg = 0; /* Write output on stdout, suppress messages */int quiet = 1; /* don't tell me about compression *//* * block compression parameters -- after all codes are used up, * and compression rate changes, start over. */int block_compress = BLOCK_MASK;int clear_flg = 0;long int ratio = 0;#define CHECK_GAP 10000 /* ratio check interval */count_int checkpoint = CHECK_GAP;/* * the next two codes should not be changed lightly, as they must not * lie within the contiguous general code space. */ #define FIRST 257 /* first free entry */#define CLEAR 256 /* table clear output code */int force = 0;char ofname [100];#ifdef DEBUGint verbose = 0;#endif /* DEBUG */int (*bgnd_flag)();int do_decomp = 0;/***************************************************************** * TAG( main ) * * Algorithm from "A Technique for High Performance Data Compression", * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. * * Usage: compress [-dfvc] [-b bits] [file ...] * Inputs: * -d: If given, decompression is done instead. * * -c: Write output on stdout, don't remove original. * * -b: Parameter limits the max number of bits/code. * * -f: Forces output file to be generated, even if one already * exists, and even if no space is saved by compressing. * If -f is not used, the user will be prompted if stdin is * a tty, otherwise, the output file will not be overwritten. * * -v: Write compression statistics * * file ...: Files to be compressed. If none specified, stdin * is used. * Outputs: * file.Z: Compressed form of file with same mode, owner, and utimes * or stdout (if stdin used as input) * * Assumptions: * When filenames are given, replaces with the compressed version * (.Z suffix) only if the file decreases in size. * Algorithm: * Modified Lempel-Ziv method (LZW). Basically finds common * substrings and replaces them with a variable size code. This is * deterministic, and can be done on the fly. Thus, the decompression * procedure needs no input table, but tracks the way the table was built. */main( argc, argv )register int argc; char **argv;{ int overwrite = 0; /* Do not overwrite unless given -f flag */ char tempname[100]; char **filelist, **fileptr; char *cp, *rindex(), *malloc(); struct stat statbuf; extern onintr(), oops(); if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) { signal ( SIGINT, onintr ); signal ( SIGSEGV, oops ); }#ifdef COMPATIBLE nomagic = 1; /* Original didn't have a magic number */#endif /* COMPATIBLE */ filelist = fileptr = (char **)(malloc(argc * sizeof(*argv))); *filelist = NULL; if((cp = rindex(argv[0], '/')) != 0) { cp++; } else { cp = argv[0]; } if(strcmp(cp, "uncompress") == 0) { do_decomp = 1; } else if(strcmp(cp, "zcat") == 0) { do_decomp = 1; zcat_flg = 1; }#ifdef BSD4_2 /* 4.2BSD dependent - take it out if not */ setlinebuf( stderr );#endif /* BSD4_2 */ /* Argument Processing * All flags are optional. * -D => debug * -V => print Version; debug verbose * -d => do_decomp * -v => unquiet * -f => force overwrite of output file * -n => no header: useful to uncompress old files * -b maxbits => maxbits. If -b is specified, then maxbits MUST be * given also. * -c => cat all output to stdout * -C => generate output compatible with compress 2.0. * if a string is left, must be an input filename. */ for (argc--, argv++; argc > 0; argc--, argv++) { if (**argv == '-') { /* A flag argument */ while (*++(*argv)) { /* Process all flags in this arg */ switch (**argv) {#ifdef DEBUG case 'D': debug = 1; break; case 'V': verbose = 1; version(); break;#else case 'V': version(); break;#endif /* DEBUG */ case 'v': quiet = 0; break; case 'd': do_decomp = 1; break; case 'f': case 'F': overwrite = 1; force = 1; break; case 'n': nomagic = 1; break; case 'C': block_compress = 0; break; case 'b': if (!ARGVAL()) { fprintf(stderr, "Missing maxbits\n"); Usage(); exit(1); } maxbits = atoi(*argv); goto nextarg; case 'c': zcat_flg = 1; break; case 'q': quiet = 1; break; default: fprintf(stderr, "Unknown flag: '%c'; ", **argv); Usage(); exit(1); } } } else { /* Input file name */ *fileptr++ = *argv; /* Build input file list */ *fileptr = NULL; /* process nextarg; */ } nextarg: continue; } if(maxbits < INIT_BITS) maxbits = INIT_BITS; if (maxbits > BITS) maxbits = BITS; maxmaxcode = 1L << maxbits; if (*filelist != NULL) { for (fileptr = filelist; *fileptr; fileptr++) { exit_stat = 0; if (do_decomp != 0) { /* DECOMPRESSION */ /* Check for .Z suffix */ if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) { /* No .Z: tack one on */ strcpy(tempname, *fileptr); strcat(tempname, ".Z"); *fileptr = tempname; } /* Open input file */ if ((freopen(*fileptr, "r", stdin)) == NULL) { perror(*fileptr); continue; } /* Check the magic number */ if (nomagic == 0) { if ((getchar() != (magic_header[0] & 0xFF)) || (getchar() != (magic_header[1] & 0xFF))) { fprintf(stderr, "%s: not in compressed format\n", *fileptr); continue; } maxbits = getchar(); /* set -b from file */ block_compress = maxbits & BLOCK_MASK; maxbits &= BIT_MASK; maxmaxcode = 1L << maxbits; if(maxbits > BITS) { fprintf(stderr, "%s: compressed with %d bits, can only handle %d bits\n", *fileptr, maxbits, BITS); continue; } } /* Generate output filename */ strcpy(ofname, *fileptr); ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */ } else { /* COMPRESSION */ if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) { fprintf(stderr, "%s: already has .Z suffix -- no change\n", *fileptr); continue; } /* Open input file */ if ((freopen(*fileptr, "r", stdin)) == NULL) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -