📄 compress.c
字号:
/* compress - Reduce file size using Modified Lempel-Ziv encoding *//* * compress.c - File compression ala IEEE Computer, June 1984. * * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) * Jim McKie (decvax!mcvax!jim) * Steve Davies (decvax!vax135!petsd!peora!srd) * Ken Turkowski (decvax!decwrl!turtlevax!ken) * James A. Woods (decvax!ihnp4!ames!jaw) * Joe Orost (decvax!vax135!petsd!joe) * * Richard Todd Port to MINIX * Andy Tanenbaum Cleanup * * * Algorithm from "A Technique for High Performance Data Compression", * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. * * Usage: compress [-dfvc] [-b bits] [file ...] * Inputs: * -d: If given, decompression is done instead. * * -c: Write output on stdout. * * -b: Parameter limits the max number of bits/code. * * -f: Forces output file to be generated, even if one already * exists, and even if no space is saved by compressing. * If -f is not used, the user will be prompted if stdin is * a tty, otherwise, the output file will not be overwritten. * * -v: Write compression statistics * * file ...: Files to be compressed. If none specified, stdin * is used. * Outputs: * file.Z: Compressed form of file with same mode, owner, and utimes * or stdout (if stdin used as input) * * Assumptions: * When filenames are given, replaces with the compressed version * (.Z suffix) only if the file decreases in size. * Algorithm: * Modified Lempel-Ziv method (LZW). Basically finds common * substrings and replaces them with a variable size code. This is * deterministic, and can be done on the fly. Thus, the decompression * procedure needs no input table, but tracks the way the table was built. */#define AZTEC86 1#define min(a,b) ((a>b) ? b : a)/* * Set USERMEM to the maximum amount of physical user memory available * in bytes. USERMEM is used to determine the maximum BITS that can be used * for compression. * * SACREDMEM is the amount of physical memory saved for others; compress * will hog the rest. */#ifndef SACREDMEM#define SACREDMEM 0#endif#ifndef USERMEM# define USERMEM 450000 /* default user memory */#endif#define REGISTER register#define DOTZ ".Z"#include <limits.h>/* The default for Minix is -b13, but we can do -b16 if the machine can. */#define DEFAULTBITS 13#if INT_MAX == 32767# define BITS 13#else# define BITS 16#endif#ifdef USERMEM# if USERMEM >= (433484+SACREDMEM)# define PBITS 16# else# if USERMEM >= (229600+SACREDMEM)# define PBITS 15# else# if USERMEM >= (127536+SACREDMEM)# define PBITS 14# else# if USERMEM >= (73464+SACREDMEM)# define PBITS 13# else# define PBITS 12# endif# endif# endif# endif# undef USERMEM#endif /* USERMEM */#ifdef PBITS /* Preferred BITS for this memory size */# ifndef BITS# define BITS PBITS# endif#endif /* PBITS */#if BITS == 16# define HSIZE 69001 /* 95% occupancy */#endif#if BITS == 15# define HSIZE 35023 /* 94% occupancy */#endif#if BITS == 14# define HSIZE 18013 /* 91% occupancy */#endif#if BITS == 13# define HSIZE 9001 /* 91% occupancy */#endif#if BITS <= 12# define HSIZE 5003 /* 80% occupancy */#endif/* * a code_int must be able to hold 2**BITS values of type int, and also -1 */#if BITS > 15typedef long int code_int;#elsetypedef int code_int;#endif#ifdef SIGNED_COMPARE_SLOWtypedef unsigned long int count_int;typedef unsigned short int count_short;#elsetypedef long int count_int;#endif#ifdef NO_UCHAR typedef char char_type;#else typedef unsigned char char_type;#endif /* UCHAR */char_type magic_header[] = "\037\235"; /* 1F 9D *//* Defines for third byte of header */#define BIT_MASK 0x1f#define BLOCK_MASK 0x80/* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is a fourth header byte (for expansion).*/#define INIT_BITS 9 /* initial number of bits/code */#include <sys/types.h>#include <sys/stat.h>#include <fcntl.h>#include <ctype.h>#include <signal.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#include <utime.h>#include <stdio.h>#define ARGVAL() (*++(*argv) || (--argc && *++argv))int n_bits; /* number of bits/code */int maxbits = DEFAULTBITS; /* user settable max # bits/code */code_int maxcode; /* maximum code, given n_bits */code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */#ifdef COMPATIBLE /* But wrong! */# define MAXCODE(n_bits) (1 << (n_bits) - 1)#else# define MAXCODE(n_bits) ((1 << (n_bits)) - 1)#endif /* COMPATIBLE */#ifndef AZTEC86 count_int htab [HSIZE]; unsigned short codetab [HSIZE];#else count_int *htab; unsigned short *codetab;# define HTABSIZE ((size_t)(HSIZE*sizeof(count_int)))# define CODETABSIZE ((size_t)(HSIZE*sizeof(unsigned short)))#define htabof(i) htab[i]#define codetabof(i) codetab[i]#endif /* XENIX_16 */code_int hsize = HSIZE; /* for dynamic table sizing */count_int fsize;/* * To save much memory, we overlay the table used by compress() with those * used by decompress(). The tab_prefix table is the same size and type * as the codetab. The tab_suffix table needs 2**BITS characters. We * get this from the beginning of htab. The output stack uses the rest * of htab, and contains characters. There is plenty of room for any * possible stack (stack used to be 8000 characters). */#define tab_prefixof(i) codetabof(i)#ifdef XENIX_16# define tab_suffixof(i) ((char_type *)htab[(i)>>15])[(i) & 0x7fff]# define de_stack ((char_type *)(htab2))#else /* Normal machine */# define tab_suffixof(i) ((char_type *)(htab))[i]# define de_stack ((char_type *)&tab_suffixof(1<<BITS))#endif /* XENIX_16 */code_int free_ent = 0; /* first unused entry */int exit_stat = 0;_PROTOTYPE(int main, (int argc, char **argv));_PROTOTYPE(void Usage, (void));_PROTOTYPE(void compress, (void));_PROTOTYPE(void onintr, (int dummy));_PROTOTYPE(void oops, (int dummy));_PROTOTYPE(void output, (code_int code));_PROTOTYPE(int foreground, (void));_PROTOTYPE(void decompress, (void));_PROTOTYPE(code_int getcode, (void)); _PROTOTYPE(void writeerr, (void));_PROTOTYPE(void copystat, (char *ifname, char *ofname));_PROTOTYPE(int foreground, (void));_PROTOTYPE(void cl_block , (void));_PROTOTYPE(void cl_hash, (count_int hsize));_PROTOTYPE(void prratio, (FILE *stream, long int num, long int den));_PROTOTYPE(void version, (void));void Usage() {#ifdef DEBUGfprintf(stderr,"Usage: compress [-dDVfc] [-b maxbits] [file ...]\n");}int debug = 0;#elsefprintf(stderr,"Usage: compress [-dfvcV] [-b maxbits] [file ...]\n");}#endif /* DEBUG */int nomagic = 0; /* Use a 3-byte magic number header, unless old file */int zcat_flg = 0; /* Write output on stdout, suppress messages */int quiet = 0; /* don't tell me about compression *//* * block compression parameters -- after all codes are used up, * and compression rate changes, start over. */int block_compress = BLOCK_MASK;int clear_flg = 0;long int ratio = 0;#define CHECK_GAP 10000 /* ratio check interval */count_int checkpoint = CHECK_GAP;/* * the next two codes should not be changed lightly, as they must not * lie within the contiguous general code space. */ #define FIRST 257 /* first free entry */#define CLEAR 256 /* table clear output code */int force = 0;char ofname [100];#ifdef DEBUGint verbose = 0;#endif /* DEBUG */#ifndef METAWARE#ifdef AZTEC86void#elseint#endif#ifndef __STDC__(*bgnd_flag)();#else(*bgnd_flag)(int);#endif#endifint do_decomp = 0;int main(argc, argv)int argc;char **argv;{ int overwrite = 0; /* Do not overwrite unless given -f flag */ char tempname[100]; char **filelist, **fileptr; char *cp; struct stat statbuf;#ifndef METAWARE if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) { signal ( SIGINT, onintr ); signal ( SIGSEGV, oops ); }#endif#ifdef AZTEC86#ifdef METAWARE _setmode(NULL,_ALL_FILES_BINARY); _setmode(stdin,_BINARY); _setmode(stdout,_BINARY); _setmode(stderr,_TEXT);#endif if (NULL == (htab = (count_int *)malloc(HTABSIZE))) { fprintf(stderr,"Can't allocate htab\n"); exit(1); } if (NULL == (codetab = (unsigned short *)malloc(CODETABSIZE))) { fprintf(stderr,"Can't allocate codetab\n"); exit(1); }#endif#ifdef COMPATIBLE nomagic = 1; /* Original didn't have a magic number */#endif /* COMPATIBLE */ filelist = fileptr = (char **)(malloc((size_t)(argc * sizeof(*argv)))); *filelist = NULL; if((cp = strrchr(argv[0], '/')) != 0) { cp++; } else { cp = argv[0]; } if(strcmp(cp, "uncompress") == 0) { do_decomp = 1; } else if(strcmp(cp, "zcat") == 0) { do_decomp = 1; zcat_flg = 1; }#ifdef BSD4_2 /* 4.2BSD dependent - take it out if not */ setlinebuf( stderr );#endif /* BSD4_2 */ /* Argument Processing * All flags are optional. * -D => debug * -V => print Version; debug verbose * -d => do_decomp * -v => unquiet * -f => force overwrite of output file * -n => no header: useful to uncompress old files * -b maxbits => maxbits. If -b is specified, then maxbits MUST be * given also. * -c => cat all output to stdout * -C => generate output compatible with compress 2.0. * if a string is left, must be an input filename. */ for (argc--, argv++; argc > 0; argc--, argv++) { if (**argv == '-') { /* A flag argument */ while (*++(*argv)) { /* Process all flags in this arg */ switch (**argv) {#ifdef DEBUG case 'D': debug = 1; break; case 'V': verbose = 1; version(); break;#else case 'V': version(); break;#endif /* DEBUG */ case 'v': quiet = 0; break; case 'd': do_decomp = 1; break; case 'f': case 'F': overwrite = 1; force = 1; break; case 'n': nomagic = 1; break; case 'C': block_compress = 0; break; case 'b': if (!ARGVAL()) { fprintf(stderr, "Missing maxbits\n"); Usage(); exit(1); } maxbits = atoi(*argv); goto nextarg; case 'c': zcat_flg = 1; break; case 'q': quiet = 1; break; default: fprintf(stderr, "Unknown flag: '%c'; ", **argv); Usage(); exit(1); } } } else { /* Input file name */ *fileptr++ = *argv; /* Build input file list */ *fileptr = NULL; /* process nextarg; */ } nextarg: continue; } if(maxbits < INIT_BITS) maxbits = INIT_BITS; if (maxbits > BITS) maxbits = BITS; maxmaxcode = 1 << maxbits; if (*filelist != NULL) { for (fileptr = filelist; *fileptr; fileptr++) { exit_stat = 0; if (do_decomp != 0) { /* DECOMPRESSION */ /* Check for .Z suffix */#ifndef PCDOS if (strcmp(*fileptr + strlen(*fileptr) - 2, DOTZ) != 0) #else if (strcmp(*fileptr + strlen(*fileptr) - 1, DOTZ) != 0) #endif { /* No .Z: tack one on */ strcpy(tempname, *fileptr);#ifndef PCDOS strcat(tempname, DOTZ);#else /* either tack one on or replace last character */ { char *dot; if (NULL == (dot = strchr(tempname,'.'))) { strcat(tempname,".Z"); } else /* if there is a dot then either tack a z on or replace last character */ { if (strlen(dot) < 4) strcat(tempname,DOTZ); else dot[3] = 'Z'; } }#endif *fileptr = tempname; } /* Open input file */ if ((freopen(*fileptr, "r", stdin)) == NULL) { perror(*fileptr); continue; } /* Check the magic number */ if (nomagic == 0) { unsigned magic1, magic2; if (((magic1 = getc(stdin)) != (magic_header[0] & 0xFF)) || ((magic2 = getc(stdin)) != (magic_header[1] & 0xFF))) { fprintf(stderr, "%s: not in compressed format %x %x\n", *fileptr,magic1,magic2); continue; } maxbits = getc(stdin); /* set -b from file */ block_compress = maxbits & BLOCK_MASK; maxbits &= BIT_MASK; maxmaxcode = 1 << maxbits; if(maxbits > BITS) { fprintf(stderr, "%s: compressed with %d bits, can only handle %d bits\n", *fileptr, maxbits, BITS); continue; } } /* Generate output filename */ strcpy(ofname, *fileptr);#ifndef PCDOS ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */#else /* kludge to handle various common three character extension */ { char *dot; char fixup = '\0'; /* first off, map name to upper case */ for (dot = ofname; *dot; dot++) *dot = toupper(*dot); if (NULL == (dot = strchr(ofname,'.'))) { fprintf(stderr,"Bad filename %s\n",ofname); exit(1); } if (strlen(dot) == 4) /* we got three letter extensions */ { if (strcmp(dot,".EXZ") == 0) fixup = 'E'; else if (strcmp(dot,".COZ") == 0) fixup = 'M'; else if (strcmp(dot,".BAZ") == 0) fixup = 'S'; else if (strcmp(dot,".OBZ") == 0) fixup = 'J'; else if (strcmp(dot,".SYZ") == 0) fixup = 'S'; else if (strcmp(dot,".DOZ") == 0) fixup = 'C'; } /* replace the Z */ ofname[strlen(*fileptr) - 1] = fixup; }#endif } else { /* COMPRESSION */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -