📄 compress.c
字号:
/* * compress - File compression ala IEEE Computer, June 1984. * * Algorithm from "A Technique for High Performance Data Compression", * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. * * Usage: compress [-dfvc] [-b bits] [file ...] * Inputs: * -b: limit the max number of bits/code. * -c: write output on stdout, don't remove original. * -d: decompress instead. * -f: Forces output file to be generated, even if one already * exists, and even if no space is saved by compressing. * If -f is not used, the user will be prompted if stdin is * a tty, otherwise, the output file will not be overwritten. * -v: Write compression statistics * * file ...: Files to be compressed. If none specified, stdin is used. * Outputs: * file.Z: Compressed form of file with same mode, owner, and utimes * or stdout (if stdin used as input) * * Assumptions: * When filenames are given, replaces with the compressed version * (.Z suffix) only if the file decreases in size. * Algorithm: * Modified Lempel-Ziv method (LZW). Basically finds common * substrings and replaces them with a variable size code. This is * deterministic, and can be done on the fly. Thus, the decompression * procedure needs no input table, but tracks the way the table was built. * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) * Jim McKie (decvax!mcvax!jim) * Steve Davies (decvax!vax135!petsd!peora!srd) * Ken Turkowski (decvax!decwrl!turtlevax!ken) * James A. Woods (decvax!ihnp4!ames!jaw) * Joe Orost (decvax!vax135!petsd!joe) */#define _PLAN9_SOURCE#include <u.h>#include <stdio.h>#include <ctype.h>#include <stdlib.h>#include <string.h>#include <signal.h>#include <sys/types.h>#include <sys/stat.h>#define min(a,b) ((a>b) ? b : a)#define BITS 16#define HSIZE 69001 /* 95% occupancy *//* * a code_int must be able to hold 2**BITS values of type int, and also -1 */typedef long code_int;typedef long count_int;static char rcs_ident[] = "$Header: compress.c,v 4.0 85/07/30 12:50:00 joe Release $";uchar magic_header[] = { 0x1F, 0x9D }; /* 1F 9D *//* Defines for third byte of header */#define BIT_MASK 0x1f#define BLOCK_MASK 0x80/* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is a fourth header byte (for expansion).*/#define INIT_BITS 9 /* initial number of bits/code */void onintr(int);void oops(int);#define ARGVAL() (*++(*argv) || (--argc && *++argv))int n_bits; /* number of bits/code */int maxbits = BITS; /* user settable max # bits/code */code_int maxcode; /* maximum code, given n_bits */code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */#define MAXCODE(n_bits) ((1 << (n_bits)) - 1)count_int htab[HSIZE];ushort codetab[HSIZE];#define htabof(i) htab[i]#define codetabof(i) codetab[i]code_int hsize = HSIZE; /* for dynamic table sizing */count_int fsize;/* * To save much memory, we overlay the table used by compress() with those * used by decompress(). The tab_prefix table is the same size and type * as the codetab. The tab_suffix table needs 2**BITS characters. We * get this from the beginning of htab. The output stack uses the rest * of htab, and contains characters. There is plenty of room for any * possible stack (stack used to be 8000 characters). */#define tab_prefixof(i) codetabof(i)#define tab_suffixof(i) ((uchar *)(htab))[i]#define de_stack ((uchar *)&tab_suffixof(1<<BITS))code_int free_ent = 0; /* first unused entry */int exit_stat = 0;code_int getcode();Usage(){#ifdef DEBUG fprintf(stderr,"Usage: compress [-cdfDV] [-b maxbits] [file ...]\n");#else fprintf(stderr,"Usage: compress [-cdfvV] [-b maxbits] [file ...]\n");#endif /* DEBUG */}int debug = 0;int nomagic = 0; /* Use a 3-byte magic number header, unless old file */int zcat_flg = 0; /* Write output on stdout, suppress messages */int quiet = 1; /* don't tell me about compression *//* * block compression parameters -- after all codes are used up, * and compression rate changes, start over. */int block_compress = BLOCK_MASK;int clear_flg = 0;long ratio = 0;#define CHECK_GAP 10000 /* ratio check interval */count_int checkpoint = CHECK_GAP;/* * the next two codes should not be changed lightly, as they must not * lie within the contiguous general code space. */#define FIRST 257 /* first free entry */#define CLEAR 256 /* table clear output code */int force = 0;char ofname [100];#ifdef DEBUGint verbose = 0;#endif /* DEBUG */void (*bgnd_flag)(int);int do_decomp = 0;main(argc, argv)int argc;char **argv;{ int overwrite = 0; /* Do not overwrite unless given -f flag */ char tempname[512]; char **filelist, **fileptr; char *cp; struct stat statbuf; if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) { signal(SIGINT, onintr); signal(SIGSEGV, oops); } filelist = fileptr = (char **)(malloc(argc * sizeof(*argv))); *filelist = NULL; if((cp = strrchr(argv[0], '/')) != 0) cp++; else cp = argv[0]; if(strcmp(cp, "uncompress") == 0) do_decomp = 1; else if(strcmp(cp, "zcat") == 0) { do_decomp = 1; zcat_flg = 1; } /* * Argument Processing * All flags are optional. * -C generate output compatible with compress 2.0. * -D debug * -V print Version; debug verbose * -b maxbits maxbits. If -b is specified, then maxbits MUST be * given also. * -c cat all output to stdout * -d do_decomp * -f force overwrite of output file * -n no header: useful to uncompress old files * -v unquiet * if a string is left, must be an input filename. */ for (argc--, argv++; argc > 0; argc--, argv++) { if (**argv == '-') { /* A flag argument */ while (*++(*argv)) { /* Process all flags in this arg */ switch (**argv) { case 'C': block_compress = 0; break;#ifdef DEBUG case 'D': debug = 1; break; case 'V': verbose = 1; version(); break;#else case 'V': version(); break;#endif case 'b': if (!ARGVAL()) { fprintf(stderr, "Missing maxbits\n"); Usage(); exit(1); } maxbits = atoi(*argv); goto nextarg; case 'c': zcat_flg = 1; break; case 'd': do_decomp = 1; break; case 'f': case 'F': overwrite = 1; force = 1; break; case 'n': nomagic = 1; break; case 'q': quiet = 1; break; case 'v': quiet = 0; break; default: fprintf(stderr, "Unknown flag: '%c'; ", **argv); Usage(); exit(1); } } } else { /* Input file name */ *fileptr++ = *argv; /* Build input file list */ *fileptr = NULL; /* process nextarg; */ }nextarg: continue; } if(maxbits < INIT_BITS) maxbits = INIT_BITS; if (maxbits > BITS) maxbits = BITS; maxmaxcode = 1 << maxbits; if (*filelist != NULL) { for (fileptr = filelist; *fileptr; fileptr++) { exit_stat = 0; if (do_decomp != 0) { /* DECOMPRESSION */ /* Check for .Z suffix */ if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) { /* No .Z: tack one on */ strcpy(tempname, *fileptr); strcat(tempname, ".Z"); *fileptr = tempname; } /* Open input file */ if ((freopen(*fileptr, "r", stdin)) == NULL) { perror(*fileptr); continue; } /* Check the magic number */ if (nomagic == 0) { if ((getchar() != (magic_header[0] & 0xFF)) || (getchar() != (magic_header[1] & 0xFF))) { fprintf(stderr, "%s: not in compressed format\n", *fileptr); continue; } maxbits = getchar(); /* set -b from file */ block_compress = maxbits & BLOCK_MASK; maxbits &= BIT_MASK; maxmaxcode = 1 << maxbits; if(maxbits > BITS) { fprintf(stderr, "%s: compressed with %d bits, can only handle %d bits\n", *fileptr, maxbits, BITS); continue; } } /* Generate output filename */ strcpy(ofname, *fileptr); ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */ } else { /* COMPRESSION */ if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) { fprintf(stderr, "%s: already has .Z suffix -- no change\n", *fileptr); continue; } /* Open input file */ if ((freopen(*fileptr, "r", stdin)) == NULL) { perror(*fileptr); continue; } (void) stat(*fileptr, &statbuf); fsize = (long) statbuf.st_size; /* * tune hash table size for small files -- ad hoc, * but the sizes match earlier #defines, which * serve as upper bounds on the number of output codes. */ hsize = HSIZE; if (fsize < (1 << 12)) hsize = min(5003, HSIZE); else if (fsize < (1 << 13)) hsize = min(9001, HSIZE); else if (fsize < (1 << 14)) hsize = min (18013, HSIZE); else if (fsize < (1 << 15)) hsize = min (35023, HSIZE); else if (fsize < 47000) hsize = min (50021, HSIZE); /* Generate output filename */ strcpy(ofname, *fileptr);#ifndef BSD4_2 if ((cp=strrchr(ofname,'/')) != NULL) cp++; else cp = ofname; /* *** changed 12 to 25; should be NAMELEN-3, but I don't want * to fight the headers. ehg 5 Nov 92 ** */ if (strlen(cp) > 25) { fprintf(stderr, "%s: filename too long to tack on .Z\n", cp); continue; }#endif strcat(ofname, ".Z"); } /* Check for overwrite of existing file */ if (overwrite == 0 && zcat_flg == 0 && stat(ofname, &statbuf) == 0) { char response[2]; response[0] = 'n'; fprintf(stderr, "%s already exists;", ofname); if (foreground()) { fprintf(stderr, " do you wish to overwrite %s (y or n)? ", ofname); fflush(stderr); (void) read(2, response, 2); while (response[1] != '\n') if (read(2, response+1, 1) < 0) { /* Ack! */ perror("stderr"); break; } } if (response[0] != 'y') { fprintf(stderr, "\tnot overwritten\n"); continue; } } if(zcat_flg == 0) { /* Open output file */ if (freopen(ofname, "w", stdout) == NULL) { perror(ofname); continue; } if(!quiet) fprintf(stderr, "%s: ", *fileptr); } /* Actually do the compression/decompression */ if (do_decomp == 0) compress();#ifndef DEBUG else decompress();#else else if (debug == 0) decompress(); else printcodes(); if (verbose) dump_tab();#endif /* DEBUG */ if(zcat_flg == 0) { copystat(*fileptr, ofname); /* Copy stats */ if (exit_stat == 1 || !quiet) putc('\n', stderr); } } } else { /* Standard input */ if (do_decomp == 0) { compress();#ifdef DEBUG if(verbose) dump_tab();#endif if(!quiet) putc('\n', stderr); } else { /* Check the magic number */ if (nomagic == 0) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -