📄 compress.c
字号:
nextarg: continue;
}
if(maxbits < INIT_BITS) maxbits = INIT_BITS;
if (maxbits > BITS) maxbits = BITS;
maxmaxcode = 1 << maxbits;
if (*filelist != NULL) {
for (fileptr = filelist; *fileptr; fileptr++) {
comprexx(fileptr);
}
} else { /* Standard input */
setmode(fileno(stdin), O_BINARY);
setmode(fileno(stdout), O_BINARY);
setvbuf(stdin, IOinbuf, _IOFBF, sizeof(IOinbuf));
setvbuf(stdout, IOoutbuf, _IOFBF, sizeof(IOoutbuf));
if (do_decomp == 0) {
compress();
if(!quiet)
putc('\n', stderr);
} else {
/* Check the magic number */
if (nomagic == 0) {
if ((getchar()!=(magic_header[0] & 0xFF))
|| (getchar()!=(magic_header[1] & 0xFF))) {
fprintf(stderr, "stdin: not in compressed format\n");
exit(1);
}
maxbits = getchar(); /* set -b from file */
block_compress = maxbits & BLOCK_MASK;
maxbits &= BIT_MASK;
maxmaxcode = 1 << maxbits;
fsize = 100000; /* assume stdin large for USERMEM */
if(maxbits > BITS) {
fprintf(stderr,
"stdin: compressed with %d bits, can only handle %d bits\n",
maxbits, BITS);
exit(1);
}
}
#ifndef DEBUG
decompress();
#else /* DEBUG */
if (debug == 0) decompress();
else printcodes();
if (verbose) dump_tab();
#endif /* DEBUG */
}
}
exit(exit_stat);
}
comprexx(fileptr)
char **fileptr;
{
char tempname[MAXPATHLEN], *cp;
strcpy(tempname,*fileptr);
errno = 0;
#ifdef BSD4
if (lstat(tempname,&insbuf) == -1) {
#else
if (stat(tempname,&insbuf) == -1) {
#endif
if ( do_decomp ) {
switch (errno) {
case ENOENT: /* file doesn't exist */
/*
** if the given name doesn't end with .Z, try appending one
** This is obviously the wrong thing to do if it's a
** directory, but it shouldn't do any harm.
*/
if (stricmp(tempname + strlen(tempname) - 2, ".Z") != 0) {
#ifdef SHORTNAMES
if ((cp=rindex(tempname,'/')) != NULL) cp++;
else cp = tempname;
if (strlen(cp) > 12) {
fprintf(stderr,"%s.Z: No such file or directory\n",tempname);
return;
}
#endif /* SHORTNAMES */
strcat(tempname,".Z");
errno = 0;
#ifdef BSD4
if (lstat(tempname,&insbuf) == -1) {
#else
if (stat(tempname,&insbuf) == -1) {
#endif
perror(tempname);
return;
}
}
else {
perror(tempname);
return;
}
break;
default:
perror(tempname);
return;
} /* end switch */
} /* endif */
else {
/* we can't stat the file, ignore it */
perror(tempname);
return;
}
} /* endif */
switch (insbuf.st_mode & S_IFMT) {
case S_IFDIR: /* directory */
if (recursive)
compdir(tempname);
else if ( ! quiet )
fprintf(stderr,"%s is a directory -- ignored\n",
tempname);
break;
case S_IFREG: /* regular file */
exit_stat = 0;
if (do_decomp != 0) {
/* DECOMPRESSION */
if ( ! zcat_flg ) {
if (stricmp(tempname + strlen(tempname) - 2, ".Z") != 0) {
if ( ! quiet ) {
fprintf(stderr,"%s - no .Z suffix\n",tempname);
}
return;
}
}
/* Open input file */
if ((freopen(tempname, "rb", stdin)) == NULL) {
perror(tempname); return;
}
/* Check the magic number */
if (nomagic == 0) {
if ((getchar() != (magic_header[0] & 0xFF))
|| (getchar() != (magic_header[1] & 0xFF))) {
fprintf(stderr, "%s: not in compressed format\n",
tempname);
return;
}
maxbits = getchar(); /* set -b from file */
block_compress = maxbits & BLOCK_MASK;
maxbits &= BIT_MASK;
maxmaxcode = 1 << maxbits;
if(maxbits > BITS) {
fprintf(stderr,
"%s: compressed with %d bits, can only handle %d bits\n",
tempname, maxbits, BITS);
return;
}
}
/* we have to ignore SIGINT for a while, otherwise
a ^C can nuke an existing file with ofname */
signal(SIGINT,SIG_IGN);
/* Generate output filename */
strcpy(ofname, tempname);
/* Check for .Z suffix */
if (stricmp(tempname + strlen(tempname) - 2, ".Z") == 0) {
ofname[strlen(tempname) - 2] = '\0'; /* Strip off .Z */
}
}
else {
/* COMPRESSION */
if (stricmp(tempname + strlen(tempname) - 2, ".Z") == 0) {
fprintf(stderr, "%s: already has .Z suffix -- no change\n",
tempname);
return;
}
if (insbuf.st_nlink > 1 && (! force) ) {
fprintf(stderr, "%s has %d other links: unchanged\n",
tempname,insbuf.st_nlink - 1);
return;
}
/* Open input file */
if ((freopen(tempname, "rb", stdin)) == NULL) {
perror(tempname); return;
}
fsize = (long) insbuf.st_size;
/*
* tune hash table size for small files -- ad hoc,
* but the sizes match earlier #defines, which
* serve as upper bounds on the number of output codes.
*/
hsize = HSIZE;
if ( fsize < (1 << 12) )
hsize = min ( 5003, HSIZE );
else if ( fsize < (1 << 13) )
hsize = min ( 9001, HSIZE );
else if ( fsize < (1 << 14) )
hsize = min ( 18013, HSIZE );
else if ( fsize < (1 << 15) )
hsize = min ( 35023, HSIZE );
else if ( fsize < 47000 )
hsize = min ( 50021, HSIZE );
/* we have to ignore SIGINT for a while, otherwise
a ^C can nuke an existing file with ofname */
signal(SIGINT,SIG_IGN);
/* Generate output filename */
strcpy(ofname, tempname);
#ifdef SHORTNAMES /* Short filenames */
if ((cp=rindex(ofname,'/')) != NULL) cp++;
else cp = ofname;
if (strlen(cp) > 12) {
fprintf(stderr,"%s: filename too long to tack on .Z\n",ofname);
signal(SIGINT,onintr);
return;
}
#endif /* SHORTNAMES */
strcat(ofname, ".Z");
}
/* Check for overwrite of existing file */
if (overwrite == 0 && zcat_flg == 0) {
if (stat(ofname, &statbuf) == 0) {
char response[2];
response[0] = 'n';
fprintf(stderr, "%s already exists;", ofname);
if (foreground()) {
fprintf(stderr, " OK to overwrite (y or n)? ");
fflush(stderr);
read(2, response, 2);
while (response[1] != '\n') {
if (read(2, response+1, 1) < 0) { /* Ack! */
perror("stderr"); break;
}
}
}
if (response[0] != 'y') {
fprintf(stderr, "\tnot overwritten\n");
signal(SIGINT,onintr);
return;
}
}
}
signal(SIGINT,onintr);
if(zcat_flg == 0) { /* Open output file */
valid = 1;
if (freopen(ofname, "wb", stdout) == NULL) {
perror(ofname);
return;
}
if(!quiet)
fprintf(stderr, "%s: ", tempname);
}
else
setmode(fileno(stdout), O_BINARY);
setvbuf(stdin, IOinbuf, _IOFBF, sizeof(IOinbuf));
setvbuf(stdout, IOoutbuf, _IOFBF, sizeof(IOoutbuf));
/* Actually do the compression/decompression */
if (do_decomp == 0) compress();
#ifndef DEBUG
else decompress();
#else
else if (debug == 0) decompress();
else printcodes();
if (verbose) dump_tab();
#endif /* DEBUG */
if(zcat_flg == 0) {
copystat(tempname, ofname); /* Copy stats */
if((exit_stat == 1) || (!quiet))
putc('\n', stderr);
}
break;
default:
fprintf(stderr,"%s is not a directory or a regular file - ignored\n",
tempname);
break;
} /* end switch */
return;
} /* end comprexx */
compdir(dir)
char *dir;
{
DIR *dirp;
#ifndef DIRENT
register struct direct *dp;
#else
register struct dirent *dp;
#endif
char nbuf[MAXPATHLEN];
char *nptr = nbuf;
dirp = opendir(dir);
if (dirp == NULL) {
printf("%s unreadable\n", dir); /* not stderr! */
return ;
}
/*
** WARNING: the following algorithm will occasionally cause
** compress to produce error warnings of the form "<filename>.Z
** already has .Z suffix - ignored". This occurs when the
** .Z output file is inserted into the directory below
** readdir's current pointer.
** These warnings are harmless but annoying. The alternative
** to allowing this would be to store the entire directory
** list in memory, then compress the entries in the stored
** list. Given the depth-first recursive algorithm used here,
** this could use up a tremendous amount of memory. I don't
** think it's worth it. -- Dave Mack
*/
while (dp = readdir(dirp)) {
if (dp->d_ino == 0)
continue;
if (strcmp(dp->d_name,".") == 0 ||
strcmp(dp->d_name,"..") == 0)
continue;
if ( (strlen(dir)+strlen(dp->d_name)+1) < (MAXPATHLEN - 1)){
strcpy(nbuf,dir);
strcat(nbuf,"/");
strcat(nbuf,dp->d_name);
comprexx(&nptr);
}
else {
fprintf(stderr,"Pathname too long: %s/%s\n",dir,dp->d_name);
}
}
closedir(dirp);
return;
} /* end compdir */
static int offset;
long int in_count = 1; /* length of input */
long int bytes_out; /* length of compressed output */
long int out_count = 0; /* # of codes output (for debugging) */
/*
* compress stdin to stdout
*
* Algorithm: use open addressing double hashing (no chaining) on the
* prefix code / next character combination. We do a variant of Knuth's
* algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
* secondary probe. Here, the modular division first probe is gives way
* to a faster exclusive-or manipulation. Also do block compression with
* an adaptive reset, whereby the code table is cleared when the compression
* ratio decreases, but after the table fills. The variable-length output
* codes are re-sized at this point, and a special CLEAR code is generated
* for the decompressor. Late addition: construct the table according to
* file size for noticeable speed improvement on small files. Please direct
* questions about this implementation to ames!jaw.
*/
compress() {
register long fcode;
register code_int i = 0;
register int c;
register code_int ent;
#ifdef XENIX_16
register code_int disp;
#else /* Normal machine */
register int disp;
#endif
register code_int hsize_reg;
register int hshift;
#ifndef COMPATIBLE
if (nomagic == 0) {
putchar(magic_header[0]); putchar(magic_header[1]);
putchar((char)(maxbits | block_compress));
if(ferror(stdout))
writeerr();
}
#endif /* COMPATIBLE */
offset = 0;
bytes_out = 3; /* includes 3-byte header mojo */
out_count = 0;
clear_flg = 0;
ratio = 0;
in_count = 1;
checkpoint = CHECK_GAP;
maxcode = MAXCODE(n_bits = INIT_BITS);
free_ent = ((block_compress) ? FIRST : 256 );
ent = getchar ();
hshift = 0;
for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L )
hshift++;
hshift = 8 - hshift; /* set hash code range bound */
hsize_reg = hsize;
cl_hash( (count_int) hsize_reg); /* clear hash table */
#ifdef SIGNED_COMPARE_SLOW
while ( (c = getchar()) != (unsigned) EOF ) {
#else
while ( (c = getchar()) != EOF ) {
#endif
in_count++;
fcode = (long) (((long) c << maxbits) + ent);
i = ((c << hshift) ^ ent); /* xor hashing */
if ( htabof (i) == fcode ) {
ent = codetabof (i);
continue;
} else if ( (long)htabof (i) < 0 ) /* empty slot */
goto nomatch;
disp = hsize_reg - i; /* secondary hash (after G. Knott) */
if ( i == 0 )
disp = 1;
probe:
if ( (i -= disp) < 0 )
i += hsize_reg;
if ( htabof (i) == fcode ) {
ent = codetabof (i);
continue;
}
if ( (long)htabof (i) > 0 )
goto probe;
nomatch:
output ( (code_int) ent );
out_count++;
ent = c;
#ifdef SIGNED_COMPARE_SLOW
if ( (unsigned) free_ent < (unsigned) maxmaxcode) {
#else
if ( free_ent < maxmaxcode ) {
#endif
codetabof (i) = free_ent++; /* code -> hashtable */
htabof (i) = fcode;
}
else if ( (count_int)in_count >= checkpoint && block_compress )
cl_block ();
}
/*
* Put out the final code.
*/
output( (code_int)ent );
out_count++;
output( (code_int)-1 );
/*
* Print out stats on stderr
*/
if(zcat_flg == 0 && !quiet) {
#ifdef DEBUG
fprintf( stderr,
"%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
in_count, out_count, bytes_out );
prratio( stderr, in_count, bytes_out );
fprintf( stderr, "\n");
fprintf( stderr, "\tCompression as in compact: " );
prratio( stderr, in_count-bytes_out, in_count );
fprintf( stderr, "\n");
fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n",
free_ent - 1, n_bits );
#else /* !DEBUG */
fprintf( stderr, "Compression: " );
prratio( stderr, in_count-bytes_out, in_count );
#endif /* DEBUG */
}
if(bytes_out > in_count) /* exit(2) if no savings */
exit_stat = 2;
return;
}
/*****************************************************************
* TAG( output )
*
* Output the given code.
* Inputs:
* code: A n_bits-bit integer. If == -1, then EOF. This assumes
* that n_bits =< (long)wordsize - 1.
* Outputs:
* Outputs code to the file.
* Assumptions:
* Chars are 8 bits long.
* Algorithm:
* Maintain a BITS character long buffer (so that 8 codes will
* fit in it exactly). Use the VAX insv instruction to insert each
* code in turn. When the buffer fills up empty it and start over.
*/
static char buf[BITS];
#ifndef vax
char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
#endif /* vax */
output( code )
code_int code;
{
#ifdef DEBUG
static int col = 0;
#endif /* DEBUG */
/*
* On the VAX, it is important to have the register declarations
* in exactly the order given, or the asm will break.
*/
register int r_off = offset, bits= n_bits;
register char * bp = buf;
#ifdef DEBUG
if ( verbose )
fprintf( stderr, "%5d%c", code,
(col+=6) >= 74 ? (col = 0, '\n') : ' ' );
#endif /* DEBUG */
if ( code >= 0 ) {
#ifdef vax
/* VAX DEPENDENT!! Implementation on other machines is below.
*
* Translation: Insert BITS bits from the argument starting at
* offset bits from the beginning of buf.
*/
0; /* Work around for pcc -O bug with asm and if stmt */
asm( "insv 4(ap),r11,r10,(r9)" );
#else /* not a vax */
/*
* byte/bit numbering on the VAX is simulated by the following code
*/
/*
* Get to the first byte.
*/
bp += (r_off >> 3);
r_off &= 7;
/*
* Since code is always >= 8 bits, only need to mask the first
* hunk on the left.
*/
*bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
bp++;
bits -= (8 - r_off);
code >>= 8 - r_off;
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if ( bits >= 8 ) {
*bp++ = code;
code >>= 8;
bits -= 8;
}
/* Last bits. */
if(bits)
*bp = code;
#endif /* vax */
offset += n_bits;
if ( offset == (n_bits << 3) ) {
bp = buf;
bits = n_bits;
bytes_out += bits;
do
putchar(*bp++);
while(--bits);
offset = 0;
}
/*
* If the next entry is going to be too big for the code size,
* then increase it, if possible.
*/
if ( free_ent > maxcode || (clear_flg > 0))
{
/*
* Write the whole buffer, because the input side won't
* discover the size increase until after it has read it.
*/
if ( offset > 0 ) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -