⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 compress.c

📁 用c语言写的相当简单的文件压缩小程序非常适合初学者的使用
💻 C
📖 第 1 页 / 共 3 页
字号:
	nextarg: continue;
    }

    if(maxbits < INIT_BITS) maxbits = INIT_BITS;
    if (maxbits > BITS) maxbits = BITS;
    maxmaxcode = 1 << maxbits;

    if (*filelist != NULL) {
      for (fileptr = filelist; *fileptr; fileptr++) {
	comprexx(fileptr);
      }
    } else {		/* Standard input */
        setmode(fileno(stdin), O_BINARY);
        setmode(fileno(stdout), O_BINARY);
	setvbuf(stdin, IOinbuf, _IOFBF, sizeof(IOinbuf));
	setvbuf(stdout, IOoutbuf, _IOFBF, sizeof(IOoutbuf));
	if (do_decomp == 0) {
		compress();
		if(!quiet)
			putc('\n', stderr);
	} else {
		/* Check the magic number */
		if (nomagic == 0) {
		if ((getchar()!=(magic_header[0] & 0xFF))
		 || (getchar()!=(magic_header[1] & 0xFF))) {
			fprintf(stderr, "stdin: not in compressed format\n");
			exit(1);
		}
		maxbits = getchar();	/* set -b from file */
		block_compress = maxbits & BLOCK_MASK;
		maxbits &= BIT_MASK;
		maxmaxcode = 1 << maxbits;
		fsize = 100000;		/* assume stdin large for USERMEM */
		if(maxbits > BITS) {
			fprintf(stderr,
			"stdin: compressed with %d bits, can only handle %d bits\n",
			maxbits, BITS);
			exit(1);
		}
		}
#ifndef DEBUG
		decompress();
#else   /* DEBUG */
		if (debug == 0)	decompress();
		else		printcodes();
		if (verbose)	dump_tab();
#endif /* DEBUG */
	}
	}
	exit(exit_stat);
}

comprexx(fileptr)
char **fileptr;
{
	char tempname[MAXPATHLEN], *cp;

	strcpy(tempname,*fileptr);
	errno = 0;
#ifdef	BSD4
	if (lstat(tempname,&insbuf) == -1) {
#else
	if (stat(tempname,&insbuf) == -1) {
#endif
	  if ( do_decomp ) {
	    switch (errno) {
	    case ENOENT:	/* file doesn't exist */
	      /*
	      ** if the given name doesn't end with .Z, try appending one
	      ** This is obviously the wrong thing to do if it's a 
	      ** directory, but it shouldn't do any harm.
	      */
	      if (stricmp(tempname + strlen(tempname) - 2, ".Z") != 0) {
#ifdef SHORTNAMES
		    if ((cp=rindex(tempname,'/')) != NULL)	cp++;
		    else					cp = tempname;
		    if (strlen(cp) > 12) {
			fprintf(stderr,"%s.Z: No such file or directory\n",tempname);
			return;
		    }
#endif  /* SHORTNAMES */
		strcat(tempname,".Z");
		errno = 0;
#ifdef	BSD4
		if (lstat(tempname,&insbuf) == -1) {
#else
		if (stat(tempname,&insbuf) == -1) {
#endif
		  perror(tempname);
		  return;
		}
	      }
	      else {
		perror(tempname);
		return;
	      }
	      break;
	    default:
	      perror(tempname);
	      return;
	    } /* end switch */
	  } /* endif */
	  else {
	    /* we can't stat the file, ignore it */
	      perror(tempname);
	      return;
	  }
	} /* endif */

	switch (insbuf.st_mode & S_IFMT) {
	case S_IFDIR:	/* directory */
	  if (recursive)
	    compdir(tempname);
	  else if ( ! quiet )
	    fprintf(stderr,"%s is a directory -- ignored\n",
		    tempname);
	  break;

	case S_IFREG:	/* regular file */
	  exit_stat = 0;
	  if (do_decomp != 0) {
	/* DECOMPRESSION */
	    if ( ! zcat_flg ) {
	      if (stricmp(tempname + strlen(tempname) - 2, ".Z") != 0) {
		if ( ! quiet ) {
		  fprintf(stderr,"%s - no .Z suffix\n",tempname);
		}
		return;
	      }
	    }
	    /* Open input file */
	    if ((freopen(tempname, "rb", stdin)) == NULL) {
	      perror(tempname); return;
	    }
		/* Check the magic number */
		if (nomagic == 0) {
		    if ((getchar() != (magic_header[0] & 0xFF))
		     || (getchar() != (magic_header[1] & 0xFF))) {
			fprintf(stderr, "%s: not in compressed format\n",
			    tempname);
					return;
		    }
		    maxbits = getchar();	/* set -b from file */
		    block_compress = maxbits & BLOCK_MASK;
		    maxbits &= BIT_MASK;
		    maxmaxcode = 1 << maxbits;
		    if(maxbits > BITS) {
			fprintf(stderr,
			"%s: compressed with %d bits, can only handle %d bits\n",
			tempname, maxbits, BITS);
					return;
		    }
		}
	        /* we have to ignore SIGINT for a while, otherwise
		   a ^C can nuke an existing file with ofname */
	        signal(SIGINT,SIG_IGN);
		/* Generate output filename */
		strcpy(ofname, tempname);
		/* Check for .Z suffix */
		if (stricmp(tempname + strlen(tempname) - 2, ".Z") == 0) {
		  ofname[strlen(tempname) - 2] = '\0';  /* Strip off .Z */
		}
	    }
	    else {
	/* COMPRESSION */
		if (stricmp(tempname + strlen(tempname) - 2, ".Z") == 0) {
		  fprintf(stderr, "%s: already has .Z suffix -- no change\n",
			  tempname);
		  return;
		}
		if (insbuf.st_nlink > 1 && (! force) ) {
		  fprintf(stderr, "%s has %d other links: unchanged\n",
			  tempname,insbuf.st_nlink - 1);
		  return;
		}
		/* Open input file */
		if ((freopen(tempname, "rb", stdin)) == NULL) {
		    perror(tempname); return;
		}
		fsize = (long) insbuf.st_size;
		/*
		 * tune hash table size for small files -- ad hoc,
		 * but the sizes match earlier #defines, which
		 * serve as upper bounds on the number of output codes. 
		 */
		hsize = HSIZE;
		if ( fsize < (1 << 12) )
		    hsize = min ( 5003, HSIZE );
		else if ( fsize < (1 << 13) )
		    hsize = min ( 9001, HSIZE );
		else if ( fsize < (1 << 14) )
		    hsize = min ( 18013, HSIZE );
		else if ( fsize < (1 << 15) )
		    hsize = min ( 35023, HSIZE );
		else if ( fsize < 47000 )
		    hsize = min ( 50021, HSIZE );

	        /* we have to ignore SIGINT for a while, otherwise
		   a ^C can nuke an existing file with ofname */
	        signal(SIGINT,SIG_IGN);
		/* Generate output filename */
		strcpy(ofname, tempname);
#ifdef SHORTNAMES	/* Short filenames */
		if ((cp=rindex(ofname,'/')) != NULL)	cp++;
		else					cp = ofname;
		if (strlen(cp) > 12) {
		    fprintf(stderr,"%s: filename too long to tack on .Z\n",ofname);
		    signal(SIGINT,onintr);
		    return;
		}
#endif  /* SHORTNAMES */
		strcat(ofname, ".Z");
	    }
	    /* Check for overwrite of existing file */
	    if (overwrite == 0 && zcat_flg == 0) {
		if (stat(ofname, &statbuf) == 0) {
		    char response[2];
		    response[0] = 'n';
		    fprintf(stderr, "%s already exists;", ofname);
		    if (foreground()) {
			fprintf(stderr, " OK to overwrite (y or n)? ");
			fflush(stderr);
			read(2, response, 2);
			while (response[1] != '\n') {
			    if (read(2, response+1, 1) < 0) {	/* Ack! */
				perror("stderr"); break;
			    }
			}
		    }
		    if (response[0] != 'y') {
			fprintf(stderr, "\tnot overwritten\n");
			signal(SIGINT,onintr);
			return;
		    }
		}
	    }
	    signal(SIGINT,onintr);
	    if(zcat_flg == 0) {		/* Open output file */
	        valid = 1;
		if (freopen(ofname, "wb", stdout) == NULL) {
		    perror(ofname);
			return;
		}
		if(!quiet)
			fprintf(stderr, "%s: ", tempname);
	    }
	    else
	        setmode(fileno(stdout), O_BINARY);

	    setvbuf(stdin, IOinbuf, _IOFBF, sizeof(IOinbuf));
	    setvbuf(stdout, IOoutbuf, _IOFBF, sizeof(IOoutbuf));

	    /* Actually do the compression/decompression */
	    if (do_decomp == 0)	compress();
#ifndef DEBUG
	    else			decompress();
#else
	    else if (debug == 0)	decompress();
	    else			printcodes();
	    if (verbose)		dump_tab();
#endif /* DEBUG */
	    if(zcat_flg == 0) {
		copystat(tempname, ofname);	/* Copy stats */
		if((exit_stat == 1) || (!quiet))
			putc('\n', stderr);
	    }
	  break;
	default:
	  fprintf(stderr,"%s is not a directory or a regular file - ignored\n",
		  tempname);
	  break;
	} /* end switch */
	return;
} /* end comprexx */

compdir(dir)
char *dir;
{
	DIR *dirp;
#ifndef	DIRENT
	register struct direct *dp;
#else
	register struct dirent *dp;
#endif
	char nbuf[MAXPATHLEN];
	char *nptr = nbuf;
	dirp = opendir(dir);
	if (dirp == NULL) {
		printf("%s unreadable\n", dir);		/* not stderr! */
		return ;
	}
	/*
	** WARNING: the following algorithm will occasionally cause
	** compress to produce error warnings of the form "<filename>.Z
	** already has .Z suffix - ignored". This occurs when the
	** .Z output file is inserted into the directory below
	** readdir's current pointer.
	** These warnings are harmless but annoying. The alternative
	** to allowing this would be to store the entire directory
	** list in memory, then compress the entries in the stored
	** list. Given the depth-first recursive algorithm used here,
	** this could use up a tremendous amount of memory. I don't
	** think it's worth it. -- Dave Mack
	*/
	while (dp = readdir(dirp)) {
		if (dp->d_ino == 0)
			continue;
		if (strcmp(dp->d_name,".") == 0 ||
		    strcmp(dp->d_name,"..") == 0)
			continue;
		if ( (strlen(dir)+strlen(dp->d_name)+1) < (MAXPATHLEN - 1)){
		  strcpy(nbuf,dir);
		  strcat(nbuf,"/");
		  strcat(nbuf,dp->d_name);
		  comprexx(&nptr);
		}
		else {
		  fprintf(stderr,"Pathname too long: %s/%s\n",dir,dp->d_name);
		}
  	}
	closedir(dirp);
	return;
} /* end compdir */

static int offset;
long int in_count = 1;			/* length of input */
long int bytes_out;			/* length of compressed output */
long int out_count = 0;			/* # of codes output (for debugging) */

/*
 * compress stdin to stdout
 *
 * Algorithm:  use open addressing double hashing (no chaining) on the 
 * prefix code / next character combination.  We do a variant of Knuth's
 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
 * secondary probe.  Here, the modular division first probe is gives way
 * to a faster exclusive-or manipulation.  Also do block compression with
 * an adaptive reset, whereby the code table is cleared when the compression
 * ratio decreases, but after the table fills.  The variable-length output
 * codes are re-sized at this point, and a special CLEAR code is generated
 * for the decompressor.  Late addition:  construct the table according to
 * file size for noticeable speed improvement on small files.  Please direct
 * questions about this implementation to ames!jaw.
 */

compress() {
    register long fcode;
    register code_int i = 0;
    register int c;
    register code_int ent;
#ifdef XENIX_16
    register code_int disp;
#else	/* Normal machine */
    register int disp;
#endif
    register code_int hsize_reg;
    register int hshift;

#ifndef COMPATIBLE
    if (nomagic == 0) {
	putchar(magic_header[0]); putchar(magic_header[1]);
	putchar((char)(maxbits | block_compress));
	if(ferror(stdout))
		writeerr();
    }
#endif /* COMPATIBLE */

    offset = 0;
    bytes_out = 3;		/* includes 3-byte header mojo */
    out_count = 0;
    clear_flg = 0;
    ratio = 0;
    in_count = 1;
    checkpoint = CHECK_GAP;
    maxcode = MAXCODE(n_bits = INIT_BITS);
    free_ent = ((block_compress) ? FIRST : 256 );

    ent = getchar ();

    hshift = 0;
    for ( fcode = (long) hsize;  fcode < 65536L; fcode *= 2L )
    	hshift++;
    hshift = 8 - hshift;		/* set hash code range bound */

    hsize_reg = hsize;
    cl_hash( (count_int) hsize_reg);		/* clear hash table */

#ifdef SIGNED_COMPARE_SLOW
    while ( (c = getchar()) != (unsigned) EOF ) {
#else
    while ( (c = getchar()) != EOF ) {
#endif
	in_count++;
	fcode = (long) (((long) c << maxbits) + ent);
 	i = ((c << hshift) ^ ent);	/* xor hashing */

	if ( htabof (i) == fcode ) {
	    ent = codetabof (i);
	    continue;
	} else if ( (long)htabof (i) < 0 )	/* empty slot */
	    goto nomatch;
 	disp = hsize_reg - i;		/* secondary hash (after G. Knott) */
	if ( i == 0 )
	    disp = 1;
probe:
	if ( (i -= disp) < 0 )
	    i += hsize_reg;

	if ( htabof (i) == fcode ) {
	    ent = codetabof (i);
	    continue;
	}
	if ( (long)htabof (i) > 0 ) 
	    goto probe;
nomatch:
	output ( (code_int) ent );
	out_count++;
 	ent = c;
#ifdef SIGNED_COMPARE_SLOW
	if ( (unsigned) free_ent < (unsigned) maxmaxcode) {
#else
	if ( free_ent < maxmaxcode ) {
#endif
 	    codetabof (i) = free_ent++;	/* code -> hashtable */
	    htabof (i) = fcode;
	}
	else if ( (count_int)in_count >= checkpoint && block_compress )
	    cl_block ();
    }
    /*
     * Put out the final code.
     */
    output( (code_int)ent );
    out_count++;
    output( (code_int)-1 );

    /*
     * Print out stats on stderr
     */
    if(zcat_flg == 0 && !quiet) {
#ifdef DEBUG
	fprintf( stderr,
		"%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
		in_count, out_count, bytes_out );
	prratio( stderr, in_count, bytes_out );
	fprintf( stderr, "\n");
	fprintf( stderr, "\tCompression as in compact: " );
	prratio( stderr, in_count-bytes_out, in_count );
	fprintf( stderr, "\n");
	fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n",
		free_ent - 1, n_bits );
#else /* !DEBUG */
	fprintf( stderr, "Compression: " );
	prratio( stderr, in_count-bytes_out, in_count );
#endif /* DEBUG */
    }
    if(bytes_out > in_count)	/* exit(2) if no savings */
	exit_stat = 2;
    return;
}

/*****************************************************************
 * TAG( output )
 *
 * Output the given code.
 * Inputs:
 * 	code:	A n_bits-bit integer.  If == -1, then EOF.  This assumes
 *		that n_bits =< (long)wordsize - 1.
 * Outputs:
 * 	Outputs code to the file.
 * Assumptions:
 *	Chars are 8 bits long.
 * Algorithm:
 * 	Maintain a BITS character long buffer (so that 8 codes will
 * fit in it exactly).  Use the VAX insv instruction to insert each
 * code in turn.  When the buffer fills up empty it and start over.
 */

static char buf[BITS];

#ifndef vax
char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
#endif /* vax */

output( code )
code_int  code;
{
#ifdef DEBUG
    static int col = 0;
#endif /* DEBUG */

    /*
     * On the VAX, it is important to have the register declarations
     * in exactly the order given, or the asm will break.
     */
    register int r_off = offset, bits= n_bits;
    register char * bp = buf;

#ifdef DEBUG
	if ( verbose )
	    fprintf( stderr, "%5d%c", code,
		    (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
#endif /* DEBUG */
    if ( code >= 0 ) {
#ifdef vax
	/* VAX DEPENDENT!! Implementation on other machines is below.
	 *
	 * Translation: Insert BITS bits from the argument starting at
	 * offset bits from the beginning of buf.
	 */
	0;	/* Work around for pcc -O bug with asm and if stmt */
	asm( "insv	4(ap),r11,r10,(r9)" );
#else /* not a vax */
/* 
 * byte/bit numbering on the VAX is simulated by the following code
 */
	/*
	 * Get to the first byte.
	 */
	bp += (r_off >> 3);
	r_off &= 7;
	/*
	 * Since code is always >= 8 bits, only need to mask the first
	 * hunk on the left.
	 */
	*bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
	bp++;
	bits -= (8 - r_off);
	code >>= 8 - r_off;
	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
	if ( bits >= 8 ) {
	    *bp++ = code;
	    code >>= 8;
	    bits -= 8;
	}
	/* Last bits. */
	if(bits)
	    *bp = code;
#endif /* vax */
	offset += n_bits;
	if ( offset == (n_bits << 3) ) {
	    bp = buf;
	    bits = n_bits;
	    bytes_out += bits;
	    do
		putchar(*bp++);
	    while(--bits);
	    offset = 0;
	}

	/*
	 * If the next entry is going to be too big for the code size,
	 * then increase it, if possible.
	 */
	if ( free_ent > maxcode || (clear_flg > 0))
	{
	    /*
	     * Write the whole buffer, because the input side won't
	     * discover the size increase until after it has read it.
	     */
	    if ( offset > 0 ) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -