⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 block.c

📁 zzip-zzlib-src.zip. A new archiver that uses a BWT algorithm to achieve superior compression. The
💻 C
📖 第 1 页 / 共 3 页
字号:
/*---------------------------------------------*/
/* Zzip/Zzlib compressor               block.c */
/* (un)compress/archive managing functions     */
/*---------------------------------------------*/

/*
  This file is a part of zzip and/or zzlib, a program and
  library for lossless, block-sorting data compression.
  Copyright (C) 1999-2001 Damien Debin. All Rights Reserved.

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the 
  Free Software Foundation, Inc., 
  59 Temple Place, Suite 330, 
  Boston, MA 02111-1307 USA

  Damien Debin
  <damien@debin.net>

  This program is based on (at least) the work of: Mike Burrows, 
  David Wheeler, Peter Fenwick, Alistair Moffat, Ian H. Witten, 
  Robert Sedgewick, Jon Bentley, Brenton Chapin, Stephen R. Tate, 
  Szymon Grabowski, Bernhard Balkenhol, Stefan Kurtz
*/

#include <sys/stat.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <time.h>

#ifdef WIN32
# include <io.h>
# include <sys/utime.h>
# include <direct.h>
#else  /* WIN32 */
# include <utime.h>
# include <unistd.h>
# include <dirent.h>
#endif /* WIN32 */

#include "zzip.h"

#ifdef SFX
# include "sfx_code.h"
#endif

/*---------------------------------------------*/

#ifdef WIN32
# define SEP_PATH '\\'
# define MKDIR_OPTIONS
#else  /* WIN32 */
# define SEP_PATH '/'
# define MKDIR_OPTIONS ,S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH
#endif /* WIN32 */

#ifdef SFX
# define START_OFFSET SFX_CODE_SIZE
#else  /* SFX */
# define START_OFFSET 0L
#endif /* SFX */

#define WRITE_F(a,b)	fwrite((a),1,(b),session->output_file);
#define READ_F(a,b)		fread((a),1,(b),session->input_file);
#define WRITE_M(a,b)	{ memcpy(buffer_in, (a), (b)); buffer_in += (b); }
#define READ_M(a,b)		{ memcpy((a), buffer_in, (b)); buffer_in += (b); }
#define IO_ERROR()		{ last_error = errno; return; }
#define CHECK_IO_R()	{ if (ferror(session->input_file) != 0) last_error = errno; if (feof(session->input_file) != 0) last_error = UNEXPECTED_EOF; }
#define CHECK_IO_W()	{ if (ferror(session->output_file) != 0) last_error = errno; }
#define FTELL_I(a)		{ if ((a = ftell(session->input_file )) == -1) last_error = errno; }
#define FTELL_O(a)		{ if ((a = ftell(session->output_file)) == -1) last_error = errno; }
#define FSEEK_I(a,b)	{ if (fseek(session->input_file, (a), (b)) != 0)  last_error = FSEEK_INPUT_FILE; }
#define FSEEK_O(a,b)	{ if (fseek(session->output_file, (a), (b)) != 0) last_error = FSEEK_OUTPUT_FILE; }

int last_error = OK;

block_param_s block = { 0, false, false, false, 0, 0, 0, 0, NO_TYPE, NULL, NULL };

static session_param_s *session = NULL;
static union
{
	uint8  *buffer8;
	uint16 *buffer16;
	uint32 *buffer32;
} mem = { NULL };

#ifdef GET_STAT
 time_stat_s time_stat = { false, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
#endif /* GET_STAT */

#ifdef ZZLIB
# define NB_MAX_SESSION 8
  static session_param_s *session_tab[NB_MAX_SESSION + 1] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
#endif /* ZZLIB */

/*---------------------------------------------*/

#ifdef ZZLIB
DLL_EXPORT int Get_last_error() { return last_error; }
#endif /* ZZLIB */

/*---------------------------------------------*/

INLINE static
void *MyFree(void *pp)
{
	free(pp);
	return NULL;
}

INLINE static
void *MyMalloc(size_t size)
{
	void *mm = malloc(size);
	if (mm == NULL) last_error = NOT_ENOUGH_MEMORY;
	return mm;
}

INLINE static
void *MyRealloc(void   *pp, 
				size_t size)
{
	void *mm = realloc(pp, size);
	if (mm == NULL) last_error = NOT_ENOUGH_MEMORY;
	return mm;
}

/*---------------------------------------------*/

#ifndef SFX

void CleanMemory()
{
	mem.buffer8 = MyFree(mem.buffer8);
}

#endif /* !SFX */

/*---------------------------------------------*/

#ifndef SFX

/* Write header for an "archive" file */
static
void Write_Header_Arc()
{
	WRITE_F(session->head_arc.magic, sizeof(char) * 2);
	WRITE_F(&session->head_arc.version_number, sizeof(session->head_arc.version_number));
	WRITE_F(&session->head_arc.nb_of_file, sizeof(session->head_arc.nb_of_file));

	CHECK_IO_W();
}

/* Write header for a compressed file inside an "archive" file */
static
void Write_Header_File()
{
	uint lg = strlen(session->head_file.name);
	WRITE_F(&lg, sizeof(lg));
	WRITE_F(session->head_file.name, sizeof(char) * lg);
	WRITE_F(&session->head_file.time, sizeof(session->head_file.time));
	WRITE_F(&session->head_file.attributes, sizeof(session->head_file.attributes));
	WRITE_F(&session->head_file.packed_size, sizeof(session->head_file.packed_size));
	WRITE_F(&session->head_file.original_size, sizeof(session->head_file.original_size));
	WRITE_F(&session->head_file.nb_of_block, sizeof(session->head_file.nb_of_block));

	CHECK_IO_W();
}

#endif /* !SFX */

/*---------------------------------------------*/

/* Read header for an "archive" file */
static
void Read_Header_Arc()
{
	READ_F(session->head_arc.magic, sizeof(char)*2);
	READ_F(&session->head_arc.version_number, sizeof(session->head_arc.version_number));
	READ_F(&session->head_arc.nb_of_file, sizeof(session->head_arc.nb_of_file));

	CHECK_IO_R();

#ifndef SFX
	if (session->head_arc.magic[0] != 'Z' || session->head_arc.magic[1] != 'Z')
	{ 
		last_error = NOT_A_ZZIP_FILE; 
		return; 
	}
	if (session->head_arc.version_number != VERSION_NUMBER)
	{ 
		last_error = UNSUPPORTED_VERSION; 
		return; 
	}
#endif /* !SFX */
}

/* Read header for a compressed file inside an "archive" file */
static
void Read_Header_File()
{
	uint lg;
	READ_F(&lg, sizeof(lg));
	MyFree(session->head_file.name);
	session->head_file.name = (char*)MyMalloc(sizeof(char) * (lg + 1));
	READ_F(session->head_file.name, sizeof(char) * lg);
	session->head_file.name[lg] = '\0';
	READ_F(&session->head_file.time, sizeof(session->head_file.time));
	READ_F(&session->head_file.attributes, sizeof(session->head_file.attributes));
	READ_F(&session->head_file.packed_size, sizeof(session->head_file.packed_size));
	READ_F(&session->head_file.original_size, sizeof(session->head_file.original_size));
	READ_F(&session->head_file.nb_of_block, sizeof(session->head_file.nb_of_block));

	CHECK_IO_R();
}

/*---------------------------------------------*/

#ifdef SFX

#define BUFFER_SIZE (512*1024)

static
void Crc32_File(FILE *fin)
{
	uint8  *buffer;
	uint32 nb, crc = 0xFFFFFFFFUL, *crc_in_file;
	sint   end_of_file = 0;

	buffer = MyMalloc(BUFFER_SIZE * sizeof(uint8));
	if (last_error != OK) return;

	while (end_of_file == 0)
	{
		nb = fread(buffer, 1, BUFFER_SIZE, fin);
		if (ferror(fin) != 0)
		{
			last_error = errno;
			return;
		}
		end_of_file = feof(fin);
		if (end_of_file != 0) nb -= 4; /* 4 last bytes of file = CRC */
		crc = Crc32(buffer, buffer + nb, crc);
	} 

	crc_in_file = (uint32*)(buffer + nb);

	if (*crc_in_file != crc)
	{
		last_error = CRC_ERROR;
		return;
	}

	MyFree(buffer);
}

#endif /* SFX */

/*---------------------------------------------*/

#ifndef SFX

#define STAT_SIZE			(8*1024)
#define NB_S				64
#define BLOCKSTAT_THRESHOLD	5400

static sint32 bstat[NB_S] ALIGN;
static sint32 cstat[NB_S] ALIGN;

/* trick to compute an absolute value without any test/jump */
INLINE static
uint32 MyAbs(sint32 a)
{
	ssint64 s;
	s.s64 = a;
	return (s.d.l ^ s.d.h) - s.d.h;
}

static
uint32 BlockStat(uint8  *input_buffer, 
				 uint32 input_len)
{
	uint32 *buffer = (uint32*)input_buffer;
	uint32 len = input_len;
	sint32 *b1 = bstat, *b2 = cstat;

	b2[0] = -1;

	while (len > STAT_SIZE * 2)
	{
		uint32 i;

		len -= STAT_SIZE;

		for (i = 0; i < NB_S; ++i)
			b1[i] = 0;
		
		for (i = STAT_SIZE >> 2; i > 0; --i)
		{
			uuint32 u;
			u.u32 = *buffer++;
			b1[u.b.ll >> 2]++;
			b1[u.b.lh >> 2]++;
			b1[u.b.hl >> 2]++;
			b1[u.b.hh >> 2]++;
		}

		/* skip the first time */
		if (b2[0] != -1)
		{
			sint32 s = 0;

			for (i = 0; i < NB_S; ++i)
				s += MyAbs(b1[i] - b2[i]);
			
			if (s > BLOCKSTAT_THRESHOLD) 
				return ((uint8*)buffer - input_buffer) - STAT_SIZE;
		}

		/* swap buffers bstat and cstat */
		{
			sint32 *t = b1;
			b1 = b2;
			b2 = t;
		}
	}

	return input_len;
}

#endif /* !SFX */

/*---------------------------------------------*/

#ifndef SFX

static
#ifdef ZZLIB
sint32 CompressBlock(bool   from_file, 
					 uint8  *buffer_in, 
					 uint32 len_in)
#else  /* ZZLIB */
sint32 CompressBlock()
#endif /* ZZLIB */
{
	bool   ff_bug = false;
	uint16 status;
	uint8  *buffer1 = NULL, *buffer2 = NULL, *buf_out1 = NULL, *buf_out2 = NULL;
	uint32 len, len2, len_max, first, tot1 = 0, tot2, block_len;
	sint   err = 0;
	slong  pos = 0, deb = 0, fin = 0;
#ifdef GET_STAT
	uint64 p1, p2;
#endif /* GET_STAT */

#ifdef ZZLIB
	uint8 *sav_buffer_in = buffer_in;

	if (from_file == false)
	{
		mem.buffer8 = (uint8*)MyMalloc(sizeof(uint8) * (len_in + RUN_LENGTH_MAX + 32) * 6);
		if (last_error != OK) return -1;
		buffer1 = mem.buffer8;
		buffer2 = (uint8*)ROUND32(buffer1 + len_in * 2);

		memcpy(buffer1, buffer_in, sizeof(uint8) * len_in);
		len = len_in;
	}
	else
#endif /* ZZLIB */
	{
		uint32 taille_max = MIN(session->head_file.original_size + 1, session->block_size);

		mem.buffer8 = (uint8*)MyMalloc(sizeof(uint8) * (taille_max + RUN_LENGTH_MAX + 32) * 6);
		if (last_error != OK) return -1;

		buffer1 = mem.buffer8;
		buffer2 = (uint8*)ROUND32(buffer1 + taille_max * 2);

		FTELL_I(pos);
		FTELL_O(deb);
		if (last_error != OK) return -1;
		
		len = READ_F(buffer1, sizeof(uint8) * taille_max);
		CHECK_IO_R();
		if (last_error == UNEXPECTED_EOF) last_error = OK;
		if (last_error != OK) return -1;

		if ((session->compression_mode & 2) == 2)
		{
			uint32 blockstat_len = BlockStat(buffer1, len);
			if (blockstat_len != len)
			{
				FSEEK_I((sint32)blockstat_len - (sint32)len, SEEK_CUR);
				if (last_error != OK) return -1;
				len = blockstat_len;
			}
		}
	}

	block_len = len;
	len_max = len;
	block.crc = Crc32_2(buffer1, buffer1 + len);
	block.mm_type = 0;

	STAT_ADD_SIZE(kb_tot, len);

	if (len > 64)
	{
		if (len < 3 * 1024) block.compression_mode = 0;
		
		/*- Beginning ---- Analyze ------------*/
		
		GET_TSC(p1);
		Analysis(buffer1, buffer1 + len);
		GET_TSC(p2);

		STAT_ADD_TIME(time_ana, p2, p1);
		STAT_ADD_SIZE(kb_ana, len);

		/*- End ---------- Analyze ------------*/
		
		/* trick for 'Canterbury Corpus: kennedy.xls' !, delta-encoding with record size of 13 */
		if (((uint32*)buffer1)[10] == 161480704 && ((uint32*)buffer1)[20] == 60818693)
		{
			uint  i;
			uint8 *b8_out = buffer2, *b8;

			for (i = 0; i < 13; ++i)
				for (b8 = buffer1 + 2320 + i; b8 < buffer1 + len; b8 += 13)
					*b8_out++ = *b8 - *(b8-13);
			memcpy(buffer1 + 2320, buffer2, len - 2320);
			block.rle_encoding = true;
			block.type = NO_TYPE;
			block.mm_type = 6;
		}

		/*- Beginning ---- RLE Coding ---------*/

		if (block.rle_encoding == true)
		{
			uint32 rle_len;

			GET_TSC(p1);
			rle_len = RLE_Coding(buffer1, buffer2, buffer1 + len);
			GET_TSC(p2);

			STAT_ADD_TIME(time_rle, p2, p1);
			STAT_ADD_SIZE(kb_rle, len);

			len = rle_len;
			memcpy(buffer1, buffer2, len);
		}

		/*- End ---------- RLE Coding ---------*/

		/*- Beginning ---- MM Coding ----------*/

		if ((len > 128 * 1024) 
			& ((block.type == BIN) | (block.type == NO_TYPE)) 
			& (block.multimedia_test == true))
		{
			uint res;

			GET_TSC(p1);
			res = MM_Test(buffer1 + 512, buffer1 + len);
			GET_TSC(p2);

			STAT_ADD_TIME(time_ana, p2, p1);

			if (res != 0)
			{
				block.mm_type = res;
				block.type = MULTIMEDIA;
			}
		}

		if (block.mm_type != 0)
		{
			GET_TSC(p1);
			MM_Coding(buffer1, buffer1 + len);
			GET_TSC(p2);

			STAT_ADD_TIME(time_mm, p2, p1);
			STAT_ADD_SIZE(kb_mm, len);
		}

		/*- End ---------- MM Coding ----------*/

		/*- Beginning ---- W32/BIN Coding -----*/

		if (block.type == WIN_EXE)	
		{
			GET_TSC(p1)
			Win32_Coding(buffer1, buffer1 + len);
			GET_TSC(p2);

			STAT_ADD_TIME(time_txt, p2, p1);
		}

		if (block.type == BIN || block.type == WIN_EXE)
		{
			GET_TSC(p1);
			Reverse_Block(buffer1, buffer1 + len);
			GET_TSC(p2);

			STAT_ADD_TIME(time_txt, p2, p1);
			STAT_ADD_SIZE(kb_txt, len);
		}

		/*- End ---------- W32/BIN Coding -----*/
		
		/*- Beginning ---- alpha/txt Cod. -----*/

		if (block.type == TEXT)
		{
			GET_TSC(p1);

			len = Filter1(buffer1, buffer2, len);

			memcpy(buffer1, buffer2, len);

			mem.buffer8 = (uint8*)MyRealloc(mem.buffer8, sizeof(uint8) * (len + RUN_LENGTH_MAX + 32) * 6);
			if (last_error != OK) return -1;

			buffer1 = mem.buffer8;

			len_max = MAX(len, len_max);

			GET_TSC(p2);

			STAT_ADD_TIME(time_txt, p2, p1);
			STAT_ADD_SIZE(kb_txt, len);
		}

		/*- End ---------- alpha/txt Cod. -----*/

		/*- Beginning ---- Phr. replacement ---*/

		if (block.english_encoding == true)
		{
			GET_TSC(p1);
			len = Filter2(buffer1, buffer1 + len);
			GET_TSC(p2);

			STAT_ADD_TIME(time_txt, p2, p1);
		}

		/*- End ---------- Phr. replacement ---*/
	
		/*- Beginning ---- BWT ----------------*/

		/* to avoid a bug if the block ends with a run of 0xFF */
		if (buffer1[len - 1] == 0xFF)
		{
			buffer1[len - 1] -= buffer1[len - 2];
			ff_bug = true;
		}

		BWT_Coding(len, &first, buffer1);

		if (last_error != OK) return -1;

		/*- End ---------- BWT ----------------*/

		/*- Beginning ---- MTF Coding ---------*/
		
		GET_TSC(p1);

		M1FF2_Coding(buffer1, buffer1 + len);
		
		/*- End ---------- MTF Coding ---------*/

		/*- Beginning ---- Split --------------*/

		buf_out1 = (uint8*)ROUND32(buffer1 + len);
		buffer2  = (uint8*)ROUND32(buf_out1 + len);
		buf_out2 = (uint8*)ROUND32(buffer2 + len);

		len2 = Split(buffer1, buffer1 + len, buffer2);

		GET_TSC(p2);

		STAT_ADD_TIME(time_mtf, p2, p1);
		STAT_ADD_SIZE(kb_mtf, len);

		/*- End ---------- Split --------------*/
		
		/*- Beginning ---- Arith Compression --*/

		block.buffer = buf_out1;

		GET_TSC(p1);
		tot1 = Zip_SM0(len, buffer1);
		GET_TSC(p2);

		STAT_ADD_TIME(time_st0, p2, p1);
		STAT_ADD_SIZE(kb_st0, len);

		if (last_error != OK) return -1;
		
		block.buffer = buf_out2;

		GET_TSC(p1);
		tot2 = Zip_SM1(len2, buffer2);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -