📄 coding.c

📁 zzip-zzlib-src.zip. A new archiver that uses a BWT algorithm to achieve superior compression. The
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*---------------------------------------------*/
/* Zzip/Zzlib compressor              coding.c */
/* (de)coding functions (RLE,WIN32,MM,unBWT...)*/
/*---------------------------------------------*/

/*
  This file is a part of zzip and/or zzlib, a program and
  library for lossless, block-sorting data compression.
  Copyright (C) 1999-2001 Damien Debin. All Rights Reserved.

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the 
  Free Software Foundation, Inc., 
  59 Temple Place, Suite 330, 
  Boston, MA 02111-1307 USA

  Damien Debin
  <damien@debin.net>

  This program is based on (at least) the work of: Mike Burrows, 
  David Wheeler, Peter Fenwick, Alistair Moffat, Ian H. Witten, 
  Robert Sedgewick, Jon Bentley, Brenton Chapin, Stephen R. Tate, 
  Szymon Grabowski, Bernhard Balkenhol, Stefan Kurtz
*/

#include "zzip.h"

/*---------------------------------------------*/

static uint32 cc[256] ALIGN;

/*
 * unBWT for a given block (from bufinout to bufinout)
 * uses 4*len bytes + bufinout
 * len must be < (1<<24) (16Mb)
 */
void BWT_Decoding(uint32 len, 
				  uint32 first, 
				  uint8  *bufinout, 
				  uint32 *offset)
{
	memset(cc, 0, sizeof(uint32) * 256);

	/* bufinout is spread over the high bytes (0xFF000000) of offset */
	{
		uint32 i = 0;

		for (; i < len; ++i)
			offset[i] = ((uint32)bufinout[ i ]) << 24;
	}

	/* we use only the lower bytes (0x00FFFFFF) of offset to store the counts */
	{
		uint32 *olen, *o = offset;

		/* we skip 'first' */
		olen = offset + first;
		for (; o < olen; ++o)
			*o |= cc[*o >> 24]++;

		o++;
		olen = offset + len;
		for (; o < olen; ++o)
			*o |= cc[*o >> 24]++;

		offset[first] |= cc[offset[first] >> 24]++;
	}

	
	{
		uint32 k = 255, j = len;

		do cc[k] = (j -= cc[k]);
		while (--k != 0);
		cc[0] = 0;
	}
	

	{
		uint8  *b = bufinout + len - 1; 
		uint32 j = first;

		while (b >= bufinout) 
			j = cc[*(b--) = (offset[j] >> 24)] + (offset[j] & 0x00FFFFFFUL);
	}
}

/*---------------------------------------------*/

/* reverse a block, improve compression with some binary files */
void Reverse_Block(uint8 *bufin, 
				   uint8 *bufin_end)
{

	bufin_end--;
	for (; bufin < bufin_end; ++bufin, --bufin_end)
	{	
		uint t = *bufin;
		*bufin = *bufin_end;
		*bufin_end = t;
	}
}

/*---------------------------------------------*/

#ifndef SFX

/* 
 * 32 bits asm call trick : We transform relative adresses (following the CALL 
 * opcode) into absolute ones to improve compression. With most of files, it 
 * seems to be useless to do the same with JMP opcode.
 */
void Win32_Coding(uint8 *bufin, 
				  uint8 *bufin_end)
{
	uint8 *bufin_start = bufin;

	bufin_end -= 6;
	for (; bufin < bufin_end; ++bufin)
	{
		if (*bufin == WIN32_ASM_CALL)
		{
#ifdef WIN32
			bufin++;
			*(uint32*)bufin += (uint32)(bufin - bufin_start);
			bufin += 3;
#else  /* WIN32 */ 
			/* the piece of code above doesn't seem to work under UNIX ! */
			uint32 offset = (uint32)(bufin - bufin_start);
			bufin++;
			offset += ((uint32)*(bufin+0)) << 0;
			offset += ((uint32)*(bufin+1)) << 8;
			offset += ((uint32)*(bufin+2)) << 16;
			offset += ((uint32)*(bufin+3)) << 24;
			*(bufin+0) = (uint8)(offset >> 0);
			*(bufin+1) = (uint8)(offset >> 8);
			*(bufin+2) = (uint8)(offset >> 16);
			*(bufin+3) = (uint8)(offset >> 24);
			bufin += 3;
#endif /* WIN32 */
		}
	}
}

#endif /* !SFX */

/*---------------------------------------------*/

/* The reverse operation */
void Win32_Decoding(uint8 *bufin, 
					uint8 *bufin_end)
{
	uint8 *bufin_start = bufin;

	bufin_end -= 6;
	for (; bufin < bufin_end; ++bufin)
	{
		if (*bufin == WIN32_ASM_CALL)
		{
#ifdef WIN32
			bufin++;
			*(uint32*)bufin -= (uint32)(bufin - bufin_start);
			bufin += 3;
#else  /* WIN32 */
			uint32 offset = 0;
			bufin++;
			offset += ((uint32)*(bufin+0)) << 0;
			offset += ((uint32)*(bufin+1)) << 8;
			offset += ((uint32)*(bufin+2)) << 16;
			offset += ((uint32)*(bufin+3)) << 24;
			offset -=  (uint32)(bufin - 1 - bufin_start);
			*(bufin+0) = (uint8)(offset >> 0);
			*(bufin+1) = (uint8)(offset >> 8);
			*(bufin+2) = (uint8)(offset >> 16);
			*(bufin+3) = (uint8)(offset >> 24);
			bufin += 3;
#endif /* WIN32 */
		}
	}
}

/*---------------------------------------------*/

#ifndef SFX

/* trick to compute an absolute value without any test/jump */
INLINE static
uint32 MyAbs(sint32 a)
{
	ssint64 s;
	s.s64 = a;
	return (s.d.l^s.d.h)-s.d.h;
}

/* 
 * We try to find out if it's a "multimedia" (image, audio) block and what is 
 * its interlacing, because raw "multimedia" files (24 bits images, 8/16 bits 
 * mono/stereo audio files) are usually interlaced we try different interlacing
 * and calculate the mean distance.
 */
uint MM_Test(uint8 *bufin, 
			 uint8 *bufin_end)
{
	union
	{
		uint16 *b16;
		uint8  *b8;
	} b;
	uint32 len32 = bufin_end - bufin;
	uint8  *b8_end = bufin_end - 1;
	uint32 s1 = 0, s2 = 0;
	uint32 t1 = 0, t2 = 0, t3 = 0;

	for (b.b8 = bufin + 4; b.b8 < b8_end; b.b8 += 2)
	{
		s1 += MyAbs(*(b.b16) - *(b.b16-1)) >> 8;
		s2 += MyAbs(*(b.b16) - *(b.b16-2)) >> 8;
		t1 += MyAbs(*(b.b8+1) - *(b.b8-0));
		t1 += MyAbs(*(b.b8+0) - *(b.b8-1));
		t2 += MyAbs(*(b.b8+1) - *(b.b8-1));
		t2 += MyAbs(*(b.b8+0) - *(b.b8-2));
		t3 += MyAbs(*(b.b8+1) - *(b.b8-2));
		t3 += MyAbs(*(b.b8+0) - *(b.b8-3));
	}

/*	printf("\n|Wav16:%.2f,%.2f|", (double)s1/len32, (double)s2/len32);
	printf("Wav8:%.2f,%.2f,%.2f|", (double)t1/len32, (double)t2/len32, (double)t3/len32);*/

	/* these thresholds seem to work ;) */
	if (t1 < 13*len32) return 1;
	else if (t2 < 13*len32) return 2;
	else if (t3 < 14*len32) return 3;
	else if (s1 < 13*len32) return 4;
	else if (s2 < 17*len32) return 5;
	else return 0;
}

#endif /* !SFX */

/*---------------------------------------------*/

#ifndef SFX

/*
 * We do a delta-encoding on the block (according to the interlacing), it 
 * improves compression with "multimedia" files.
 */
void MM_Coding(uint8 *bufin, 
			   uint8 *bufin_end)
{
	switch (block.mm_type)
	{
	case 1: /* interlacing 1:1 (byte) */
		{
			uint32  *b32 = (uint32*)bufin + 1;
			uint32  *b32_end = (uint32*)bufin_end + 1;
			uuint32 u1, u2;
			u1.u32 = *(uint32*)bufin;
			for (; b32 < b32_end; ++b32)
			{
				u2.u32 = *b32;
				u1.b.ll = u2.b.ll - u1.b.hh;
				u1.b.lh = u2.b.lh - u2.b.ll;
				u1.b.hl = u2.b.hl - u2.b.lh;
				u1.b.hh = u2.b.hh - u2.b.hl;
				*b32 = u1.u32;
				u1.u32 = u2.u32;
			}
		}
		break;
	case 2: /* interlacing 1:2 (byte) */
		{
			uint32  *b32 = (uint32*)bufin + 1;
			uint32  *b32_end = (uint32*)bufin_end + 1;
			uuint32 u1, u2;
			u1.u32 = *(uint32*)bufin;
			for (; b32 < b32_end; ++b32)
			{
				u2.u32 = *b32;
				u1.b.ll = u2.b.ll - u1.b.hl;
				u1.b.lh = u2.b.lh - u1.b.hh;
				u1.b.hl = u2.b.hl - u2.b.ll;
				u1.b.hh = u2.b.hh - u2.b.lh;
				*b32 = u1.u32;
				u1.u32 = u2.u32;
			}
		}
		break;
	case 3: /* interlacing 1:3 (byte) */
		{
			uint32  *b32 = (uint32*)bufin + 1;
			uint32  *b32_end = (uint32*)bufin_end + 1;
			uuint32 u1, u2;
			u1.u32 = *(uint32*)bufin;
			for (; b32 < b32_end; ++b32)
			{
				u2.u32 = *b32;
				u1.b.ll = u2.b.ll - u1.b.lh;
				u1.b.lh = u2.b.lh - u1.b.hl;
				u1.b.hl = u2.b.hl - u1.b.hh;
				u1.b.hh = u2.b.hh - u2.b.ll;
				*b32 = u1.u32;
				u1.u32 = u2.u32;
			}
		}
		break;
	case 4: /* interlacing 1:1 (word) */
		{
			uint32  *b32 = (uint32*)bufin + 1;
			uint32  *b32_end = (uint32*)bufin_end + 1;
			uuint32 u1, u2;
			u1.u32 = *(uint32*)bufin;
			for (; b32 < b32_end; ++b32)
			{
				u2.u32 = *b32;
				u1.w.l = u2.w.l - u1.w.h;
				u1.w.h = u2.w.h - u2.w.l;
				*b32 = u1.u32;
				u1.u32 = u2.u32;
			}
		}
		break;
	case 5: /* interlacing 1:2 (word) */
		{
			uint32  *b32 = (uint32*)bufin + 1;
			uint32  *b32_end = (uint32*)bufin_end + 1;
			uuint32 u1, u2;
			u1.u32 = *(uint32*)bufin;
			for (; b32 < b32_end; ++b32)
			{
				u2.u32 = *b32;
				u1.w.l = u2.w.l - u1.w.l;
				u1.w.h = u2.w.h - u1.w.h;
				*b32 = u1.u32;
				u1.u32 = u2.u32;
			}
		}
		break;
	}
}

#endif

/*---------------------------------------------*/

/* The reverse operation */
void MM_Decoding(uint8 *bufin, 
				 uint8 *bufin_end)
{
	switch (block.mm_type)
	{
	case 1:
		{
			uint32  *b32 = (uint32*)bufin + 1;
			uint32  *b32_end = (uint32*)bufin_end + 1;
			uuint32 u1, u2;
			u1.u32 = *(uint32*)bufin;
			for (; b32 < b32_end; ++b32)
			{
				u2.u32 = *b32;
				u2.b.ll += u1.b.hh;
				u2.b.lh += u2.b.ll;
				u2.b.hl += u2.b.lh;
				u2.b.hh += u2.b.hl;
				*b32 = u2.u32;
				u1.u32 = u2.u32;
			}
		}
		break;
	case 2:
		{
			uint32  *b32 = (uint32*)bufin + 1;
			uint32  *b32_end = (uint32*)bufin_end + 1;
			uuint32 u1, u2;
			u1.u32 = *(uint32*)bufin;
			for (; b32 < b32_end; ++b32)
			{
				u2.u32 = *b32;
				u2.b.ll += u1.b.hl;
				u2.b.lh += u1.b.hh;
				u2.b.hl += u2.b.ll;
				u2.b.hh += u2.b.lh;
				*b32 = u2.u32;
				u1.u32 = u2.u32;
			}
		}
		break;
	case 3:
		{
			uint32  *b32 = (uint32*)bufin + 1;
			uint32  *b32_end = (uint32*)bufin_end + 1;
			uuint32 u1, u2;
			u1.u32 = *(uint32*)bufin;
			for (; b32 < b32_end; ++b32)
			{
				u2.u32 = *b32;
				u2.b.ll += u1.b.lh;
				u2.b.lh += u1.b.hl;
				u2.b.hl += u1.b.hh;
				u2.b.hh += u2.b.ll;
				*b32 = u2.u32;
				u1.u32 = u2.u32;
			}
		}
		break;
	case 4:
		{
			uint32  *b32 = (uint32*)bufin + 1;
			uint32  *b32_end = (uint32*)bufin_end + 1;
			uuint32 u1, u2;
			u1.u32 = *(uint32*)bufin;
			for (; b32 < b32_end; ++b32)
			{
				u2.u32 = *b32;
				u2.w.l += u1.w.h;
				u2.w.h += u2.w.l;
				*b32 = u2.u32;
				u1.u32 = u2.u32;
			}
		}
		break;
	case 5:
		{
			uint32  *b32 = (uint32*)bufin + 1;
			uint32  *b32_end = (uint32*)bufin_end + 1;
			uuint32 u1, u2;
			u1.u32 = *(uint32*)bufin;
			for (; b32 < b32_end; ++b32)
			{
				u2.u32 = *b32;
				u2.w.l += u1.w.l;
				u2.w.h += u1.w.h;
				*b32 = u2.u32;
				u1.u32 = u2.u32;
			}
		}
		break;
	}
}

/*---------------------------------------------*/

#ifndef SFX

static const uint move[256] ALIGN = 
{ 0,1,2,3,4,5,6,7,8,9,31,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,
96,32,10,33,34,44,59,58,63,39,40,41,42,43,45,35,46,47,48,49,50,51,52,53,54,55,56,
57,37,36,60,62,61,38,64,65,70,71,72,66,74,73,75,67,83,84,77,79,80,68,81,82,76,78,
85,69,87,86,88,89,90,91,92,93,94,95,30,97,102,103,104,98,106,105,107,99,115,116,
109,111,112,100,113,114,108,110,117,101,119,118,120,121,122,
130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,
150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,
171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,
213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,
255,123,124,125,126,127,128,129 };

/*
 * some ASCII transformations: alphabet reordering, carriage-return tagging,
 * upper-case letter tagging
 */
uint32 Filter1(uint8  *bufin, 
			   uint8  *bufout, 
			   uint32 len)
{
	uint  b0, b1;
	uint8 *bout = bufout, *blen = bufin + len;

	b1 = *bufin++;
	while (bufin < blen)
	{
		b0 = b1;
		b1 = *bufin++;
		/* upper-case letter tagging */
		if (((b0 - 65) < 26) | (b0 == TAG_CAPS))
		{
			*bout++ = 96;/* move[TAG_CAPS]; */
			b0 += 32;
		}

		*bout++ = move[b0];
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -