📄 lzw.cpp

📁 lzw算法的实现
💻 CPP
字号:
#include <stdio.h>
#include <stdlib.h>

#define MAX 65535				// 字典项的最大数量, 默认采用两个字节编码, 且预留最大值作为空值.
								// 这是因为0表示了'\0'而不能表示空. 此值不能随意修改! default: 65535

#define TIRED 256				// 疲劳忍受值, 当Hash冲突造成的顺延量超过此值时, 清空Hash表. default: 256


/* --------------------- Structure Declarations ----------------------- */

struct _DICT {							// 字典结构
	unsigned short pre[MAX];	// 字典项的前缀, 即前面字符的编码
	char ch[MAX];				// 字典项的后缀, 即最后的字符
};

typedef _DICT DICT;


/* ------------------- Global Function Declarations --------------------- */

void dicInit();													// 初始化或重置字典

int enCode();													// 编码
int deCode();													// 解码
void test();													// 调试用, 对用户无意义

/* ------------------- Global Variable Declarations --------------------- */

DICT _dic; DICT* dic=&_dic;						// 字典
FILE* pf, * nf;									// pf 原文件, nf 压缩后的文件


/* -------------------------- Source Begins ----------------------------- */

int main()
{
	char cmd; char s[99], t[99];
	printf("===== LZW Text Compression =====\n\n1.Compression\n2.Decompression\n0.Exit\n: ");
	scanf("%c", &cmd); fflush(stdin);
	switch (cmd) {
		case '1':
			printf("\nOrigin File: ");
			scanf("%s", s); fflush(stdin);
			pf=fopen(s, "rb");
			if (!pf) {
				printf("\nCannot find file %s. Please check the filename correct.\n", s);
				system("PAUSE"); return -1;
			}
			printf("Target File: ");
			scanf("%s", t); fflush(stdin);
			nf=fopen(t, "wb");
			printf("\nCompressing...\n"); enCode();
			fclose(pf); fclose(nf);
			break;
		case '2':
			printf("\nOrigin File: ");
			scanf("%s", t); fflush(stdin);
			nf=fopen(t, "rb");
			if (!nf) {
				printf("\nCannot find file %s. Please check the filename correct.\n", t);
				system("PAUSE"); return -1;
			}
			printf("Target File: ");
			scanf("%s", s); fflush(stdin);
			pf=fopen(s, "wb"); 
			printf("\nDecompressing...\n"); deCode();
			fclose(pf); fclose(nf);
			break;
		default:
			break;
	}
	return 0;
}

// 重置字典, 将127以后的项全部清空
void dicInit()
{
	int i;
	for (i=0; i<256; i++) {										// 建立基底, 即0-255的ASCII码表
		dic->pre[i]=MAX; dic->ch[i]=(char)i;					// 基底的pre是无用的
	}
	for (i=256; i<MAX; i++) dic->pre[i]=MAX;					// 只需清空pre一项即可
}

// 编码
// 若成功编码, 返回0, 否则返回-1
int enCode()
{
	char buf[MAX];												// buf 读取缓冲区
	unsigned short size=0;										// size 缓冲区有效长度
	int forecode, thecode, tmpcode=0;							// forecode 前一编码, thecode 当前编码, tmpcode 临时编码
	unsigned short tir;											// tir 疲劳度, 即顺延的程度
	unsigned short n;											// n 状态判断, 1为正常, 0为文件尾, 2为字典已满

	if (fread(&buf[0], sizeof(char), 1, pf)==0) {				// 空文件, 报错
		printf("\n原文件为空, 压缩失败!\n"); return -1;
	}

	dicInit();													// 初始化字典
	rewind(pf);													// 文件复位, 准备开始
	fread(&buf[0], sizeof(char), 1, pf); size=1;				// 预先读一个字符
	forecode=tmpcode=(int)buf[0];								// 前一编码就是ASCII码

	while (1) {

		/* ---------------- 搜索编码 ---------------- */

		while (n=fread(&buf[size], sizeof(char), 1, pf)) {		// fread()返回成功读取的数量, 为0时即文件尾, 跳出循环
			if (size>=MAX-1) {n=2; break;}						// 缓冲区满, 强制跳出循环并清空字典
			tir=0;												// 疲劳度归零
			thecode=(tmpcode+256+buf[size])%MAX; if (thecode<256) thecode=256;
			tmpcode=thecode;									// 暂存原始位置, 以备下一循环上面式子使用
			// 编码的算法是: (长度-1)*256 + 最后一位ASCII + 前面字符串的编码, 并对MAX取模, 采用顺延解决Hash冲突
		
			while (dic->pre[thecode]!=forecode || dic->ch[thecode]!=buf[size]) {
				if (dic->pre[thecode]==MAX) break;				// 编码不存在, 跳出循环并准备新建
				if (++tir>TIRED) { n=2; break;}					// 疲劳度超过忍受值时, 设置状态值为2, 跳出循环并准备写入文件后清空字典
				thecode=(thecode+1)%MAX; if (thecode<256) thecode=256;
			}
			if (dic->pre[thecode]==MAX || n==2) break;			// 编码为空或字典已满时, 跳出搜索循环, 准备新建
			forecode=thecode; size++;							// 找到编码, 储存前一编码, 继续循环
		}

		/* ---------------- 写入文件 ---------------- */

		tir=(unsigned short)forecode;							// 找一个地方把forecode转成unsigned short
		fwrite(&tir, sizeof(unsigned short), 1, nf);
		if (n==0) break;										// 文件结束, 跳出所有循环

		/* --------------- 新建字典项 --------------- */

		if (n==2) dicInit();									// 字典满时清空字典
		else { dic->pre[thecode]=forecode; dic->ch[thecode]=buf[size];}
		buf[0]=buf[size]; size=1; forecode=tmpcode=(int)buf[0];	// 保留查找失败的字符, 进入搜索循环

	}
	return 0;
}

// 解码
// 若成功解码, 返回0, 否则返回-1
int deCode()
{
	char buf1[MAX], buf2[MAX];									// buf1, buf2 写入缓冲区, 缓冲区内的字符串都是逆序排列的, 方便操作
	unsigned short size1=0, size2=0;							// size1, size2 各缓冲区尾部位置
	unsigned short forecode, thecode=MAX;						// forecode 前一编码, thecode 当前编码
	int tmpcode;												// tmpcode 临时编码
	unsigned short tir;											// tir 疲劳度, 即顺延的程度
	unsigned short n;											// n 状态判断, 1为正常, 0为文件尾, 2为字典已满, 3为读到未知编码
	int i;														// 临时变量

	if (fread(&thecode, sizeof(unsigned short), 1, nf)==0) {	// 空文件, 报错
		printf("\n压缩文件为空, 解压失败!\n"); return -1;
	}

	dicInit(); rewind(nf);										// 初始化字典并将文件复位, 准备开始

	while (1) {

		/* ---------------- 读入编码 ---------------- */

		forecode=thecode;										// 将上一编码保存
		n=fread(&thecode, sizeof(unsigned short), 1, nf);

		/* ------------ 译码并写入缓冲区 ------------ */
		
		if (dic->pre[thecode]==MAX && thecode>255) {			// 字典项不存在的情况
			// 这是因为在压缩时刚生成的字典项马上被使用, 而解压生成的速度比压缩时慢一步造成的
			// 这只有在一种情况下发生, 就是当前项加上当前项的首字的搭配, 由此可给出此解决方法
			dic->pre[thecode]=forecode; dic->ch[thecode]=buf1[size1-1]; n=3;
		}

		tmpcode=thecode;										// 迭代初值
		do {													// 迭代开始, 将字符依次读出, 得到逆序字符串
			buf2[size2++]=dic->ch[tmpcode];						// 读取时写入缓冲区2
			tmpcode=dic->pre[tmpcode];
		} while (tmpcode<MAX);

		/* ---------- 写入文件和新建字典项 ---------- */

		if (size1>0 && n) {										// 不是文件头或文件尾时, 写入文件并新建字典项

			for (i=size2-1; i>=0; i--) fwrite(&buf2[i], sizeof(char), 1, pf);
																// 将buf2中的字符串写入文件
			
			if (size1>=MAX-2) n=2;								// buf1缓冲区满, 强制清空字典
			if (n<2) {											// n==3时字典项已经新建过了, 故不再新建
				tmpcode=size1*256;
				for (i=size1-1; i>=0; i--) tmpcode+=(int)buf1[i];
				tmpcode=(tmpcode+(int)buf2[size2-1])%MAX;		// 这三句利用算法求得编码的公式位置
				// 新的编码为前一字符串buf1的所有字符加当前字符串buf2的第一个字符, 注意字符串的存储是逆序的
			
				tir=0;											// 疲劳度归零
				while (dic->pre[tmpcode]<MAX) {					// 字典项冲突, 顺延解决冲突
					if (++tir>TIRED) { n=2; break;}				// 字典满了
					tmpcode=(tmpcode+1)%MAX; if (tmpcode<256) tmpcode=256;
				}
				if (n!=2) {	dic->pre[tmpcode]=forecode; dic->ch[tmpcode]=buf2[size2-1];}
			}													// 本循环按照上面的规则新建字典项 
			if (n==2) {
				dicInit(); buf1[0]=buf2[0]; size1=1; size2=0;
			}													// 字典满时, 清空字典并更新缓冲区
			else {
				for (i=0; i<size2; i++) buf1[i]=buf2[i];
				size1=size2; size2=0;							// 这两句将缓冲区更新, 把当前字符串buf2复制到buf1, 并清空buf2
			}

		}
		else if (n) {
			fwrite(&buf2[0], sizeof(char), 1, pf);				// 文件头只有一个字符, 写入即可
			buf1[0]=buf2[0]; size1=1; size2=0;					// 更新缓冲区和size
		}
		else break;												// 文件结束, 退出循环

	}
	return 0;
}

void test()														// 调试用, 对用户无意义
{
	FILE* tf1=fopen("result.bin", "rb");
	FILE* tf2=fopen("debug.txt", "w");
	unsigned short t;
	while (fread(&t, sizeof(unsigned short), 1, tf1)) fprintf(tf2, "%d ", t);
	fclose(tf1); fclose(tf2);
}
💿 文件大小 4 K
👤 上传用户 sgly2002
📂 所属分类压缩解压
📄 代码行数 228 行
💻 语言类型 C++
🏷️ 相关标签

#lzw #算法
更多lzw资源 →
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -