📄 huffmancoding.java
字号:
package gilyou;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.Date;
import javax.swing.JOptionPane;
/**
*
* @author Gilyou
* @param ASCII 字符数组大小
* @param bits 位数
* @param HuffmanTree Huffman树根
* @param symbolCounter 超符号的个数
* @param symbols 节点数组
* @param dataArrayList 数组存储字节的信息
*
*/
public class HuffmanCoding {
static int ASCII = 256;
private int bits = 8;
private HuffmanNode HuffmanTree;
private HuffmanNode[] symbols = new HuffmanNode[ASCII + 1];
private java.util.ArrayList<DataRecord> dataArrayList = new java.util.ArrayList<DataRecord>();
private long symbolCounter;
public HuffmanCoding() {
}
private void error(String s){
JOptionPane.showMessageDialog(null,s);
}
@SuppressWarnings("unchecked")
private void initDataArrayList(RandomAccessFile reader) throws IOException {
int firstSymbol, nextSymbol, symbolLen, index;
for (firstSymbol = reader.read(); firstSymbol != -1; firstSymbol = nextSymbol) {
for (symbolLen = 1, nextSymbol = reader.read(); nextSymbol != -1 && nextSymbol == firstSymbol; symbolLen++)
nextSymbol = reader.read();
DataRecord r = new DataRecord((byte)firstSymbol,symbolLen);
if ((index = dataArrayList.indexOf(r)) == -1)
dataArrayList.add(r);
else ((DataRecord)dataArrayList.get(index)).freq++;
}
java.util.Collections.sort(dataArrayList);
}
private void outputHeadInfo(RandomAccessFile reader, RandomAccessFile writer) throws IOException {
writer.writeInt(dataArrayList.size());
writer.writeLong(reader.getFilePointer());
int size = dataArrayList.size();
for (int j = 0; j < size; j++) {
DataRecord r = (DataRecord)dataArrayList.get(j);
writer.write(r.symbol);
writer.writeInt(r.symbolLen);
writer.writeInt(r.freq);
}
}
private void inputHeadInfo(RandomAccessFile reader) throws IOException {
int dataIndex = reader.readInt();
symbolCounter = reader.readLong();
dataArrayList.ensureCapacity(dataIndex);
for (int j = 0; j < dataIndex; j++) {
DataRecord r = new DataRecord();
r.symbol = (byte) reader.read();
r.symbolLen = reader.readInt();
r.freq = reader.readInt();
dataArrayList.add(r);
}
}
/**
* @param tree 指针
* @param newNode 产生的新节点
* @param head 头节点
* @param tail 尾节点
* @param newFreq 产生的新权值
* @function 将数组转换成HuffamnTree
*/
private void createHuffmanTree() {
ListNode pointer, newNode, head, tail;
head = tail = new ListNode();
DataRecord r = (DataRecord)dataArrayList.get(0);
head.tree = new HuffmanNode(r.symbol,r.freq,r.symbolLen);
int size = dataArrayList.size();
for (int j = 0; j < size; j++) {
tail.next = new ListNode(tail,null);
tail = tail.next;
r = (DataRecord)dataArrayList.get(j);
tail.tree = new HuffmanNode(r.symbol,r.freq,r.symbolLen);
}
while (head != tail) {
int newFreq = head.tree.freq + head.next.tree.freq;
for (pointer = tail; pointer != null && pointer.tree.freq > newFreq; pointer = pointer.prev);
newNode = new ListNode(pointer,pointer.next);
pointer.next = newNode;
if (pointer == tail)
tail = newNode;
else newNode.next.prev = newNode;
newNode.tree =
new HuffmanNode((byte)0,newFreq,0,head.tree,head.next.tree);
head = head.next.next;
head.prev = null;
}
HuffmanTree = head.tree;
}
/**
* @param codeWord 字符编码后的0/1串对应的整型十进制数
* @param codeWordLen 字符编码后的0/1串的长度,用于解决最末字符不够8位的问题
*/
private void createCodeWords(HuffmanNode tree, int codeword, int codewordLen) {
if (tree.left == null && tree.right == null) {
tree.codeWord = codeword;
tree.codeWordLen = codewordLen;
}
else {
createCodeWords(tree.left, codeword << 1, codewordLen + 1);//左孩子接0
createCodeWords(tree.right,(codeword << 1) + 1,codewordLen + 1);//左孩子接1
}
}
private void transformToArray(HuffmanNode tree) {
if (tree.left == null && tree.right == null) { //叶子
tree.right = symbols[tree.symbol + 128];
symbols[tree.symbol + 128] = tree;
}
else {
transformToArray(tree.left);
transformToArray(tree.right);
}
}
/**
* @param symbolCounter buffer中已存储的0/1串的长度
* @param holdBuffer 当buffer中剩余的长度小于下一个字符的0/1串的长度时,保存截取该字符,以便存入buffer中
* @param maxBits 32,表示buffer中能存储的0/1串的长度
* @param bitsLeft buffer中还能存0/1串的长度
* @param symbolLen 超字符的字符长度。如:AAA表示一个超字符,symbolLen = 3
* @param firstSymbol, nextSymbol 从源文件(*.cod)读出的0/1串对应的整型十进制数
*/
private void encode(RandomAccessFile reader, RandomAccessFile writer) throws IOException {
int symbolCounter = 0, holdBuffer, maxBits = 4 * bits, buffer = 0;
int firstSymbol, nextSymbol, bitsLeft, symbolLen;
HuffmanNode pointer;
for (firstSymbol = reader.read(); firstSymbol != -1; ) {
for (symbolLen = 1, nextSymbol = reader.read(); nextSymbol != -1 && nextSymbol == firstSymbol; symbolLen++)
nextSymbol = reader.read();
for (pointer = symbols[(byte)firstSymbol+128]; pointer != null && symbolLen != pointer.symbolLen; pointer = pointer.right);
if (pointer == null)
error("出现内部转化错误");
if (pointer.codeWordLen < maxBits - symbolCounter) {//剩余位足够
buffer = (buffer << pointer.codeWordLen) | pointer.codeWord;
symbolCounter += pointer.codeWordLen;
}
else {
bitsLeft = maxBits - symbolCounter;
buffer <<= bitsLeft;
if (bitsLeft != pointer.codeWordLen) { //剩余位不足
holdBuffer = pointer.codeWord;
holdBuffer >>>= pointer.codeWordLen - bitsLeft;
buffer |= holdBuffer;
}
else buffer |= pointer.codeWord; //剩余位刚好
writer.writeInt(buffer);
if (bitsLeft != pointer.codeWordLen) {
buffer = pointer.codeWord;
symbolCounter = pointer.codeWordLen - bitsLeft;
}
else symbolCounter = 0;
}
firstSymbol = nextSymbol;
}
if (symbolCounter != 0) {//处理最后字符
buffer <<= maxBits - symbolCounter;
writer.writeInt(buffer);
}
}
public String compressFile(String fileName, RandomAccessFile reader,boolean outBin) throws IOException {
String outFileName = new String(fileName+".cod");
RandomAccessFile writer = new RandomAccessFile(outFileName,"rw");
Date start = new Date();
initDataArrayList(reader);
outputHeadInfo(reader,writer);
createHuffmanTree();
createCodeWords(HuffmanTree,0,0);
for (int i = 0; i <= ASCII; i++)
symbols[i] = null;
transformToArray(HuffmanTree);
if(outBin){
BinThread binThread = new BinThread(fileName,symbols);
binThread.start();
}
reader.seek(0);//重新开始扫描
encode(reader,writer);
Date end = new Date();
int tableSize = 4 + 8 + dataArrayList.size() * (1 + 4 + 4);
double fReader = reader.getFilePointer(), fWriter = writer.getFilePointer();
String message = "\n文件大小:" + reader.getFilePointer()
+ "\n" + "超符号大小:" + dataArrayList.size() + "\n" + "历时: "
+ (end.getTime() - start.getTime()) + "毫秒" + "\n" + "压缩率:"
+ ((long)(1000.0*(fReader-fWriter)/fReader)/10.0) + "%\n"
+ "无表压缩率:"
+ ((long)(1000.0*(fReader-(fWriter-tableSize))/fReader)/10.0) + "%";
writer.close();
reader.close();
return message;
}
/**
* @param symbols 已处理超符号个数
* @param symbolCounter 总超符号个数
* @param bitCount 已用位数
* @param label 用以表示二进制10000000判断ch的高位为0[(symbol & label) == 0]/[(ch & mask) != 0]1
*/
private void decode(RandomAccessFile reader, RandomAccessFile writer) throws IOException {
int chars, index, symbol, bitCount = 1, label = 1;
label <<= bits - 1;
for (chars = 0, symbol = reader.read(); symbol != -1 && chars < symbolCounter; ) {
for (HuffmanNode pointer = HuffmanTree; ; ) {
if (pointer.left == null && pointer.right == null) {
for (index = 0; index < pointer.symbolLen; index++)//写超符号
writer.write(pointer.symbol);
chars += pointer.symbolLen;
break;
}
else if ((symbol & label) == 0)//向左孩子
pointer = pointer.left;
else pointer = pointer.right;//向右孩子
if (bitCount++ == bits) {//位满,读入下一字节
symbol = reader.read();
bitCount = 1;
}
else symbol <<= 1;
}
}
}
@SuppressWarnings("unused")
public HuffmanNode[] getSymbols(){
return symbols;
}
public String decompressFile(String inFileName, RandomAccessFile reader) throws IOException {
String outFileName = new String(inFileName+".txt");
RandomAccessFile writer = new RandomAccessFile(outFileName,"rw");
Date start = new Date();
inputHeadInfo(reader);
createHuffmanTree();
createCodeWords(HuffmanTree,0,0);
for (int i = 0; i <= ASCII; i++)
symbols[i] = null;
decode(reader,writer);
Date end = new Date();
int tableSize = 4 + 8 + dataArrayList.size() * (1 + 4 + 4);
double fReader = reader.getFilePointer(), fWriter = writer.getFilePointer();
String message = "\n文件大小:" + writer.getFilePointer()
+ "\n" + "超符号大小:" + dataArrayList.size() + "\n" + "历时: "
+ (end.getTime() - start.getTime()) + "毫秒 " + "\n" + "转换表空间大小"
+ tableSize + "字节" + "\n" + "压缩率:"
+ ((long)(1000.0*(fWriter-fReader)/fWriter)/10.0) + "%\n"
+ "无表压缩率:"
+ ((long)(1000.0*(fWriter-(fReader-tableSize))/fWriter)/10.0) + "%";
return message;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -