📄 huffman.c
字号:
numbytes = numbytes_from_numbits(p->numbits); if(fwrite(p->bits, 1, numbytes, out) != numbytes) return 1; } } return 0;}/* * Allocates memory and sets *pbufout to point to it. The memory * contains the code table. */static intwrite_code_table_to_memory(buf_cache *pc, SymbolEncoder *se, unsigned int symbol_count){ unsigned long i, count = 0; /* Determine the number of entries in se. */ for(i = 0; i < MAX_SYMBOLS; ++i) { if((*se)[i]) ++count; } /* Write the number of entries in network byte order. */ i = htonl(count); if(write_cache(pc, &i, sizeof(i))) return 1; /* Write the number of bytes that will be encoded. */ symbol_count = htonl(symbol_count); if(write_cache(pc, &symbol_count, sizeof(symbol_count))) return 1; /* Write the entries. */ for(i = 0; i < MAX_SYMBOLS; ++i) { huffman_code *p = (*se)[i]; if(p) { unsigned int numbytes; /* The value of i is < MAX_SYMBOLS (256), so it can be stored in an unsigned char. */ unsigned char uc = (unsigned char)i; /* Write the 1 byte symbol. */ if(write_cache(pc, &uc, sizeof(uc))) return 1; /* Write the 1 byte code bit length. */ uc = (unsigned char)p->numbits; if(write_cache(pc, &uc, sizeof(uc))) return 1; /* Write the code bytes. */ numbytes = numbytes_from_numbits(p->numbits); if(write_cache(pc, p->bits, numbytes)) return 1; } } return 0;}/* * read_code_table builds a Huffman tree from the code * in the in file. This function returns NULL on error. * The returned value should be freed with free_huffman_tree. */static huffman_node*read_code_table(FILE* in, unsigned int *pDataBytes){ huffman_node *root = new_nonleaf_node(0, NULL, NULL); unsigned int count; /* Read the number of entries. (it is stored in network byte order). */ if(fread(&count, sizeof(count), 1, in) != 1) { free_huffman_tree(root); return NULL; } count = ntohl(count); /* Read the number of data bytes this encoding represents. */ if(fread(pDataBytes, sizeof(*pDataBytes), 1, in) != 1) { free_huffman_tree(root); return NULL; } *pDataBytes = ntohl(*pDataBytes); /* Read the entries. */ while(count-- > 0) { int c; unsigned int curbit; unsigned char symbol; unsigned char numbits; unsigned char numbytes; unsigned char *bytes; huffman_node *p = root; if((c = fgetc(in)) == EOF) { free_huffman_tree(root); return NULL; } symbol = (unsigned char)c; if((c = fgetc(in)) == EOF) { free_huffman_tree(root); return NULL; } numbits = (unsigned char)c; numbytes = (unsigned char)numbytes_from_numbits(numbits); bytes = (unsigned char*)malloc(numbytes); if(fread(bytes, 1, numbytes, in) != numbytes) { free(bytes); free_huffman_tree(root); return NULL; } /* * Add the entry to the Huffman tree. The value * of the current bit is used switch between * zero and one child nodes in the tree. New nodes * are added as needed in the tree. */ for(curbit = 0; curbit < numbits; ++curbit) { if(get_bit(bytes, curbit)) { if(p->one == NULL) { p->one = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->one->parent = p; } p = p->one; } else { if(p->zero == NULL) { p->zero = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->zero->parent = p; } p = p->zero; } } free(bytes); } return root;}static intmemread(const unsigned char* buf, unsigned int buflen, unsigned int *pindex, void* bufout, unsigned int readlen){ assert(buf && pindex && bufout); assert(buflen >= *pindex); if(buflen < *pindex) return 1; if(readlen + *pindex >= buflen) return 1; memcpy(bufout, buf + *pindex, readlen); *pindex += readlen; return 0;}static huffman_node*read_code_table_from_memory(const unsigned char* bufin, unsigned int bufinlen, unsigned int *pindex, unsigned int *pDataBytes){ huffman_node *root = new_nonleaf_node(0, NULL, NULL); unsigned int count; /* Read the number of entries. (it is stored in network byte order). */ if(memread(bufin, bufinlen, pindex, &count, sizeof(count))) { free_huffman_tree(root); return NULL; } count = ntohl(count); /* Read the number of data bytes this encoding represents. */ if(memread(bufin, bufinlen, pindex, pDataBytes, sizeof(*pDataBytes))) { free_huffman_tree(root); return NULL; } *pDataBytes = ntohl(*pDataBytes); /* Read the entries. */ while(count-- > 0) { unsigned int curbit; unsigned char symbol; unsigned char numbits; unsigned char numbytes; unsigned char *bytes; huffman_node *p = root; if(memread(bufin, bufinlen, pindex, &symbol, sizeof(symbol))) { free_huffman_tree(root); return NULL; } if(memread(bufin, bufinlen, pindex, &numbits, sizeof(numbits))) { free_huffman_tree(root); return NULL; } numbytes = (unsigned char)numbytes_from_numbits(numbits); bytes = (unsigned char*)malloc(numbytes); if(memread(bufin, bufinlen, pindex, bytes, numbytes)) { free(bytes); free_huffman_tree(root); return NULL; } /* * Add the entry to the Huffman tree. The value * of the current bit is used switch between * zero and one child nodes in the tree. New nodes * are added as needed in the tree. */ for(curbit = 0; curbit < numbits; ++curbit) { if(get_bit(bytes, curbit)) { if(p->one == NULL) { p->one = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->one->parent = p; } p = p->one; } else { if(p->zero == NULL) { p->zero = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->zero->parent = p; } p = p->zero; } } free(bytes); } return root;}static intdo_file_encode(FILE* in, FILE* out, SymbolEncoder *se){ unsigned char curbyte = 0; unsigned char curbit = 0; int c; while((c = fgetc(in)) != EOF) { unsigned char uc = (unsigned char)c; huffman_code *code = (*se)[uc]; unsigned long i; for(i = 0; i < code->numbits; ++i) { /* Add the current bit to curbyte. */ curbyte |= get_bit(code->bits, i) << curbit; /* If this byte is filled up then write it * out and reset the curbit and curbyte. */ if(++curbit == 8) { fputc(curbyte, out); curbyte = 0; curbit = 0; } } } /* * If there is data in curbyte that has not been * output yet, which means that the last encoded * character did not fall on a byte boundary, * then output it. */ if(curbit > 0) fputc(curbyte, out); return 0;}static intdo_memory_encode(buf_cache *pc, const unsigned char* bufin, unsigned int bufinlen, SymbolEncoder *se){ unsigned char curbyte = 0; unsigned char curbit = 0; unsigned int i; for(i = 0; i < bufinlen; ++i) { unsigned char uc = bufin[i]; huffman_code *code = (*se)[uc]; unsigned long i; for(i = 0; i < code->numbits; ++i) { /* Add the current bit to curbyte. */ curbyte |= get_bit(code->bits, i) << curbit; /* If this byte is filled up then write it * out and reset the curbit and curbyte. */ if(++curbit == 8) { if(write_cache(pc, &curbyte, sizeof(curbyte))) return 1; curbyte = 0; curbit = 0; } } } /* * If there is data in curbyte that has not been * output yet, which means that the last encoded * character did not fall on a byte boundary, * then output it. */ return curbit > 0 ? write_cache(pc, &curbyte, sizeof(curbyte)) : 0;}/* * huffman_encode_file huffman encodes in to out. */inthuffman_encode_file(FILE *in, FILE *out){ SymbolFrequencies sf; SymbolEncoder *se; huffman_node *root = NULL; int rc; unsigned int symbol_count; /* Get the frequency of each symbol in the input file. */ symbol_count = get_symbol_frequencies(&sf, in); /* Build an optimal table from the symbolCount. */ se = calculate_huffman_codes(&sf); root = sf[0]; /* Scan the file again and, using the table previously built, encode it into the output file. */ rewind(in); rc = write_code_table(out, se, symbol_count); if(rc == 0) rc = do_file_encode(in, out, se); /* Free the Huffman tree. */ free_huffman_tree(root); free_encoder(se); return rc;}inthuffman_decode_file(FILE *in, FILE *out){ huffman_node *root, *p; int c; unsigned int data_count; /* Read the Huffman code table. */ root = read_code_table(in, &data_count); if(!root) return 1; /* Decode the file. */ p = root; while(data_count > 0 && (c = fgetc(in)) != EOF) { unsigned char byte = (unsigned char)c; unsigned char mask = 1; while(data_count > 0 && mask) { p = byte & mask ? p->one : p->zero; mask <<= 1; if(p->isLeaf) { fputc(p->symbol, out); p = root; --data_count; } } } free_huffman_tree(root); return 0;}#define CACHE_SIZE 1024int huffman_encode_memory(const unsigned char *bufin, unsigned int bufinlen, unsigned char **pbufout, unsigned int *pbufoutlen){ SymbolFrequencies sf; SymbolEncoder *se; huffman_node *root = NULL; int rc; unsigned int symbol_count; buf_cache cache; /* Ensure the arguments are valid. */ if(!pbufout || !pbufoutlen) return 1; if(init_cache(&cache, CACHE_SIZE, pbufout, pbufoutlen)) return 1; /* Get the frequency of each symbol in the input memory. */ symbol_count = get_symbol_frequencies_from_memory(&sf, bufin, bufinlen); /* Build an optimal table from the symbolCount. */ se = calculate_huffman_codes(&sf); root = sf[0]; /* Scan the memory again and, using the table previously built, encode it into the output memory. */ rc = write_code_table_to_memory(&cache, se, symbol_count); if(rc == 0) rc = do_memory_encode(&cache, bufin, bufinlen, se); /* Flush the cache. */ flush_cache(&cache); /* Free the Huffman tree. */ free_huffman_tree(root); free_encoder(se); free_cache(&cache); return rc;}int huffman_decode_memory(const unsigned char *bufin, unsigned int bufinlen, unsigned char **pbufout, unsigned int *pbufoutlen){ huffman_node *root, *p; unsigned int data_count; unsigned int i = 0; unsigned char *buf; unsigned int bufcur = 0; /* Ensure the arguments are valid. */ if(!pbufout || !pbufoutlen) return 1; /* Read the Huffman code table. */ root = read_code_table_from_memory(bufin, bufinlen, &i, &data_count); if(!root) return 1; buf = (unsigned char*)malloc(data_count); /* Decode the memory. */ p = root; for(; i < bufinlen && data_count > 0; ++i) { unsigned char byte = bufin[i]; unsigned char mask = 1; while(data_count > 0 && mask) { p = byte & mask ? p->one : p->zero; mask <<= 1; if(p->isLeaf) { buf[bufcur++] = p->symbol; p = root; --data_count; } } } free_huffman_tree(root); *pbufout = buf; *pbufoutlen = bufcur; return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -