📄 huffmannodes.hpp
字号:
/// @file huffmannodes.hpp
/// Huffman node classes
///
/// @remarks
/// The HuffNode_Leaf is the base class for all symbols, and is (despite the
/// elaborate interface), a very lightweight data structure, containing
/// only a symbol id and a numeric weight.
///
/// The techinique to use a priority queue to generate the huffman tree
/// is described in [@ref nelson].
///
/// @author mouser
/// @date 2003.07.29
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// recursive header protection
#ifndef Coder_HuffmanNodesH
#define Coder_HuffmanNodesH
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// application includes
#include "../../bitio/bitio.hpp"
// system includes
#include <string>
#include <iostream>
#include <iomanip>
#include <vector>
#include <queue>
#include <set>
#include <bitset>
using namespace std;
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// forward declarations
class HuffNode;
class HuffNode_Middle;
class HuffNode_Leaf;
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// jibz modified huffman pq weight comparisn,
#define OCTANE_USEJIBZHUFFMANPQGREATER
// <mouser> so if left is a leaf and right is not a leaf, then you treat right as if it has lower weight
// <mouser> (on ties)
// <Jibz> yes .. that was what I was going for .. making internal nodes seem slightly smaller than leafs with the same weight
// <mouser> now why is this good?
// <Jibz> I don't know .. it may even be bad ;)
// <mouser> hehehehe
// <Jibz> but it seems like it might be better to make a choice, than to pick at random (or whatever the pq gives you)
// COMPARISON SHOWS -> no perceivable difference.
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// TypeDefs and Defines
//
// what kind of precision should we use for the weights?
// note that because of the way trees are built, this values needs to store
// the SUM of all weights in the tree, not just the maximum frequency of
// any leaf(!).
// We could use a typedef here but it can cause warnings on type conversions
// //typedef unsigned long THuffmanNodeWeight;
// #define THuffmanNodeWeight unsigned long
// #define THuffmanNodeWeight_MAX ULONGMAX
// typedef unsigned long THuffmanNodeWeight;
/// Precision for representing a huffman weight
#define THuffmanNodeWeight float
/// Largest value a weight can hold
#define THuffmanNodeWeight_MAX FLOATMAX
/// Maximum depth that will be seen in the priority queue (huffman tree).
/// Which is equivalent to the maximum bitlength for any code.
/// Setting this is a little tricky because it directly affects the memory
/// used by every dictionary entry. an entry needs to reserver a number of
/// bits equal to the maximum depth of any node in the huffman tree.
/// so this value should be a multiple of 8, and can be estimated from the
/// number of dictionary entries that will be in the final huffman tree.
/// The code will detect if it is not large enough and tell you during
/// building of the codebook.
/// A reasonable value for a dictionary of several thousand symbols would be
/// something like 104.
#define DEF_HUFFMANPQ_MAXIMUM_CODEBITDEPTH 160
/// simple define which should represent more bits than we could ever see in a symbol (depth of tree)
/// note that this size does not effect the per-symbol memory used, its just a one time thing,
/// so we can make it as large as we want.
#define DEF_HUFFMANPQ_MAXFUNCBITS 200
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
/// The base HuffNode class is used to represent both leaf and internal HuffNodes. leaf HuffNodes have 0s in the
/// child pointers, and their value member corresponds to the character they encode. internal HuffNodes
/// don't have anything meaningful in their value member, but their child pointers point to other HuffNodes.
///
/// The huffman node class is used by the HuffmanCoder.
class HuffNode {
protected:
/// the weight of a node is just the frequency count of the node if it is a leaf, or the *sum* of frequencies for all children if it is not
THuffmanNodeWeight weight;
public:
/// constructor
HuffNode() { ; }
/// destructor
virtual ~HuffNode() { ; }
public:
/// The comparison operator used to order the priority queue.
bool operator>( const HuffNode* &a ) const {return weight > (a->get_weight());}
public:
/// recursive traversal of huffman tree for building bitcodes
virtual void traverse_buildbitcodes(std::bitset<DEF_HUFFMANPQ_MAXFUNCBITS> ¤t_bitset, int bitsetlength);
public:
/// freeing memory occupied by this node and any children
virtual void recursive_freechildren() { ; }
public:
/// get weight of node
THuffmanNodeWeight get_weight() const {return weight;}
/// increment node weight
void increment_weight(int increment) {weight+=increment;}
/// set node weight
void set_weight(THuffmanNodeWeight val) {weight=val;}
/// get memory used by the node and its children
virtual unsigned int get_memoryused() const {return sizeof(this);}
/// get pointer to left child
virtual HuffNode *get_child0() {return 0;}
/// get pointer to right child
virtual HuffNode *get_child1() {return 0;}
public:
/// is this a leaf node (this is an RTTI type function)
virtual bool isleaf() {return false;}
};
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
/// A derived huffman node for a node in the middle of the tree.
/// It holds pointers to other children nodes.
class HuffNode_Middle : public HuffNode {
protected:
/// pointer to left child
HuffNode *child0;
/// pointer to right child
HuffNode *child1;
public:
/// constructor
HuffNode_Middle(HuffNode* c0, HuffNode *c1 ) {weight=c0->get_weight()+c1->get_weight(); child0=c0; child1=c1;}
/// destructor
/// @note this does not free children
virtual ~HuffNode_Middle() { ; }
public:
void recursive_freechildren() {child0->recursive_freechildren(); delete child0; child1->recursive_freechildren(); delete child1;}
public:
virtual void traverse_buildbitcodes(std::bitset<DEF_HUFFMANPQ_MAXFUNCBITS> ¤t_bitset, int bitsetlength);
public:
virtual unsigned int get_memoryused() const {return sizeof(this)+(child0->get_memoryused())+(child1->get_memoryused());}
virtual HuffNode *get_child0() {return child0;}
virtual HuffNode *get_child1() {return child1;}
public:
bool isleaf() {return false;}
};
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
/// The leaves of the huffman tree contains actual symbol information.
/// Each node holds a bitvector representing its bitcode for output/input,
/// which is calculated by traversing the final huffman tree. We store it
/// explicitly for fast access during coding.
/// A node also contains the symbol id# that it corresponds to in the parser.
class HuffNode_Leaf : public HuffNode {
protected:
// length of the bitcode for this symbol
unsigned char code_bitsetlength;
/// the bits used to code this symbol in the huffman tree
std::bitset<DEF_HUFFMANPQ_MAXIMUM_CODEBITDEPTH> code_bitset;
/// the parser symbol id# that this node refers to
int symbolid;
public:
/// constructor
HuffNode_Leaf(int insymbolid, THuffmanNodeWeight inweight) {symbolid=insymbolid; weight=inweight;}
/// destructor
virtual ~HuffNode_Leaf() { ; }
public:
void recursive_freechildren() { ; }
public:
virtual void traverse_buildbitcodes(std::bitset<DEF_HUFFMANPQ_MAXFUNCBITS> ¤t_bitset, int bitsetlength);
public:
/// get the symbol id# corresponding to this symbol
int get_symbolid() {return symbolid;}
/// set the symbol id#
void set_symbolid(int val) {symbolid=val;}
/// get length of bitcode
unsigned char get_bitcode_length() {return code_bitsetlength;};
/// get a string representation of the bitcode (useful for debugging)
std::string get_bitcode_string();
public:
/// space used by the node
/// @todo do we really need to add sizeof(code_bitset) as we are doing?
virtual unsigned int get_memoryused() const {return (unsigned int)(sizeof(this)+sizeof(code_bitset));}
public:
virtual bool isleaf() {return true;}
public:
/// add the bits from this leaf symbol to the output bitwriter, will depend on derived class
virtual bool AddBitsToBitWriter(bitwriter &bw);
};
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
/// We need to make a special Greater<> classes used during sorting, since
/// our collection stl elements are built from pointers, and the default sort
/// will be done on pointer addresses instead of weights if we don't.
template< class T >
class PQWeightGreater
{
public:
bool operator() (const T& pLeft, const T& pRight) const
{
#ifdef OCTANE_USEJIBZHUFFMANPQGREATER
if (pLeft->get_weight()==pRight->get_weight()) return (pLeft->isleaf() && !pRight->isleaf());
//if (pLeft->get_weight()==pRight->get_weight()) return (!pLeft->isleaf() && pRight->isleaf());
#endif
return pLeft->get_weight()>pRight->get_weight();
}
};
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// typedef for priority queue of HuffNodes, which we use in huffman coding and in DictionarySet
typedef std::priority_queue< HuffNode*, std::vector< HuffNode* >, PQWeightGreater<HuffNode*> > THuffmanNodePriorityQueue;
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
#endif
//---------------------------------------------------------------------------
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -