📄 huffman.m

📁 Generates a static minimum-variance Huffman code tree.
💻 M
字号:
function C = huffman(D,W,varargin)
% HUFFMAN   Huffman encoder.
%    C = HUFFMAN(D,W) generates a static minimum-variance Huffman tree and
%    corresponding codebook C for the source symbols with nonnegative
%    weights given by vector W, using a D-ary output code alphabet,
%    e.g. D = 2 for a binary output code alphabet.
%    W is an N-vector of nonnegative source symbol weights, where N is the
%    number of source symbols.
%    The generated Huffman code C minimizes the weighted codeword length,
%    e.g. if the weight is the probability of the source symbol, then C
%    minimizes the expected codeword length.
%    C is an N-vector of strings (i.e. cell array) giving the
%    codewords for each of the N source symbols in W.
%
%    C = HUFFMAN(D,W,L,'screen') additionally displays a simple text
%    rendition of the resulting Huffman code tree, with the source symbol
%    labels given by L. L is an N-vector of strings (i.e. cell array)
%    containing the labels for each of the N source symbols.
%
%    C = HUFFMAN(D,W,L,'dot') additionally creates a DOT file describing the
%    resulting Huffman code tree, and runs Graphviz DOT to create the
%    corresponding PNG image. The output files are automatically
%    time-stamped. The source symbol labels are given by L, which is an
%    N-vector of strings (i.e. cell array) containing the labels for each
%    of the N source symbols. Please ensure that Graphviz DOT is accessible
%    from the current directory (e.g. by adding it to the path).
%
%    Multiple output can be requested using a single command, e.g.
%    C = HUFFMAN(D,W,L,'dot','screen')


% VALIDATE INPUT ARGUMENTS

% check vector W describing the nonnegative source symbol weights
if ~isvector(W)
	error('Input argument W must be a vector.');
end

if min(W) < 0
	error('Input argument W must contain nonnegative entries.');
end

% number of source symbols
N = length(W);

% check code alphabet size D
if ~isscalar(D)
	error('Input argument D must be a scalar.');
end

codeAlphabet = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ';
if D > length(codeAlphabet)
	error(['Not enough literal characters to accommodate requested code alphabet; '...
		'please modify the source code manually to add more characters.']);
end

% source symbol labels
if ~isempty(varargin)
	L = varargin{1};

	if ~isvector(L)
		error('Input argument L must be a vector.');
	end

	if  length(L) ~= N
		error('Input arguments W and L must be vectors of the same length.');
	end
end

% output types
outputScreen = false;
outputDot = false;

for i = 2:length(varargin)
	output = varargin{i};

	if strcmp(output,'screen')
		outputScreen = true;
	elseif strcmp(output,'dot')
		outputDot = true;
	else
		error(['Input argument ''' output ''' is not a recognized output type.']);
	end
end


% BUILD HUFFMAN CODE TREE

% create a tree node for each of the N source symbols
for i = 1:N
	H(i).weight = W(i);  % weight
	H(i).children = [];  % this node has no children
	H(i).height = 0;     % this subtree has height 0
	H(i).exposed = true; % this node is exposed
	H(i).dummy = false;  % this node is not a dummy
end

% check if dummy nodes are needed
k = floor((N-1) / (D-1));
nDummyNodes = 0;
if (N-1) - k*(D-1) ~= 0
	nDummyNodes = (k+1) * (D-1) - (N-1);

	for i = 1:nDummyNodes
		H(N+i).weight = 0;     % weight
		H(N+i).children = [];  % this node has no children
		H(N+i).height = 0;     % this subtree has height 0
		H(N+i).exposed = true; % this node is exposed
		H(N+i).dummy = true;   % this node is a dummy
	end
end

% iteratively merge D subtrees at a time, until only 1 subtree is left
while sum([H.exposed] == true) > 1

	% sort exposed nodes in ascending order of weight
	exposedIndices = find([H.exposed] == true);
	allWeights = [H.weight];
	allHeights = [H.height];
	[Y,I] = sortrows([allWeights(exposedIndices)' allHeights(exposedIndices)'], [1 2]);
	sortedIndices = exposedIndices(I);

	% combine first D nodes into a new node
	combinedIndices = sortedIndices(1:D);
	for i = 1:D
		H(combinedIndices(i)).exposed = false;
	end

	ii = length(H) + 1;
	H(ii).weight = sum(allWeights(combinedIndices));  % sum of weights
	H(ii).children = sortedIndices(1:D);  % this node has D children
	H(ii).height = max(allHeights(combinedIndices)) + 1; % height of new subtree
	H(ii).exposed = true; % this node is exposed
	H(ii).dummy = false;  % this node is not a dummy
end

% PARSE CODE TREE TO GENERATE CODEWORDS

rootIndex = find([H.exposed] == true);
if isscalar(rootIndex) == false
	error('Internal error building Huffman tree.');
end

% initialize return value, i.e. codewords
for i = 1:N
	C{i} = '';
end

if outputDot
	% open DOT output file for writing
	filename = ['dot.' datestr(now,'yyyymmdd.HHMMSS')];
	fid = fopen([filename '.txt'],'w');

	fprintf(fid,'digraph Huffman {\nrankdir=TB');
end

% parse the code tree, beginning from the root
parseCodeTree(rootIndex,'');

if outputScreen
	% display statistics
	fprintf('\n\nSTATISTICS:');
	fprintf('\n   Number of source symbols N = %d', N);

	if nDummyNodes > 0
		fprintf('\n   Number of dummy nodes added = %d', nDummyNodes);
	end

	tw = 0;
	for i = 1:N
		tw = tw + W(i);
	end
	fprintf('\n   Total weight = sum{i=1:N} W_i = %g', tw);

	wcl = 0;
	for i = 1:N
		wcl = wcl + length(C{i}) * W(i);
	end
	fprintf('\n   (A) Weighted codeword length = sum{i=1:N} length(C_i) * W_i = %g', wcl);

	wll = 0;
	for i = 1:N
		wll = wll + length(L{i}) * W(i);
	end
	fprintf('\n   (B) Weighted label length    = sum{i=1:N} length(L_i) * W_i = %g', wll);

	fprintf('\n   Ratio (A) / (B) = %g', wcl / wll);

	fprintf('\n\n');
end

% call Graphviz dot if requested
if outputDot
	fprintf(fid,'\n}\n');
	fclose(fid);
	eval(['!dot -Tpng ' filename '.txt -o ' filename '.png']);

	% display image using default external program
	eval(['!' filename '.png']);

	% display image in MATLAB
	%[X, map] = imread([filename '.png']);
	%figure(1);
	%image(X);
	%colormap(map);
	%axis off;
	%axis image;
end


% NESTED FUNCTIONS

	function parseCodeTree(nodeIndex,nodeCodeword)
		% Parse nodes in the code tree recursively.

		node = H(nodeIndex);

		if outputScreen
			% create padding string
			padding = [''];
			for l = 1:length(nodeCodeword);
				padding = [padding '   '];
			end
		end

		if outputScreen
			if nodeIndex == rootIndex
				rootIndicator = ' (ROOT)';
			else
				rootIndicator = '';
			end
		end

		if isempty(node.children)
			if node.dummy == true
				% this is a dummy leaf node
				if outputScreen
					fprintf('\n%s[''%s'',%g]%s (DUMMY)',...
						padding, nodeCodeword, node.weight,rootIndicator);
				end

				if outputDot
					fprintf(fid,'\nNODEID%s [shape=record, label="''%s''\\n[%g]\\n(DUMMY)%s", style=filled, fillcolor=lightgrey]',...
						nodeCodeword, nodeCodeword, node.weight, rootIndicator);
				end
			else
				% this is a source symbol (non-dummy leaf node)
				C{nodeIndex} = nodeCodeword;

				if outputScreen
					fprintf('\n%s[''%s'',%g] ''%s''%s (SOURCE)',...
						padding, nodeCodeword, node.weight, L{nodeIndex}, rootIndicator);
				end

				if outputDot
					fprintf(fid,'\nNODEID%s [shape=record, label="''%s''\\n[%g]\\n''%s''\\n(SOURCE)%s", style=filled, fillcolor=salmon]',...
						nodeCodeword, nodeCodeword, node.weight, L{nodeIndex}, rootIndicator );
				end
			end
		else
			% this node is not a leaf
			if outputScreen
				fprintf('\n%s[''%s'',%g]%s',...
					padding, nodeCodeword, node.weight, rootIndicator);
			end

			if outputDot
				fprintf(fid,'\nNODEID%s [shape=record, label="''%s''\\n[%g]\\n%s", style=filled, fillcolor=white]',...
					nodeCodeword, nodeCodeword, node.weight, rootIndicator);
			end

			% recurse into child nodes
			for l = 1:length(node.children)
				childCodeword = [nodeCodeword, codeAlphabet(l)];

				if outputDot
					if H(node.children(l)).dummy == true
						% child is a dummy node
						fprintf(fid,'\nNODEID%s -> NODEID%s [sametail, label="%s", style=dotted]',...
							nodeCodeword, childCodeword, codeAlphabet(l));
					else
						% child is not a dummy node
						fprintf(fid,'\nNODEID%s -> NODEID%s [sametail, label="%s"]',...
							nodeCodeword, childCodeword, codeAlphabet(l));
					end
				end

				parseCodeTree(node.children(l), childCodeword);
			end
		end
	end

end
💿 文件大小 37 K
👤 上传用户 villyc
📂 所属分类压缩解压
📄 代码行数 294 行
💻 语言类型 M
🏷️ 相关标签

#minimum-variance #Generates #Huffman #static
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -