📄 testbin.m
字号:
figure(1);
plot(log2(NN),B2-B1);
title(['Bits saved by adapting probability, N=',int2str(N)]);
xlabel('Number of ones');
ylabel('Bits');
grid on;
end
p1=0.5;
if 1
NN=[10:10:100,200:100:1000];
B1=zeros(size(NN));
B2=zeros(size(NN));
for i=1:length(NN)
N=NN(i);
N1=floor(NN(i)*p1);
n=0:(N1-1);
B1(i)=log2(N+1)+sum(log2(N-n))-sum(log2(N1-n)); % bits
B2(i)=log2(N+1)-N1*log2(N1)-(N-N1)*log2(N-N1)+N*log2(N); % bits
end
figure(1);
plot(log2(NN),B2-B1);
title(['Bits saved by adapting probability, p1=',num2str(p1)]);
xlabel('log2(N)');
ylabel('Bits');
grid on;
end
% test of approximation
for N=[50,100,500,1000,5000,10000];
p=1/2;
N1=floor(p*N);
% first the correct answer
B1=log2(N+1)+sum(log2(N-(0:(N1-1))))-sum(log2(N1-(0:(N1-1)))); % bits
% then the approximation
N0=N-N1;
% The teoretic approximation
% b1est=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)+s2+...
% (1/(N+1/2)+(N+1/2)/(N+1/4)-(N0+1/2)/(N0+1/4)-(N1+1/2)/(N1+1/4)-10)*log2(exp(1));
disp(['N=',int2str(N),', N1=',int2str(N1)]);
b1est=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)-0.010631*log2(log2(N))-1.28817;
disp([' 1000*Difference 1 is ',num2str(1000*(B1-b1est))]);
b2est=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)-1.3226;
disp([' 1000*Difference 2 is ',num2str(1000*(B1-b2est))]);
end
% both approximations are well useful
% try to find "optimal coefficients
A=zeros(0,1);b=zeros(0,1);
for N=[50,100,250,500,1000,5000,10000];
for p=[0.10,0.2,0.3,0.4,0.45,0.5];
N1=floor(p*N);
b1=log2(N+1)+sum(log2(N-(0:(N1-1))))-sum(log2(N1-(0:(N1-1)))); % bits
b2=N*log2(N)-(N-N1)*log2(N-N1)-N1*log2(N1);
b3=1.5*log2(N)-0.5*log2(N-N1)-0.5*log2(N1);
% A=[A;[log2(N),log2(N-N1),log2(N1),1]];
A=[A;1];
b=[b;b1-b2-b3];
end
end
x=A\b;
% log2(N), log2(N-N1), log2(N1) 1
% x= 1.48892478527407 -0.49461589408971 -0.49616311290196 -1.29382354586074
% norm(b-A*x) = 0.02512
% log2(N), log2(N-N1), log2(N1) 1
% x= [ 1.5; -0.5; -0.5; -1.32113849612329 ];
% norm(b-A*x) = 0.04150
% log2(N), log2(N-N1), log2(N1), log2(log2(N)), 1
% x= 1.498973 -0.494827 -0.49623245 -0.0616557 -1.190184
% norm(b-A*x) = 0.01417
% we should not count the smallest N1
A=zeros(0,2);b=zeros(0,1);
for N=[50,70,100,250,500,750,1000];
% for p=[0.1,0.2,0.3,0.4,0.45,0.5];
for p=0.05:0.05:0.5;
N1=floor(p*N);
if N1>20
b1=log2(N+1)+sum(log2(N-(0:(N1-1))))-sum(log2(N1-(0:(N1-1)))); % bits
b2=N*log2(N)-(N-N1)*log2(N-N1)-N1*log2(N1);
b3=1.5*log2(N)-0.5*log2(N-N1)-0.5*log2(N1);
A=[A;[log2(log2(N)),1]];
b=[b;b1-b2-b3];
end
end
end
x=A\b;
% log2(N), log2(N-N1), log2(N1) 1
% x= [ 1.5; -0.5; -0.5; -1.32234151562252 ];
% norm(b-A*x) = 0.03216
% x= [ 1.5; -0.5; -0.5; -1.32298392950550 ];
% norm(b-A*x) = 0.03547 % using more p-values
% x= [ 1.50283174633830 -0.504191846468 -0.49998398295 -1.31180783192693 ];
% norm(b-A*x) = 0.02252 % using more p-values
% log2(N), log2(N-N1), log2(N1), log2(log2(N)), 1
% x= 1.5; -0.5; -0.5; -0.010631 -1.28817076478349
% norm(b-A*x) = 0.02134 % using more p-values
% x= 1.509169 -0.50062 -0.49959 -0.069397 -1.18453873231815
% norm(b-A*x) = 0.00757 % using more p-values
% we should not count the smallest N1
A=zeros(0,5);b=zeros(0,1);
for N=[50,100,250,500,1000,5000,10000];
% for p=[0.1,0.2,0.3,0.4,0.45,0.5];
for p=0.05:0.05:0.5;
N1=floor(p*N);
if N1>20
b1=log2(N+1)+sum(log2(N-(0:(N1-1))))-sum(log2(N1-(0:(N1-1)))); % bits
b2=N*log2(N)-(N-N1)*log2(N-N1)-N1*log2(N1);
b3=1.5*log2(N)-0.5*log2(N-N1)-0.5*log2(N1);
A=[A;[log2(N),log2(N-N1),log2(N1),log2(log2(N)),1]];
% A=[A;[log2(log2(N)),1]];
b=[b;b1-b2];
end
end
end
x=A\b;
% log2(N), log2(N-N1), log2(N1) 1
% x= [ 1.5; -0.5; -0.5; -1.32234151562252 ];
% norm(b-A*x) = 0.03216
% x= [ 1.5; -0.5; -0.5; -1.32298392950550 ];
% norm(b-A*x) = 0.03547 % using more p-values
% x= [ 1.50283174633830 -0.504191846468 -0.49998398295 -1.31180783192693 ];
% norm(b-A*x) = 0.02252 % using more p-values
% log2(N), log2(N-N1), log2(N1), log2(log2(N)), 1
% x= 1.5; -0.5; -0.5; -0.010631 -1.28817076478349
% norm(b-A*x) = 0.02134 % using more p-values
% x= 1.509169 -0.50062 -0.49959 -0.069397 -1.18453873231815
% norm(b-A*x) = 0.00757 % using more p-values
% the very good approximation function could then be
function b=BitEst(N,N1);
if (N1>(N/2)); N1=N-N1; end;
N0=N-N1;
if (N>1000)
b=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)-1.3256;
elseif (N1>20)
b=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)-0.020984*log2(log2(N))-1.25708;
else
b=log2(N+1)+sum(log2(N-(0:(N1-1))))-sum(log2(N1-(0:(N1-1))));
end
return;
% test this
for N=[50,100,500,1000,5000,10000];
p=1/2;
N1=floor(p*N);
% first the correct answer
B1=log2(N+1)+sum(log2(N-(0:(N1-1))))-sum(log2(N1-(0:(N1-1)))); % bits
% then the approximation
if (N1>(N/2)); N1=N-N1; end;
N0=N-N1;
if (N>1000)
b=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)-1.3256;
elseif (N1>20)
b=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)-0.020984*log2(log2(N))-1.25708;
else
b=log2(N+1)+sum(log2(N-(0:(N1-1))))-sum(log2(N1-(0:(N1-1))));
end
% display results
disp(['N=',int2str(N),', N1=',int2str(N1),' bits=',num2str(B1),...
' approx.bits=',num2str(b)]);
disp([' 1000*Difference is ',num2str(1000*(B1-b))]);
end
% --------------------------------------------
% Test how a sequence could be split to make more compact (and slower) compression
clear all;
p1=0.42;
p0=1-p1;
ent=-(p0*log2(p0)+p1*log2(p1));
N=300;
% All sequences are possible. N1 is the number of ones, N0 the number of zeros,
% in the sequence of length N. We have N1+N0=N
% we make an example sequence
if 0
rand('state',301);
x=floor(rand(N,1)+p1);
% x=[x';x';x']; % make this not so much random sequence
x=x(:);
else
x1=abs(floor(filter(1,[1,-0.97],randn(1,N))+0.5)); % an AR-1 signal
x=[bitget(x1,4);bitget(x1,3);bitget(x1,2);bitget(x1,1)]; % split has no effect
x=[bitget(x1,4),bitget(x1,3),bitget(x1,2),bitget(x1,1)]; % split has effect
x=x(:);
end
% a sequence, x, of length N should be coded
N=length(x);
I=find(x);
N1=length(I);
if (N1>(N/2)); N1=N-N1; end; % then N1 is number of zeros
N0=N-N1;
b=N1*log2(N/N1)+N0*log2(N/N0);
disp(['N=',int2str(N),', N1=',int2str(N1),', (e)bits=',num2str(b)]);
% alternative 1, direct
if (N>1000)
b=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)-1.3256;
elseif (N1>20)
b=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)-0.020984*log2(log2(N))-1.25708;
else
b=log2(N+1)+sum(log2(N-(0:(N1-1))))-sum(log2(N1-(0:(N1-1))));
end
disp(['N=',int2str(N),', N1=',int2str(N1),' N1/N=',num2str(N1/N),' bits=',num2str(b)]);
bits1=b;
% alternativ 2, splitt in 2 sequences
I=find(x(1:(N-1)));
J=find(x(1:(N-1))==0);
N11=length(I);
% store N11, 0 <= N11 <= (N-1)
bits2=log2(N); % bits needed to store N11
if N11<(N/2)
x1=x(I+1);
x2=[x(1);x(J+1)];
else
x1=[x(1);x(I+1)];
x2=x(J+1);
end
Ntemp=N;N1temp=N1;N=length(x1);N1=length(find(x1));N1=min([N1,N-N1]);N0=N-N1;
if (N>1000)
b=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)-1.3256;
elseif (N1>20)
b=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)-0.020984*log2(log2(N))-1.25708;
else
b=log2(N+1)+sum(log2(N-(0:(N1-1))))-sum(log2(N1-(0:(N1-1))));
end
disp(['N=',int2str(N),', N1=',int2str(N1),' N1/N=',num2str(N1/N),' bits=',num2str(b)]);
N=Ntemp;N1=N1temp;
bits21=b;
Ntemp=N;N1temp=N1;N=length(x2);N1=length(find(x2));N1=min([N1,N-N1]);N0=N-N1;
if (N>1000)
b=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)-1.3256;
elseif (N1>20)
b=(N+3/2)*log2(N)-(N0+1/2)*log2(N0)-(N1+1/2)*log2(N1)-0.020984*log2(log2(N))-1.25708;
else
b=log2(N+1)+sum(log2(N-(0:(N1-1))))-sum(log2(N1-(0:(N1-1))));
end
disp(['N=',int2str(N),', N1=',int2str(N1),' N1/N=',num2str(N1/N),' bits=',num2str(b)]);
N=Ntemp;N1=N1temp;
bits22=b;
bits2=bits2+bits21+bits22;
% we must also indicate which method used
bits1=bits1+0.415; % probability 3/4
bits2=bits2+2; % probability 1/4
disp(['N=',int2str(N),', N1=',int2str(N1),', bits1=',num2str(bits1),...
', bits2=',num2str(bits2)]);
% This demonstrate some of the features of this coding technique
% it is only effective if the dependencies are to the previous symbol
% and not to the symbol 2 or more positions before.
% Also all possible dependicies where position has information will
% not be resolved.
% It is an effective way of coding completly random binary sequences (where p is
% the same for all symbols and no depenencies).
% Also if the dependency is to the preceeding bit this is effective coding.
% Note that if we force a split in several levels this is not the same as
% splitting for previous symbols '00', '01', '10' and '11'.
% For effective coding, it will always be important to resolve as much as possible
% of the dependencies early in the coding process, i.e. when
% we have most (general) knowledge of the symbols (and any dependencies).
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -