📄 cnt1s.c
字号:
#include "header.h"typedef enum {A,B,C,D,E} letters;/*convert a byte to a letter*/letters b2l(int byte){ register int cnt1s=0, tmp, j; for(j=0; j<8; ++j){ tmp=(byte>>j) & 1; if( tmp==1 ) ++cnt1s; } if(cnt1s<3) return A; switch( cnt1s ){ case 3: return B; case 4: return C; case 5: return D; default: break; } return E;}/* get a byte from a stream of bytes */int getb(char *filename, int rt){ static short rest=0; static uniform wd; if(rest==0){ wd=uni(filename); rest=4; } --rest; return ( (wd >> rest*8) & 255 ); /*255=pow(2,8)-1*/}/* get a specific byte of a random integer */ int getsb(char *filename, int rt){ return ( (uni(filename) >> rt) & 255 );}/* count the 1's in the sequence of bytes */real cnt_stat(char *fn, counter no_wds, char *test, int rshft){ const real mean=2500, std=sqrt(5000); const real prob[]={37/256.,56/256.,70/256.,56/256.,37/256.}; int (*byte)(char*, int); register int wd; register counter i; short j, s; counter *f, f4[625], f5[3125]; real Ef, chsq=0, z; if( test[1]=='p' ){ byte=getsb; printf("\t%d to %d ", 25-rshft,32-rshft); } else{ byte=getb; printf("\t\t"); } for(i=0; i<625; ++i){ f4[i]=0; f5[i]=0; } for(i=625; i<3125; ++i){ f5[i]=0; }#define BYTE ( byte(fn, rshft) ) wd=(625*b2l(BYTE)+125*b2l(BYTE)+25*b2l(BYTE)+5*b2l(BYTE)+b2l(BYTE)); for(i=1; i<no_wds; ++i){ wd %= 625; /*Erase leftmost letter of w*/ ++f4[wd]; wd=5*wd+b2l(BYTE); /*Shift wd left, add new letter*/ ++f5[wd]; }/* compute Q5-Q4, where Q4,Q5=sum(obs-exp)**2/exp */ for(s=4; s<=5; ++s){ int ltrspwd, wdspos; letters ltr; switch(s){ case 4: wdspos=625; f=f4; ltrspwd=4; break; case 5: wdspos=3125; f=f5; chsq=-chsq; ltrspwd=5; break; } for(i=0; i<wdspos; ++i){ Ef=no_wds; wd=i; for(j=0; j<ltrspwd; ++j){ ltr=wd%5; Ef*=prob[ltr]; wd/=5; } chsq+=( *(f+i)-Ef )*( *(f+i)-Ef )/Ef; } } uni("close"); z=(chsq-mean)/std; printf("\t%.2f\t\t% .3f\t\t%f\n", chsq, z, 1-Phi(z)); return chsq;}/*print the title*/void cnt_ttl(char *fn, char *test){ if( strncmp(test, "specific", 2 )==0){ puts("\n\t|-------------------------------------------------------------|"); puts("\t| This is the COUNT-THE-1''s TEST for specific bytes. |"); puts("\t|Consider the file under test as a stream of 32-bit integers. |"); puts("\t|From each integer, a specific byte is chosen , say the left- |"); puts("\t|most: bits 1 to 8. Each byte can contain from 0 to 8 1''s, |"); puts("\t|with probabilitie 1,8,28,56,70,56,28,8,1 over 256. Now let |"); puts("\t|the specified bytes from successive integers provide a string|"); puts("\t|of (overlapping) 5-letter words, each \"letter\" taking values |"); puts("\t|A,B,C,D,E. The letters are determined by the number of 1''s,|"); puts("\t|in that byte: 0,1,or 2 ---> A, 3 ---> B, 4 ---> C, 5 ---> D, |"); puts("\t|and 6,7 or 8 ---> E. Thus we have a monkey at a typewriter |"); puts("\t|hitting five keys with with various probabilities: 37,56,70, |"); puts("\t|56,37 over 256. There are 5^5 possible 5-letter words, and |"); puts("\t|from a string of 256,000 (overlapping) 5-letter words, counts|"); puts("\t|are made on the frequencies for each word. The quadratic form|"); puts("\t|in the weak inverse of the covariance matrix of the cell |"); puts("\t|counts provides a chisquare test: Q5-Q4, the difference of |"); puts("\t|the naive Pearson sums of (OBS-EXP)^2/EXP on counts for 5- |"); puts("\t|and 4-letter cell counts. |"); puts("\t|-------------------------------------------------------------|\n"); printf("\t\tTest results for specific bytes from %s\n", fn); } else{ puts("\n\t|-------------------------------------------------------------|"); puts("\t| This is the COUNT-THE-1''s TEST on a stream of bytes. |"); puts("\t|Consider the file under test as a stream of bytes (four per |"); puts("\t|32 bit integer). Each byte can contain from 0 to 8 1''s, |"); puts("\t|with probabilities 1,8,28,56,70,56,28,8,1 over 256. Now let |"); puts("\t|the stream of bytes provide a string of overlapping 5-letter|"); puts("\t|words, each \"letter\" taking values A,B,C,D,E. The letters are|"); puts("\t|determined by the number of 1''s in a byte: 0,1,or 2 yield A,|"); puts("\t|3 yields B, 4 yields C, 5 yields D and 6,7 or 8 yield E. Thus|"); puts("\t|we have a monkey at a typewriter hitting five keys with vari-|"); puts("\t|ous probabilities (37,56,70,56,37 over 256). There are 5^5 |"); puts("\t|possible 5-letter words, and from a string of 256,000 (over- |"); puts("\t|lapping) 5-letter words, counts are made on the frequencies |"); puts("\t|for each word. The quadratic form in the weak inverse of |"); puts("\t|the covariance matrix of the cell counts provides a chisquare|"); puts("\t|test: Q5-Q4, the difference of the naive Pearson sums of |"); puts("\t|(OBS-EXP)^2/EXP on counts for 5- and 4-letter cell counts. |"); puts("\t|-------------------------------------------------------------|\n"); printf("\t\tTest result for the byte stream from %s\n", fn); } return;}/* type "stream" (use small case and don't forget the quotation mark) to test a stream of bytes. type "specific" to test specific bytes */void cnt1s(char *filename, char *test){ int rt=24; counter no_wds=256000; char *s="bits used"; if(strncmp(test, "stream", 2)==0){ rt=-1; s="\t"; no_wds=2560000; } cnt_ttl(filename, test); printf("\t (Degrees of freedom: 5^4-5^3=2500; sample size: %d)\n\n", no_wds); printf("\t%s\tchisquare\tz-score\t\tp-value\n", s); do{ cnt_stat(filename, no_wds, test, rt); --rt; }while(rt>=0); return;}/*main(){ cnt1s("binc", "stream"); cnt1s("binc", "specific"); return;}*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -