📄 longrun.c
字号:
/**************************************************** * * * Longest run of ones. Same math as in section * * 3.4 of NIST docs. * * * ***************************************************/#include "randtest.h"#include "bigfloat.h"extern FLOAT P2;/* return the combination of n things taken r at a time. */double combination( int n, int r){ double combo; int i; if (!r) return 1.0; combo = n; for( i=r; i>1; i--) combo *= (double)(n-i+1)/i; return combo;}/* Use bigfloat to compute combination terms to more bit precision. With 256 bit mantissa, this gets us up to 77 decimal digits. Expand bigfloat if necessary!*/void bigcombo( int top, int bot, FLOAT *com){ FLOAT k, j; one( com); if( (!bot) || (top == bot) ) return; while ( bot>0 ) { int_to_float( top, &k); int_to_float( bot, &j); multiply( &k, com, com); divide( com, &j, com); bot--; top--; }}/* Compute probability for a run of <= m bits in an M bit block (blocksize). Straight combinatorial computation fails for small m and large blocksize. So let's try again with the use of bigfloat package.*/double probOfRun( int m, int blocksize){ int j, u, r, l1, l2; int l3, l4, l5, count; FLOAT c1, c2, Pnmr, Pnm; null( &Pnm); for( r=0; r<=blocksize; r++) { null(&Pnmr); l1 = blocksize - r + 1; l2 = r/(m+1); u = (l1 < l2) ? l1 : l2;/* Attempt to get more accurate answers. It appears that for small m, the probabilities explode to >> 1. Either the formula is wrong or the difference between terms is too large to track. Compute difference explicitly, if enough terms exist. First thing to notice is that sum(r=0..M)C(M,r) = 2^M (I got this empiricly, the mathematicians can prove it.) This is what happens for the j=0 term. So we can just ignore this term, and deal with all terms of j > 0. This leaves us with Pnm - 1 = 1/2^M*sum(Pnmr) for those r which have u > 0. [Put back in for combination compuation]*/ l4 = blocksize - r; for(j=0; j<=u; j++) { bigcombo( l1, j, &c1); l3 = blocksize - j*(m+1); bigcombo(l3, l4, &c2); multiply( &c1, &c2, &c1); if(j&1) subtract( &Pnmr, &c1, &Pnmr); else add( &Pnmr, &c1, &Pnmr); } add( &Pnm, &Pnmr, &Pnm); }/* divide by 2^blocksize */ Pnm.expnt -= blocksize; return bigdown( &Pnm);}/* Compute polynomial correction term for n!. See 6.1.37 in Handbook of Mathematical functions.*/double nfactPoly( int n){ double coef[4] = {-571.0/2488320.0, -139.0/5180.0, 1.0/288.0, 1.0/12.0}; double f, sum; if (!n) return 1.0; f = 1.0/n; sum = (((coef[0]*f + coef[1])*f + coef[2])*f + coef[3])*f + 1.0; return sum;}/* for large M, use Stirling's approximation for n! n! ~ sqrt(2 pi n) * n^n * exp(-n) * [ 1 + 1/12n + ...](see F. Reif, "Fundamentals of Statistical and Thermal Physics"Appendix A for a derivation. Or other places, I'm sure.)*//* bug in emacs??????See associated docs to this file for derivation of formulasused here. Note that r = 0 and r = M are ignored since weare assuming 1/2^M is too small to see in a 64 bit number. In fact, we can ignore most terms when M > 1000. It's onlynear r ~ M/2 that any real values are found. So work outfrom the middle and quit when Pnmr < Pnm*1e-20*/double CalcPnmr( int r, int m, int blocksize){ int j, u, l1, l2; int l3, l4, l5, l6, flaga, flagb; double lnc1, lnc2, Pnmr; double te, term, pi2, j0; pi2 = log(8.0*atan(1.0))/2.0; te = blocksize * log(2.0); j0 = (blocksize + 0.5) * log( blocksize); l1 = blocksize - r + 1; l2 = r/(m + 1); u = (l1 < l2)? l1 : l2; Pnmr = 0.0;/* do the j = 0 term separately since log(0) = oo. */ if( (r < 10) || ((blocksize - r) < 10)) Pnmr = exp(log(combination( blocksize, blocksize - r)) - te); else { lnc1 = j0 - (blocksize - r + 0.5)*log( blocksize - r) - (r + 0.5)*log(r) - pi2; term = nfactPoly(blocksize)/nfactPoly(blocksize - r)/nfactPoly(r); Pnmr = term*exp(lnc1 - te); } if(u) { for( j=1; j<=u; j++) {/* first combination term */ if( j<10) { lnc1 = log(combination( l1, j)); flaga = 0; } else { flaga = 1; l3 = l1 - j; if(l3) lnc1 = (l1 + 0.5)*log(l1) - (j + 0.5)*log(j) - (l3 + 0.5)*log(l3) - pi2; else lnc1 = 0.0; }/* second combination term */ l4 = blocksize - j*(m + 1); l5 = l1 - 1; l6 = r - j*(m + 1); if( blocksize - r < 10) { lnc2 = log(combination( l4, blocksize - r)); flagb = 0; } else { flagb = 1; if(l6) lnc2 = (l4 + 0.5)*log(l4) - (l5 + 0.5)*log(l5) - (l6 + 0.5)*log(l6) - pi2; else lnc2 = 0.0; } if( lnc1 + lnc2 - te < -691) continue; /* too small to see */ term = exp(lnc1 + lnc2 - te); if( flaga) term *= nfactPoly(l1)/nfactPoly(j)/nfactPoly(l3); if( flagb) term *= nfactPoly(l4)/nfactPoly(l5)/nfactPoly(l6); if( j&1) Pnmr -= term; else Pnmr += term; } } //printf("r= %d Pnmr = %e\n", r, Pnmr); return Pnmr;}double probRunBig( int m, int blocksize){ int r, mid; double Pnmr1, Pnmr2, Pnm; mid = blocksize/2; if( m<6) { printf(" probRunBig fails for m < 6.\n"); printf(" rewrite code to get better answers.\n"); return (0.0); } Pnm = CalcPnmr( mid, m, blocksize); for( r=1; r<mid; r++) { Pnmr1 = CalcPnmr( mid+r, m, blocksize); Pnmr2 = CalcPnmr( mid-r, m, blocksize); if( Pnmr1 + Pnmr2 > Pnm*1e-20) Pnm += Pnmr1 + Pnmr2; else break; } return (Pnm);}/* bigfloat version of Stirling's expansion for n! Uses pure n! for n<30 so calling routines can work for any case.NOTE: this sucks. Replace with full Chebyshev expansion, orignore forever!*/void big_nfact( int n, FLOAT *nfact){ FLOAT twopin, nflt, npoly, nexp, overn, top, bot, term; char msg[128]; int ck = n; if( n<2) { one( nfact); return; } int_to_float( n, &nflt); if( n<30) { copy( &nflt, nfact); n--; while( n>1) { int_to_float( n, &nflt); multiply( &nflt, nfact, nfact); n--; } sprintf(msg, "%d factorial is",ck); printfloat(msg, nfact);// return; } /* get (2*PI*n)^.5 */ copy( &P2, &twopin); twopin.expnt += 2; /* gives me two pi */ multiply( &nflt, &twopin, &twopin); square_root( &twopin, &twopin);/* next compute exponential factor exp( n*ln(n) - n) */ big_ln( &nflt, &nexp); multiply( &nflt, &nexp, &nexp); subtract( &nexp, &nflt, &nexp); big_exp( &nexp, &nexp); multiply( &nexp, &twopin, &nexp); printfloat("zeroth order approx =", &nexp);/* finally do polynomial expansion. See 6.1.37 in "Handbook of Mathematical Functions" Abramowitz and Stegun*/ reciprical( &nflt, &overn); int_to_float( 571, &top); int_to_float( 2488320, &bot); divide( &top, &bot, &npoly); multiply( &overn, &npoly, &npoly); int_to_float( 139, &top); int_to_float( 5180, &bot); divide( &top, &bot, &term); add( &term, &npoly, &npoly); multiply( &overn, &npoly, &npoly); int_to_float( 288, &bot); reciprical( &bot, &term); subtract( &term, &npoly, &npoly); multiply( &overn, &npoly, &npoly); int_to_float( 12, &bot); reciprical( &bot, &term); add( &term, &npoly, &npoly); multiply( &overn, &npoly, &npoly); one( &term); add( &term, &npoly, &npoly); multiply( &nexp, &npoly, nfact); sprintf(msg, "%d factorial is", n); printfloat(msg, nfact);}/* compute two adjacent terms in Pnmr. Main idea here is to get as much accuracy in the difference between two similar terms as possible. Enter with parameters M (blocksize), m, r and j Returns FLOAT resutl of (M-r+1) (M-(j+1)*(m+1))! T_j - T_(j+1) = ---------------------------- { j! (M-r-j)! (r-(j+1)*(m+1))! (M-(m+1)j)(M-(m+1)j-1) ... (M-(m+1)j-m) 1 ------------------------------------------------ - ----- } (r-(m+1)j)(r-(m+1)j-1) ... (r-(m+1)j-m)(M-r-j+1) j+1 caller needs to track correct sign of difference.*/void calcdiff( int M, int m, int r, int j, FLOAT *Tj){ FLOAT t1, t2, t3, t4, t5; int l1, l2, k; int_to_float( M-r-j+1, &t5); reciprical(&t5, &t5); l1 = M - j*(m+1); l2 = r - j*(m+1); for( k=0; k<=m; k++) { int_to_float( l1, &t1); int_to_float( l2, &t2); multiply( &t1, &t5, &t5); divide( &t5, &t2, &t5); l1--; l2--; } int_to_float( j+1, &t1); reciprical( &t1, &t2); subtract( &t5, &t2, &t5); int_to_float( M-r+1, &t1); big_nfact( M-(j+1)*(m+1), &t1); big_nfact( M-r-j, &t2); big_nfact( j, &t3); big_nfact( r-(j+1)*(m+1), &t4); multiply( &t1, &t5, &t5); multiply( &t2, &t3, &t1); multiply( &t4, &t1, &t1); divide( &t5, &t1, Tj);}/* compute probability of long runs using bigfloat package. Enter with blocksize and length of run. Returns double (standard float) result.*/double probOfRunBig( int m, int blocksize){ FLOAT Mfact, Pnm, Pnmr, rfact, Mrfact; int j, u, r, l1, l2, k;/* first create M! because we'll need it a lot */ null( &Pnm); big_nfact( blocksize, &Mfact); for( r=0; r<=blocksize; r++) { null( &Pnmr); l1 = blocksize - r + 1; l2 = r/(m+1); u = (l1 < l2) ? l1 : l2; /* if u is even, do j=0 term separatly. Otherwise, compute two terms of nearly same magnitude to get more accuracy.*/ if( !u&1) { big_nfact( r, &rfact); big_nfact( blocksize - r, &Mrfact); multiply( &rfact, &Mrfact, &Pnmr); divide( &Mfact, &Pnmr, &Pnmr); for( j=1; j<=u; j+=2) { calcdiff( blocksize, m, r, j, &rfact); subtract( &Pnmr, &rfact, &Pnmr); } } else { for( j=0; j<u; j+=2) { calcdiff( blocksize, m, r, j, &rfact); add( &Pnmr, &rfact, &Pnmr); } } add( &Pnmr, &Pnm, &Pnm); }/* divide by 2^blocksize */ Pnm.expnt -= blocksize; return( bigdown( &Pnm));}/* This routine computes tables of probabilities for use with chi^2 testing of longest runs. Enter with blocksize being scanned and pointer to probability structure. Returns structure filled base on the following: if blocksize < 21, then classes n <= 1 to n = blocksize are computed exactly if 21 <= blocksize <= 1000, then classes n <= 1 to n = 20 and n >= 21 are computed exactly if blocksize > 1000, then classes n <= 8 to n = 27 and n >= 28 are computed approximatly Probability structure includes an int for the class and a double for the associated probability of that class. It is the caller's responsibility to make sure there are enough spaces to hold the maximum number of classes!*/void BuildChiTable( int blocksize, CHIENTRY *ctbl){ int i; double problist[21]; if( blocksize < 21) { for( i=0; i<blocksize; i++) problist[i] = probOfRun( i+1, blocksize); ctbl[0].class = 1; ctbl[0].prob = problist[0]; for( i=1; i<blocksize; i++) { ctbl[i].class = i+1; ctbl[i].prob = problist[i] - problist[i-1]; } return; }/* if( blocksize <= 100) */ { for( i=0; i<21; i++) problist[i] = probOfRun( i+1, blocksize); ctbl[0].class = 1; ctbl[0].prob = problist[0]; for( i=1; i<20; i++) { ctbl[i].class = i+1; ctbl[i].prob = problist[i] - problist[i-1]; } ctbl[20].class = 21; ctbl[20].prob = 1.0 - problist[20]; return; }/* blocksize is > 256, use approximations *//* for(i=0; i<21; i++) { problist[i] = probOfRunBig( i+1, blocksize); printf("problist[%d] = %e\n", i, problist[i]); } ctbl[0].class = 1; ctbl[0].prob = problist[0]; for( i=1; i<20; i++) { ctbl[i].class = i+1; ctbl[i].prob = problist[i] - problist[i-1]; } ctbl[20].class = 21; ctbl[20].prob = 1.0 - problist[20];*/}/* compute chi^2 statistic for longest runs. Requires table of probabilities and classes to be pre-computed. (Note that for large block size this is a hard thing to do.) Enter with pointer to data block, endianness, blocksize and number of blocks as well as number of entries to probability table. returns p-value as described in section 3.4 of NIST docs.*/double longrun( SRCPTR *src, int endian, int blocksize, int numblocks, CHIENTRY *chitable, int tblsz){ unsigned char mask, bitblock[100000]; int i, j, k, run, maxrun, start, end; int nu[32]; double chi, temp;/* count runs and increment bins */ for( i=0; i<32; i++) nu[i] = 0; for( i=0; i<numblocks; i++) { getbits( src, bitblock, endian, blocksize); maxrun = 0; run = 1; if( endian) mask = 0x80; else mask = 1; k = 0; start = 0; for( j=0; j<blocksize; j++) { if( bitblock[k] & mask) { if( start) run++; /* incremnt run count on bits set */ else start++; /* otherwise start new run */ } else { if( maxrun < run) maxrun = run; start = 0; run = 1; /* otherwise reset run counter */ } if( endian) { mask >>= 1; if( !mask) { mask = 0x80; k++; } } else { mask <<= 1; if( !mask) { mask = 1; k++; } } } if( maxrun > 31) nu[31]++; else nu[maxrun]++; }/* compute chi^2 and p-value. Need to align chitable classes with bins collected.*/ start = chitable[0].class; if( start > 1) { for( j=0; j<start; j++) nu[start] += nu[j]; } end = chitable[ tblsz - 1].class; if( end<32) { for( j=31; j>end; j--) nu[end] += nu[j]; } chi = 0.0; for( j=start; j<=end; j++) { temp = nu[j] - numblocks*chitable[j-1].prob; chi += temp*temp/(numblocks*chitable[j-1].prob); } temp = chisqr( chi, end - start); return temp;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -