📄 longrun.c

📁 随机数算法
💻 C
字号:
/**************************************************** *                                                  * *  Longest run of ones.  Same math as in section   * *  3.4 of NIST docs.                               * *                                                  * ***************************************************/#include "randtest.h"#include "bigfloat.h"extern FLOAT P2;/*  return the combination of n things taken r at a time.  */double combination( int n, int r){  double combo;  int i;    if (!r) return 1.0;  combo = n;  for( i=r; i>1; i--)    combo *= (double)(n-i+1)/i;  return combo;}/*  Use bigfloat to compute combination terms to more bit    precision.  With 256 bit mantissa, this gets us up to    77 decimal digits.  Expand bigfloat if necessary!*/void bigcombo( int top, int bot, FLOAT *com){  FLOAT k, j;  one( com);  if( (!bot) || (top == bot) ) return;  while ( bot>0 )  {    int_to_float( top, &k);    int_to_float( bot, &j);    multiply( &k, com, com);    divide( com, &j, com);    bot--;    top--;  }}/*  Compute probability for a run of <= m bits in    an M bit block (blocksize).    Straight combinatorial computation fails for small m    and large blocksize.  So let's try again with the use    of bigfloat package.*/double probOfRun( int m, int blocksize){  int j, u, r, l1, l2;  int l3, l4, l5, count;  FLOAT c1, c2, Pnmr, Pnm;  null( &Pnm);  for( r=0; r<=blocksize; r++)  {    null(&Pnmr);    l1 = blocksize - r + 1;    l2 = r/(m+1);    u = (l1 < l2) ? l1 : l2;/*  Attempt to get more accurate answers.    It appears that for small m, the probabilities    explode to >> 1.  Either the formula is wrong    or the difference between terms is too large    to track.  Compute difference explicitly, if    enough terms exist.    First thing to notice is that sum(r=0..M)C(M,r) = 2^M    (I got this empiricly, the mathematicians can prove it.)    This is what happens for the j=0 term.  So we can    just ignore this term, and deal with all terms of j > 0.    This leaves us with Pnm - 1 = 1/2^M*sum(Pnmr) for those r    which have u > 0.  [Put back in for combination compuation]*/    l4 = blocksize - r;    for(j=0; j<=u; j++)    {            bigcombo( l1, j, &c1);      l3 = blocksize - j*(m+1);      bigcombo(l3, l4, &c2);      multiply( &c1, &c2, &c1);      if(j&1) subtract( &Pnmr, &c1, &Pnmr);      else add( &Pnmr, &c1, &Pnmr);    }    add( &Pnm, &Pnmr, &Pnm);  }/*  divide by 2^blocksize  */  Pnm.expnt -= blocksize;  return bigdown( &Pnm);}/*  Compute polynomial correction term for n!.  See    6.1.37 in Handbook of Mathematical functions.*/double nfactPoly( int n){  double coef[4] = {-571.0/2488320.0, -139.0/5180.0, 1.0/288.0, 1.0/12.0};  double f, sum;  if (!n) return 1.0;  f = 1.0/n;  sum = (((coef[0]*f + coef[1])*f + coef[2])*f + coef[3])*f + 1.0;  return sum;}/*  for large M, use Stirling's approximation for n!    n! ~ sqrt(2 pi n) * n^n * exp(-n) * [ 1 + 1/12n + ...](see F. Reif, "Fundamentals of Statistical and Thermal Physics"Appendix A for a derivation.  Or other places, I'm sure.)*//*  bug in emacs??????See associated docs to this file for derivation of formulasused here.  Note that r = 0 and r = M are ignored since weare assuming 1/2^M is too small to see in a 64 bit number.   In fact, we can ignore most terms when M > 1000.  It's onlynear r ~ M/2 that any real values are found.  So work outfrom the middle and quit when Pnmr < Pnm*1e-20*/double CalcPnmr( int r, int m, int blocksize){  int j, u, l1, l2;  int l3, l4, l5, l6, flaga, flagb;  double lnc1, lnc2, Pnmr;  double te, term, pi2, j0;  pi2 = log(8.0*atan(1.0))/2.0;  te = blocksize * log(2.0);  j0 = (blocksize + 0.5) * log( blocksize);  l1 = blocksize - r + 1;  l2 = r/(m + 1);  u = (l1 < l2)? l1 : l2;  Pnmr = 0.0;/* do the j = 0 term separately since log(0) = oo.  */  if( (r < 10) || ((blocksize - r) < 10))    Pnmr = exp(log(combination( blocksize, blocksize - r)) - te);  else  {    lnc1 = j0 - (blocksize - r + 0.5)*log( blocksize - r)                 - (r + 0.5)*log(r) - pi2;    term = nfactPoly(blocksize)/nfactPoly(blocksize - r)/nfactPoly(r);    Pnmr = term*exp(lnc1 - te);  }  if(u)  {    for( j=1; j<=u; j++)    {/*  first combination term  */      if( j<10)       {	lnc1 = log(combination( l1, j));	flaga = 0;      }      else      {	flaga = 1;	l3 = l1 - j;	if(l3)	  lnc1 = (l1 + 0.5)*log(l1) - (j + 0.5)*log(j)                    - (l3 + 0.5)*log(l3) - pi2;	else	  lnc1 = 0.0;      }/*  second combination term  */      l4 = blocksize - j*(m + 1);      l5 = l1 - 1;      l6 = r - j*(m + 1);      if( blocksize - r < 10)       {	lnc2 = log(combination( l4, blocksize - r));	flagb = 0;      }      else      {	flagb = 1;	if(l6)	  lnc2 = (l4 + 0.5)*log(l4) - (l5 + 0.5)*log(l5)                    - (l6 + 0.5)*log(l6) - pi2;	else	  lnc2 = 0.0;      }      if( lnc1 + lnc2 - te < -691) continue; /* too small to see */      term = exp(lnc1 + lnc2 - te);      if( flaga) term *= nfactPoly(l1)/nfactPoly(j)/nfactPoly(l3);      if( flagb) term *= nfactPoly(l4)/nfactPoly(l5)/nfactPoly(l6);      if( j&1) Pnmr -= term;      else Pnmr += term;    }  }  //printf("r= %d Pnmr = %e\n", r, Pnmr);  return Pnmr;}double probRunBig( int m, int blocksize){  int r, mid;  double Pnmr1, Pnmr2, Pnm;    mid = blocksize/2;  if( m<6)  {    printf(" probRunBig fails for m < 6.\n");    printf("  rewrite code to get better answers.\n");    return (0.0);  }    Pnm = CalcPnmr( mid, m, blocksize);  for( r=1; r<mid; r++)  {    Pnmr1 = CalcPnmr( mid+r, m, blocksize);    Pnmr2 = CalcPnmr( mid-r, m, blocksize);    if( Pnmr1 + Pnmr2 > Pnm*1e-20) Pnm += Pnmr1 + Pnmr2;    else break;  }  return (Pnm);}/*  bigfloat version of Stirling's expansion for n!    Uses pure n! for n<30 so calling routines can work    for any case.NOTE: this sucks.  Replace with full Chebyshev expansion, orignore forever!*/void big_nfact( int n, FLOAT *nfact){  FLOAT twopin, nflt, npoly, nexp, overn, top, bot, term;  char msg[128];  int ck = n;  if( n<2)  {    one( nfact);    return;  }    int_to_float( n, &nflt);  if( n<30)  {    copy( &nflt, nfact);    n--;    while( n>1)    {      int_to_float( n, &nflt);      multiply( &nflt, nfact, nfact);      n--;    }  sprintf(msg, "%d factorial is",ck);  printfloat(msg, nfact);//    return;  }  /*  get (2*PI*n)^.5  */    copy( &P2, &twopin);  twopin.expnt += 2;  /* gives me two pi */  multiply( &nflt, &twopin, &twopin);  square_root( &twopin, &twopin);/*  next compute exponential factor exp( n*ln(n) - n) */  big_ln( &nflt, &nexp);  multiply( &nflt, &nexp, &nexp);  subtract( &nexp, &nflt, &nexp);  big_exp( &nexp, &nexp);  multiply( &nexp, &twopin, &nexp);  printfloat("zeroth order approx =", &nexp);/*  finally do polynomial expansion.  See 6.1.37 in    "Handbook of Mathematical Functions" Abramowitz and Stegun*/  reciprical( &nflt, &overn);  int_to_float( 571, &top);  int_to_float( 2488320, &bot);  divide( &top, &bot, &npoly);  multiply( &overn, &npoly, &npoly);  int_to_float( 139, &top);  int_to_float( 5180, &bot);  divide( &top, &bot, &term);  add( &term, &npoly, &npoly);  multiply( &overn, &npoly, &npoly);  int_to_float( 288, &bot);  reciprical( &bot, &term);  subtract( &term, &npoly, &npoly);  multiply( &overn, &npoly, &npoly);  int_to_float( 12, &bot);  reciprical( &bot, &term);  add( &term, &npoly, &npoly);  multiply( &overn, &npoly, &npoly);  one( &term);  add( &term, &npoly, &npoly);  multiply( &nexp, &npoly, nfact);  sprintf(msg, "%d factorial is", n);  printfloat(msg, nfact);}/*  compute two adjacent terms in Pnmr.    Main idea here is to get as much accuracy in the    difference between two similar terms as possible.    Enter with parameters M (blocksize), m, r and j    Returns FLOAT resutl of                     (M-r+1) (M-(j+1)*(m+1))!    T_j - T_(j+1) = ---------------------------- {                    j! (M-r-j)! (r-(j+1)*(m+1))!  (M-(m+1)j)(M-(m+1)j-1) ... (M-(m+1)j-m)                1  ------------------------------------------------  -  ----- }  (r-(m+1)j)(r-(m+1)j-1) ... (r-(m+1)j-m)(M-r-j+1)      j+1 caller needs to track correct sign of difference.*/void calcdiff( int M, int m, int r, int j, FLOAT *Tj){  FLOAT t1, t2, t3, t4, t5;  int l1, l2, k;  int_to_float( M-r-j+1, &t5);  reciprical(&t5, &t5);  l1 = M - j*(m+1);  l2 = r - j*(m+1);  for( k=0; k<=m; k++)  {    int_to_float( l1, &t1);    int_to_float( l2, &t2);    multiply( &t1, &t5, &t5);    divide( &t5, &t2, &t5);    l1--;    l2--;  }  int_to_float( j+1, &t1);  reciprical( &t1, &t2);  subtract( &t5, &t2, &t5);  int_to_float( M-r+1, &t1);  big_nfact( M-(j+1)*(m+1), &t1);  big_nfact( M-r-j, &t2);  big_nfact( j, &t3);  big_nfact( r-(j+1)*(m+1), &t4);  multiply( &t1, &t5, &t5);  multiply( &t2, &t3, &t1);  multiply( &t4, &t1, &t1);  divide( &t5, &t1, Tj);}/*  compute probability of long runs using bigfloat package.    Enter with blocksize and length of run.    Returns double (standard float) result.*/double probOfRunBig( int m, int blocksize){  FLOAT Mfact, Pnm, Pnmr, rfact, Mrfact;  int j, u, r, l1, l2, k;/*  first create M! because we'll need it a lot  */  null( &Pnm);  big_nfact( blocksize, &Mfact);  for( r=0; r<=blocksize; r++)  {    null( &Pnmr);    l1 = blocksize - r + 1;    l2 = r/(m+1);    u = (l1 < l2) ? l1 : l2;   /*  if u is even, do j=0 term separatly.  Otherwise,    compute two terms of nearly same magnitude to get     more accuracy.*/    if( !u&1)    {      big_nfact( r, &rfact);      big_nfact( blocksize - r, &Mrfact);      multiply( &rfact, &Mrfact, &Pnmr);      divide( &Mfact, &Pnmr, &Pnmr);      for( j=1; j<=u; j+=2)      {	calcdiff( blocksize, m, r, j, &rfact);	subtract( &Pnmr, &rfact, &Pnmr);      }    }    else    {      for( j=0; j<u; j+=2)      {	calcdiff( blocksize, m, r, j, &rfact);	add( &Pnmr, &rfact, &Pnmr);      }    }    add( &Pnmr, &Pnm, &Pnm);  }/*  divide by 2^blocksize  */  Pnm.expnt -= blocksize;  return( bigdown( &Pnm));}/*  This routine computes tables of probabilities for    use with chi^2 testing of longest runs.  Enter with    blocksize being scanned and pointer to probability    structure.  Returns structure filled base on the     following:    if blocksize < 21, then classes n <= 1 to n = blocksize            are computed exactly    if 21 <= blocksize <= 1000, then classes n <= 1 to            n = 20 and n >= 21 are computed exactly    if blocksize > 1000, then classes n <= 8 to n = 27 and            n >= 28 are computed approximatly    Probability structure includes an int for the class and    a double for the associated probability of that class.    It is the caller's responsibility to make sure there are    enough spaces to hold the maximum number of classes!*/void BuildChiTable( int blocksize, CHIENTRY *ctbl){  int i;  double problist[21];  if( blocksize < 21)  {    for( i=0; i<blocksize; i++)      problist[i] = probOfRun( i+1, blocksize);    ctbl[0].class = 1;    ctbl[0].prob = problist[0];    for( i=1; i<blocksize; i++)    {      ctbl[i].class = i+1;      ctbl[i].prob = problist[i] - problist[i-1];    }    return;  }/*  if( blocksize <= 100) */  {    for( i=0; i<21; i++)      problist[i] = probOfRun( i+1, blocksize);    ctbl[0].class = 1;    ctbl[0].prob = problist[0];    for( i=1; i<20; i++)    {      ctbl[i].class = i+1;      ctbl[i].prob = problist[i] - problist[i-1];    }    ctbl[20].class = 21;    ctbl[20].prob = 1.0 - problist[20];    return;  }/*  blocksize is > 256, use approximations *//*  for(i=0; i<21; i++)    {    problist[i] = probOfRunBig( i+1, blocksize);    printf("problist[%d] = %e\n", i, problist[i]);    }  ctbl[0].class = 1;  ctbl[0].prob = problist[0];  for( i=1; i<20; i++)  {    ctbl[i].class = i+1;    ctbl[i].prob = problist[i] - problist[i-1];  }  ctbl[20].class = 21;  ctbl[20].prob = 1.0 - problist[20];*/}/*  compute chi^2 statistic for longest runs.    Requires table of probabilities and classes to    be pre-computed.  (Note that for large block size    this is a hard thing to do.)    Enter with pointer to data block, endianness,    blocksize and number of blocks as well as number    of entries to probability table.    returns p-value as described in section 3.4 of    NIST docs.*/double longrun( SRCPTR *src, int endian, 		int blocksize, int numblocks, 		CHIENTRY *chitable, int tblsz){  unsigned char mask, bitblock[100000];  int i, j, k, run, maxrun, start, end;  int nu[32];  double chi, temp;/*  count runs and increment bins  */  for( i=0; i<32; i++) nu[i] = 0;  for( i=0; i<numblocks; i++)  {    getbits( src, bitblock, endian, blocksize);    maxrun = 0;    run = 1;    if( endian) mask = 0x80;    else mask = 1;    k = 0;    start = 0;    for( j=0; j<blocksize; j++)    {      if( bitblock[k] & mask)      {	if( start) run++;  /*  incremnt run count on bits set  */	else start++;      /*  otherwise start new run */      }      else      {	if( maxrun < run) maxrun = run;	start = 0;	run = 1;           /* otherwise reset run counter */      }      if( endian)      {	mask >>= 1;	if( !mask)	{	  mask = 0x80;	  k++;	}      }      else      {	mask <<= 1;	if( !mask)	{	  mask = 1;	  k++;	}      }    }    if( maxrun > 31) nu[31]++;    else nu[maxrun]++;  }/*  compute chi^2 and p-value.    Need to align chitable classes with bins collected.*/  start = chitable[0].class;  if( start > 1)  {    for( j=0; j<start; j++) nu[start] += nu[j];  }  end = chitable[ tblsz - 1].class;  if( end<32)  {    for( j=31; j>end; j--) nu[end] += nu[j];  }  chi = 0.0;  for( j=start; j<=end; j++)  {    temp = nu[j] - numblocks*chitable[j-1].prob;    chi += temp*temp/(numblocks*chitable[j-1].prob);  }  temp = chisqr( chi, end - start);  return temp;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -