📄 nrcx.java

📁 n algorithm for domain independent linear text segmentation This the Windows version of the C99 al
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
package uk.ac.man.cs.choif.extend;import java.io.*;import java.util.*;import uk.ac.man.cs.choif.extend.io.*;import uk.ac.man.cs.choif.extend.sort.*;/** * Numerical recipes in C : 2nd Edition * Creation date: (02/04/00 15:35:06) * @author: Freddy Choi */public class NRCx {	/** float Reference */	public static class floatR {		public float value = 0;		public String toString() { return Float.toString(value); }	}	/* Table of precomputed values for factln */	private static float[] factln_p = new float[101];/** * P. 617 Computes the mean and variance of a dataset * Note, the first element data[0] does not count  * Creation date: (02/07/00 02:08:31) * @param data float[] * @param ave floatR  *				Mean * @param var floatR *				Variance */public final static void avevar(final float[] data, floatR ave, floatR var) {	int n = data.length-1;	int j;	float s;	ave.value=(var.value=0);	for (j=1;j<=n;j++) ave.value += data[j];	ave.value /= n;	for (j=1;j<=n;j++) {		s=data[j]-ave.value;		var.value += s*s;	}	var.value /= (n-1);}/** * P. 227 : Continued fraction evaluation * Creation date: (02/07/00 02:32:51) * @return float * @param a float * @param b float * @param x float */public final static float betacf(float a, float b, float x) {	final int ITMAX = 100;	final float EPS = 3e-7f;	float qap,qam,qab,em,tem,d;	float bz,bm=1,bp,bpp;	float az=1,am=1,ap,app,aold;	int m;	qab=a+b;	qap=a+1;	qam=a-1;	bz=1-qab*x/qap;	for (m=1;m<=ITMAX;m++) {		em=(float) m;		tem=em+em;		d=em*(b-em)*x/((qam+tem)*(a+tem));		ap=az+d*am;		bp=bz+d*bm;		d = -(a+em)*(qab+em)*x/((qap+tem)*(a+tem));		app=ap+d*az;		bpp=bp+d*bz;		aold=az;		am=ap/bpp;		bm=bp/bpp;		az=app/bpp;		bz=1;		if (Math.abs(az-aold) < (EPS*Math.abs(az))) return az;	}	Debugx.handle(new Error("betacf : a or b too big, or ITMAX too small"));	return 0;}/** * P. 227 : Incomplete beta function * Creation date: (02/07/00 02:25:16) * @return float * @param a float * @param b float * @param x float */public final static float betai(final float a, final float b, final float x) {	float bt;	//float gammln(),betacf();	if (x < 0.0 || x > 1.0) Debugx.handle(new Error("betai : x < 0 || x > 1"));	if (x == 0.0 || x == 1.0) bt=0;	else		bt=(float) Math.exp(gammln(a+b)-gammln(a)-gammln(b)+a*Math.log(x)+b*Math.log(1.0-x));	if (x < (a+1.0)/(a+b+2.0))		return bt*betacf(a,b,x)/a;	else		return 1-bt*betacf(b,a,1-x)/b;}/** * Given a directory of sample files, compute a table * of inter-file differences and significance * Creation date: (02/07/00 03:06:19) * @param dir java.io.File * @param out java.io.File */public final static void compareData(final File dir, final File outFile) {	LineOutput out = new LineOutput(outFile);	File[] file = IOx.fileList(dir);	Arrayx.sort(file, new FileNameAsc());	float[] data1;	float[] data2;	floatR m = new floatR();	floatR v = new floatR();	floatR d = new floatR();	floatR prob = new floatR();	for (int i=0; i<file.length; i++) {		for (int j=0; j<file.length; j++) {			out.println("====================");			out.println("data1 : " + file[i].getName());			out.println("data2 : " + file[j].getName());			data1 = readData(file[i]);			data2 = readData(file[j]);			avevar(data1, m, v);			out.println("data1 (m,v)    : " + m + " " + v);			avevar(data2, m, v);			out.println("data2 (m,v)    : " + m + " " + v);			ftest(data1, data2, d, prob);			out.println("F-test (f,p)   : " + d + " " + prob);			tutest(data1, data2, d, prob);			out.println("T-test (t,p)   : " + d + " " + prob);			kstwo(data1, data2, d, prob);			out.println("KS-test (ks,p) : " + d + " " + prob);		}	}}/** * Given a matrix M, compute its eigenvalues and eigenvectors * @param M double[][] Matrix * @param eigenvalues double[] A vector for holding the calculated eigenvalues * @param eigenvectors double[][] Matrix A matrix for holding the calculated eigenvectors */public static void eigen(double[][] M, double[] eigenvalue, double[][] eigenvector){	// Matrix access indices	int i,j;	// Create array for P (accessed from 1 rather than 0)	double[][] P = new double[M.length+1][M[0].length+1];	for (i=1; i<=P.length; i++) {		for (j=1; j<=P[0].length; j++) {			P[i][j] = M[i-1][j-1];		}	}	// Store for holding diagonals of tridiagonalised P	double[] d = new double[P.length];	// Store for holding off diagonals of tridiagonalised P	double[] od = new double[P.length];	// Tridiagonalise P	tred2(P, P.length, d, od);	// Calculate the eigenvalues and eigenvectors	tqli(d, od, P.length, P);	// d now contains the n eigenvalues of Kx	// P now contains the n eigenvectors of Kx	// Normalise d (sum of eigenvalues = 1) and copy d into L	double sumOfd = 0.0;	for (i=1; i<=P.length; i++) {		/** DEVELOPER'S NOTE - IMPROVE		Some eigenvalues are reported as being negative. This is caused		by inaccuracy in the algorithm. The negative values seems to be all		very small (in the order of -1e-16). Improve with the LAPACK algorithm.		For now it is forced to be positive, should be ok for now. */		d[i] = Math.abs(d[i]); // Force all eigenvalues to be positive and non-zero		if (d[i] == 0) d[i] = 0.0000000001; // NUDGING eigenvalues to ensure they are not zero which might be a cause of the NaN problem		sumOfd += d[i]; // Compute sum	}	for (i=1; i<=P.length; i++) eigenvalue[i-1] = d[i] / sumOfd; // Scale and copy		// Copy P into e	for (i=1; i<=P.length; i++) {		for (j=1; j<=P[0].length; j++) {			eigenvector[i-1][j-1] = P[i][j];		}	}}/** * Given a matrix M, compute its eigenvalues and eigenvectors * @param M double[][] Matrix * @param eigenvalues double[] A vector for holding the calculated eigenvalues * @param eigenvectors double[][] Matrix A matrix for holding the calculated eigenvectors */public static void eigen(float[][] M, float[] eigenvalue, float[][] eigenvector){	// Matrix access indices	int i,j;	// Create array for P (accessed from 1 rather than 0)	double[][] P = new double[M.length+1][M[0].length+1];	for (i=1; i<P.length; i++) {		for (j=1; j<P[0].length; j++) {			P[i][j] = M[i-1][j-1];		}	}	// Store for holding diagonals of tridiagonalised P	double[] d = new double[P.length];	// Store for holding off diagonals of tridiagonalised P	double[] od = new double[P.length];	// Tridiagonalise P	tred2(P, M.length, d, od);	// Calculate the eigenvalues and eigenvectors	tqli(d, od, M.length, P);	// d now contains the n eigenvalues of Kx	// P now contains the n eigenvectors of Kx	// Normalise d (sum of eigenvalues = 1) and copy d into L	double sumOfd = 0.0;	for (i=1; i<P.length; i++) {		/** DEVELOPER'S NOTE - IMPROVE		Some eigenvalues are reported as being negative. This is caused		by inaccuracy in the algorithm. The negative values seems to be all		very small (in the order of -1e-16). Improve with the LAPACK algorithm.		For now it is forced to be positive, should be ok for now. */		d[i] = Math.abs(d[i]); // Force all eigenvalues to be positive and non-zero		if (d[i] == 0) d[i] = 0.0000000001; // NUDGING eigenvalues to ensure they are not zero which might be a cause of the NaN problem		sumOfd += d[i]; // Compute sum	}	for (i=1; i<P.length; i++) eigenvalue[i-1] = (float) (d[i] / sumOfd); // Scale and copy		// Copy P into e	for (i=1; i<P.length; i++) {		for (j=1; j<P[0].length; j++) {			eigenvector[i-1][j-1] = (float) P[i][j];		}	}}/** * Compute the natural log of n!. Short cut code converted from * that on p.215 of Numerical recipes in C * @return float * @param n int */public static float factln(final int n) {	if (n<=1) return 0;	// In range of precomputed table	if (n<=100) return (factln_p[n]!=0 ? factln_p[n] : (factln_p[n]=gammln(n+1)));	// Out of range of table	else return gammln(n+1);}/** * P. 619 : F-test for significantly different variances * Creation date: (02/07/00 02:17:53) * @param data1 float[] * @param data2 float[] * @param f floatR *			Ratio of one variance to the other, so values *			either >> 1 or << 1 will indicate very significant *			differences. * @param prob floatR *			The significance of f, small values indicate that *			the two arrays have significantly different variances. */public final static void ftest(final float[] data1, final float[] data2, floatR f, floatR prob) { 	int n1=data1.length-1;	int n2=data2.length-1;	floatR var1=new floatR();	floatR var2=new floatR();	floatR ave1=new floatR();	floatR ave2=new floatR();	float df1,df2;	avevar(data1,ave1,var1);	avevar(data2,ave2,var2);	if (var1.value > var2.value) {		f.value=var1.value/var2.value;		df1=n1-1;		df2=n2-1;	} else {		f.value=var2.value/var1.value;		df1=n2-1;		df2=n1-1;	}	prob.value = (float) 2.0*betai(0.5f*df2, 0.5f*df1, df2/(df2+df1*(f.value)));	if (prob.value > 1.0) prob.value=2-prob.value;}/** * P. 214 : Compute the natural log of the gamma function. * Creation date: (02/07/00 00:14:24) * @return float * @param xx float
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -