📄 vectorstats.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档
💻 JAVA
字号:
package edu.umass.cs.mallet.base.util;/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept.   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This software is provided under the terms of the Common Public License,   version 1.0, as published by http://www.opensource.org.  For further   information, see the file `LICENSE' included with this distribution. *//**  * Class of static methods for calculating  statistics of a SparseVector sample  * packaged in an InstanceList. * *  @author Jerod Weinman <A HREF="mailto:weinman@cs.umass.edu">weinman@cs.umass.edu</A>*/import edu.umass.cs.mallet.base.types.*;import gnu.trove.TIntHashSet;public class VectorStats {    /**      * Returns a <CODE>SparseVector</CODE> whose entries (taken from the union of     * those in the instances) are the expected values of those in the      * <CODE>InstanceList</CODE>. This implies the returned vector will not have      * binary values.      */    public static SparseVector mean (InstanceList instances )    {	if (instances==null || instances.size()==0)	    return null;	InstanceList.Iterator instanceItr = instances.iterator();		SparseVector v;	Instance instance;	int indices[];	int maxSparseIndex=-1;	int maxDenseIndex=-1;	// First, we find the union of all the indices used in the instances	TIntHashSet hIndices = new TIntHashSet(instances.getDataAlphabet().size());	while (instanceItr.hasNext())	{ 	    instance = (Instance)instanceItr.next();	    v = (SparseVector)(instance.getData());	    indices = v.getIndices ();	    if (indices!=null)	    {		hIndices.addAll (indices);		if (indices[indices.length-1]>maxSparseIndex)		    maxSparseIndex = indices[indices.length-1];	    }	    else // dense		if (v.numLocations()>maxDenseIndex)		    maxDenseIndex = v.numLocations()-1;	}	if (maxDenseIndex>-1) // dense vectors were present	{	    if (maxSparseIndex>maxDenseIndex) 	    // sparse vectors were present and they had greater indices than	    // the dense vectors	    { 		// therefore, we create sparse vectors and 		// add all the dense indices		 for (int i=0 ; i<=maxDenseIndex ; i++)		     hIndices.add (i); 	    }	    else		// sparse indices may have been present, but we don't care		// since they never had indices that exceeded those of the 		// dense vectors	    {		return mean(instances, maxDenseIndex+1);	    }	}		// reaching this statement implies we can create a sparse vector	return mean (instances, hIndices.toArray ());    }    /**      * Returns a <CODE>SparseVector</CODE> whose entries (dense with the given      * number of indices) are the expected values of those in the      * <CODE>InstanceList</CODE>. This implies the returned vector will not have      * binary values.      */    public static SparseVector mean ( InstanceList instances, int numIndices )    {	SparseVector mv = new SparseVector (new double[numIndices], false);	return mean (instances, mv);    }    /**      * Returns a <CODE>SparseVector</CODE> whose entries (the given indices) are      * the expected values of those in the <CODE>InstanceList</CODE>.      * This implies the returned vector will not have binary values.      */    public static SparseVector mean ( InstanceList instances, int[] indices )    {	// Create the mean vector with the indices having all zeros, 	// nothing copied, sorted, and no checks for duplicates.	SparseVector mv = new SparseVector (indices, 					    new double[indices.length],					    false, true, false);	return mean (instances, mv);    }    private static SparseVector mean (InstanceList instances, 				      SparseVector meanVector )    {	if (instances==null || instances.size()==0)	    return null;		Instance instance;	SparseVector v;	InstanceList.Iterator instanceItr = instances.iterator();		double factor = 1.0/(double)instances.size();	while (instanceItr.hasNext())	{    	    instance = (Instance)instanceItr.next();	    v = (SparseVector)(instance.getData());	    	    meanVector.plusEqualsSparse (v, factor);	}		return meanVector;    }    /**      * Returns a <CODE>SparseVector</CODE> whose entries (taken from the union of     * those in the instances) are the variance of those in the      * <CODE>InstanceList</CODE>. This implies the returned vector will not have      * binary values.      *      * @param unbiased Normalizes by N-1 when true, and by N otherwise.     */    public static SparseVector variance( InstanceList instances, boolean unbiased )    {	return variance( instances, mean (instances), unbiased );    }    /**      * Returns a <CODE>SparseVector</CODE> whose entries (taken from the mean      * argument) are the variance of those in the <CODE>InstanceList</CODE>. This     * implies the returned vector will not have      * binary values.      *      * @param unbiased Normalizes by N-1 when true, and by N otherwise.     */        public static SparseVector variance ( InstanceList instances, 					 SparseVector mean,					 boolean unbiased )    {	if (instances==null || instances.size()==0)	    return null;		double factor = 1.0/(double)(instances.size() - 				     (unbiased ? 1.0 : 0.0));	System.out.println("factor = "+factor);	SparseVector v;	// var = (x^2 - n*mu^2)/(n-1)	SparseVector vv = (SparseVector)mean.cloneMatrix();		vv.timesEqualsSparse(vv, -(double)instances.size()*factor);	InstanceList.Iterator instanceItr = instances.iterator();	Instance instance;		while (instanceItr.hasNext())	{    	    instance = (Instance)instanceItr.next();	    v = (SparseVector)((SparseVector)(instance.getData())).cloneMatrix();	    v.timesEqualsSparse (v);	    	    vv.plusEqualsSparse (v, factor);	}	System.out.println("Var:\n"+vv);	return vv;    }			              /** Returns unbiased variance */    public static SparseVector variance ( InstanceList instances )    {	return variance( instances, true );    }    /** Returns unbiased variance of instances having the given mean. */    public static SparseVector variance ( InstanceList instances,					 SparseVector mean)    {	return variance( instances, mean, true );    }    /**      * Square root of variance.     *       * @param mean Mean of the given instances.     * @param unbiased Normalizes variance by N-1 when true, and by N      *                 otherwise.     * @see variance      */    public static SparseVector stddev ( InstanceList instances, 				       SparseVector mean,				       boolean unbiased )    {	    	if (instances.size()==0)	    return null;			SparseVector sv = variance (instances, mean, unbiased);	int dim = sv.numLocations();	double val;	for (int i=0 ; i<dim ; i++)	{	    val = sv.valueAtLocation (i);	    sv.setValueAtLocation (i, Math.sqrt (val));	}	return sv;    }    /** Square root of unbiased variance. */    public static SparseVector stddev ( InstanceList instances )    {	return stddev( instances, true );    }    /**      * Square root of variance.     *       * @param unbiased Normalizes variance by N-1 when true, and by N      *                 otherwise.     * @see variance      */    public static SparseVector stddev ( InstanceList instances, 				       boolean unbiased )    {	return stddev (instances, mean (instances), unbiased);    }	    /** Square root of unbiased variance of instances having the given mean */    public static SparseVector stddev ( InstanceList instances,				       SparseVector mean)    {	return stddev( instances, mean, true );    }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -