📄 vectorstats.java
字号:
package edu.umass.cs.mallet.base.util;/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. *//** * Class of static methods for calculating statistics of a SparseVector sample * packaged in an InstanceList. * * @author Jerod Weinman <A HREF="mailto:weinman@cs.umass.edu">weinman@cs.umass.edu</A>*/import edu.umass.cs.mallet.base.types.*;import gnu.trove.TIntHashSet;public class VectorStats { /** * Returns a <CODE>SparseVector</CODE> whose entries (taken from the union of * those in the instances) are the expected values of those in the * <CODE>InstanceList</CODE>. This implies the returned vector will not have * binary values. */ public static SparseVector mean (InstanceList instances ) { if (instances==null || instances.size()==0) return null; InstanceList.Iterator instanceItr = instances.iterator(); SparseVector v; Instance instance; int indices[]; int maxSparseIndex=-1; int maxDenseIndex=-1; // First, we find the union of all the indices used in the instances TIntHashSet hIndices = new TIntHashSet(instances.getDataAlphabet().size()); while (instanceItr.hasNext()) { instance = (Instance)instanceItr.next(); v = (SparseVector)(instance.getData()); indices = v.getIndices (); if (indices!=null) { hIndices.addAll (indices); if (indices[indices.length-1]>maxSparseIndex) maxSparseIndex = indices[indices.length-1]; } else // dense if (v.numLocations()>maxDenseIndex) maxDenseIndex = v.numLocations()-1; } if (maxDenseIndex>-1) // dense vectors were present { if (maxSparseIndex>maxDenseIndex) // sparse vectors were present and they had greater indices than // the dense vectors { // therefore, we create sparse vectors and // add all the dense indices for (int i=0 ; i<=maxDenseIndex ; i++) hIndices.add (i); } else // sparse indices may have been present, but we don't care // since they never had indices that exceeded those of the // dense vectors { return mean(instances, maxDenseIndex+1); } } // reaching this statement implies we can create a sparse vector return mean (instances, hIndices.toArray ()); } /** * Returns a <CODE>SparseVector</CODE> whose entries (dense with the given * number of indices) are the expected values of those in the * <CODE>InstanceList</CODE>. This implies the returned vector will not have * binary values. */ public static SparseVector mean ( InstanceList instances, int numIndices ) { SparseVector mv = new SparseVector (new double[numIndices], false); return mean (instances, mv); } /** * Returns a <CODE>SparseVector</CODE> whose entries (the given indices) are * the expected values of those in the <CODE>InstanceList</CODE>. * This implies the returned vector will not have binary values. */ public static SparseVector mean ( InstanceList instances, int[] indices ) { // Create the mean vector with the indices having all zeros, // nothing copied, sorted, and no checks for duplicates. SparseVector mv = new SparseVector (indices, new double[indices.length], false, true, false); return mean (instances, mv); } private static SparseVector mean (InstanceList instances, SparseVector meanVector ) { if (instances==null || instances.size()==0) return null; Instance instance; SparseVector v; InstanceList.Iterator instanceItr = instances.iterator(); double factor = 1.0/(double)instances.size(); while (instanceItr.hasNext()) { instance = (Instance)instanceItr.next(); v = (SparseVector)(instance.getData()); meanVector.plusEqualsSparse (v, factor); } return meanVector; } /** * Returns a <CODE>SparseVector</CODE> whose entries (taken from the union of * those in the instances) are the variance of those in the * <CODE>InstanceList</CODE>. This implies the returned vector will not have * binary values. * * @param unbiased Normalizes by N-1 when true, and by N otherwise. */ public static SparseVector variance( InstanceList instances, boolean unbiased ) { return variance( instances, mean (instances), unbiased ); } /** * Returns a <CODE>SparseVector</CODE> whose entries (taken from the mean * argument) are the variance of those in the <CODE>InstanceList</CODE>. This * implies the returned vector will not have * binary values. * * @param unbiased Normalizes by N-1 when true, and by N otherwise. */ public static SparseVector variance ( InstanceList instances, SparseVector mean, boolean unbiased ) { if (instances==null || instances.size()==0) return null; double factor = 1.0/(double)(instances.size() - (unbiased ? 1.0 : 0.0)); System.out.println("factor = "+factor); SparseVector v; // var = (x^2 - n*mu^2)/(n-1) SparseVector vv = (SparseVector)mean.cloneMatrix(); vv.timesEqualsSparse(vv, -(double)instances.size()*factor); InstanceList.Iterator instanceItr = instances.iterator(); Instance instance; while (instanceItr.hasNext()) { instance = (Instance)instanceItr.next(); v = (SparseVector)((SparseVector)(instance.getData())).cloneMatrix(); v.timesEqualsSparse (v); vv.plusEqualsSparse (v, factor); } System.out.println("Var:\n"+vv); return vv; } /** Returns unbiased variance */ public static SparseVector variance ( InstanceList instances ) { return variance( instances, true ); } /** Returns unbiased variance of instances having the given mean. */ public static SparseVector variance ( InstanceList instances, SparseVector mean) { return variance( instances, mean, true ); } /** * Square root of variance. * * @param mean Mean of the given instances. * @param unbiased Normalizes variance by N-1 when true, and by N * otherwise. * @see variance */ public static SparseVector stddev ( InstanceList instances, SparseVector mean, boolean unbiased ) { if (instances.size()==0) return null; SparseVector sv = variance (instances, mean, unbiased); int dim = sv.numLocations(); double val; for (int i=0 ; i<dim ; i++) { val = sv.valueAtLocation (i); sv.setValueAtLocation (i, Math.sqrt (val)); } return sv; } /** Square root of unbiased variance. */ public static SparseVector stddev ( InstanceList instances ) { return stddev( instances, true ); } /** * Square root of variance. * * @param unbiased Normalizes variance by N-1 when true, and by N * otherwise. * @see variance */ public static SparseVector stddev ( InstanceList instances, boolean unbiased ) { return stddev (instances, mean (instances), unbiased); } /** Square root of unbiased variance of instances having the given mean */ public static SparseVector stddev ( InstanceList instances, SparseVector mean) { return stddev( instances, mean, true ); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -