featureselection.java

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Java 代码 · 共 162 行

JAVA
162
字号
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This software is provided under the terms of the Common Public License,   version 1.0, as published by http://www.opensource.org.  For further   information, see the file `LICENSE' included with this distribution. *//**	 A subset of features.	    @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.types;import edu.umass.cs.mallet.base.types.*;import java.util.BitSet;import java.util.regex.Pattern;import java.io.*;public class FeatureSelection implements Serializable{	Alphabet dictionary;	BitSet selectedFeatures;	// boolean defaultValue;  //Implement this by using it to reverse all the exterior interfaces	public FeatureSelection (Alphabet dictionary,													 BitSet selectedFeatures)	{		this.dictionary = dictionary;		this.selectedFeatures = selectedFeatures;	}	public FeatureSelection (Alphabet dictionary)	{		this.dictionary = dictionary;		this.selectedFeatures = new BitSet();	}	public FeatureSelection (RankedFeatureVector rsv, int numFeatures)	{		this.dictionary = rsv.getAlphabet();		this.selectedFeatures = new BitSet (dictionary.size());		int numSelections = Math.min (numFeatures, dictionary.size());		for (int i = 0; i < numSelections; i++)			selectedFeatures.set (rsv.getIndexAtRank(i));	}  /** Creates a FeatureSelection that includes only those features whose names match a given regex.   *   A static factory method.   * @param dictionary  A dictionary of fetaure names.  Entries must be string.   * @param regex Features whose names match this pattern will be included.   * @return A new FeatureSelection.   * */  public static FeatureSelection createFromRegex (Alphabet dictionary, Pattern regex)  {    BitSet included = new BitSet (dictionary.size());    for (int i = 0; i < dictionary.size(); i++) {      String feature = (String) dictionary.lookupObject (i);      if (regex.matcher (feature).matches()) {        included.set (i);      }    }    return new FeatureSelection (dictionary, included);  }	public Object clone ()	{		return new FeatureSelection (dictionary, (BitSet)selectedFeatures.clone());	}	public Alphabet getAlphabet ()	{		return dictionary;	}	public int cardinality ()	{		return selectedFeatures.cardinality();	}	public BitSet getBitSet ()	{		return selectedFeatures;	}	public void add (Object o)	{		add (dictionary.lookupIndex(o));	}	public void add (int index)	{		assert (index >= 0);		selectedFeatures.set (index);	}	public void remove (Object o)	{		remove (dictionary.lookupIndex(o));	}	public void remove (int index)	{		selectedFeatures.set (index, false);	}		public boolean contains (Object o)	{		int index = dictionary.lookupIndex (o, false);		if (index == -1)			return false;		return contains (index);	}	public boolean contains (int index)	{		return selectedFeatures.get (index);	}	public void or (FeatureSelection fs)	{		selectedFeatures.or (fs.selectedFeatures);	}	public int nextSelectedIndex (int index)	{		return selectedFeatures.nextSetBit (index);	}	public int nextDeselectedIndex (int index)	{		return selectedFeatures.nextClearBit (index);	}	// Serialization		private static final long serialVersionUID = 1;	private static final int CURRENT_SERIAL_VERSION = 0;	static final int NULL_INTEGER = -1;		/* Need to check for null pointers. */	private void writeObject (ObjectOutputStream out) throws IOException {		int i, size;		out.writeInt(CURRENT_SERIAL_VERSION);		out.writeObject(dictionary);		out.writeObject(selectedFeatures);	}		private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException {		int size, i;		int version = in.readInt ();		dictionary = (Alphabet) in.readObject();		selectedFeatures = (BitSet) in.readObject();	}	}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?