📄 alphabet.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档
💻 JAVA
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This software is provided under the terms of the Common Public License,   version 1.0, as published by http://www.opensource.org.  For further   information, see the file `LICENSE' included with this distribution. *//**   @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.types;import java.util.ArrayList;import java.io.*;import java.util.Iterator;import java.util.HashMap;import java.rmi.dgc.VMID;/** *  A mapping between integers and objects where the mapping in each * direction is efficient.  Integers are assigned consecutively, starting * at zero, as objects are added to the Alphabet.  Objects can not be * deleted from the Alphabet and thus the integers are never reused. * <p> * The most common use of an alphabet is as a dictionary of feature names * associated with a {@link edu.umass.cs.mallet.base.types.FeatureVector} in an * {@link edu.umass.cs.mallet.base.types.Instance}. In a simple document * classification usage, * each unique word in a document would be a unique entry in the Alphabet * with a unique integer associated with it.   FeatureVectors rely on * the integer part of the mapping to efficiently represent the subset of * the Alphabet present in the FeatureVector. * @see FeatureVector * @see Instance * @see edu.umass.cs.mallet.base.pipe.Pipe */public class Alphabet implements Serializable{	gnu.trove.TObjectIntHashMap map;	ArrayList entries;	boolean growthStopped = false;	Class entryClass = null;    VMID instanceId = new VMID();  //used in readResolve to identify persitent instances	public Alphabet (int capacity, Class entryClass)	{		this.map = new gnu.trove.TObjectIntHashMap (capacity);		this.entries = new ArrayList (capacity);		this.entryClass = entryClass;	}	public Alphabet (Class entryClass)	{		this (8, entryClass);	}	public Alphabet (int capacity)	{		this (capacity, null);	}	public Alphabet ()	{		this (8, null);	}	public Object clone ()	{		//try {		// Wastes effort, because we over-write ivars we create		Alphabet ret = new Alphabet ();		ret.map = (gnu.trove.TObjectIntHashMap) map.clone();		ret.entries = (ArrayList) entries.clone();		ret.growthStopped = growthStopped;		ret.entryClass = entryClass;		return ret;		//} catch (CloneNotSupportedException e) {		//e.printStackTrace();		//throw new IllegalStateException ("Couldn't clone InstanceList Vocabuary");		//}	}	/** Return -1 if entry isn't present. */	public int lookupIndex (Object entry, boolean addIfNotPresent)	{		if (entry == null)			throw new IllegalArgumentException ("Can't lookup \"null\" in an Alphabet.");		if (entryClass == null)			entryClass = entry.getClass();		else			// Insist that all entries in the Alphabet are of the same			// class.  This may not be strictly necessary, but will catch a			// bunch of easily-made errors.			if (entry.getClass() != entryClass)				throw new IllegalArgumentException ("Non-matching entry class, "+entry.getClass()+", was "+entryClass);    int retIndex = -1;    if (map.containsKey( entry )) {      retIndex = map.get( entry );    }    else if (!growthStopped && addIfNotPresent) {      retIndex = entries.size();			map.put (entry, retIndex);			entries.add (entry);	  }		return retIndex;	}	public int lookupIndex (Object entry)	{		return lookupIndex (entry, true);	}	public Object lookupObject (int index)	{		return entries.get(index);	}	public Object[] toArray () {		return entries.toArray();	}	// xxx This should disable the iterator's remove method...	public Iterator iterator () {		return entries.iterator();	}	public Object[] lookupObjects (int[] indices)	{		Object[] ret = new Object[indices.length];		for (int i = 0; i < indices.length; i++)			ret[i] = entries.get(indices[i]);		return ret;	}	public int[] lookupIndices (Object[] objects, boolean addIfNotPresent)	{		int[] ret = new int[objects.length];		for (int i = 0; i < objects.length; i++)			ret[i] = lookupIndex (objects[i], addIfNotPresent);		return ret;	}	public boolean contains (Object entry)	{		return map.contains (entry);	}	public int size ()	{		return entries.size();	}	public void stopGrowth ()	{		growthStopped = true;	}	public boolean growthStopped ()	{		return growthStopped;	}	public Class entryClass ()	{		return entryClass;	}	/** Return String representation of all Alphabet entries, each			separated by a newline. */	public String toString()	{		StringBuffer sb = new StringBuffer();		for (int i = 0; i < entries.size(); i++) {			sb.append (entries.get(i).toString());			sb.append ('\n');		}		return sb.toString();	}	public void dump () { dump (System.out); }	public void dump (PrintStream out)	{		for (int i = 0; i < entries.size(); i++) {			out.println (i+" => "+entries.get (i));		}	}    public VMID getInstanceId() { return instanceId;} // for debugging	// Serialization	private static final long serialVersionUID = 1;	private static final int CURRENT_SERIAL_VERSION = 1;	private void writeObject (ObjectOutputStream out) throws IOException {		out.writeInt (CURRENT_SERIAL_VERSION);		out.writeInt (entries.size());		for (int i = 0; i < entries.size(); i++)			out.writeObject (entries.get(i));		out.writeBoolean (growthStopped);		out.writeObject (entryClass);        out.writeObject(instanceId);	}	private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException {		int version = in.readInt ();		int size = in.readInt();		entries = new ArrayList (size);		map = new gnu.trove.TObjectIntHashMap (size);		for (int i = 0; i < size; i++) {			Object o = in.readObject();			map.put (o, i);			entries. add (o);		}		growthStopped = in.readBoolean();		entryClass = (Class) in.readObject();        if (version >0 ){ // instanced id added in version 1S            instanceId = (VMID) in.readObject();        }	}    private transient static HashMap deserializedEntries = new HashMap();    /**    * This gets called after readObject; it lets the object decide whether    * to return itself or return a previously read in version.    * We use a hashMap of instanceIds to determine if we have already read    * in this object.    * @return    * @throws ObjectStreamException    */    public Object readResolve() throws ObjectStreamException {       Object previous = deserializedEntries.get(instanceId);       if (previous != null){           //System.out.println(" ***Alphabet ReadResolve:Resolving to previous instance. instance id= " + instanceId);           return previous;       }       if (instanceId != null){           deserializedEntries.put(instanceId, this);       }       //System.out.println(" *** Alphabet ReadResolve: new instance. instance id= " + instanceId);       return this;    }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -