📄 alphabet.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. *//** @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.types;import java.util.ArrayList;import java.io.*;import java.util.Iterator;import java.util.HashMap;import java.rmi.dgc.VMID;/** * A mapping between integers and objects where the mapping in each * direction is efficient. Integers are assigned consecutively, starting * at zero, as objects are added to the Alphabet. Objects can not be * deleted from the Alphabet and thus the integers are never reused. * <p> * The most common use of an alphabet is as a dictionary of feature names * associated with a {@link edu.umass.cs.mallet.base.types.FeatureVector} in an * {@link edu.umass.cs.mallet.base.types.Instance}. In a simple document * classification usage, * each unique word in a document would be a unique entry in the Alphabet * with a unique integer associated with it. FeatureVectors rely on * the integer part of the mapping to efficiently represent the subset of * the Alphabet present in the FeatureVector. * @see FeatureVector * @see Instance * @see edu.umass.cs.mallet.base.pipe.Pipe */public class Alphabet implements Serializable{ gnu.trove.TObjectIntHashMap map; ArrayList entries; boolean growthStopped = false; Class entryClass = null; VMID instanceId = new VMID(); //used in readResolve to identify persitent instances public Alphabet (int capacity, Class entryClass) { this.map = new gnu.trove.TObjectIntHashMap (capacity); this.entries = new ArrayList (capacity); this.entryClass = entryClass; } public Alphabet (Class entryClass) { this (8, entryClass); } public Alphabet (int capacity) { this (capacity, null); } public Alphabet () { this (8, null); } public Object clone () { //try { // Wastes effort, because we over-write ivars we create Alphabet ret = new Alphabet (); ret.map = (gnu.trove.TObjectIntHashMap) map.clone(); ret.entries = (ArrayList) entries.clone(); ret.growthStopped = growthStopped; ret.entryClass = entryClass; return ret; //} catch (CloneNotSupportedException e) { //e.printStackTrace(); //throw new IllegalStateException ("Couldn't clone InstanceList Vocabuary"); //} } /** Return -1 if entry isn't present. */ public int lookupIndex (Object entry, boolean addIfNotPresent) { if (entry == null) throw new IllegalArgumentException ("Can't lookup \"null\" in an Alphabet."); if (entryClass == null) entryClass = entry.getClass(); else // Insist that all entries in the Alphabet are of the same // class. This may not be strictly necessary, but will catch a // bunch of easily-made errors. if (entry.getClass() != entryClass) throw new IllegalArgumentException ("Non-matching entry class, "+entry.getClass()+", was "+entryClass); int retIndex = -1; if (map.containsKey( entry )) { retIndex = map.get( entry ); } else if (!growthStopped && addIfNotPresent) { retIndex = entries.size(); map.put (entry, retIndex); entries.add (entry); } return retIndex; } public int lookupIndex (Object entry) { return lookupIndex (entry, true); } public Object lookupObject (int index) { return entries.get(index); } public Object[] toArray () { return entries.toArray(); } // xxx This should disable the iterator's remove method... public Iterator iterator () { return entries.iterator(); } public Object[] lookupObjects (int[] indices) { Object[] ret = new Object[indices.length]; for (int i = 0; i < indices.length; i++) ret[i] = entries.get(indices[i]); return ret; } public int[] lookupIndices (Object[] objects, boolean addIfNotPresent) { int[] ret = new int[objects.length]; for (int i = 0; i < objects.length; i++) ret[i] = lookupIndex (objects[i], addIfNotPresent); return ret; } public boolean contains (Object entry) { return map.contains (entry); } public int size () { return entries.size(); } public void stopGrowth () { growthStopped = true; } public boolean growthStopped () { return growthStopped; } public Class entryClass () { return entryClass; } /** Return String representation of all Alphabet entries, each separated by a newline. */ public String toString() { StringBuffer sb = new StringBuffer(); for (int i = 0; i < entries.size(); i++) { sb.append (entries.get(i).toString()); sb.append ('\n'); } return sb.toString(); } public void dump () { dump (System.out); } public void dump (PrintStream out) { for (int i = 0; i < entries.size(); i++) { out.println (i+" => "+entries.get (i)); } } public VMID getInstanceId() { return instanceId;} // for debugging // Serialization private static final long serialVersionUID = 1; private static final int CURRENT_SERIAL_VERSION = 1; private void writeObject (ObjectOutputStream out) throws IOException { out.writeInt (CURRENT_SERIAL_VERSION); out.writeInt (entries.size()); for (int i = 0; i < entries.size(); i++) out.writeObject (entries.get(i)); out.writeBoolean (growthStopped); out.writeObject (entryClass); out.writeObject(instanceId); } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int version = in.readInt (); int size = in.readInt(); entries = new ArrayList (size); map = new gnu.trove.TObjectIntHashMap (size); for (int i = 0; i < size; i++) { Object o = in.readObject(); map.put (o, i); entries. add (o); } growthStopped = in.readBoolean(); entryClass = (Class) in.readObject(); if (version >0 ){ // instanced id added in version 1S instanceId = (VMID) in.readObject(); } } private transient static HashMap deserializedEntries = new HashMap(); /** * This gets called after readObject; it lets the object decide whether * to return itself or return a previously read in version. * We use a hashMap of instanceIds to determine if we have already read * in this object. * @return * @throws ObjectStreamException */ public Object readResolve() throws ObjectStreamException { Object previous = deserializedEntries.get(instanceId); if (previous != null){ //System.out.println(" ***Alphabet ReadResolve:Resolving to previous instance. instance id= " + instanceId); return previous; } if (instanceId != null){ deserializedEntries.put(instanceId, this); } //System.out.println(" *** Alphabet ReadResolve: new instance. instance id= " + instanceId); return this; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -