tokensequence.java

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Java 代码 · 共 198 行

JAVA
198
字号
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This software is provided under the terms of the Common Public License,   version 1.0, as published by http://www.opensource.org.  For further   information, see the file `LICENSE' included with this distribution. *//** @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.types;import edu.umass.cs.mallet.base.pipe.Pipe;import edu.umass.cs.mallet.base.pipe.PipeOutputAccumulator;import edu.umass.cs.mallet.base.util.PropertyList;import java.io.IOException;import java.io.ObjectInputStream;import java.io.ObjectOutputStream;import java.io.Serializable;import java.util.ArrayList;import java.util.Collection;import java.util.Iterator;/** * A representation of a piece of text, usually a single word, to which we can attach properties. */public class TokenSequence implements PipeOutputAccumulator, Sequence, Serializable {	ArrayList tokens;	PropertyList properties = null;				// for arbitrary properties	public TokenSequence(Collection tokens) {		this.tokens = new ArrayList( tokens );	}	public TokenSequence() {		this.tokens = new ArrayList();	}	public TokenSequence(int capacity) {		this.tokens = new ArrayList( capacity );	}	public TokenSequence(Token[] tokens) {		this( tokens.length );		for (int i = 0; i < tokens.length; i++)			this.add( tokens[i] );	}	public TokenSequence(Object[] tokens) {		this( tokens.length );		for (int i = 0; i < tokens.length; i++)			this.add( new Token( tokens[i].toString() ) );	}	public int size() {		return this.tokens.size();	}	public String toString() {		StringBuffer sb = new StringBuffer();		sb.append( "TokenSequence " + super.toString() + "\n" );		for (int i = 0; i < tokens.size(); i++) {			String tt = getToken( i ).toString();			sb.append( "Token#" + i + ":" );			sb.append( tt );			if (!tt.endsWith( "\n" ))				sb.append( "\n" );		}		return sb.toString();	}	public Token getToken(int i) {		return (Token)tokens.get( i );	}	public Object get(int i) {		return tokens.get( i );	}	public void add(Object o) {		if (o instanceof Token)			add( (Token)o );		else if (o instanceof TokenSequence)			add( (TokenSequence)o );		else			add( new Token( o.toString() ) );	}	public void add(Token t) {		tokens.add( t );	}	//added by Fuchun Peng, Oct. 24, 2003	public Object remove(int index) {		return tokens.remove( index );	}	// added by Fuchun Peng, Oct. 24, 2003	public Object removeLastToken() {		if (tokens.size() > 0) {			return tokens.remove( tokens.size() - 1 );		}		else			return null;	}	public void addAll(TokenSequence ts) {		for (int i = 0; i < ts.size(); i++)			add( ts.getToken( i ) );	}	public void addAll(Token[] tokens) {		for (int i = 0; i < tokens.length; i++)			add( tokens[i] );	}	public void addAll(Object[] tokens) {		for (int i = 0; i < tokens.length; i++) {			if (tokens[i] instanceof Token)				add( (Token)tokens[i] );			else				add( new Token( tokens[i].toString() ) );		}	}	public Iterator iterator() {		return tokens.iterator();	}	public void pipeOutputAccumulate(Instance carrier, Pipe iteratedPipe) {		Object data = carrier.getData();		if (! (data instanceof Token))			throw new IllegalArgumentException( "TokenSequence can only accumulator Token's" );		add( (Token)data );	}	public PipeOutputAccumulator clonePipeOutputAccumulator() {		TokenSequence ret = new TokenSequence( tokens );		ret.properties = this.properties;		return ret;	}	public FeatureSequence toFeatureSequence(Alphabet dict) {		FeatureSequence fs = new FeatureSequence( dict, tokens.size() );		for (int i = 0; i < tokens.size(); i++)			fs.add( dict.lookupIndex( ((Token)tokens.get( i )).getText() ) );		return fs;	}	public FeatureVector toFeatureVector(Alphabet dict) {		return new FeatureVector( toFeatureSequence( dict ) );	}	public void setNumericProperty(String key, double value) {		properties = PropertyList.add( key, value, properties );	}	public void setProperty(String key, Object value) {		properties = PropertyList.add( key, value, properties );	}	public double getNumericProperty(String key) {		return properties.lookupNumber( key );	}	public Object getProperty(String key) {		return properties.lookupObject( key );	}	public boolean hasProperty(String key) {		return properties.hasProperty( key );	}	// Serialization	private static final long serialVersionUID = 1;	private static final int CURRENT_SERIAL_VERSION = 0;	private void writeObject(ObjectOutputStream out) throws IOException {		out.writeInt( CURRENT_SERIAL_VERSION );		out.defaultWriteObject();	}	private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {		int version = in.readInt();		in.defaultReadObject();	}}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?