📄 transducer.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. *//** @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.fst;// Analogous to base.types.classify.Classifierimport java.util.logging.*;import java.util.*;import java.io.*;import edu.umass.cs.mallet.base.pipe.Pipe;//import edu.umass.cs.mallet.base.pipe.SerialPipe;import edu.umass.cs.mallet.base.types.InstanceList;import edu.umass.cs.mallet.base.types.Instance;import edu.umass.cs.mallet.base.types.Sequence;import edu.umass.cs.mallet.base.types.ArraySequence;import edu.umass.cs.mallet.base.types.SequencePair;import edu.umass.cs.mallet.base.types.SequencePairAlignment;import edu.umass.cs.mallet.base.types.Label;import edu.umass.cs.mallet.base.types.LabelAlphabet;import edu.umass.cs.mallet.base.types.LabelVector;import edu.umass.cs.mallet.base.types.DenseVector;import edu.umass.cs.mallet.base.types.Alphabet;import edu.umass.cs.mallet.base.types.MatrixOps;import edu.umass.cs.mallet.base.util.MalletLogger;import edu.umass.cs.mallet.base.util.search.*;// Variable name key:// "ip" = "input position"// "op" = "output position"public abstract class Transducer implements Serializable{ private static Logger logger = MalletLogger.getLogger(Transducer.class.getName()); { // xxx Why isn't this resulting in printing the log messages? //logger.setLevel (Level.FINE); //logger.addHandler (new StreamHandler (System.out, new SimpleFormatter ())); //System.out.println ("Setting level to finer"); //System.out.println ("level = " + logger.getLevel()); //logger.warning ("Foooooo"); } public static final double ZERO_COST = 0; public static final double INFINITE_COST = Double.POSITIVE_INFINITY; //private Stack availableTransitionIterators = new Stack (); // Serialization private static final long serialVersionUID = 1; // Version history: // 0: Initial // 1: Add pipes // 3: Add beam width private static final int CURRENT_SERIAL_VERSION = 3; private static final int NO_PIPE_VERSION = 0; private void writeObject (ObjectOutputStream out) throws IOException { int i, size; out.writeInt (CURRENT_SERIAL_VERSION); out.writeObject(inputPipe); out.writeObject(outputPipe); out.writeInt (beamWidth); } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int size, i; int version = in.readInt (); if (version == NO_PIPE_VERSION) { inputPipe = null; outputPipe = null; } else { inputPipe = (Pipe) in.readObject(); outputPipe = (Pipe) in.readObject(); } if (version < 3) { beamWidth = 50; } else { beamWidth = in.readInt (); } } public abstract static class State implements Serializable { protected double initialCost = 0; protected double finalCost = 0; public abstract String getName(); public abstract int getIndex (); public double getInitialCost () { return initialCost; } public void setInitialCost (double c) { initialCost = c; } public double getFinalCost () { return finalCost; } public void setFinalCost (double c) { finalCost = c; } //public Transducer getTransducer () { return (Transducer)this; } //public abstract TransitionIterator transitionIterator (Object input); // Pass negative positions for a sequence to request "epsilon // transitions" for either input or output. (-position-1) should be // the position in the sequence after which we are trying to insert // the espilon transition. public abstract TransitionIterator transitionIterator (Sequence input, int inputPosition, Sequence output, int outputPosition); /* public abstract TransitionIterator transitionIterator { if (availableTransitionIterators.size() > 0) return ((TransitionIterator)availableTransitionIterators.pop()).initialize (State source, Sequence input, int inputPosition, Sequence output, int outputPosition); else return newTransitionIterator (Sequence input, int inputPosition, Sequence output, int outputPosition); } */ // Pass negative input position for a sequence to request "epsilon // transitions". (-position-1) should be the position in the // sequence after which we are trying to insert the espilon // transition. public TransitionIterator transitionIterator (Sequence input, int inputPosition) { return transitionIterator (input, inputPosition, null, 0); } // For generative transducers: // Return all possible transitions, independent of input public TransitionIterator transitionIterator () { return transitionIterator (null, 0, null, 0); } // For trainable transducers: public void incrementInitialCount (double count) { throw new UnsupportedOperationException (); } public void incrementFinalCount (double count) { throw new UnsupportedOperationException (); } // Serialization private static final long serialVersionUID = 1; private static final int CURRENT_SERIAL_VERSION = 0; private void writeObject (ObjectOutputStream out) throws IOException { int i, size; out.writeInt (CURRENT_SERIAL_VERSION); out.writeDouble(initialCost); out.writeDouble(finalCost); } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int size, i; int version = in.readInt (); initialCost = in.readDouble(); finalCost = in.readDouble(); } } public abstract static class TransitionIterator implements Iterator, Serializable { //public abstract void initialize (Sequence input, int inputPosition, //Sequence output, int outputPosition); public abstract boolean hasNext (); public int numberNext(){ return -1;} public abstract State nextState (); // returns the destination state public Object next () { return nextState(); } public void remove () { throw new UnsupportedOperationException (); } public abstract Object getInput (); public abstract Object getOutput (); public abstract double getCost (); public abstract State getSourceState (); public abstract State getDestinationState (); // In future these will allow for transition that consume variable amounts of the sequences public int getInputPositionIncrement () { return 1; } public int getOutputPositionIncrement () { return 1; } //public abstract Transducer getTransducer () {return getSourceState().getTransducer();} // For trainable transducers: public void incrementCount (double count) { throw new UnsupportedOperationException (); } // Serialization private static final long serialVersionUID = 1; private static final int CURRENT_SERIAL_VERSION = 0; private void writeObject (ObjectOutputStream out) throws IOException { int i, size; out.writeInt (CURRENT_SERIAL_VERSION); } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int size, i; int version = in.readInt (); } // I hate that I need this; there's really no other way public String describeTransition (double cutoff) { return ""; } } // CPAL - these worked well for nettalk //private int beamWidth = 10; //private double KLeps = .005; boolean UseForwardBackwardBeam = false; private int beamWidth = 0; private double KLeps = 0; private double Rmin = 0.1; private double nstatesExpl[]; private int curIter = 0; int tctIter = 0; // The number of times we have been called this iteration private double curAvgNstatesExpl; /** A pipe that should produce a Sequence in the "data" slot, (and possibly one in the "target" slot also */ protected Pipe inputPipe; /** A pipe that should expect a ViterbiPath in the "target" slot, and should produce something printable in the "source" slot that indicates the results of transduction. */ protected Pipe outputPipe; public Pipe getInputPipe () { return inputPipe; } public Pipe getOutputPipe () { return outputPipe; } public int getBeamWidth () { return beamWidth; } public void setBeamWidth (int beamWidth) { this.beamWidth = beamWidth; } public void setCurIter (int curIter) { this.curIter = curIter; this.tctIter = 0; } public void incIter () { this.tctIter++; } public void setKLeps (double KLeps) { this.KLeps = KLeps; } public void setRmin (double Rmin) { this.Rmin = Rmin; } public double[] getNstatesExpl() { return nstatesExpl; } public void setUseForwardBackwardBeam (boolean state) { this.UseForwardBackwardBeam = state; } /** We aren't really a Pipe subclass, but this method works like Pipes' do. */ public Instance pipe (Instance carrier) { carrier.setTarget(viterbiPath ((Sequence)carrier.getData())); return carrier; } /** * Converts the given sequence into another sequence according to this transducer. * For exmaple, robabilistic transducer may do something like Viterbi here. * Subclasses of transducer may specify that they only accept special kinds of sequence. * @param input Input sequence * @return Sequence output by this transudcer */ public Sequence transduce (Sequence input) { ViterbiPath lattice = viterbiPath (input); return lattice.output (); } public abstract int numStates (); public abstract State getState (int index); // Note that this method is allowed to return states with infinite initialCost. public abstract Iterator initialStateIterator (); // Some transducers are "generative", meaning that you can get a // sequence out of them without giving them an input sequence. In // this case State.transitionIterator() should return all available // transitions, but attempts to obtain the input and cost fields may // throw an exception. // xxx Why could obtaining "cost" be a problem??? public boolean canIterateAllTransitions () { return false; } // If true, this is a "generative transducer". In this case // State.transitionIterator() should return transitions that have // valid input and cost fields. True returned here should imply // that canIterateAllTransitions() is true. public boolean isGenerative () { return false; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -