📄 clusterunitselector.java
字号:
/** * Portions Copyright 2001-2003 Sun Microsystems, Inc. * Portions Copyright 1999-2001 Language Technologies Institute, * Carnegie Mellon University. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. */package com.sun.speech.freetts.clunits;import java.util.List;import java.util.Iterator;import java.net.URL;import java.io.IOException;import com.sun.speech.freetts.relp.Sample;import com.sun.speech.freetts.relp.SampleInfo;import com.sun.speech.freetts.relp.SampleSet;import com.sun.speech.freetts.UtteranceProcessor;import com.sun.speech.freetts.cart.CART;import com.sun.speech.freetts.Utterance;import com.sun.speech.freetts.ProcessException;import com.sun.speech.freetts.Relation;import com.sun.speech.freetts.Item;import com.sun.speech.freetts.FeatureSet;import com.sun.speech.freetts.FeatureSetImpl;import com.sun.speech.freetts.PathExtractor;import com.sun.speech.freetts.PathExtractorImpl;import de.dfki.lt.freetts.ClusterUnitNamer;/** * Generates the Unit Relation of an Utterance from the * Segment Relation. * */public class ClusterUnitSelector implements UtteranceProcessor { final static boolean DEBUG = false; private final static PathExtractor DNAME = new PathExtractorImpl( "R:SylStructure.parent.parent.name", true); private ClusterUnitDatabase clunitDB; private ClusterUnitNamer unitNamer; /** * Constructs a ClusterUnitSelector. * * @param url the URL for the unit database. If the URL path ends * with a '.bin' it is assumed that the DB is a binary database, * otherwise, its assumed that its a text database1 * * @throws IOException if an error occurs while loading the * database * */ public ClusterUnitSelector(URL url) throws IOException { this(url, null); } /** * Constructs a ClusterUnitSelector. * * @param url the URL for the unit database. If the URL path ends * with a '.bin' it is assumed that the DB is a binary database, * otherwise, its assumed that its a text database1 * @param unitNamer an optional unit namer, specifying how the cluster * units are called in the voice database referenced by url. If this is null, * an ldom unit naming scheme will be used (e.g., 'ae_afternoon' for the * phoneme 'ae' in the word 'afternoon'. * * @throws IOException if an error occurs while loading the * database * */ public ClusterUnitSelector(URL url, ClusterUnitNamer unitNamer) throws IOException { if (url == null) { throw new IOException("Can't load cluster unit database"); } boolean binary = url.getPath().endsWith(".bin"); clunitDB = new ClusterUnitDatabase(url, binary); this.unitNamer = unitNamer; } /** * Generates the Unit Relation from the Segment Relation. * <br><b>Implementation note:</b><br> * Populates the segment relation with segment names of the form: * XX_YY where XX is the segment name (typically a phoneme) * and YY is the word that the segment is in (stripped and * lower case). * * The first step in cluster unit selection is to determine the unit * type for each unit in the utterance. The unit type for * selection in the simple talking clock example (cmu_time_awb) is * done per phone. The unit type consists of the phone * name followed by the word the phone comes from (e.g., n_now for * the phone 'n' in the word 'now'). * * Invoke the Viterbi algorithm (via a viterbi class) that * selects the proper units for the segment and adds that to * each segment item. * * For each segment, create a unit and attach features based * upon the selected units. * * @param utterance the utterance to generate the Unit Relation * * @throws ProcessException if an IOException is thrown during the * processing of the utterance * */ public void processUtterance(Utterance utterance) throws ProcessException { Viterbi vd; Relation segs = utterance.getRelation(Relation.SEGMENT); utterance.setObject(SampleInfo.UTT_NAME, clunitDB.getSampleInfo()); utterance.setObject("sts_list", clunitDB.getSts()); vd = new Viterbi(segs, clunitDB); for (Item s = segs.getHead(); s != null; s = s.getNext()) { setUnitName(s); } // Carry out the CART lookup for the target costs, and the viterbi // search for finding the best path (join costs) through the candidates. vd.decode(); // Now associate the candidate units in the best path // with the items in the segment relation. if (!vd.result("selected_unit")) { utterance.getVoice().error("clunits: can't find path"); } // If optimal coupling was used, the join points must now be copied // from the path elements to the actual items in the segment relation. vd.copyFeature("unit_prev_move"); vd.copyFeature("unit_this_move"); // Based on this data, create a Unit relation giving the details of the // units to concatenate. Relation unitRelation = utterance.createRelation(Relation.UNIT); for (Item s = segs.getHead(); s != null; s = s.getNext()) { Item unit = unitRelation.appendItem(); FeatureSet unitFeatureSet = unit.getFeatures(); int unitEntry = s.getFeatures().getInt("selected_unit"); // The item name is the segment name unitFeatureSet.setString("name", s.getFeatures().getString("name")); int unitStart; int unitEnd; String clunitName = s.getFeatures().getString("clunit_name"); if (s.getFeatures().isPresent("unit_this_move")) { unitStart = s.getFeatures().getInt("unit_this_move"); } else { unitStart = clunitDB.getStart(unitEntry); } if (s.getNext() != null && s.getNext().getFeatures().isPresent("unit_prev_move")) { unitEnd = s.getNext().getFeatures().getInt("unit_prev_move"); } else { unitEnd = clunitDB.getEnd(unitEntry); } unitFeatureSet.setInt("unit_entry", unitEntry); ClusterUnit clunit = new ClusterUnit(clunitDB, clunitName, unitStart, unitEnd); unitFeatureSet.setObject("unit", clunit); if (true) { unitFeatureSet.setInt("unit_start", clunit.getStart()); unitFeatureSet.setInt("unit_end", clunit.getEnd()); unitFeatureSet.setInt("instance", unitEntry - clunitDB.getUnitIndex(clunitName, 0)); } // add the rest of these things for debugging. if (DEBUG) { debug(" sr " + clunitDB.getSampleInfo().getSampleRate() + " " + s.getFeatures().getFloat("end") + " " + (int) (s.getFeatures().getFloat("end") * clunitDB.getSampleInfo().getSampleRate())); } unitFeatureSet.setInt("target_end", (int) (s.getFeatures().getFloat("end") * clunitDB.getSampleInfo().getSampleRate())); } } /** * Sets the cluster unit name given the segment. * * @param seg the segment item that gets the name */ protected void setUnitName(Item seg) { if (unitNamer != null) { unitNamer.setUnitName(seg); return; } // default to LDOM naming scheme 'ae_afternoon': String cname = null; String segName = seg.getFeatures().getString("name"); if (segName.equals("pau")) { cname = "pau_" + seg.findFeature("p.name"); } else { // remove single quotes from name String dname = ((String) DNAME.findFeature(seg)).toLowerCase(); cname = segName + "_" + stripQuotes(dname); } seg.getFeatures().setString("clunit_name", cname); } /** * Strips quotes from the given string. * * @param s the string to strip quotes from * * @return a string with all single quotes removed */ private String stripQuotes(String s) { StringBuffer sb = new StringBuffer(s.length()); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c != '\'') { sb.append(c); } } return sb.toString(); } /** * Retrieves the string representation of this object. * * @return the string representation of this object */ public String toString() { return "ClusterUnitSelector"; } /** * Provides support for the Viterbi Algorithm. * * Implementation Notes * <p> * For each candidate for the current unit, calculate the cost * between it and the first candidate in the next unit. Save * only the path that has the least cost. By default, if two * candidates come from units that are adjacent in the * database, the cost is 0 (i.e., they were spoken together, * so they are a perfect match). * <p> * * Repeat the previous process for each candidate in the next * unit, creating a list of least cost paths between the * candidates between the current unit and the unit following * it. * <p> * * Toss out all candidates in the current unit that are not * included in a path. * <p> * * Move to the next unit and repeat the process. */ static class Viterbi { private int numStates = -1; private boolean bigIsGood = false; private ViterbiPoint timeline = null; private ViterbiPoint lastPoint = null; private FeatureSet f = null; private ClusterUnitDatabase clunitDB; /** * Creates a Viterbi class to process the given utterance. * A queue of ViterbiPoints corresponding to the Items in the Relation segs * is built up. * */ public Viterbi(Relation segs, ClusterUnitDatabase db) { ViterbiPoint last = null; clunitDB = db; f = new FeatureSetImpl(); for (Item s = segs.getHead(); true; s = s.getNext()) { ViterbiPoint n = new ViterbiPoint(s); // The number of ViterbiPaths associated with each ViterbiPoint // is determined using the variable numStates. // TODO: Where can numStates be set? if (numStates > 0) { n.initPathArray(numStates); } if (last != null) { // continue to build up the queue last.next = n; } else { // timeline is the start of the queue timeline = n; } last = n; if (s == null) { // no further segments, leave loop lastPoint = n; break; } } if (DEBUG) { debug("num states " + numStates); } if (numStates == 0) { // its a general beam search timeline.paths = new ViterbiPath(); } if (numStates == -1) { // dynamic number of states (# cands) timeline.initPathArray(1); } } /** * Sets the given feature to the given value. * * @param name the name of the feature * @param obj the new value. */ public void setFeature(String name, Object obj) { f.setObject(name, obj); } /** * Gets the value for the given feature. * * @param name the name of the feature * * @return the value of the feature */ public Object getFeature(String name) { return f.getObject(name); } /** * Carry out a Viterbi search in for a prepared queue of ViterbiPoints. * In a nutshell, each Point represents a target item (a target segment); * for each target Point, a number of Candidate units in the voice database * are determined; a Path structure is built up, based on local best transitions. * Concretely, a Path consists of a (possibly empty) previous Path, a current Candidate, * and a Score. This Score is a quality measure of the Path; it is calculated as the * sum of the previous Path's score, the Candidate's score, and the Cost of joining * the Candidate to the previous Path's Candidate. At each step, only one Path * leading to each Candidate is retained, viz. the Path with the best Score. * All that is left to do is to call result() to get the best-rated * path from among the paths associated with the last Point, and to associate * the resulting Candidates with the segment items they will realise. */ void decode() { for (ViterbiPoint p = timeline; p.next != null; p = p.next) { // The candidates for the current item: p.cands = getCandidate(p.item); if (DEBUG) { debug("decode " + p.cands); } if (numStates != 0) { if (numStates == -1) { // put as many (empty) path elements into p.next as there are candidates in p p.next.initDynamicPathArray(p.cands); } // Now go through all existing paths and all candidates for the current item; // tentatively extend each existing path to each of the candidates, // but only retain the // Attention: p.numStates is not numStates! // numStates = a general flag indicating which type of viterbi search // to use (only -1 seems to be implemented); // p.numStates = the number of paths in p.statePaths, i.e. p.numStates==p.statePaths.length for (int i = 0; i < p.numStates; i++) { if ((p == timeline && i == 0) || (p.statePaths[i] != null)) { // We are at the very beginning of the search, or have a usable path to extend // debug(" dc p " + p); for (ViterbiCandidate c = p.cands; c != null; c = c.next) { // For the candidate c, create a path extending the previous path // p.statePaths[i] to that candidate: ViterbiPath np = getPath(p.statePaths[i], c); // Compare this path to the existing best path (if any) leading to // candidate c; only retain the one with the better score. // TODO: why should the paths leading to the candidates realising p
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -