📄 lattice-tool.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 3 页
字号:
	 * if specified
	 */
	if (hiddenVocabFile) {
	    wordDistance = new SubVocabDistance(vocab, hiddenVocab);
	    assert(wordDistance!= 0);
	} else if (dictFile && dictAlign) {
	    wordDistance = new DictionaryAbsDistance(vocab, dictionary);
	    assert(wordDistance != 0);
	}

	WordMesh mesh(vocab, lat.getName(), wordDistance);

	/*
	 * Preserve acoustic information in word mesh if requested,
	 * or if needed for CTM generation.
	 */
	lat.alignLattice(mesh, noiseWords, posteriorScale,
						acousticMesh || outputCTM);

	if (posteriorDecode) {
	    /*
	     * Recover best hyp from lattice
	     */
	    unsigned maxLength = mesh.length();
	    double subs, inss, dels;

	    if (outputCTM) {
		NBestWordInfo *bestWords = new NBestWordInfo[maxLength + 1];
		assert(bestWords != 0);

		double errors = mesh.minimizeWordError(bestWords, maxLength + 1,
							      subs, inss, dels);
		printCTM(vocab, bestWords, lat.getName());

		delete [] bestWords;
	    } else {
		makeArray(VocabIndex, bestWords, maxLength + 1);

		double errors = mesh.minimizeWordError(bestWords, maxLength + 1,
							      subs, inss, dels);

		cout << lat.getName() << " "
		     << (mesh.vocab.use(), bestWords) << endl;
	    }
	}

	if (refIndices && (writeMesh || writeMeshDir)) {
	    mesh.alignReference(refIndices);
	}

	if (writeMesh) {
	    File file(writeMesh, "w");
	    mesh.write(file);
	}
	if (writeMeshDir) {
	    makeArray(char, outfile,
		      strlen(writeMeshDir) + 1 +
		      strlen(lat.getName()) + sizeof(GZIP_SUFFIX));
	    sprintf(outfile, "%s/%s%s", writeMeshDir,
					lat.getName(), GZIP_SUFFIX);

	    File file(outfile, "w");
	    mesh.write(file);
	}

	delete wordDistance;
    }

    if (writeNgrams) {
      lat.countNgrams(order, ngramCounts, posteriorScale);
    }

    if (computePosteriors) {
      lat.computePosteriors(posteriorScale, true);
    }
    
    if (density) {
      double d = lat.computeDensity();

      if (d == HUGE_VAL) {
	cerr << "WARNING: duration for lattice " << inLat << " unknown\n";
      } else {
        cout << lat.getName() << " " << lat.computeDensity() << endl;
      }
    }

    if (connectivity) {
      if (!lat.areConnected(lat.getInitial(), lat.getFinal())) {
	cerr << "WARNING: lattice " << inLat << " is not connected\n";
#ifndef NO_TIMEOUT
	alarm(0);
#endif
	return;
      } 
    }

    if (nodeEntropy) { 
      lat.computeNodeEntropy();
    }

    if (indexName) { 
      File indexFile(indexName, "w");
      lat.printNodeIndexNamePair(indexFile); 
    }

    if (refFile || refList) {
	if (refIndices) {
	    unsigned numWords = vocab.length(refIndices); 
	    if (!numWords) {
	      cerr << "WARNING: reference has 0 length\n";
	    }

	    unsigned total, sub, ins, del;
	    total = lat.latticeWER(refIndices, sub, ins, del, noiseWords);
	    
	    cout << "sub " << sub 
		 << " ins " << ins
		 << " del " << del
		 << " wer " << total
		 << " words " << numWords
		 << endl;
	} else {
	    cerr << "FATAL ERROR: reference is missing for lattice "
		 << inLat << endl;
	}
    }

    if (addRefsProb != 0.0) {
	if (refIndices) {
	    lat.addWords(refIndices, addRefsProb, !noPause);
	} else if (!(refFile || refList)) {
	    cerr << "FATAL ERROR: reference is missing for lattice "
		 << inLat << endl;
	}
    }

    if (noNulls) {
	lat.removeAllXNodes(Vocab_None);
    }

    if (simpleReduction || simpleReductionIter) {
      cerr << "reducing input lattices (overlap ratio = "
	   << overlapRatio << ")\n"; 

      if (overlapRatio == 0.0) { 
        // if reduceBeforePruning is specified then merged transitions probs
	// should be addded, not maxed
	lat.simplePackBigramLattice(simpleReductionIter, reduceBeforePruning); 
      } else {
	lat.approxRedBigramLattice(simpleReductionIter, overlapBase, 
				   overlapRatio);
      }
    }

    if (posteriorPruneThreshold > 0 && reduceBeforePruning) {
	if (!lat.prunePosteriors(posteriorPruneThreshold, posteriorScale,
				    densityPruneThreshold, nodesPruneThreshold,
				    fastPrune))
	{
	    cerr << "WARNING: posterior pruning of lattice " << inLat
	         << " failed\n";
#ifndef NO_TIMEOUT
	    alarm(0);
#endif
	    return;
        } 
    }

    Boolean recoverPauses = false;

    if (noPause) {
	lat.removeAllXNodes(vocab.pauseIndex());
    }

    if (maxNodes > 0 && lat.getNumNodes() > maxNodes) {
      cerr << "WARNING: processing lattice " << inLat
	   << " aborted -- too many nodes after reduction: "
	   << lat.getNumNodes() << endl;
#ifndef NO_TIMEOUT
      alarm(0);
#endif
      return;
    }

    // by default we leave HTK lattices scores alone
    HTKScoreMapping htkScoreMapping = mapHTKnone;

    if (!readHTK) {
	// preserve PFSG weights as HTK acoustic scores
	htkScoreMapping = mapHTKacoustic;
    }

    if (lmFile) {

      // remove pause and NULL nodes prior to LM application,
      // so each word has a proper predecessor
      // (This can be skipped for unigram LMs, unless we're explicitly
      // asked to eliminate pauses.  It also not necessary for the 
      // new general LM expansion algorithms.
      if (noPause || compactPause || (oldExpansion && order >= 2)) {
	if (!noPause) {
	    lat.removeAllXNodes(vocab.pauseIndex());
	}
	if (!noNulls) {
	    lat.removeAllXNodes(Vocab_None);
	}
      
	recoverPauses = true;

	/*
	 * attempt further reduction on pause-less lattices
	 */
	if (preReductionIter) {
	  cerr << "reducing pause-less lattices (overlap ratio = "
	       << overlapRatio << ")\n"; 

	  File f(stderr);
	  if (overlapRatio == 0.0) { 
	    lat.simplePackBigramLattice(preReductionIter); 
	  } else {
	    lat.approxRedBigramLattice(preReductionIter, overlapBase, 
				       overlapRatio);
	  }
	}
      }

      Boolean status;

      if (oldExpansion) {
	  switch (order) {
	  case 1:
	  case 2: 
	      // unigram/bigram weight replacement
	      status = lat.replaceWeights(lm); 
	      break;
	  default:
	      // trigram expansion
	      if (compactExpansion) {
		status = lat.expandToCompactTrigram(*(Ngram *)&lm, maxNodes); 
	      } else {
		status = lat.expandToTrigram(lm, maxNodes); 
	      }
	  }
      } else {
	  if (noBackoffWeights) {
	      // hack to ignore backoff weights in LM expansion
	      lat.noBackoffWeights = true;
	  }

	  // general LM expansion
	  status = lat.expandToLM(lm, maxNodes, compactExpansion); 
      }

      if (!status) {
        cerr << "WARNING: expansion of lattice " << inLat << " failed\n";
#ifndef NO_TIMEOUT
	alarm(0);
#endif
	return;
      }

      /*
       * after LM application need to make sure that probs will fit in
       * bytelog range
       */
      lat.limitIntlogs = true;

      /*
       * Replace old HTK language scores in output with new LM scores
       */
      htkScoreMapping = mapHTKlanguage;
    }

    if (!noPause && recoverPauses || insertPause) {
	if (compactPause) {
	    lat.recoverCompactPauses(loopPause, insertPause);
	} else {
	    lat.recoverPauses(loopPause, insertPause);
	}
    }

    /*
     * attempt further reduction on output lattices after LM expansion
     */
    if (postReductionIter) {
        cerr << "reducing output lattices (overlap ratio = "
	     << overlapRatio << ")\n"; 

	if (overlapRatio == 0.0) { 
	    lat.simplePackBigramLattice(postReductionIter); 
	} else {
	    lat.approxRedBigramLattice(postReductionIter, overlapBase, 
				       overlapRatio);
	}
    }

    if (splitMultiwordsAfterLM) {
	/*
	 * Split multiwords after LM application
	 * We create an empty LM so that none of the multiwords appear with
	 * non-zero probability
	 */
	Ngram dummy(vocab);

	lat.splitMultiwordNodes((MultiwordVocab &)vocab, dummy);
    }

    if (collapseSameWords) {
	lat.collapseSameWordNodes(noiseWords);
    }

    /*
     * perform lattice algebra
     */
    Lattice *finalLat,
	    resultLat(vocab, idFromFilename(outLat), ignoreWords); 

    if (operation && lattice2 != 0) {
        resultLat.debugme(debug);
    
	if (!strcmp(operation, LATTICE_OR)) {
	    resultLat.latticeOr(lat, *lattice2);
	} else if (!strcmp(operation, LATTICE_CONCATE)) {
	    resultLat.latticeCat(lat, *lattice2, interSegmentTime);
	} else {
	    cerr << "unknown operation (" << operation << ")\n";
	    cerr << "allowed operations are " << LATTICE_OR
		 << " and " << LATTICE_CONCATE << endl;
	    exit(2);
	}

	finalLat = &resultLat;
    } else {
	finalLat = &lat;
    }

#ifndef NO_TIMEOUT
    // kill alarm timer -- we're done
    alarm(0);
#endif

    if (viterbiDecode) {
	if (outputCTM) {
	    NBestWordInfo *bestwords = new NBestWordInfo[maxWordsPerLine + 1];
	    assert(bestwords != 0);

	    LogP prob =
		    finalLat->bestWords(bestwords, maxWordsPerLine, noiseWords);

	    if (prob != LogP_Zero || bestwords[0].word != Vocab_None) {
		printCTM(vocab, bestwords, finalLat->getName());
	    } 

	    delete [] bestwords;
	} else {
	    VocabIndex bestwords[maxWordsPerLine + 1];

	    LogP prob =
		    finalLat->bestWords(bestwords, maxWordsPerLine, noiseWords);

	    if (prob != LogP_Zero || bestwords[0] != Vocab_None) {
		cout << finalLat->getName() << " "
		     << (finalLat->vocab.use(), bestwords) << endl;
	    }
	}
    }

    if (nbestDecode > 0) {
	// output top N hyps
	nbestOut.openFiles(finalLat->getName());
	if (nbestViterbi) {
	    finalLat->computeNBestViterbi(nbestDecode, nbestOut, noiseWords,
					    useMultiwordLM ? multiChar : 0);
	} else {
	    finalLat->computeNBest(nbestDecode, nbestOut, noiseWords,
					    useMultiwordLM ? multiChar : 0,
	    							nbestMaxHyps, nbestDuplicates);
	}
	nbestOut.closeFiles();
    }

    if (pplFile) {
	if (!noPause) {
	    // treat pauses as regular words for LatticeLM computation
	    finalLat->ignoreVocab.remove(finalLat->vocab.pauseIndex());
	}
		
	LatticeLM latlm(*finalLat);
	latlm.debugme(debug);
	
        File file(pplFile, "r");
        TextStats stats;

        /*
         * Send perplexity info to stdout 
         */
        latlm.dout(cout);
        latlm.pplFile(file, stats);
        latlm.dout(cerr);

        cout << "file " << pplFile << ": " << stats;
    }

    if (outLattice || outLatticeDir) {
      File file(outLat, "w");
      if (writeHTK) {
        finalLat->writeHTK(file, htkScoreMapping, computePosteriors);
      } else if (writeInternal) {
        finalLat->writePFSG(file);
      } else {
        finalLat->writeCompactPFSG(file);
      }
    }
}

LM *
makeMixLM(const char *filename, Vocab &vocab, SubVocab *classVocab,
		    unsigned order, LM *oldLM, double lambda1, double lambda2)
{
    File file(filename, "r");

    /*
     * create factored LM or class-LM if specified, otherwise a regular ngram
     */
    Ngram *lm = factored ? 
		  new ProductNgram((ProductVocab &)vocab, order) :
		  (classVocab != 0) ?
		    (simpleClasses ?
			  new SimpleClassNgram(vocab, *classVocab, order) :
			  new ClassNgram(vocab, *classVocab, order)) :
		    new Ngram(vocab, order);
    assert(lm != 0);
💿 文件大小 3034 K
👤 上传用户 wanghaihah
📂 所属分类其他
🏷️ 相关标签

#工具包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -