📄 wordlattice.cc
字号:
/*
* WordLattice.cc --
* Word lattices
*
*/
#ifndef lint
static char Copyright[] = "Copyright (c) 1995-2006 SRI International. All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lm/src/RCS/WordLattice.cc,v 1.32 2006/01/05 08:44:25 stolcke Exp $";
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "WordLattice.h"
#include "WordAlign.h" /* for *_COST constants */
#include "Array.cc"
#ifdef INSTANTIATE_TEMPLATES
INSTANTIATE_ARRAY(WordLatticeNode);
#endif
/*
* LHash over lattice edges (pairs of nodes)
*/
class NodePair
{
public:
NodePair(unsigned n1 = 0, unsigned n2 = 0) : node1(n1), node2(n2) {} ;
Boolean operator==(NodePair &np)
{ return node1 == np.node1 && node2 == np.node2; };
unsigned node1, node2;
};
#include "LHash.cc"
static ostream &
operator<<(ostream &str, NodePair &key)
{
return str << "(" << key.node1 << "->" << key.node2 << ")";
}
static inline unsigned
LHash_hashKey(NodePair &key, unsigned maxBits)
{
return LHash_hashKey(key.node1 + 10 * key.node2, maxBits);
}
static inline void
Map_noKey(NodePair &key)
{
key.node1 = key.node2 = (unsigned)(-1);
}
inline Boolean
Map_noKeyP(const NodePair &key)
{
return key.node1 == (unsigned)(-1) &&
key.node2 == (unsigned)(-1);
}
#ifdef INSTANTIATE_TEMPLATES
INSTANTIATE_LHASH(NodePair, Boolean);
#endif
/*
* Generic array reversal
*/
template <class T>
void reverseArray(T *array, unsigned length)
{
int i, j; /* j can get negative ! */
for (i = 0, j = length - 1; i < j; i++, j--) {
T x = array[i];
array[i] = array[j];
array[j] = x;
}
}
WordLattice::WordLattice(Vocab &vocab, const char *myname)
: MultiAlign(vocab, myname), numNodes(0), numAligns(0)
{
initial = numNodes ++;
final = numNodes ++;
nodes[initial].numSuccs = 0;
nodes[final].numSuccs = 0;
/*
* Initialize alignment classes so that 0 and 1 correspond to initial and
* final. This makes the job easier in sortAlignedNodes().
*/
nodes[initial].align = numAligns ++;
nodes[final].align = numAligns ++;
}
WordLattice::~WordLattice()
{
if (name != 0) {
free(name);
}
}
WordLatticeNode::WordLatticeNode()
: word(Vocab_None), score(0.0), numSuccs(0), align(NO_ALIGN)
{
}
Boolean
WordLattice::isEmpty()
{
return numNodes == 2 && nodes[initial].numSuccs == 0;
}
Boolean
WordLattice::hasArc(unsigned from, unsigned to, Prob &prob)
{
if (from < numNodes) {
WordLatticeNode &lnode = nodes[from];
for (unsigned i = 0; i < lnode.numSuccs; i++) {
if (lnode.succs[i] == to) {
prob = lnode.probs[i];
return true;
}
}
}
return false;
}
void
WordLattice::addArc(unsigned from, unsigned to, Prob prob)
{
if (from >= numNodes) {
numNodes = from + 1;
}
WordLatticeNode &lnode = nodes[from];
/*
* See if the arc is already there. If so, do nothing.
*/
for (unsigned i = 0; i < lnode.numSuccs; i++) {
if (lnode.succs[i] == to) {
lnode.probs[i] += prob;
return;
}
}
/*
* Add the arc
*/
lnode.succs[lnode.numSuccs] = to;
lnode.probs[lnode.numSuccs] = prob;
lnode.numSuccs += 1;
return;
}
Boolean
WordLattice::write1(File &file)
{
fprintf(file, "initial %u\n", initial);
fprintf(file, "final %u\n", final);
for (unsigned i = 0; i < numNodes; i ++) {
fprintf(file, "node %u %s %g", i,
nodes[i].word == Vocab_None ?
"NULL" : vocab.getWord(nodes[i].word),
nodes[i].score);
for (unsigned j = 0; j < nodes[i].numSuccs; j ++) {
fprintf(file, " %u", nodes[i].succs[j]);
}
fprintf(file, "\n");
}
return true;
}
Boolean
WordLattice::write(File &file)
{
fprintf(file, "version 2\n");
if (name != 0) {
fprintf(file, "name %s\n", name);
}
fprintf(file, "initial %u\n", initial);
fprintf(file, "final %u\n", final);
for (unsigned i = 0; i < numNodes; i ++) {
fprintf(file, "node %u %s %u %g", i,
nodes[i].word == Vocab_None ?
"NULL" : vocab.getWord(nodes[i].word),
nodes[i].align,
nodes[i].score);
for (unsigned j = 0; j < nodes[i].numSuccs; j ++) {
fprintf(file, " %u %lg", nodes[i].succs[j],
(double)nodes[i].probs[j]);
}
fprintf(file, "\n");
}
return true;
}
Boolean
WordLattice::read1(File &file)
{
char *line;
while (line = file.getline()) {
unsigned arg1;
char arg2[100];
double arg3;
unsigned parsed;
unsigned version;
if (sscanf(line, "version %u", &version) == 1) {
if (version == 1) {
continue;
} else if (version == 2) {
return read(file);
} else {
file.position() << "unknown version\n";
return false;
}
} else if (sscanf(line, "initial %u", &initial) == 1) {
continue;
} else if (sscanf(line, "final %u", &final) == 1) {
continue;
} else if (sscanf(line, "node %u %100s %lg%n",
&arg1, arg2, &arg3, &parsed) == 3)
{
WordLatticeNode &node = nodes[arg1];
if (arg1 >= numNodes) {
numNodes = arg1 + 1;
}
node.word = (strcmp(arg2, "NULL") == 0) ?
Vocab_None : vocab.addWord(arg2);
node.score = arg3;
node.align = NO_ALIGN;
node.numSuccs = 0;
char *cp = line + parsed;
while (sscanf(cp, "%u%n", &arg1, &parsed) == 1) {
node.succs[node.numSuccs++] = arg1;
cp += parsed;
}
} else {
file.position() << "unknown keyword\n";
return false;
}
}
return true;
}
Boolean
WordLattice::read(File &file)
{
char *line;
while (line = file.getline()) {
unsigned arg1;
char arg2[100];
double arg3;
unsigned arg4;
unsigned parsed;
unsigned version;
if (sscanf(line, "version %u", &version) == 1) {
if (version == 1) {
return read1(file);
} else if (version == 2) {
continue;
} else {
file.position() << "unknown version\n";
return false;
}
} else if (sscanf(line, "name %100s", arg2) == 1) {
if (name != 0) {
free(name);
}
name = strdup(arg2);
assert(name != 0);
} else if (sscanf(line, "initial %u", &initial) == 1) {
continue;
} else if (sscanf(line, "final %u", &final) == 1) {
continue;
} else if (sscanf(line, "node %u %100s %u %lg%n",
&arg1, arg2, &arg4, &arg3, &parsed) == 4)
{
WordLatticeNode &node = nodes[arg1];
if (arg1 >= numNodes) {
numNodes = arg1 + 1;
}
node.word = (strcmp(arg2, "NULL") == 0) ?
Vocab_None : vocab.addWord(arg2);
node.score = arg3;
node.align = arg4;
node.numSuccs = 0;
char *cp = line + parsed;
double prob;
while (sscanf(cp, "%u %lg%n", &arg1, &prob, &parsed) == 2) {
node.succs[node.numSuccs] = arg1;
node.probs[node.numSuccs] = prob;
node.numSuccs += 1;
cp += parsed;
}
} else {
file.position() << "unknown keyword\n";
return false;
}
}
return true;
}
/*
* sortNodes --
* Sort node indices topologically
*
* Result:
* The number of reachable nodes.
*
* Side effects:
* sortedNodes is filled with the sorted node indices.
*/
unsigned
WordLattice::sortNodes(unsigned *sortedNodes)
{
makeArray(Boolean, visitedNodes, numNodes);
for (unsigned i = 0; i < numNodes; i ++) {
visitedNodes[i] = false;
}
unsigned numVisited = 0;
sortNodesRecursive(initial, numVisited, sortedNodes, visitedNodes);
/*
* reverse the node order from the way we generated it
*/
reverseArray(sortedNodes, numVisited);
return numVisited;
}
void
WordLattice::sortNodesRecursive(unsigned index, unsigned &numVisited,
unsigned *sortedNodes, Boolean *visitedNodes)
{
if (visitedNodes[index]) {
return;
}
visitedNodes[index] = true;
WordLatticeNode &lNode = nodes[index];
for (unsigned i = 0; i < lNode.numSuccs; i ++) {
sortNodesRecursive(lNode.succs[i], numVisited,
sortedNodes, visitedNodes);
}
sortedNodes[numVisited++] = index;
}
/*
* sortAlignedNodes --
* Sort node indices topologically, keeping nodes with same alignment
* class adjacent.
*
* Result:
* The number of reachable nodes.
*
* Side effects:
* sortedNodes is filled with the sorted node indices.
* Adjecent alignment classes that are ordered (have no arcs between
* them) are merged.
*/
unsigned
WordLattice::sortAlignedNodes(unsigned *sortedNodes)
{
/*
* Create a lattice of alignment classes and build a
* a homomorphic image of the word lattice
*/
WordLattice alat(vocab);
unsigned i;
assert(nodes[initial].align == initial);
assert(nodes[final].align == final);
for (i = 0; i < numNodes; i ++) {
WordLatticeNode &from = nodes[i];
for (unsigned j = 0; j < from.numSuccs; j ++) {
assert(from.align != NO_ALIGN);
assert(nodes[from.succs[j]].align != NO_ALIGN);
alat.addArc(from.align,
nodes[from.succs[j]].align,
from.probs[j]);
}
}
/*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -