⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wordlattice.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 3 页
字号:
/*
 * WordLattice.cc --
 *	Word lattices
 *
 */

#ifndef lint
static char Copyright[] = "Copyright (c) 1995-2006 SRI International.  All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lm/src/RCS/WordLattice.cc,v 1.32 2006/01/05 08:44:25 stolcke Exp $";
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include "WordLattice.h"

#include "WordAlign.h"			/* for *_COST constants */

#include "Array.cc"
#ifdef INSTANTIATE_TEMPLATES
INSTANTIATE_ARRAY(WordLatticeNode);
#endif

/*
 * LHash over lattice edges (pairs of nodes)
 */

class NodePair
{
public:
    NodePair(unsigned n1 = 0, unsigned n2 = 0) : node1(n1), node2(n2) {} ;

    Boolean operator==(NodePair &np)
	{ return node1 == np.node1 && node2 == np.node2; };
    unsigned node1, node2;
};

#include "LHash.cc"

static ostream &
operator<<(ostream &str, NodePair &key)
{
    return str << "(" << key.node1 << "->" << key.node2 << ")";
}

static inline unsigned
LHash_hashKey(NodePair &key, unsigned maxBits)
{
    return LHash_hashKey(key.node1 + 10 * key.node2, maxBits);
}

static inline void
Map_noKey(NodePair &key)
{
    key.node1 = key.node2 = (unsigned)(-1);
}

inline Boolean
Map_noKeyP(const NodePair &key)
{
    return key.node1 == (unsigned)(-1) &&
           key.node2 == (unsigned)(-1);
}

#ifdef INSTANTIATE_TEMPLATES
INSTANTIATE_LHASH(NodePair, Boolean);
#endif

/*
 * Generic array reversal
 */
template <class T>
void reverseArray(T *array, unsigned length)
{
    int i, j;	/* j can get negative ! */

    for (i = 0, j = length - 1; i < j; i++, j--) {
	T x = array[i];
	array[i] = array[j];
	array[j] = x;
    }
}

WordLattice::WordLattice(Vocab &vocab, const char *myname)
    : MultiAlign(vocab, myname), numNodes(0), numAligns(0)
{
    initial = numNodes ++;
    final = numNodes ++;

    nodes[initial].numSuccs = 0;
    nodes[final].numSuccs = 0;

    /*
     * Initialize alignment classes so that 0 and 1 correspond to initial and
     * final. This makes the job easier in sortAlignedNodes().
     */
    nodes[initial].align = numAligns ++;
    nodes[final].align = numAligns ++;
}

WordLattice::~WordLattice()
{
    if (name != 0) {
	free(name);
    }
}

WordLatticeNode::WordLatticeNode()
    : word(Vocab_None), score(0.0), numSuccs(0), align(NO_ALIGN)
{
}

Boolean
WordLattice::isEmpty()
{
    return numNodes == 2 && nodes[initial].numSuccs == 0;
}

Boolean
WordLattice::hasArc(unsigned from, unsigned to, Prob &prob)
{
    if (from < numNodes) {
	WordLatticeNode &lnode = nodes[from];

	for (unsigned i = 0; i < lnode.numSuccs; i++) {
	    if (lnode.succs[i] == to) {
		prob = lnode.probs[i];
		return true;
	    }
	}
    }
    return false;
}

void
WordLattice::addArc(unsigned from, unsigned to, Prob prob)
{
    if (from >= numNodes) {
	numNodes = from + 1;
    }

    WordLatticeNode &lnode = nodes[from];

    /*
     * See if the arc is already there.  If so, do nothing.
     */
    for (unsigned i = 0; i < lnode.numSuccs; i++) {
	if (lnode.succs[i] == to) {
	    lnode.probs[i] += prob;
	    return;
	}
    }

    /*
     * Add the arc
     */
    lnode.succs[lnode.numSuccs] = to;
    lnode.probs[lnode.numSuccs] = prob;
    lnode.numSuccs += 1;
    return;
}

Boolean
WordLattice::write1(File &file)
{
    fprintf(file, "initial %u\n", initial);
    fprintf(file, "final %u\n", final);

    for (unsigned i = 0; i < numNodes; i ++) {
	fprintf(file, "node %u %s %g", i,
		nodes[i].word == Vocab_None ?
			"NULL" : vocab.getWord(nodes[i].word),
		nodes[i].score);

	for (unsigned j = 0; j < nodes[i].numSuccs; j ++) {
	    fprintf(file, " %u", nodes[i].succs[j]);
	}
	fprintf(file, "\n");
    }
    return true;
}

Boolean
WordLattice::write(File &file)
{
    fprintf(file, "version 2\n");
    if (name != 0) {
	fprintf(file, "name %s\n", name);
    }
    fprintf(file, "initial %u\n", initial);
    fprintf(file, "final %u\n", final);

    for (unsigned i = 0; i < numNodes; i ++) {
	fprintf(file, "node %u %s %u %g", i,
		nodes[i].word == Vocab_None ?
			"NULL" : vocab.getWord(nodes[i].word),
		nodes[i].align,
		nodes[i].score);

	for (unsigned j = 0; j < nodes[i].numSuccs; j ++) {
	    fprintf(file, " %u %lg", nodes[i].succs[j],
				(double)nodes[i].probs[j]);
	}
	fprintf(file, "\n");
    }
    return true;
}

Boolean
WordLattice::read1(File &file)
{
    char *line;

    while (line = file.getline()) {
	unsigned arg1;
	char arg2[100];
	double arg3;
	unsigned parsed;
	unsigned version;

	if (sscanf(line, "version %u", &version) == 1) {
	    if (version == 1) {
		continue;
	    } else if (version == 2) {
		return read(file);
	    } else {
		file.position() << "unknown version\n";
		return false;
	    }
	} else if (sscanf(line, "initial %u", &initial) == 1) {
	    continue;
	} else if (sscanf(line, "final %u", &final) == 1) {
	    continue;
	} else if (sscanf(line, "node %u %100s %lg%n",
				&arg1, arg2, &arg3, &parsed) == 3)
	{
	    
	    WordLatticeNode &node = nodes[arg1];
	    if (arg1 >= numNodes) {
		numNodes = arg1 + 1;
	    }

	    node.word = (strcmp(arg2, "NULL") == 0) ?
				Vocab_None : vocab.addWord(arg2);
	    node.score = arg3;
	    node.align = NO_ALIGN;
	    node.numSuccs = 0;

	    char *cp = line + parsed;
	    while (sscanf(cp, "%u%n", &arg1, &parsed) == 1) {
		node.succs[node.numSuccs++] = arg1;
		cp += parsed;
	    }
	} else {
	    file.position() << "unknown keyword\n";
	    return false;
	}
    }
    return true;
}

Boolean
WordLattice::read(File &file)
{
    char *line;

    while (line = file.getline()) {
	unsigned arg1;
	char arg2[100];
	double arg3;
	unsigned arg4;
	unsigned parsed;
	unsigned version;

	if (sscanf(line, "version %u", &version) == 1) {
	    if (version == 1) {
		return read1(file);
	    } else if (version == 2) {
		continue;
	    } else {
		file.position() << "unknown version\n";
		return false;
	    }
	} else if (sscanf(line, "name %100s", arg2) == 1) {
	    if (name != 0) {
		free(name);
	    }
	    name = strdup(arg2);
	    assert(name != 0);
	} else if (sscanf(line, "initial %u", &initial) == 1) {
	    continue;
	} else if (sscanf(line, "final %u", &final) == 1) {
	    continue;
	} else if (sscanf(line, "node %u %100s %u %lg%n",
				&arg1, arg2, &arg4, &arg3, &parsed) == 4)
	{
	    WordLatticeNode &node = nodes[arg1];
	    if (arg1 >= numNodes) {
		numNodes = arg1 + 1;
	    }

	    node.word = (strcmp(arg2, "NULL") == 0) ?
				Vocab_None : vocab.addWord(arg2);
	    node.score = arg3;
	    node.align = arg4;
	    node.numSuccs = 0;

	    char *cp = line + parsed;
	    double prob;
	    while (sscanf(cp, "%u %lg%n", &arg1, &prob, &parsed) == 2) {
		node.succs[node.numSuccs] = arg1;
		node.probs[node.numSuccs] = prob;
		node.numSuccs += 1;
		cp += parsed;
	    }
	} else {
	    file.position() << "unknown keyword\n";
	    return false;
	}
    }
    return true;
}

/*
 * sortNodes --
 *	Sort node indices topologically
 *
 * Result:
 *	The number of reachable nodes.
 *
 * Side effects:
 *	sortedNodes is filled with the sorted node indices.
 */
unsigned
WordLattice::sortNodes(unsigned *sortedNodes)
{
    makeArray(Boolean, visitedNodes, numNodes);

    for (unsigned i = 0; i < numNodes; i ++) {
	visitedNodes[i] = false;
    }

    unsigned numVisited = 0;

    sortNodesRecursive(initial, numVisited, sortedNodes, visitedNodes);
    
    /*
     * reverse the node order from the way we generated it
     */
    reverseArray(sortedNodes, numVisited);

    return numVisited;
}

void
WordLattice::sortNodesRecursive(unsigned index, unsigned &numVisited,
			unsigned *sortedNodes, Boolean *visitedNodes)
{
    if (visitedNodes[index]) {
	return;
    }

    visitedNodes[index] = true;

    WordLatticeNode &lNode = nodes[index];

    for (unsigned i = 0; i < lNode.numSuccs; i ++) {
	sortNodesRecursive(lNode.succs[i], numVisited, 
						sortedNodes, visitedNodes);
    }

    sortedNodes[numVisited++] = index;
}

/*
 * sortAlignedNodes --
 *	Sort node indices topologically, keeping nodes with same alignment
 *	class adjacent.
 *
 * Result:
 *	The number of reachable nodes.
 *
 * Side effects:
 *	sortedNodes is filled with the sorted node indices.
 *	Adjecent alignment classes that are ordered (have no arcs between
 *	them) are merged.
 */
unsigned
WordLattice::sortAlignedNodes(unsigned *sortedNodes)
{
    /*
     * Create a lattice of alignment classes and build a
     * a homomorphic image of the word lattice
     */
    WordLattice alat(vocab);
    unsigned i;

    assert(nodes[initial].align == initial);
    assert(nodes[final].align == final);

    for (i = 0; i < numNodes; i ++) {
	WordLatticeNode &from = nodes[i];

	for (unsigned j = 0; j < from.numSuccs; j ++) {
	    assert(from.align != NO_ALIGN);
	    assert(nodes[from.succs[j]].align != NO_ALIGN);

	    alat.addArc(from.align,
			nodes[from.succs[j]].align,
			from.probs[j]);
	}
    }

    /*

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -