⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wordlattice.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 3 页
字号:
	cerr << "WARNING: bestPath2 called with unreachable nodes\n";
    }
    assert(sortedNodes[numReachable - 1] == final);

    /*
     * Create arrays to keep track of error counts by node and by edge
     */
    Array<AlignErrors> nodeErrors(0, numNodes);
    Array< Array<AlignErrors> > edgeErrors(0, numNodes);

    /*
     * Create the set of pending edges, initially empty
     */
    LHash<NodePair,Boolean> pendingEdges;

    unsigned numWords = 0;	/* result word count */
    double totalSubs = 0.0;
    double totalInss = 0.0;
    double totalDels = 0.0;

    for (unsigned i = 0; i < numReachable; ) {
	/*
	 * Find the end of the current alignment class.
	 * Also total the posterior probs for this class.
	 */
	Prob totalAlignPost = nodes[sortedNodes[i]].score;
	unsigned endOfAlign = i + 1;

	while (endOfAlign < numReachable &&
	       nodes[sortedNodes[i]].align ==
			nodes[sortedNodes[endOfAlign]].align)
	{
	    totalAlignPost += nodes[sortedNodes[endOfAlign]].score;
	    endOfAlign ++;
	}

	/*
	 * Compute insertion/deletion errors:
	 *	assign deletion errors to edges straddling the alignment class
	 *	assign insertion errors to nodes in the alignment class
	 *	compute total posterior for all unfinished edges
	 */
	Prob totalEdgePost = 0.0;
	LHashIter<NodePair,Boolean> edgeIter(pendingEdges);
        NodePair currEdge;
	while (edgeIter.next(currEdge)) {
	    /*
	     * Check if edge terminates with current alignment class
	     */
	    Boolean edgeEnds = false;

	    for (unsigned k = i; k < endOfAlign; k ++) {
		if (currEdge.node2 == sortedNodes[k]) {
		    edgeEnds = true;
		    break;
		}
	    }

	    if (edgeEnds) {
		//cerr << "finishing edge " << currEdge
		//     << " dels = " 
		//     << edgeErrors[currEdge.node1][currEdge.node2].dels
		//     << endl;
		pendingEdges.remove(currEdge);
	    } else {
		Prob edgePost;
		hasArc(currEdge.node1, currEdge.node2, edgePost);

		totalEdgePost += edgePost;

		edgeErrors[currEdge.node1][currEdge.node2].dels +=
								totalAlignPost;

		for (unsigned k = i; k < endOfAlign; k ++) {
		    nodeErrors[sortedNodes[k]].inss += edgePost;
		}
	    }
	}

	/*
	 * Compute substitution errors
	 * and start edges emanating from this alignment class.
	 * Also compute word with highest posterior.
	 */
	VocabIndex bestWord = Vocab_None;
	Prob bestPosterior = totalEdgePost;

	for (unsigned k = i; k < endOfAlign; k ++) {
	    unsigned currNode = sortedNodes[k];

	    if (nodes[currNode].score > bestPosterior) {
		bestWord = nodes[currNode].word;
		bestPosterior = nodes[currNode].score;
	    }

	    nodeErrors[currNode].subs += totalAlignPost - nodes[currNode].score;
	    //cerr << "node " << currNode 
	    //	 << " subs = " << nodeErrors[currNode].subs
	    //   << " inss = " << nodeErrors[currNode].inss << endl;

	    for (unsigned j = 0; j < nodes[currNode].numSuccs; j ++) {
		NodePair newEdge(currNode, nodes[currNode].succs[j]);
		pendingEdges.insert(newEdge);

		//cerr << "starting edge " << newEdge << endl;
	    }
	}

	/*
	 * Save best word for this position
	 */
	if (sortedNodes[i] == initial || sortedNodes[i] == final) {
	    ;
	} else if (bestWord == Vocab_None) {
	    totalDels += totalAlignPost;
	} else {
	    if (numWords < length) {
		words[numWords ++] = bestWord;
	    }

	    totalInss += totalEdgePost;
	    totalSubs += totalAlignPost - bestPosterior;
	}

	/*
	 * Advance to next alignment class
	 */
	i = endOfAlign;
    }

    /*
     * With the "no viterbi" option we return the sequence of most probable
     * words.
     */
    if (flags|WORDLATTICE_NOVITERBI) {
	if (numWords < length) {
	    words[numWords] = Vocab_None;
	}

	sub = totalSubs;
	ins = totalInss;
	del = totalDels;
	return totalSubs + totalInss + totalDels;
    }
	    
    /*
     * Viterbi search for path with lowest total error
     */
    Array<ChartEntry> chart(0, numNodes);

    unsigned j;
    for (j = 0; j < numNodes; j ++) {
	chart[j].errs = 0.0;
	chart[j].pred = NO_PRED;
	chart[j].nnodes = 0;
	chart[j].subs = chart[j].inss = chart[j].dels = 0.0;
    }
    
    for (j = 0; j < numReachable; j ++) {
	unsigned curr = sortedNodes[j];
	WordLatticeNode &node = nodes[curr];

	if (curr != initial && chart[curr].pred == NO_PRED) continue;

	for (unsigned s = 0; s < node.numSuccs; s ++) {
	    unsigned next = node.succs[s];

	    double newSubs = chart[curr].subs +
				  edgeErrors[curr][next].subs +
				  nodeErrors[next].subs;
	    double newInss = chart[curr].inss +
				  edgeErrors[curr][next].inss +
				  nodeErrors[next].inss;
	    double newDels = chart[curr].dels +
				  edgeErrors[curr][next].dels +
				  nodeErrors[next].dels;
	    double newErrs = newSubs + newInss + newDels;

	    if (chart[next].pred == NO_PRED || newErrs < chart[next].errs) {
		chart[next].pred = curr;
		chart[next].errs = newErrs;
		chart[next].subs = newSubs;
		chart[next].inss = newInss;
		chart[next].dels = newDels;
		chart[next].nnodes = chart[curr].nnodes + 1;
	    }
	}
    }

    /*
     * Trace back the best path
     */

    if (chart[final].pred == NO_PRED) {
	cerr << "WARNING: minimizeWordError: final node not reachable\n";

	if (length > 0) {
	    words[0] = Vocab_None;
	}

	expectedError = 0.0;
    } else {

	sub = chart[final].subs;
	ins = chart[final].inss;
	del = chart[final].dels;

	unsigned curr = final;
	numWords = 0;
	while (numWords < length && curr != NO_PRED) {
	    if (nodes[curr].word != Vocab_None) {
		words[numWords ++] = nodes[curr].word;
	    }
	    curr = chart[curr].pred;
	}
	if (numWords < length) {
	    words[numWords] = Vocab_None;
	}

	if (curr != NO_PRED) {
	    cerr << "WARNING: minimizeWordError: word buffer too short\n";
	}

	Vocab::reverse(words);

	expectedError = chart[final].errs;
    }
    return expectedError;
}

/*
 * wordError --
 *	compute minimal word error of path through lattice
 */
unsigned
WordLattice::wordError(const VocabIndex *words,
				    unsigned &sub, unsigned &ins, unsigned &del)
{
    unsigned numWords = Vocab::length(words);

    /*
     * The states indexing the DP chart correspond to lattice nodes.
     */
    const unsigned NO_PRED = (unsigned)(-1);	// default for pred link

    makeArray(unsigned, sortedNodes, numNodes);

    unsigned numReachable = sortNodes(sortedNodes);
    if (numReachable != numNodes) {
	cerr << "WARNING: alignWords called with unreachable nodes\n";
    }

    /*
     * Allocate the DP chart.
     * chartEntries are indexed by [word_position][lattice_node],
     * where word_position = 0 is the  left string margin,
     * word_position = numWords + 1 is the right string margin.
     */
    typedef struct {
	unsigned cost;		// minimal path cost to this state
	unsigned ins, del, sub; // error counts by type
	unsigned predNode;	// predecessor state used in getting there
	ErrorType errType;	// error type
    } ChartEntry;

    ChartEntry **chart = new ChartEntry *[numWords + 2];
    assert(chart != 0);

    unsigned i;
    for (i = 0; i <= numWords + 1; i ++) {
	chart[i] = new ChartEntry[numNodes];
	assert(chart[i] != 0);
	for (unsigned j = 0; j < numNodes; j ++) {
	    chart[i][j].cost = MAX_COST;
	    chart[i][j].sub = chart[i][j].ins = chart[i][j].del = 0;
	    chart[i][j].predNode = NO_PRED;
	    chart[i][j].errType = ERR_NONE;
	}
    }

    /*
     * Prime the chart by anchoring the alignment at the left edge
     */
    chart[0][initial].cost = 0;
    chart[0][initial].ins = chart[0][initial].del = chart[0][initial].sub = 0;
    chart[0][initial].predNode = initial;
    chart[0][initial].errType = ERR_NONE;

    /*
     * Insertions before the first word
     * NOTE: since we process nodes in topological order this
     * will allow chains of multiple insertions.
     */
    for (unsigned j = 0; j < numReachable; j ++) {
	unsigned curr = sortedNodes[j];
	WordLatticeNode &node = nodes[curr];
	unsigned insCost = chart[0][curr].cost + INS_COST;

	if (insCost >= MAX_COST) continue;

	for (unsigned s = 0; s < node.numSuccs; s ++) {
	    unsigned next = node.succs[s];

	    if (insCost < chart[0][next].cost) {
		chart[0][next].cost = insCost;
		chart[0][next].ins = chart[0][curr].ins + 1;
		chart[0][next].del = chart[0][curr].del;
		chart[0][next].sub = chart[0][curr].sub;
		chart[0][next].predNode = curr;
		chart[0][next].errType = ERR_INS;
	    }
	}
    }

    /*
     * For all word positions, compute minimal cost alignment for each
     * state.
     */
    for (i = 1; i <= numWords + 1; i ++) {
	/*
	 * Compute partial alignment cost for all lattice nodes
	 */
	unsigned j;
	for (j = 0; j < numReachable; j ++) {
	    unsigned curr = sortedNodes[j];
	    WordLatticeNode &node = nodes[curr];
	    unsigned cost = chart[i - 1][curr].cost;

	    if (cost >= MAX_COST) continue;

	    /*
	     * Deletion error: current word not matched by lattice
	     */
	    {
		unsigned delCost = cost + DEL_COST;

		if (delCost < chart[i][curr].cost) {
		    chart[i][curr].cost = delCost;
		    chart[i][curr].del = chart[i - 1][curr].del + 1;
		    chart[i][curr].ins = chart[i - 1][curr].ins;
		    chart[i][curr].sub = chart[i - 1][curr].sub;
		    chart[i][curr].predNode = curr;
		    chart[i][curr].errType = ERR_DEL;
		}
	    }

	    /*
	     * Substitution errors
	     */
	    for (unsigned s = 0; s < node.numSuccs; s ++) {
		unsigned next = node.succs[s];
		unsigned haveSub =
			(nodes[next].word == words[i - 1]) ? 0 : 1;
		unsigned subCost = cost + haveSub * SUB_COST;

		if (subCost < chart[i][next].cost) {
		    chart[i][next].cost = subCost;
		    chart[i][next].sub = chart[i - 1][curr].sub + haveSub;
		    chart[i][next].ins = chart[i - 1][curr].ins;
		    chart[i][next].del = chart[i - 1][curr].del;
		    chart[i][next].predNode = curr;
		    chart[i][next].errType = haveSub ? ERR_SUB : ERR_NONE;
		}
	    }
	}

	for (j = 0; j < numReachable; j ++) {
	    unsigned curr = sortedNodes[j];
	    WordLatticeNode &node = nodes[curr];
	    unsigned insCost = chart[i][curr].cost + INS_COST;

	    if (insCost >= MAX_COST) continue;

	    /*
	     * Insertion errors: lattice node not matched by word
	     * NOTE: since we process nodes in topological order this
	     * will allow chains of multiple insertions.
	     */
	    for (unsigned s = 0; s < node.numSuccs; s ++) {
		unsigned next = node.succs[s];

		if (insCost < chart[i][next].cost) {
		    chart[i][next].cost = insCost;
		    chart[i][next].ins = chart[i][curr].ins + 1;
		    chart[i][next].del = chart[i][curr].del;
		    chart[i][next].sub = chart[i][curr].sub;
		    chart[i][next].predNode = curr;
		    chart[i][next].errType = ERR_INS;
		}
	    }
	}
    }

    if (chart[numWords + 1][final].predNode == NO_PRED) {
	/*
	 * Final node is unreachable
	 */
	sub = ins = 0;
	del = numWords;
    } else {
	sub = chart[numWords + 1][final].sub;
	ins = chart[numWords + 1][final].ins;
	del = chart[numWords + 1][final].del;
    }

    for (i = 0; i <= numWords + 1; i ++) {
	delete [] chart[i];
    }
    delete [] chart;

    return sub + ins + del;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -