⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wordalign.cc

📁 这是一款很好用的工具包
💻 CC
字号:
/*
 * WordAlign.cc --
 *	Word alignment and error computation
 *
 */

#ifndef lint
static char Copyright[] = "Copyright (c) 1995,1997,2000 SRI International.  All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lm/src/RCS/WordAlign.cc,v 1.10 2002/05/24 19:03:49 stolcke Exp $";
#endif

#include "WordAlign.h"

/*
 * wordError --
 *	Compute word error counts between hyp and a ref word string
 *
 * Result:
 *	Total word error count
 *
 * Side effect:
 *	sub, ins, del are set to the number of substitution, insertion and
 *	deletion errors, respectively.
 *	If alignment != 0 the array is filled in with the error/alignment
 *	type of each hyp word (CORR_ALIGN, SUB_ALIGN, INS_ALIGN).  Deleted
 *	words are indicated by DEL_ALIGN.  The alignment is terminated by
 *	END_ALIGN.
 */
unsigned
wordError(const VocabIndex *ref, const VocabIndex *hyp,
			unsigned &sub, unsigned &ins, unsigned &del,
			WordAlignType *alignment)
{
    unsigned hypLength = Vocab::length(hyp);
    unsigned refLength = Vocab::length(ref);

    typedef struct {
	unsigned cost;			// minimal cost of partial alignment
	WordAlignType error;		// best predecessor
    } ChartEntry;

    /* 
     * Allocate chart statically, enlarging on demand
     */
    static unsigned maxHypLength = 0;
    static unsigned maxRefLength = 0;
    static ChartEntry **chart = 0;

    if (chart == 0 || hypLength > maxHypLength || refLength > maxRefLength) {
	/*
	 * Free old chart
	 */
	if (chart != 0) {
	    for (unsigned i = 0; i <= maxRefLength; i ++) {
		delete [] chart[i];
	    }
	    delete [] chart;
	}

	/*
	 * Allocate new chart
	 */
	maxHypLength = hypLength;
	maxRefLength = refLength;
    
	chart = new ChartEntry*[maxRefLength + 1];
	assert(chart != 0);

	unsigned i, j;

	for (i = 0; i <= maxRefLength; i ++) {
	    chart[i] = new ChartEntry[maxHypLength + 1];
	    assert(chart[i] != 0);
	}

	/*
	 * Initialize the 0'th row and column, which never change
	 */
	chart[0][0].cost = 0;
	chart[0][0].error = CORR_ALIGN;

	/*
	 * Initialize the top-most row in the alignment chart
	 * (all words inserted).
	 */
	for (j = 1; j <= maxHypLength; j ++) {
	    chart[0][j].cost = chart[0][j-1].cost + INS_COST;
	    chart[0][j].error = INS_ALIGN;
	}

	for (i = 1; i <= maxRefLength; i ++) {
	    chart[i][0].cost = chart[i-1][0].cost + DEL_COST;
	    chart[i][0].error = DEL_ALIGN;
	}
    }

    /*
     * Fill in the rest of the chart, row by row.
     */
    for (unsigned i = 1; i <= refLength; i ++) {

	for (unsigned j = 1; j <= hypLength; j ++) {
	    unsigned minCost;
	    WordAlignType minError;

	    if (hyp[j-1] == ref[i-1]) {
		minCost = chart[i-1][j-1].cost;
		minError = CORR_ALIGN;
	    } else {
		minCost = chart[i-1][j-1].cost + SUB_COST;
		minError = SUB_ALIGN;
	    }

	    unsigned delCost = chart[i-1][j].cost + DEL_COST;
	    if (delCost < minCost) {
		minCost = delCost;
		minError = DEL_ALIGN;
	    }

	    unsigned insCost = chart[i][j-1].cost + INS_COST;
	    if (insCost < minCost) {
		minCost = insCost;
		minError = INS_ALIGN;
	    }

	    chart[i][j].cost = minCost;
	    chart[i][j].error = minError;
	}
    }

    /*
     * Backtrace
     */
    unsigned totalErrors;

    {
	unsigned i = refLength;
	unsigned j = hypLength;
	unsigned k = 0;

	sub = del = ins = 0;

	while (i > 0 || j > 0) {

	    switch (chart[i][j].error) {
	    case CORR_ALIGN:
		i --; j --;
		if (alignment != 0) {
		    alignment[k] = CORR_ALIGN;
		}
		break;
	    case SUB_ALIGN:
		i --; j --;
		sub ++;
		if (alignment != 0) {
		    alignment[k] = SUB_ALIGN;
		}
		break;
	    case DEL_ALIGN:
		i --;
		del ++;
		if (alignment != 0) {
		    alignment[k] = DEL_ALIGN;
		}
		break;
	    case INS_ALIGN:
		j --;
		ins ++;
		if (alignment != 0) {
		    alignment[k] = INS_ALIGN;
		}
		break;
	    }

	    k ++;
	}

	/*
	 * Now reverse the alignment to make the order correspond to words
	 */
	if (alignment) {
	    int k1, k2;	/* k2 can get negative ! */

	    for (k1 = 0, k2 = k - 1; k1 < k2; k1++, k2--) {
		WordAlignType x = alignment[k1];
		alignment[k1] = alignment[k2];
		alignment[k2] = x;
	    }

	    alignment[k] = END_ALIGN;
	}
	
	totalErrors = sub + del + ins;
    }

    return totalErrors;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -