📄 wordmesh.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 3 页
字号:
 * Alignment positions are integers corresponding to word equivalence classes.
 * They are assigned in increasing order; hence numerical order does not
 * correspond to topological (temporal) order.
 *
 *	- 'from' specifies the alignment position just BEFORE the first word.
 *	  A value of numAligns means the first word should start the alignment.
 *	- 'to' specifies the alignment position just AFTER the last word.
 *	  A value of numAligns means the last word should end the alignment.
 *
 * Returns false if the 'from' position does not strictly precede the 'to'.
 */
Boolean
WordMesh::alignWords(const NBestWordInfo *winfo, Prob score,
		    	Prob *wordScores, const HypID *hypID,
			unsigned from, unsigned to, unsigned *wordAlignment) 
{
    /*
     * Compute word string length
     */
    unsigned hypLength = 0;
    for (unsigned i = 0; winfo[i].word != Vocab_None; i ++) hypLength ++;

    /*
     * Locate start and end positions to align to
     */
    unsigned fromPos = 0;
    unsigned refLength = 0;

    if (numAligns > 0) {
	unsigned toPos = numAligns - 1;

	for (unsigned p = 0; p < numAligns; p ++) {
	    if (sortedAligns[p] == from) fromPos = p + 1;
	    if (sortedAligns[p] == to) toPos = p - 1;
	}

    	refLength = toPos - fromPos + 1;

        if (toPos + 1 < fromPos) {
	    
	    return false;
	}
    }
    Boolean fullAlignment = (refLength == numAligns);


    typedef struct {
	double cost;			// minimal cost of partial alignment
	WordAlignType error;		// best predecessor
    } ChartEntry;

    /* 
     * Allocate chart statically, enlarging on demand
     */
    static unsigned maxHypLength = 0;
    static unsigned maxRefLength = 0;
    static ChartEntry **chart = 0;

    if (chart == 0 || hypLength > maxHypLength || refLength > maxRefLength) {
	/*
	 * Free old chart
	 */
	if (chart != 0) {
	    for (unsigned i = 0; i <= maxRefLength; i ++) {
		delete [] chart[i];
	    }
	    delete [] chart;
	}

	/*
	 * Allocate new chart
	 */
	maxHypLength = hypLength;
	maxRefLength = refLength;
    
	chart = new ChartEntry*[maxRefLength + 1];
	assert(chart != 0);

	for (unsigned i = 0; i <= maxRefLength; i ++) {
	    chart[i] = new ChartEntry[maxHypLength + 1];
	    assert(chart[i] != 0);
	}
    }

    /*
     * Initialize the 0'th row
     */
    chart[0][0].cost = 0.0;
    chart[0][0].error = CORR_ALIGN;

    for (unsigned j = 1; j <= hypLength; j ++) {
	chart[0][j].cost = chart[0][j-1].cost +
				alignError(0, totalPosterior, winfo[j-1].word);
	chart[0][j].error = INS_ALIGN;
    }

    /*
     * Fill in the rest of the chart, row by row.
     */
    for (unsigned i = 1; i <= refLength; i ++) {
	double deletePenalty =
		    alignError(aligns[sortedAligns[fromPos+i-1]],
			       columnPosteriors[sortedAligns[fromPos+i-1]],
			       deleteIndex);

	chart[i][0].cost = chart[i-1][0].cost + deletePenalty;
	chart[i][0].error = DEL_ALIGN;

	for (unsigned j = 1; j <= hypLength; j ++) {
	    double minCost = chart[i-1][j-1].cost +
			alignError(aligns[sortedAligns[fromPos+i-1]],
				   columnPosteriors[sortedAligns[fromPos+i-1]],
				   winfo[j-1].word);
	    WordAlignType minError = SUB_ALIGN;

	    double delCost = chart[i-1][j].cost + deletePenalty;
	    if (delCost < minCost) {
		minCost = delCost;
		minError = DEL_ALIGN;
	    }

	    double insCost = chart[i][j-1].cost +
			alignError(0,
			           transPosteriors[sortedAligns[fromPos+i-1]],
				   winfo[j-1].word);
	    if (insCost < minCost) {
		minCost = insCost;
		minError = INS_ALIGN;
	    }

	    chart[i][j].cost = minCost;
	    chart[i][j].error = minError;
	}
    }

    /*
     * Backtrace and add new words to alignment columns.
     * Store word posteriors if requested.
     */
    {
	unsigned i = refLength;
	unsigned j = hypLength;

	while (i > 0 || j > 0) {

	    Prob wordPosterior = score;
	    Prob transPosterior = score;

	    /*
	     * Use word- and transition-specific posteriors if supplied.
	     * Note: the transition posterior INTO the first word is
	     * given on the Vocab_None item at winfo[hypLength].
	     */
	    if (j > 0 && winfo[j-1].wordPosterior != 0.0) {
		wordPosterior = winfo[j-1].wordPosterior;
	    }
	    if (j > 0 && winfo[j-1].transPosterior != 0.0) {
		transPosterior = winfo[j-1].transPosterior;
	    } else if (j == 0 && winfo[hypLength].transPosterior != 0.0) {
		transPosterior = winfo[hypLength].transPosterior;
	    }

	    switch (chart[i][j].error) {

	    case CORR_ALIGN:
	    case SUB_ALIGN:
		*aligns[sortedAligns[fromPos+i-1]]->insert(winfo[j-1].word) +=
								wordPosterior;
		columnPosteriors[sortedAligns[fromPos+i-1]] += wordPosterior;
		transPosteriors[sortedAligns[fromPos+i-1]] += transPosterior;

		/*
		 * Check for preexisting word info and merge if necesssary
		 */
		if (winfo[j-1].valid()) {
		    Boolean foundP;
		    NBestWordInfo *oldInfo =
		    		wordInfo[sortedAligns[fromPos+i-1]]->
						insert(winfo[j-1].word, foundP);
		    if (foundP) {
			oldInfo->merge(winfo[j-1]);
		    } else {
			*oldInfo = winfo[j-1];
		    }
		}

		if (hypID) {
		    /*
		     * Add hyp ID to the hyp list for this word 
		     */
		    Array<HypID> &hypList = 
		    *hypMap[sortedAligns[fromPos+i-1]]->insert(winfo[j-1].word);
		    hypList[hypList.size()] = *hypID;
		}

		if (wordAlignment) {
		    wordAlignment[j-1] = sortedAligns[fromPos+i-1];
		}
		if (wordScores) {
		    wordScores[j-1] =
		    *aligns[sortedAligns[fromPos+i-1]]->find(winfo[j-1].word);
		}

		i --; j --;
		break;
	    case DEL_ALIGN:
		*aligns[sortedAligns[fromPos+i-1]]->insert(deleteIndex) +=
								transPosterior;
		columnPosteriors[sortedAligns[fromPos+i-1]] += transPosterior;
		transPosteriors[sortedAligns[fromPos+i-1]] += transPosterior;

		if (hypID) {
		    /*
		     * Add hyp ID to the hyp list for this word 
		     */
		    Array<HypID> &hypList = 
			*hypMap[sortedAligns[fromPos+i-1]]->insert(deleteIndex);
		    hypList[hypList.size()] = *hypID;
		}

		i --;
		break;
	    case INS_ALIGN:
		/*
		 * Make room for new alignment column in sortedAligns
		 * and position new column
		 */
		for (unsigned k = numAligns; k > fromPos + i; k --) {
		    sortedAligns[k] = sortedAligns[k-1];
		}
		sortedAligns[fromPos + i] = numAligns;

		aligns[numAligns] = new LHash<VocabIndex,Prob>;
		assert(aligns[numAligns] != 0);

		wordInfo[numAligns] = new LHash<VocabIndex,NBestWordInfo>;
		assert(wordInfo[numAligns] != 0);

		hypMap[numAligns] = new LHash<VocabIndex,Array<HypID> >;
		assert(hypMap[numAligns] != 0);

		/*
		 * The transition posterior from the preceding to the following
		 * position becomes the posterior for *delete* at the new
		 * position (that's why we need transition posteriors!).
		 */
		Prob nullPosterior = totalPosterior;
		if (fromPos+i > 0) {
		    nullPosterior = transPosteriors[sortedAligns[fromPos+i-1]];
		}
		if (nullPosterior != 0.0) {
		    *aligns[numAligns]->insert(deleteIndex) = nullPosterior;
		}
		*aligns[numAligns]->insert(winfo[j-1].word) = wordPosterior;
		columnPosteriors[numAligns] = nullPosterior + wordPosterior;
		transPosteriors[numAligns] = nullPosterior + transPosterior;

		/*
		 * Record word info if given
		 */
		if (winfo[j-1].valid()) {
		    *wordInfo[numAligns]->insert(winfo[j-1].word) = winfo[j-1];
		}

		/*
		 * Add all hypIDs previously recorded to the *DELETE*
		 * hypothesis at the newly created position.
		 */
		{
		    Array<HypID> *hypList = 0;

		    SArrayIter<HypID,HypID> myIter(allHyps);
		    HypID id;
		    while (myIter.next(id)) {
			/*
			 * Avoid inserting *DELETE* in hypMap unless needed
			 */
			if (hypList == 0) {
			    hypList = hypMap[numAligns]->insert(deleteIndex);
			}
			(*hypList)[hypList->size()] = id;
		    }
		}

		if (hypID) {
		    /*
		     * Add hyp ID to the hyp list for this word 
		     */
		    Array<HypID> &hypList = 
				*hypMap[numAligns]->insert(winfo[j-1].word);
		    hypList[hypList.size()] = *hypID;
		}

		if (wordAlignment) {
		    wordAlignment[j-1] = numAligns;
		}
		if (wordScores) {
		    wordScores[j-1] = wordPosterior;
		}

		numAligns ++;
		j --;
		break;
	    }
	}

	/* 
	 * Add the transition posterior INTO the FIRST hyp word to
	 * to the transition posterior into the first alignment position
	 * This only applies when doing a partial alignment that doesn't
	 * start at the 0th position.
	 */
	if (fromPos+i > 0) {
	    Prob transPosterior = score;

	    if (winfo[hypLength].transPosterior != 0.0) {
		transPosterior = winfo[hypLength].transPosterior;
	    }

	    transPosteriors[sortedAligns[fromPos+i-1]] += transPosterior;
	}
    }

    /*
     * Add hyp to global list of IDs
     */
    if (hypID) {
	*allHyps.insert(*hypID) = *hypID;
    }

    /*
     * Only change total posterior if the alignment spanned the whole mesh.
     * This ensure totalPosterior reflects the initia/final node posterior
     * in a lattice.
     */
    if (fullAlignment) {
	totalPosterior += score;
    }
    return true;
}

void
WordMesh::alignAlignment(MultiAlign &alignment, Prob score, Prob *alignScores)
{
    WordMesh &other = (WordMesh &)alignment;

    unsigned refLength = numAligns;
    unsigned hypLength = other.numAligns;

    typedef struct {
	double cost;			// minimal cost of partial alignment
	WordAlignType error;		// best predecessor
    } ChartEntry;

    /* 
     * Allocate chart statically, enlarging on demand
     */
    static unsigned maxHypLength = 0;
    static unsigned maxRefLength = 0;
    static ChartEntry **chart = 0;

    if (chart == 0 || hypLength > maxHypLength || refLength > maxRefLength) {
	/*
	 * Free old chart
	 */
	if (chart != 0) {
	    for (unsigned i = 0; i <= maxRefLength; i ++) {
		delete [] chart[i];
	    }
	    delete [] chart;
	}

	/*
	 * Allocate new chart
	 */
	maxHypLength = hypLength;
	maxRefLength = refLength;
    
	chart = new ChartEntry*[maxRefLength + 1];
	assert(chart != 0);

	for (unsigned i = 0; i <= maxRefLength; i ++) {
	    chart[i] = new ChartEntry[maxHypLength + 1];
	    assert(chart[i] != 0);
	}
    }

    /*
     * Initialize the 0'th row
     */
    chart[0][0].cost = 0.0;
    chart[0][0].error = CORR_ALIGN;

    for (unsigned j = 1; j <= hypLength; j ++) {
	unsigned pos = other.sortedAligns[j-1];
	chart[0][j].cost = chart[0][j-1].cost +
		   alignError(0,
			      totalPosterior,
			      other.aligns[other.sortedAligns[j-1]],
			      other.columnPosteriors[other.sortedAligns[j-1]]);
	chart[0][j].error = INS_ALIGN;
    }

    /*
     * Fill in the rest of the chart, row by row.
     */
    for (unsigned i = 1; i <= refLength; i ++) {
	double deletePenalty =
			    alignError(aligns[sortedAligns[i-1]],
				       columnPosteriors[sortedAligns[i-1]],
				       deleteIndex);

	chart[i][0].cost = chart[i-1][0].cost + deletePenalty;
	chart[i][0].error = DEL_ALIGN;

	for (unsigned j = 1; j <= hypLength; j ++) {
	    double minCost = chart[i-1][j-1].cost +
		     alignError(aligns[sortedAligns[i-1]],
				columnPosteriors[sortedAligns[i-1]],
				other.aligns[other.sortedAligns[j-1]],
				other.columnPosteriors[other.sortedAligns[j-1]]);
	    WordAlignType minError = SUB_ALIGN;

	    double delCost = chart[i-1][j].cost + deletePenalty;
	    if (delCost < minCost) {
		minCost = delCost;
		minError = DEL_ALIGN;
	    }

	    double insCost = chart[i][j-1].cost +
		     alignError(0,
				transPosteriors[sortedAligns[i-1]],
				other.aligns[other.sortedAligns[j-1]],
				other.columnPosteriors[other.sortedAligns[j-1]]);
	    if (insCost < minCost) {
		minCost = insCost;
		minError = INS_ALIGN;
	    }

	    chart[i][j].cost = minCost;
	    chart[i][j].error = minError;
	}
    }

    /*
     * Backtrace and add new words to alignment columns.
     * Store word posteriors if requested.
     */
    {
	unsigned i = refLength;
	unsigned j = hypLength;

	while (i > 0 || j > 0) {

	    switch (chart[i][j].error) {
	    case CORR_ALIGN:
	    case SUB_ALIGN:
		/*
		 * merge all words in "other" alignment column into our own
		 */
		{
		    double totalScore = 0.0;

		    LHashIter<VocabIndex,Prob>
				iter(*other.aligns[other.sortedAligns[j-1]]);
		    Prob *otherProb;
		    VocabIndex otherWord;
		    while (otherProb = iter.next(otherWord)) {
			totalScore +=
			    (*aligns[sortedAligns[i-1]]->insert(otherWord) +=
							    score * *otherProb);
			/*
			 * Check for preexisting word info and merge if
			 * necesssary
			 */
			NBestWordInfo *otherInfo =
				    other.wordInfo[other.sortedAligns[j-1]]->
								find(otherWord);
			if (otherInfo) {
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -