📄 wordmesh.cc
字号:
* Alignment positions are integers corresponding to word equivalence classes.
* They are assigned in increasing order; hence numerical order does not
* correspond to topological (temporal) order.
*
* - 'from' specifies the alignment position just BEFORE the first word.
* A value of numAligns means the first word should start the alignment.
* - 'to' specifies the alignment position just AFTER the last word.
* A value of numAligns means the last word should end the alignment.
*
* Returns false if the 'from' position does not strictly precede the 'to'.
*/
Boolean
WordMesh::alignWords(const NBestWordInfo *winfo, Prob score,
Prob *wordScores, const HypID *hypID,
unsigned from, unsigned to, unsigned *wordAlignment)
{
/*
* Compute word string length
*/
unsigned hypLength = 0;
for (unsigned i = 0; winfo[i].word != Vocab_None; i ++) hypLength ++;
/*
* Locate start and end positions to align to
*/
unsigned fromPos = 0;
unsigned refLength = 0;
if (numAligns > 0) {
unsigned toPos = numAligns - 1;
for (unsigned p = 0; p < numAligns; p ++) {
if (sortedAligns[p] == from) fromPos = p + 1;
if (sortedAligns[p] == to) toPos = p - 1;
}
refLength = toPos - fromPos + 1;
if (toPos + 1 < fromPos) {
return false;
}
}
Boolean fullAlignment = (refLength == numAligns);
typedef struct {
double cost; // minimal cost of partial alignment
WordAlignType error; // best predecessor
} ChartEntry;
/*
* Allocate chart statically, enlarging on demand
*/
static unsigned maxHypLength = 0;
static unsigned maxRefLength = 0;
static ChartEntry **chart = 0;
if (chart == 0 || hypLength > maxHypLength || refLength > maxRefLength) {
/*
* Free old chart
*/
if (chart != 0) {
for (unsigned i = 0; i <= maxRefLength; i ++) {
delete [] chart[i];
}
delete [] chart;
}
/*
* Allocate new chart
*/
maxHypLength = hypLength;
maxRefLength = refLength;
chart = new ChartEntry*[maxRefLength + 1];
assert(chart != 0);
for (unsigned i = 0; i <= maxRefLength; i ++) {
chart[i] = new ChartEntry[maxHypLength + 1];
assert(chart[i] != 0);
}
}
/*
* Initialize the 0'th row
*/
chart[0][0].cost = 0.0;
chart[0][0].error = CORR_ALIGN;
for (unsigned j = 1; j <= hypLength; j ++) {
chart[0][j].cost = chart[0][j-1].cost +
alignError(0, totalPosterior, winfo[j-1].word);
chart[0][j].error = INS_ALIGN;
}
/*
* Fill in the rest of the chart, row by row.
*/
for (unsigned i = 1; i <= refLength; i ++) {
double deletePenalty =
alignError(aligns[sortedAligns[fromPos+i-1]],
columnPosteriors[sortedAligns[fromPos+i-1]],
deleteIndex);
chart[i][0].cost = chart[i-1][0].cost + deletePenalty;
chart[i][0].error = DEL_ALIGN;
for (unsigned j = 1; j <= hypLength; j ++) {
double minCost = chart[i-1][j-1].cost +
alignError(aligns[sortedAligns[fromPos+i-1]],
columnPosteriors[sortedAligns[fromPos+i-1]],
winfo[j-1].word);
WordAlignType minError = SUB_ALIGN;
double delCost = chart[i-1][j].cost + deletePenalty;
if (delCost < minCost) {
minCost = delCost;
minError = DEL_ALIGN;
}
double insCost = chart[i][j-1].cost +
alignError(0,
transPosteriors[sortedAligns[fromPos+i-1]],
winfo[j-1].word);
if (insCost < minCost) {
minCost = insCost;
minError = INS_ALIGN;
}
chart[i][j].cost = minCost;
chart[i][j].error = minError;
}
}
/*
* Backtrace and add new words to alignment columns.
* Store word posteriors if requested.
*/
{
unsigned i = refLength;
unsigned j = hypLength;
while (i > 0 || j > 0) {
Prob wordPosterior = score;
Prob transPosterior = score;
/*
* Use word- and transition-specific posteriors if supplied.
* Note: the transition posterior INTO the first word is
* given on the Vocab_None item at winfo[hypLength].
*/
if (j > 0 && winfo[j-1].wordPosterior != 0.0) {
wordPosterior = winfo[j-1].wordPosterior;
}
if (j > 0 && winfo[j-1].transPosterior != 0.0) {
transPosterior = winfo[j-1].transPosterior;
} else if (j == 0 && winfo[hypLength].transPosterior != 0.0) {
transPosterior = winfo[hypLength].transPosterior;
}
switch (chart[i][j].error) {
case CORR_ALIGN:
case SUB_ALIGN:
*aligns[sortedAligns[fromPos+i-1]]->insert(winfo[j-1].word) +=
wordPosterior;
columnPosteriors[sortedAligns[fromPos+i-1]] += wordPosterior;
transPosteriors[sortedAligns[fromPos+i-1]] += transPosterior;
/*
* Check for preexisting word info and merge if necesssary
*/
if (winfo[j-1].valid()) {
Boolean foundP;
NBestWordInfo *oldInfo =
wordInfo[sortedAligns[fromPos+i-1]]->
insert(winfo[j-1].word, foundP);
if (foundP) {
oldInfo->merge(winfo[j-1]);
} else {
*oldInfo = winfo[j-1];
}
}
if (hypID) {
/*
* Add hyp ID to the hyp list for this word
*/
Array<HypID> &hypList =
*hypMap[sortedAligns[fromPos+i-1]]->insert(winfo[j-1].word);
hypList[hypList.size()] = *hypID;
}
if (wordAlignment) {
wordAlignment[j-1] = sortedAligns[fromPos+i-1];
}
if (wordScores) {
wordScores[j-1] =
*aligns[sortedAligns[fromPos+i-1]]->find(winfo[j-1].word);
}
i --; j --;
break;
case DEL_ALIGN:
*aligns[sortedAligns[fromPos+i-1]]->insert(deleteIndex) +=
transPosterior;
columnPosteriors[sortedAligns[fromPos+i-1]] += transPosterior;
transPosteriors[sortedAligns[fromPos+i-1]] += transPosterior;
if (hypID) {
/*
* Add hyp ID to the hyp list for this word
*/
Array<HypID> &hypList =
*hypMap[sortedAligns[fromPos+i-1]]->insert(deleteIndex);
hypList[hypList.size()] = *hypID;
}
i --;
break;
case INS_ALIGN:
/*
* Make room for new alignment column in sortedAligns
* and position new column
*/
for (unsigned k = numAligns; k > fromPos + i; k --) {
sortedAligns[k] = sortedAligns[k-1];
}
sortedAligns[fromPos + i] = numAligns;
aligns[numAligns] = new LHash<VocabIndex,Prob>;
assert(aligns[numAligns] != 0);
wordInfo[numAligns] = new LHash<VocabIndex,NBestWordInfo>;
assert(wordInfo[numAligns] != 0);
hypMap[numAligns] = new LHash<VocabIndex,Array<HypID> >;
assert(hypMap[numAligns] != 0);
/*
* The transition posterior from the preceding to the following
* position becomes the posterior for *delete* at the new
* position (that's why we need transition posteriors!).
*/
Prob nullPosterior = totalPosterior;
if (fromPos+i > 0) {
nullPosterior = transPosteriors[sortedAligns[fromPos+i-1]];
}
if (nullPosterior != 0.0) {
*aligns[numAligns]->insert(deleteIndex) = nullPosterior;
}
*aligns[numAligns]->insert(winfo[j-1].word) = wordPosterior;
columnPosteriors[numAligns] = nullPosterior + wordPosterior;
transPosteriors[numAligns] = nullPosterior + transPosterior;
/*
* Record word info if given
*/
if (winfo[j-1].valid()) {
*wordInfo[numAligns]->insert(winfo[j-1].word) = winfo[j-1];
}
/*
* Add all hypIDs previously recorded to the *DELETE*
* hypothesis at the newly created position.
*/
{
Array<HypID> *hypList = 0;
SArrayIter<HypID,HypID> myIter(allHyps);
HypID id;
while (myIter.next(id)) {
/*
* Avoid inserting *DELETE* in hypMap unless needed
*/
if (hypList == 0) {
hypList = hypMap[numAligns]->insert(deleteIndex);
}
(*hypList)[hypList->size()] = id;
}
}
if (hypID) {
/*
* Add hyp ID to the hyp list for this word
*/
Array<HypID> &hypList =
*hypMap[numAligns]->insert(winfo[j-1].word);
hypList[hypList.size()] = *hypID;
}
if (wordAlignment) {
wordAlignment[j-1] = numAligns;
}
if (wordScores) {
wordScores[j-1] = wordPosterior;
}
numAligns ++;
j --;
break;
}
}
/*
* Add the transition posterior INTO the FIRST hyp word to
* to the transition posterior into the first alignment position
* This only applies when doing a partial alignment that doesn't
* start at the 0th position.
*/
if (fromPos+i > 0) {
Prob transPosterior = score;
if (winfo[hypLength].transPosterior != 0.0) {
transPosterior = winfo[hypLength].transPosterior;
}
transPosteriors[sortedAligns[fromPos+i-1]] += transPosterior;
}
}
/*
* Add hyp to global list of IDs
*/
if (hypID) {
*allHyps.insert(*hypID) = *hypID;
}
/*
* Only change total posterior if the alignment spanned the whole mesh.
* This ensure totalPosterior reflects the initia/final node posterior
* in a lattice.
*/
if (fullAlignment) {
totalPosterior += score;
}
return true;
}
void
WordMesh::alignAlignment(MultiAlign &alignment, Prob score, Prob *alignScores)
{
WordMesh &other = (WordMesh &)alignment;
unsigned refLength = numAligns;
unsigned hypLength = other.numAligns;
typedef struct {
double cost; // minimal cost of partial alignment
WordAlignType error; // best predecessor
} ChartEntry;
/*
* Allocate chart statically, enlarging on demand
*/
static unsigned maxHypLength = 0;
static unsigned maxRefLength = 0;
static ChartEntry **chart = 0;
if (chart == 0 || hypLength > maxHypLength || refLength > maxRefLength) {
/*
* Free old chart
*/
if (chart != 0) {
for (unsigned i = 0; i <= maxRefLength; i ++) {
delete [] chart[i];
}
delete [] chart;
}
/*
* Allocate new chart
*/
maxHypLength = hypLength;
maxRefLength = refLength;
chart = new ChartEntry*[maxRefLength + 1];
assert(chart != 0);
for (unsigned i = 0; i <= maxRefLength; i ++) {
chart[i] = new ChartEntry[maxHypLength + 1];
assert(chart[i] != 0);
}
}
/*
* Initialize the 0'th row
*/
chart[0][0].cost = 0.0;
chart[0][0].error = CORR_ALIGN;
for (unsigned j = 1; j <= hypLength; j ++) {
unsigned pos = other.sortedAligns[j-1];
chart[0][j].cost = chart[0][j-1].cost +
alignError(0,
totalPosterior,
other.aligns[other.sortedAligns[j-1]],
other.columnPosteriors[other.sortedAligns[j-1]]);
chart[0][j].error = INS_ALIGN;
}
/*
* Fill in the rest of the chart, row by row.
*/
for (unsigned i = 1; i <= refLength; i ++) {
double deletePenalty =
alignError(aligns[sortedAligns[i-1]],
columnPosteriors[sortedAligns[i-1]],
deleteIndex);
chart[i][0].cost = chart[i-1][0].cost + deletePenalty;
chart[i][0].error = DEL_ALIGN;
for (unsigned j = 1; j <= hypLength; j ++) {
double minCost = chart[i-1][j-1].cost +
alignError(aligns[sortedAligns[i-1]],
columnPosteriors[sortedAligns[i-1]],
other.aligns[other.sortedAligns[j-1]],
other.columnPosteriors[other.sortedAligns[j-1]]);
WordAlignType minError = SUB_ALIGN;
double delCost = chart[i-1][j].cost + deletePenalty;
if (delCost < minCost) {
minCost = delCost;
minError = DEL_ALIGN;
}
double insCost = chart[i][j-1].cost +
alignError(0,
transPosteriors[sortedAligns[i-1]],
other.aligns[other.sortedAligns[j-1]],
other.columnPosteriors[other.sortedAligns[j-1]]);
if (insCost < minCost) {
minCost = insCost;
minError = INS_ALIGN;
}
chart[i][j].cost = minCost;
chart[i][j].error = minError;
}
}
/*
* Backtrace and add new words to alignment columns.
* Store word posteriors if requested.
*/
{
unsigned i = refLength;
unsigned j = hypLength;
while (i > 0 || j > 0) {
switch (chart[i][j].error) {
case CORR_ALIGN:
case SUB_ALIGN:
/*
* merge all words in "other" alignment column into our own
*/
{
double totalScore = 0.0;
LHashIter<VocabIndex,Prob>
iter(*other.aligns[other.sortedAligns[j-1]]);
Prob *otherProb;
VocabIndex otherWord;
while (otherProb = iter.next(otherWord)) {
totalScore +=
(*aligns[sortedAligns[i-1]]->insert(otherWord) +=
score * *otherProb);
/*
* Check for preexisting word info and merge if
* necesssary
*/
NBestWordInfo *otherInfo =
other.wordInfo[other.sortedAligns[j-1]]->
find(otherWord);
if (otherInfo) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -