📄 wordmesh.cc
字号:
Boolean foundP;
NBestWordInfo *oldInfo =
wordInfo[sortedAligns[i-1]]->
insert(otherWord, foundP);
if (foundP) {
oldInfo->merge(*otherInfo);
} else {
*oldInfo = *otherInfo;
}
}
Array<HypID> *otherHypList =
other.hypMap[other.sortedAligns[j-1]]->
find(otherWord);
if (otherHypList) {
/*
* Add hyp IDs to the hyp list for this word
* XXX: hyp IDs should be disjoint but there is no
* checking of this!
*/
Array<HypID> &hypList =
*hypMap[sortedAligns[i-1]]->
insert(otherWord);
for (unsigned h = 0; h < otherHypList->size(); h ++)
{
hypList[hypList.size()] = (*otherHypList)[h];
}
}
}
if (alignScores) {
alignScores[j-1] = totalScore;
}
}
columnPosteriors[sortedAligns[i-1]] +=
score * other.columnPosteriors[other.sortedAligns[j-1]];
transPosteriors[sortedAligns[i-1]] +=
score * other.transPosteriors[other.sortedAligns[j-1]];
i --; j --;
break;
case DEL_ALIGN:
*aligns[sortedAligns[i-1]]->insert(deleteIndex) +=
score * other.totalPosterior;
columnPosteriors[sortedAligns[i-1]] +=
score * other.totalPosterior;
transPosteriors[sortedAligns[i-1]] +=
score * other.totalPosterior;
/*
* Add all hyp IDs from "other" alignment to our delete hyp
*/
if (other.allHyps.numEntries() > 0) {
Array<HypID> &hypList =
*hypMap[sortedAligns[i-1]]->insert(deleteIndex);
SArrayIter<HypID,HypID> otherHypsIter(other.allHyps);
HypID id;
while (otherHypsIter.next(id)) {
hypList[hypList.size()] = id;
}
}
i --;
break;
case INS_ALIGN:
/*
* Make room for new alignment column in sortedAligns
* and position new column
*/
for (unsigned k = numAligns; k > i; k --) {
sortedAligns[k] = sortedAligns[k-1];
}
sortedAligns[i] = numAligns;
aligns[numAligns] = new LHash<VocabIndex,Prob>;
assert(aligns[numAligns] != 0);
wordInfo[numAligns] = new LHash<VocabIndex,NBestWordInfo>;
assert(wordInfo[numAligns] != 0);
hypMap[numAligns] = new LHash<VocabIndex,Array<HypID> >;
assert(hypMap[numAligns] != 0);
/*
* The transition posterior from the preceding to the following
* position becomes the posterior for *delete* at the new
* position (that's why we need transition posteriors!).
*/
Prob nullPosterior = totalPosterior;
if (i > 0) {
nullPosterior = transPosteriors[sortedAligns[i-1]];
}
if (nullPosterior != 0.0) {
*aligns[numAligns]->insert(deleteIndex) = nullPosterior;
}
columnPosteriors[numAligns] = nullPosterior;
transPosteriors[numAligns] = nullPosterior;
/*
* Add all hypIDs previously recorded to the *DELETE*
* hypothesis at the newly created position.
*/
{
Array<HypID> *hypList = 0;
SArrayIter<HypID,HypID> myIter(allHyps);
HypID id;
while (myIter.next(id)) {
/*
* Avoid inserting *DELETE* in hypMap unless needed
*/
if (hypList == 0) {
hypList = hypMap[numAligns]->insert(deleteIndex);
}
(*hypList)[hypList->size()] = id;
}
}
/*
* insert all words in "other" alignment at current position
*/
{
LHashIter<VocabIndex,Prob>
iter(*other.aligns[other.sortedAligns[j-1]]);
Prob *otherProb;
VocabIndex otherWord;
while (otherProb = iter.next(otherWord)) {
*aligns[numAligns]->insert(otherWord) +=
score * *otherProb;
/*
* Record word info if given
*/
NBestWordInfo *otherInfo =
other.wordInfo[other.sortedAligns[j-1]]->
find(otherWord);
if (otherInfo) {
*wordInfo[numAligns]->insert(otherWord) =
*otherInfo;
}
Array<HypID> *otherHypList =
other.hypMap[other.sortedAligns[j-1]]->
find(otherWord);
if (otherHypList) {
/*
* Add hyp IDs to the hyp list for this word
*/
Array<HypID> &hypList =
*hypMap[numAligns]->insert(otherWord);
for (unsigned h = 0; h < otherHypList->size(); h ++)
{
hypList[hypList.size()] = (*otherHypList)[h];
}
}
}
if (alignScores) {
alignScores[j-1] = score;
}
}
columnPosteriors[numAligns] +=
score * other.columnPosteriors[other.sortedAligns[j-1]];
transPosteriors[numAligns] +=
score * other.transPosteriors[other.sortedAligns[j-1]];
numAligns ++;
j --;
break;
}
}
}
/*
* Add hyps from "other" alignment to global list of our IDs
*/
SArrayIter<HypID,HypID> otherHypsIter(other.allHyps);
HypID id;
while (otherHypsIter.next(id)) {
*allHyps.insert(id) = id;
}
totalPosterior += score * other.totalPosterior;
}
/*
* Incremental partial alignments using alignWords() can leave the
* posteriors of null entries defective (because not all transition posteriors
* were added to the alignment).
* This function normalized the null posteriors so that the total column
* posteriors sum to the totalPosterior.
*/
void
WordMesh::normalizeDeletes()
{
for (unsigned i = 0; i < numAligns; i ++) {
LHashIter<VocabIndex,Prob> alignIter(*aligns[i]);
Prob wordPosteriorSum = 0.0;
Prob *prob;
VocabIndex word;
while (prob = alignIter.next(word)) {
if (word != deleteIndex) {
wordPosteriorSum += *prob;
}
}
Prob deletePosterior;
if (wordPosteriorSum - totalPosterior > Prob_Epsilon) {
cerr << "WordMesh::normalizeDeletes: word posteriors exceed total: "
<< wordPosteriorSum << endl;
deletePosterior = 0.0;
} else {
deletePosterior = totalPosterior - wordPosteriorSum;
}
if (deletePosterior < Prob_Epsilon) {
aligns[i]->remove(deleteIndex);
} else {
*aligns[i]->insert(deleteIndex) = deletePosterior;
}
columnPosteriors[i] = totalPosterior;
transPosteriors[i] = totalPosterior;
}
}
/*
* Compute minimal word error with respect to existing alignment columns
* (derived from WordAlign())
*/
unsigned
WordMesh::wordError(const VocabIndex *words,
unsigned &sub, unsigned &ins, unsigned &del)
{
unsigned hypLength = Vocab::length(words);
unsigned refLength = numAligns;
typedef struct {
unsigned cost; // minimal cost of partial alignment
WordAlignType error; // best predecessor
} ChartEntry;
/*
* Allocate chart statically, enlarging on demand
*/
static unsigned maxHypLength = 0;
static unsigned maxRefLength = 0;
static ChartEntry **chart = 0;
if (hypLength > maxHypLength || refLength > maxRefLength) {
/*
* Free old chart
*/
if (chart != 0) {
for (unsigned i = 0; i <= maxRefLength; i ++) {
delete [] chart[i];
}
delete [] chart;
}
/*
* Allocate new chart
*/
maxHypLength = hypLength;
maxRefLength = refLength;
chart = new ChartEntry*[maxRefLength + 1];
assert(chart != 0);
for (unsigned i = 0; i <= maxRefLength; i ++) {
chart[i] = new ChartEntry[maxHypLength + 1];
assert(chart[i] != 0);
}
/*
* Initialize the 0'th row, which never changes
*/
chart[0][0].cost = 0;
chart[0][0].error = CORR_ALIGN;
for (unsigned j = 1; j <= maxHypLength; j ++) {
chart[0][j].cost = chart[0][j-1].cost + INS_COST;
chart[0][j].error = INS_ALIGN;
}
}
/*
* Fill in the rest of the chart, row by row.
*/
for (unsigned i = 1; i <= refLength; i ++) {
/*
* deletion error only if alignment column doesn't have *DELETE*
*/
Prob *delProb = aligns[sortedAligns[i-1]]->find(deleteIndex);
unsigned THIS_DEL_COST = delProb && *delProb > 0.0 ? 0 : DEL_COST;
chart[i][0].cost = chart[i-1][0].cost + THIS_DEL_COST;
chart[i][0].error = DEL_ALIGN;
for (unsigned j = 1; j <= hypLength; j ++) {
unsigned minCost;
WordAlignType minError;
if (aligns[sortedAligns[i-1]]->find(words[j-1])) {
minCost = chart[i-1][j-1].cost;
minError = CORR_ALIGN;
} else {
minCost = chart[i-1][j-1].cost + SUB_COST;
minError = SUB_ALIGN;
}
unsigned delCost = chart[i-1][j].cost + THIS_DEL_COST;
if (delCost < minCost) {
minCost = delCost;
minError = DEL_ALIGN;
}
unsigned insCost = chart[i][j-1].cost + INS_COST;
if (insCost < minCost) {
minCost = insCost;
minError = INS_ALIGN;
}
chart[i][j].cost = minCost;
chart[i][j].error = minError;
}
}
/*
* Backtrace and add new words to alignment columns
*/
{
unsigned i = refLength;
unsigned j = hypLength;
sub = ins = del = 0;
while (i > 0 || j > 0) {
switch (chart[i][j].error) {
case CORR_ALIGN:
i --; j--;
break;
case SUB_ALIGN:
sub ++;
i --; j --;
break;
case DEL_ALIGN:
/*
* deletion error only if alignment column doesn't
* have *DELETE*
*/
{
Prob *delProb =
aligns[sortedAligns[i-1]]->find(deleteIndex);
if (!delProb || *delProb == 0.0) {
del ++;
}
}
i --;
break;
case INS_ALIGN:
ins ++;
j --;
break;
}
}
}
return sub + ins + del;
}
double
WordMesh::minimizeWordError(VocabIndex *words, unsigned length,
double &sub, double &ins, double &del,
unsigned flags, double delBias)
{
NBestWordInfo *winfo = new NBestWordInfo[length];
assert(winfo != 0);
double result =
minimizeWordError(winfo, length, sub, ins, del, flags, delBias);
for (unsigned i = 0; i < length; i ++) {
words[i] = winfo[i].word;
if (words[i] == Vocab_None) break;
}
delete [] winfo;
return result;
}
double
WordMesh::minimizeWordError(NBestWordInfo *winfo, unsigned length,
double &sub, double &ins, double &del,
unsigned flags, double delBias)
{
unsigned numWords = 0;
sub = ins = del = 0.0;
for (unsigned i = 0; i < numAligns; i ++) {
LHashIter<VocabIndex,Prob> alignIter(*aligns[sortedAligns[i]]);
Prob deleteProb = 0.0;
Prob bestProb;
VocabIndex bestWord = Vocab_None;
Prob *prob;
VocabIndex word;
while (prob = alignIter.next(word)) {
Prob effectiveProb = *prob; // prob adjusted for deletion bias
if (word == deleteIndex) {
effectiveProb *= delBias;
deleteProb = effectiveProb;
}
if (bestWord == Vocab_None || effectiveProb > bestProb) {
bestWord = word;
bestProb = effectiveProb;
}
}
if (bestWord != deleteIndex) {
if (numWords < length) {
NBestWordInfo *thisInfo =
wordInfo[sortedAligns[i]]->find(bestWord);
if (thisInfo) {
winfo[numWords] = *thisInfo;
} else {
winfo[numWords].word = bestWord;
winfo[numWords].invalidate();
}
/*
* Always return the word posterior into the NBestWordInfo
*/
winfo[numWords].wordPosterior = bestProb;
winfo[numWords].transPosterior = bestProb;
numWords += 1;
}
ins += deleteProb;
sub += totalPosterior - deleteProb - bestProb;
} else {
del += totalPosterior - deleteProb;
}
}
if (numWords < length) {
winfo[numWords].word = Vocab_None;;
winfo[numWords].invalidate();
}
return sub + ins + del;
}
/*
* Return confusion set for a given position
*/
LHash<VocabIndex,Prob> *
WordMesh::wordColumn(unsigned columnNumber) {
if (columnNumber < numAligns) {
return aligns[sortedAligns[columnNumber]];
} else {
return 0;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -