📄 nbest-optimize.cc
字号:
// try again
if (ytry >= ysave) {
nfunk += 1;
ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, 0.5);
if (debug >= DEBUG_TRAIN) {
cerr << " Shrunken amoeba by 0.5 returned " << ytry << endl;
}
}
// try again opposite direction
if (ytry >= ysave) {
nfunk += 1;
ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, -0.5);
if (debug >= DEBUG_TRAIN) {
cerr << " Shrunken reflected amoeba by -0.5 returned "
<< ytry << endl;
}
}
if (ytry >= ysave) {
nfunk += 1;
ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, 0.3);
if (debug >= DEBUG_TRAIN) {
cerr << " Shrunken amoeba by 0.3 returned " << ytry << endl;
}
}
// if failed to get rid of high contract everything by 0.7
if (ytry >= ysave) {
for (unsigned i = 0; i < mpts; i++) {
if (i != ilo) {
for (unsigned j = 0; j < ndim; j++)
p[i][j] = psum[j] =
0.7 * p[i][j] + 0.3 * p[ilo][j];
y[i] = (*funk) (nbest, psum);
}
}
nfunk += ndim;
computeSum(ndim, mpts, psum, p);
}
} else {
--nfunk;
}
}
}
/*
* Amoeba optimization with restarts
*/
void
trainAmoeba(NBestSet &nbestSet)
{
// Before training reset the lambdas to their unscaled version
for (unsigned i = 0; i < numScores; i++) {
lambdas[i] *= posteriorScale;
}
// Initialize ameoba points
// There is one search dimension per score dimension, excluding fixed
// weights, but adding one for the posterior score (which is stored in
// the vector even if it is kept fixed).
int numFreeWeights = numScores - numFixedWeights + 1;
makeArray(double *, points, numFreeWeights + 1);
for (unsigned i = 0; i <= numFreeWeights; i++) {
points[i] = new double[numFreeWeights];
assert(points[i] != 0);
}
makeArray(double, errs, numFreeWeights + 1);
makeArray(double, prevPoints, numFreeWeights);
prevPoints[0] = points[0][0] = posteriorScale;
simplex[0] = posteriorScaleStep;
for (unsigned i = 0, j = 1; i < numScores; i++) {
if (!fixLambdas[i]) {
prevPoints[j] = points[0][j] = lambdas[i];
simplex[j] = lambdaSteps[i];
j++;
}
}
makeArray(double *, dir, numFreeWeights + 1);
for (unsigned i = 0; i <= numFreeWeights; i++) {
dir[i] = new double[numFreeWeights];
assert(dir[i] != 0);
for (unsigned j = 0; j < numFreeWeights; j++) {
dir[i][j] = 0.0;
}
}
unsigned nevals = 0;
unsigned loop = 1;
unsigned same = 0;
unsigned shift = 0;
unsigned reps = 0;
/* force an improvement */
bestError = (unsigned)-1;
while (loop) {
reps++;
for (unsigned i = 1; i <= numFreeWeights; i++) {
unsigned k = 0;
dir[i][k] += (((k + loop + shift - 1) % numFreeWeights) + 1 == i) ?
loop * simplex[k] : 0.0;
k++;
for (unsigned j = 0; j < numScores; j++) {
if (!fixLambdas[j]) {
dir[i][k] +=
(((k + loop + shift - 1) % numFreeWeights) + 1 == i) ?
loop * simplex[k] : 0.0;
k++;
}
}
}
if (debug >= DEBUG_TRAIN) {
cerr << "Simplex points:" << endl;
}
for (unsigned i = 0; i <= numFreeWeights; i++) {
for (unsigned j = 0; j < numFreeWeights; j++) {
points[i][j] = points[0][j] + dir[i][j];
}
errs[i] = amoebaComputeErrors(nbestSet, points[i]);
if (debug >= DEBUG_TRAIN) {
cerr << "Point " << i << " : ";
for (unsigned j = 0; j < numFreeWeights; j++) {
cerr << points[i][j] << " ";
}
cerr << "errors = " << errs[i] << endl;
}
}
unsigned prevErrors = (int) errs[0];
/*
* The objective function fractional tolerance is
* decreasing with each retry.
*/
amoeba(nbestSet, points, errs, numFreeWeights, converge / reps,
amoebaComputeErrors, nevals);
if ((int) errs[0] < prevErrors) {
loop++;
same = 0;
} else if (same < numFreeWeights) {
loop++;
same++;
} else {
loop = 0;
}
if (loop > numFreeWeights / 3) {
loop = 1;
for (unsigned i = 0; i <= numFreeWeights; i++) {
for (unsigned j = 0; j < numFreeWeights; j++) {
dir[i][j] = 0.0;
}
}
shift++;
}
// reset step sizes
posteriorScale = points[0][0];
if (loop == 1) {
simplex[0] = posteriorScaleStep;
} else if (fabs(prevPoints[0] - points[0][0])
> 1.3 * fabs(simplex[0]))
{
simplex[0] = points[0][0] - prevPoints[0];
} else {
simplex[0] = simplex[0] * 1.3;
}
prevPoints[0] = points[0][0];
unsigned j = 1;
for (unsigned i = 0; i < numScores; i++) {
if (!fixLambdas[i]) {
lambdas[i] = points[0][j];
if (loop == 1) {
simplex[j] = lambdaSteps[i];
} else if (fabs(prevPoints[j] - points[0][j])
> 1.3 * fabs(simplex[j]))
{
simplex[j] = points[0][j] - prevPoints[j];
} else {
simplex[j] = simplex[j] * 1.3;
}
prevPoints[j] = points[0][j];
if (debug >= DEBUG_TRAIN) {
cerr << "lambda_" << i << " " << points[0][j]
<< " " << simplex[j] << endl;
}
j++;
}
}
if (debug >= DEBUG_TRAIN) {
cerr << "scale " << points[0][0] << endl;
cerr << "errors " << errs[0] << endl;
cerr << "unchanged for " << same << " iterations " << endl;
}
if (nevals >= maxIters) {
cerr << "maximum number of iterations exceeded" << endl;
loop = 0;
}
if (reps > maxAmoebaRestarts) {
cerr << "maximum number of Amoeba restarts reached" << endl;
loop = 0;
}
if (abortSearch) {
cerr << "search timed out after " << maxTime << " seconds\n";
loop = 0;
}
}
for (unsigned i = 0; i <= numFreeWeights; i++) {
delete points[i];
delete dir[i];
}
// Scale the lambdas back
for (unsigned i = 0; i < numScores; i++) {
lambdas[i] /= posteriorScale;
}
}
/*
* output 1-best hyp
*/
void
printTop1bestHyp(File &file, RefString id, NBestScore **scores,
NBestList &nbest)
{
unsigned numHyps = nbest.numHyps();
unsigned bestHyp;
LogP bestScore;
fprintf(file, "%s", id);
/*
* Find hyp with highest score
*/
for (unsigned i = 0; i < numHyps; i ++) {
LogP score = hypScore(i, scores);
if (i == 0 || score > bestScore) {
bestScore = score;
bestHyp = i;
}
}
if (numHyps > 0) {
VocabIndex *hyp = nbest.getHyp(bestHyp).words;
for (unsigned j = 0; hyp[j] != Vocab_None; j ++) {
fprintf(file, " %s", nbest.vocab.getWord(hyp[j]));
}
}
fprintf(file, "\n");
}
/*
* output best sausage hypotheses
*/
void
printTopSausageHyp(File &file, RefString id, NBestScore **scores,
WordMesh &alignment)
{
fprintf(file, "%s", id);
/*
* process all positions in alignment
*/
for (unsigned pos = 0; pos < alignment.length(); pos++) {
VocabIndex bestWord = Vocab_None;
Prob bestScore = 0.0;
WordMeshIter iter(alignment, pos);
Array<HypID> *hypMap;
VocabIndex word;
while (hypMap = iter.next(word)) {
/*
* compute total score for word and check if it's the correct one
*/
Boolean dummy;
Prob totalScore = wordScore(*hypMap, scores, dummy);
if (bestWord == Vocab_None || totalScore > bestScore) {
bestWord = word;
bestScore = totalScore;
}
}
assert(bestWord != Vocab_None);
if (bestWord != alignment.deleteIndex) {
fprintf(file, " %s", alignment.vocab.getWord(bestWord));
}
}
fprintf(file, "\n");
}
void
printTopHyps(File &file, NBestSet &nbestSet)
{
NBestSetIter iter(nbestSet);
NBestList *nbest;
RefString id;
while (nbest = iter.next(id)) {
NBestScore ***scores = nbestScores.find(id);
assert(scores != 0);
if (oneBest) {
printTop1bestHyp(file, id, *scores, *nbest);
} else {
WordMesh **alignment = nbestAlignments.find(id);
assert(alignment != 0);
printTopSausageHyp(file, id, *scores, **alignment);
}
}
}
/*
* Align N-best lists
*/
typedef struct {
LogP score;
unsigned rank;
} HypRank; /* used in sorting nbest hyps by score */
static int
compareHyps(const void *h1, const void *h2)
{
LogP score1 = ((HypRank *)h1)->score;
LogP score2 = ((HypRank *)h2)->score;
return score1 > score2 ? -1 :
score1 < score2 ? 1 : 0;
}
void
alignNbest(NBestSet &nbestSet, RefList &refs,
SubVocabDistance &subvocabDistance)
{
NBestSetIter iter(nbestSet);
NBestList *nbest;
RefString id;
while (nbest = iter.next(id)) {
unsigned numWords;
VocabIndex *ref = refs.findRef(id);
assert(ref != 0);
unsigned numHyps = nbest->numHyps();
/*
* Sort hyps by initial scores. (Combined initial scores are
* stored in acousticScore from before.)
* Keep hyp order outside of N-best lists, since scores must be
* kept in sync.
*/
makeArray(HypRank, reordering, numHyps);
NBestScore ***scores = nbestScores.find(id);
assert(scores != 0);
/*
* Copy combined scores back into N-best list acoustic score for
* posterior probability computation (since computePosteriors()
* doesn't take additional scores).
*/
for (unsigned j = 0; j < numHyps; j ++) {
reordering[j].rank = j;
reordering[j].score =
nbest->getHyp(j).acousticScore = hypScore(j, *scores);
}
if (!noReorder) {
qsort(reordering, numHyps, sizeof(HypRank), compareHyps);
}
/*
* compute posteriors for passing to alignWords().
* Note: these now reflect all scores and initial lambdas.
*/
nbest->computePosteriors(0.0, 0.0, 1.0);
/*
* create word-mesh for multiple alignment
*/
WordMesh *alignment = new WordMesh(nbestSet.vocab);
if (hiddenVocabFile) {
alignment = new WordMesh(nbestSet.vocab, 0, &subvocabDistance);
} else {
alignment = new WordMesh(nbestSet.vocab);
}
assert(alignment != 0);
*nbestAlignments.insert(id) = alignment;
numWords = Vocab::length(ref);
/*
* Default is to start alignment with hyps strings,
* or with the reference if -align-refs-first was given.
* Note we give reference posterior 1 only to constrain the
* alignment. The loss computation in training ignores the
* posteriors assigned to hyps at this point.
*/
HypID hypID;
if (noReorder) {
hypID = refID;
alignment->alignWords(ref, 1.0, 0, &hypID);
}
/*
* Now align all N-best hyps, in order of decreasing scores
*/
for (unsigned j = 0; j < numHyps; j ++) {
unsigned hypRank = reordering[j].rank;
NBestHyp &hyp = nbest->getHyp(hypRank);
hypID = hypRank;
/*
* Check for overflow in the hypIDs
*/
if ((unsigned)hypID != hypRank || hypID == refID) {
cerr << "Sorry, too many hypotheses in " << id << endl;
exit(2);
}
alignment->alignWords(hyp.words, hyp.posterior, 0, &hypID);
}
if (!noReorder) {
hypID = refID;
alignment->alignWords(ref, 1.0, 0, &hypID);
}
if (debug >= DEBUG_ALIGNMENT) {
dumpAlignment(cerr, *alignment);
}
}
}
/*
* Read a single score file into a column of the score matrix
*/
Boolean
readScoreFile(const char *scoreDir, RefString id, NBestScore *scores,
unsigned numHyps)
{
makeArray(char, fileName,
strlen(scoreDir) + 1 + strlen(id) + strlen(GZIP_SUFFIX) + 1);
sprintf(fileName, "%s/%s", scoreDir, id);
/*
* If plain file doesn't exist try gzipped version
*/
FILE *fp = 0;
if ((fp = fopen(fileName, "r")) == NULL) {
strcat(fileName, GZIP_SUFFIX);
} else {
fclose(fp);
}
File file(fileName, "r", 0);
char *line;
unsigned hypNo = 0;
Boolean decipherScores = false;
while (!file.error() && (line = file.getline())) {
if (strncmp(line, nbest1Magic, sizeof(nbest1Magic)-1) == 0 ||
strncmp(line, nbest2Magic, sizeof(nbest2Magic)-1) == 0)
{
decipherScores = true;
continue;
}
if (hypNo >= numHyps) {
break;
}
/*
* parse the first word as a score
*/
double score;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -