📄 nbest-optimize.cc
字号:
WordMeshIter iter(alignment, pos);
Array<HypID> *hypMap;
VocabIndex word;
while (hypMap = iter.next(word)) {
/*
* compute total score for word and check if it's the correct one
*/
Boolean isCorrect;
Prob totalScore = wordScore(*hypMap, scores, isCorrect);
if (isCorrect) {
corWord = word;
corScore = totalScore;
corHyps = hypMap;
} else {
if (bicWord == Vocab_None || bicScore < totalScore) {
bicWord = word;
bicScore = totalScore;
bicHyps = hypMap;
}
}
if (corWord != Vocab_None &&
bicWord != Vocab_None &&
bicScore > corScore)
{
result ++;
break;
}
}
}
return result;
}
/*
* Compute word error for vector of weights
*/
double
computeErrors(NBestSet &nbestSet, double *weights)
{
int result = 0;
Array<double> savedLambdas;
unsigned i;
for (i = 0; i < numScores; i ++) {
savedLambdas[i] = lambdas[i];
lambdas[i] = weights[i];
}
NBestSetIter iter(nbestSet);
NBestList *nbest;
RefString id;
while (nbest = iter.next(id)) {
NBestScore ***scores = nbestScores.find(id);
assert(scores != 0);
if (oneBest) {
result += (int) compute1bestErrors(id, *scores, *nbest);
} else {
WordMesh **alignment = nbestAlignments.find(id);
assert(alignment != 0);
result += (int) computeSausageErrors(id, *scores, **alignment);
}
}
for (i = 0; i < numScores; i ++) {
lambdas[i] = savedLambdas[i];
}
return result;
}
/*
* print lambdas, and optionally write nbest-rover control file
*/
void
printLambdas(ostream &str, Array<double> &lambdas, const char *controlFile = 0)
{
unsigned i;
double normalizer = 0.0;
str << " weights =";
for (i = 0; i < numScores; i ++) {
if (normalizer == 0.0 && lambdas[i] != 0.0) {
normalizer = lambdas[i];
}
str << " " << lambdas[i];
}
str << endl;
str << " normed =";
for (i = 0; i < numScores; i ++) {
str << " " << lambdas[i]/normalizer;
}
str << endl;
str << " scale = " << 1/normalizer
<< endl;
if (controlFile) {
File file(controlFile, "w");
/*
* write additional score dirs and weights
*/
for (i = 3; i < numScores; i ++) {
fprintf(file, "%s\t%lg +\n",
scoreDirectories[i-3],
lambdas[i]/normalizer);
}
/*
* write main score dir and weights
*/
fprintf(file, "%s\t%lg %lg 1.0 %d %lg\n",
nbestDirectory,
lambdas[1]/normalizer,
lambdas[2]/normalizer,
maxNbest,
1/normalizer);
}
}
/*
* One step of gradient descent on loss function
*/
void
updateLambdas()
{
static Boolean havePrev = false;
for (unsigned i = 0; i < numScores; i ++) {
if (!fixLambdas[i]) {
double delta;
if (!havePrev || !quickprop ||
lambdaDerivs[i]/prevLambdaDerivs[i] > 0)
{
delta = - epsilon * lambdaDerivs[i] / numRefWords;
} else {
/*
* Use QuickProp update rule
*/
delta = prevLambdaDeltas[i] * lambdaDerivs[i] /
(prevLambdaDerivs[i] - lambdaDerivs[i]);
}
lambdas[i] += delta;
prevLambdaDeltas[i] = delta;
prevLambdaDerivs[i] = lambdaDerivs[i];
}
}
havePrev = true;
}
/*
* Iterate gradient descent on loss function
*/
void
train(NBestSet &nbestSet)
{
unsigned iter = 0;
unsigned badIters = 0;
double oldLoss;
if (debug >= DEBUG_TRAIN) {
printLambdas(cerr, lambdas);
}
while (iter++ < maxIters) {
computeDerivs(nbestSet);
if (iter > 0 && fabs(totalLoss - oldLoss)/oldLoss < converge) {
cerr << "stopping due to convergence\n";
break;
}
if (debug >= DEBUG_TRAIN) {
cerr << "iteration " << iter << ":"
<< " errors = " << totalError
<< " (" << ((double)totalError/numRefWords) << "/word)"
<< " loss = " << totalLoss
<< " (" << (totalLoss/numRefWords) << "/word)"
<< endl;
}
if (iter == 1 || finite(totalLoss) && totalError < bestError) {
cerr << "NEW BEST ERROR: " << totalError
<< " (" << ((double)totalError/numRefWords) << "/word)\n";
printLambdas(cerr, lambdas, writeRoverControl);
bestError = totalError;
bestLambdas = lambdas;
badIters = 0;
#ifndef NO_TIMEOUT
if (maxTime) {
alarm(maxTime);
}
#endif /* !NO_TIMEOUT */
} else {
badIters ++;
}
if (abortSearch) {
cerr << "search timed out after " << maxTime << " seconds\n";
break;
}
if (badIters > maxBadIters || !finite(totalLoss)) {
if (epsilonStepdown > 0.0) {
epsilon *= epsilonStepdown;
if (epsilon < minEpsilon) {
cerr << "minimum epsilon reached\n";
break;
}
cerr << "setting epsilon to " << epsilon
<< " due to lack of error decrease\n";
/*
* restart descent at last best point, and
* disable QuickProp for the next iteration
*/
prevLambdaDerivs = lambdaDerivs;
lambdas = bestLambdas;
badIters = 0;
} else {
cerr << "stopping due to lack of error decrease\n";
break;
}
} else {
updateLambdas();
}
if (debug >= DEBUG_TRAIN) {
printLambdas(cerr, lambdas);
}
oldLoss = totalLoss;
}
}
/*
* Evaluate a single point in the (unconstrained) parameter space
*/
double
amoebaComputeErrors(NBestSet &nbestSet, double *p)
{
int i, j;
Array <double> weights;
if (p[0] < 0.5) {
/*
* This prevents posteriors to go through the roof, leading
* to numerical problems. Since the scaling of posteriors is
* a redundant dimension this doesn't constrain the result.
*/
return numRefWords;
}
for (i = 0, j = 1; i < numScores; i++) {
if (fixLambdas[i]) {
weights[i] = lambdas[i] / p[0];
} else {
weights[i] = p[j] / p[0];
j++;
}
/*
* Check for negative weights if -non-negative is in effect.
* Return large error count for disallowed values.
*/
if (nonNegative && weights[i] < 0.0) {
return numRefWords;
}
}
double error = computeErrors(nbestSet, weights.data());
if (error < bestError) {
cerr << "NEW BEST ERROR: " << error
<< " (" << ((double)error/numRefWords) << "/word)\n";
printLambdas(cerr, weights, writeRoverControl);
bestError = (int) error;
bestLambdas = weights;
#ifndef NO_TIMEOUT
if (maxTime) {
alarm(maxTime);
}
#endif /* !NO_TIMEOUT */
}
return error;
}
/*
* Try moving a single simplex corner
*/
double
amoebaEval(NBestSet &nbest, double **p, double *y, double *psum, unsigned ndim,
double (*funk) (NBestSet &, double[]), unsigned ihi, double fac)
{
makeArray(double, ptry, ndim);
double fac1 = (1.0 - fac) / ndim;
double fac2 = fac1 - fac;
for (unsigned j = 0; j < ndim; j++) {
ptry[j] = psum[j] * fac1 - p[ihi][j] * fac2;
}
double ytry = (*funk) (nbest, ptry);
if (ytry < y[ihi]) {
y[ihi] = ytry;
for (unsigned j = 0; j < ndim; j++) {
psum[j] += ptry[j] - p[ihi][j];
p[ihi][j] = ptry[j];
}
}
return ytry;
}
inline void
computeSum(unsigned ndim, unsigned mpts, double *psum, double **p)
{
for (unsigned j = 0; j < ndim; j++) {
double sum = 0.0;
for (unsigned i = 0; i < mpts; i++) {
sum += p[i][j];
}
psum[j] = sum;
}
}
inline void
swap(double &a, double &b)
{
double h = a;
a = b;
b = h;
}
/*
* Run Amoeba optimization
*/
void
amoeba(NBestSet &nbest, double **p, double *y, unsigned ndim, double ftol,
double (*funk) (NBestSet &, double[]), unsigned &nfunk)
{
unsigned ihi, inhi, mpts = ndim + 1;
makeArray(double, psum, ndim);
if (debug >= DEBUG_TRAIN) {
cerr << "Starting amoeba with " << ndim << " dimensions" << endl;
}
computeSum(ndim, mpts, psum, p);
double rtol = 10000.0;
unsigned ilo = 0;
unsigned unchanged = 0;
while (true) {
double ysave, ytry;
double ylo_pre = y[ilo];
ilo = 0;
ihi = y[0] > y[1] ? (inhi = 1, 0) : (inhi = 0, 1);
for (unsigned i = 0; i < mpts; i++) {
if (y[i] <= y[ilo]) {
ilo = i;
}
if (y[i] > y[ihi]) {
inhi = ihi;
ihi = i;
} else if (y[i] > y[inhi] && i != ihi)
inhi = i;
}
if (debug >= DEBUG_TRAIN) {
cerr << "Current low " << y[ilo] << ": ";
cerr << "Current high " << y[ihi] << ":";
/*
* for (unsigned j=0; j<ndim; j++)
* cerr << " " << p[ihi][j] ; cerr << endl;
*/
cerr << "Current next high " << y[inhi] << endl;
}
double denom = fabs(y[ihi]) + fabs(y[ilo]);
if (denom == 0.0) {
rtol = 0.0;
} else {
rtol = 2.0 * fabs(y[ihi] - y[ilo]) / denom;
}
if (ylo_pre == y[ilo] && rtol < converge) {
unchanged++;
} else {
unchanged = 0;
if (ylo_pre > y[ilo]) {
int k;
if (debug >= DEBUG_TRAIN) {
cerr << "scale " << p[ilo][0] << endl;
for (unsigned j = 1, k = 0; k < numScores && j < ndim; k++)
{
if (!fixLambdas[k])
cerr << "lambda_" << j - 1
<< " " << p[ilo][j++] << endl;
}
}
}
}
if (unchanged > maxBadIters) {
swap(y[0], y[ilo]);
for (unsigned i = 0; i < ndim; i++) {
swap(p[0][i], p[ilo][i]);
}
break;
}
if (debug >= DEBUG_TRAIN) {
cerr << " fractional range " << rtol << endl;
cerr << " limit range " << ftol << endl;
}
if (rtol <= ftol) {
swap(y[0], y[ilo]);
for (unsigned i = 0; i < ndim; i++) {
swap(p[0][i], p[ilo][i]);
}
break;
}
if (abortSearch) {
break;
}
nfunk += 1;
// Try a reflection
ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, -1.0);
if (debug >= DEBUG_TRAIN) {
cerr << " Reflected amoeba returned " << ytry << endl;
}
// If successful try more
if (ytry <= y[ilo]) {
nfunk += 1;
ysave = ytry;
ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, 1.5);
if (debug >= DEBUG_TRAIN) {
cerr << " Expanded amoeba by 1.5 returned " << ytry <<
endl;
}
// If successful try more
if (ytry <= ysave) {
ysave = ytry;
nfunk += 1;
ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, 2.0);
if (debug >= DEBUG_TRAIN) {
cerr << " Expanded amoeba by 2.0 returned " << ytry <<
endl;
}
}
// If successful try more
if (ytry <= ysave) {
nfunk += 1;
ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, 3.0);
if (debug >= DEBUG_TRAIN) {
cerr << " Expanded amoeba by 3.0 returned " << ytry << endl;
}
}
} else if (ytry >= y[inhi]) {
// If failed shrink
ysave = y[ihi];
// shrink half
nfunk += 1;
ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, 0.7);
if (debug >= DEBUG_TRAIN) {
cerr << " Shrunken amoeba by 0.7 returned " << ytry << endl;
}
// try again opposite direction
if (ytry >= ysave) {
nfunk += 1;
ytry =
amoebaEval(nbest, p, y, psum, ndim, funk, ihi, -0.7);
if (debug >= DEBUG_TRAIN) {
cerr << " Shrunken reflected amoeba by -0.7 returned "
<< ytry << endl;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -