📄 nbest-optimize.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 4 页
字号:
	WordMeshIter iter(alignment, pos);
	
	Array<HypID> *hypMap;
	VocabIndex word;
	while (hypMap = iter.next(word)) {
	    /*
	     * compute total score for word and check if it's the correct one
	     */
	    Boolean isCorrect;
	    Prob totalScore = wordScore(*hypMap, scores, isCorrect);
	    
	    if (isCorrect) {
		corWord = word;
		corScore = totalScore;
		corHyps = hypMap;
	    } else {
		if (bicWord == Vocab_None || bicScore < totalScore) {
		    bicWord = word;
		    bicScore = totalScore;
		    bicHyps = hypMap;
		}
	    }

	    if (corWord != Vocab_None &&
		bicWord != Vocab_None &&
		bicScore > corScore)
	    {
		result ++;
		break;
	    }
	}
    }

    return result;
}

/*
 * Compute word error for vector of weights
 */
double
computeErrors(NBestSet &nbestSet, double *weights)
{
    int result = 0;

    Array<double> savedLambdas;

    unsigned i;
    for (i = 0; i < numScores; i ++) {
        savedLambdas[i] = lambdas[i];
        lambdas[i] = weights[i];
    }

    NBestSetIter iter(nbestSet);
    NBestList *nbest;
    RefString id;

    while (nbest = iter.next(id)) {
	NBestScore ***scores = nbestScores.find(id);
	assert(scores != 0);

	if (oneBest) {
	    result += (int) compute1bestErrors(id, *scores, *nbest);
	} else {
	    WordMesh **alignment = nbestAlignments.find(id);
	    assert(alignment != 0);

	    result += (int) computeSausageErrors(id, *scores, **alignment);
	}
    }

    for (i = 0; i < numScores; i ++) {
        lambdas[i] = savedLambdas[i];
    }
    return result;
}

/*
 * print lambdas, and optionally write nbest-rover control file
 */
void
printLambdas(ostream &str, Array<double> &lambdas, const char *controlFile = 0)
{
    unsigned i;
    double normalizer = 0.0;

    str << "   weights =";
    for (i = 0; i < numScores; i ++) {
	if (normalizer == 0.0 && lambdas[i] != 0.0) {
	    normalizer = lambdas[i];
	}

	str << " " << lambdas[i];
    }
    str << endl;

    str << "   normed =";
    for (i = 0; i < numScores; i ++) {
	str << " " << lambdas[i]/normalizer;
    }
    str << endl;

    str << "   scale = " << 1/normalizer
	<< endl;

    if (controlFile) {
	File file(controlFile, "w");

	/*
	 * write additional score dirs and weights
	 */
	for (i = 3; i < numScores; i ++) {
	    fprintf(file, "%s\t%lg +\n",
			scoreDirectories[i-3],
			lambdas[i]/normalizer);
	}

	/*
	 * write main score dir and weights
	 */
	fprintf(file, "%s\t%lg %lg 1.0 %d %lg\n",
			nbestDirectory,
			lambdas[1]/normalizer,
			lambdas[2]/normalizer,
			maxNbest,
			1/normalizer);
    }
}

/*
 * One step of gradient descent on loss function
 */
void
updateLambdas()
{
    static Boolean havePrev = false;

    for (unsigned i = 0; i < numScores; i ++) {
	if (!fixLambdas[i]) {
	    double delta;

	    if (!havePrev || !quickprop ||
		lambdaDerivs[i]/prevLambdaDerivs[i] > 0)
	    {
		delta = - epsilon * lambdaDerivs[i] / numRefWords;
	    } else {
		/*
		 * Use QuickProp update rule
		 */
		delta = prevLambdaDeltas[i] * lambdaDerivs[i] /
			    (prevLambdaDerivs[i] - lambdaDerivs[i]);
	    }
	    lambdas[i] += delta;

	    prevLambdaDeltas[i] = delta;
	    prevLambdaDerivs[i] = lambdaDerivs[i];
	}
    }
	
    havePrev = true;
}

/*
 * Iterate gradient descent on loss function
 */
void
train(NBestSet &nbestSet)
{
    unsigned iter = 0;
    unsigned badIters = 0;
    double oldLoss;

    if (debug >= DEBUG_TRAIN) {
	printLambdas(cerr, lambdas);
    }

    while (iter++ < maxIters) {

	computeDerivs(nbestSet);

	if (iter > 0 && fabs(totalLoss - oldLoss)/oldLoss < converge) {
	    cerr << "stopping due to convergence\n";
	    break;
	}

	if (debug >= DEBUG_TRAIN) {
	    cerr << "iteration " << iter << ":"
		 << " errors = " << totalError
		 << " (" << ((double)totalError/numRefWords) << "/word)"
		 << " loss = " << totalLoss
		 << " (" << (totalLoss/numRefWords) << "/word)"
		 << endl;
	}

	if (iter == 1 || finite(totalLoss) && totalError < bestError) {
	    cerr << "NEW BEST ERROR: " << totalError 
		 << " (" << ((double)totalError/numRefWords) << "/word)\n";
	    printLambdas(cerr, lambdas, writeRoverControl);

	    bestError = totalError;
	    bestLambdas = lambdas;
	    badIters = 0;

#ifndef NO_TIMEOUT
	    if (maxTime) {
		alarm(maxTime);
	    }
#endif /* !NO_TIMEOUT */
	} else {
	    badIters ++;
	}

	if (abortSearch) {
	    cerr << "search timed out after " << maxTime << " seconds\n";
	    break;
	}

	if (badIters > maxBadIters || !finite(totalLoss)) {
	    if (epsilonStepdown > 0.0) {
		epsilon *= epsilonStepdown;
		if (epsilon < minEpsilon) {
		    cerr << "minimum epsilon reached\n";
		    break;
		}
		cerr << "setting epsilon to " << epsilon
		     << " due to lack of error decrease\n";

		/*
		 * restart descent at last best point, and 
		 * disable QuickProp for the next iteration
		 */
		prevLambdaDerivs = lambdaDerivs;	
		lambdas = bestLambdas;
		badIters = 0;
	    } else {
		cerr << "stopping due to lack of error decrease\n";
		break;
	    }
	} else {
	    updateLambdas();
	}

	if (debug >= DEBUG_TRAIN) {
	    printLambdas(cerr, lambdas);
	}

	oldLoss = totalLoss;
    }
}

/*
 * Evaluate a single point in the (unconstrained) parameter space
 */
double
amoebaComputeErrors(NBestSet &nbestSet, double *p)
{
    int i, j;
    Array <double> weights;

    if (p[0] < 0.5) {
	/*
	 * This prevents posteriors to go through the roof, leading 
	 * to numerical problems.  Since the scaling of posteriors is 
	 * a redundant dimension this doesn't constrain the result.
	 */
	return numRefWords;
    }

    for (i = 0, j = 1; i < numScores; i++) {
	if (fixLambdas[i]) {
	    weights[i] = lambdas[i] / p[0];
	} else {
	    weights[i] = p[j] / p[0];
	    j++;
	}

	/*
	 * Check for negative weights if -non-negative is in effect.
	 * Return large error count for disallowed values.
	 */
	if (nonNegative && weights[i] < 0.0) {
	    return numRefWords;
	}
    }

    double error = computeErrors(nbestSet, weights.data());

    if (error < bestError) {
	cerr << "NEW BEST ERROR: " << error
	     << " (" << ((double)error/numRefWords) << "/word)\n";
	printLambdas(cerr, weights, writeRoverControl);

	bestError = (int) error;
	bestLambdas = weights;

#ifndef NO_TIMEOUT
	if (maxTime) {
	    alarm(maxTime);
	}
#endif /* !NO_TIMEOUT */
    }

    return error;
}

/*
 * Try moving a single simplex corner
 */
double
amoebaEval(NBestSet &nbest, double **p, double *y, double *psum, unsigned ndim,
	   double (*funk) (NBestSet &, double[]), unsigned ihi, double fac)
{
    makeArray(double, ptry, ndim);
    double fac1 = (1.0 - fac) / ndim;
    double fac2 = fac1 - fac;

    for (unsigned j = 0; j < ndim; j++) {
	ptry[j] = psum[j] * fac1 - p[ihi][j] * fac2;
    }
    double ytry = (*funk) (nbest, ptry);
    if (ytry < y[ihi]) {
	y[ihi] = ytry;
	for (unsigned j = 0; j < ndim; j++) {
	    psum[j] += ptry[j] - p[ihi][j];
	    p[ihi][j] = ptry[j];
	}
    }
    return ytry;
}

inline void
computeSum(unsigned ndim, unsigned mpts, double *psum, double **p)
{
    for (unsigned j = 0; j < ndim; j++) {
	double sum = 0.0;
	for (unsigned i = 0; i < mpts; i++) {
	    sum += p[i][j];
	}
	psum[j] = sum;
    }
}

inline void
swap(double &a, double &b)
{
    double h = a;
    a = b;
    b = h;
}

/*
 * Run Amoeba optimization
 */
void
amoeba(NBestSet &nbest, double **p, double *y, unsigned ndim, double ftol,
       double (*funk) (NBestSet &, double[]), unsigned &nfunk)
{
    unsigned ihi, inhi, mpts = ndim + 1;
    makeArray(double, psum, ndim);

    if (debug >= DEBUG_TRAIN) {
	cerr << "Starting amoeba with " << ndim << " dimensions" << endl;
    }

    computeSum(ndim, mpts, psum, p);
    
    double rtol = 10000.0;

    unsigned ilo = 0;
    unsigned unchanged = 0;

    while (true) {
	double ysave, ytry;
	double ylo_pre = y[ilo];

	ilo = 0;

	ihi = y[0] > y[1] ? (inhi = 1, 0) : (inhi = 0, 1);
	for (unsigned i = 0; i < mpts; i++) {
	    if (y[i] <= y[ilo]) {
		ilo = i;
	    }
	    if (y[i] > y[ihi]) {
		inhi = ihi;
		ihi = i;
	    } else if (y[i] > y[inhi] && i != ihi)
		inhi = i;
	}

	if (debug >= DEBUG_TRAIN) {
	    cerr << "Current low " << y[ilo] << ": ";
	    cerr << "Current high " << y[ihi] << ":";
	    /*
	     * for (unsigned j=0; j<ndim; j++)
	     *	   cerr << " " << p[ihi][j] ; cerr << endl; 
	     */
	    cerr << "Current next high " << y[inhi] << endl;
	}

	double denom = fabs(y[ihi]) + fabs(y[ilo]);
	if (denom == 0.0) {
	    rtol = 0.0;
	} else {
	    rtol = 2.0 * fabs(y[ihi] - y[ilo]) / denom;
	}

	if (ylo_pre == y[ilo] && rtol < converge) {
	    unchanged++;
	} else {
	    unchanged = 0;
	    if (ylo_pre > y[ilo]) {
		int k;

		if (debug >= DEBUG_TRAIN) {
		    cerr << "scale " << p[ilo][0] << endl;
		    for (unsigned j = 1, k = 0; k < numScores && j < ndim; k++)
		    {
			if (!fixLambdas[k])
			    cerr << "lambda_" << j - 1
				 << " " << p[ilo][j++] << endl;
		    }
		}
	    }
	}

	if (unchanged > maxBadIters) {
	    swap(y[0], y[ilo]);
	    for (unsigned i = 0; i < ndim; i++) {
		swap(p[0][i], p[ilo][i]);
	    }
	    break;
	}

	if (debug >= DEBUG_TRAIN) {
	    cerr << " fractional range " << rtol << endl;
	    cerr << " limit range " << ftol << endl;
	}

	if (rtol <= ftol) {
	    swap(y[0], y[ilo]);
	    for (unsigned i = 0; i < ndim; i++) {
		swap(p[0][i], p[ilo][i]);
	    }
	    break;
	}

	if (abortSearch) {
	    break;
	}

	nfunk += 1;

	// Try a reflection
	ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, -1.0);
	if (debug >= DEBUG_TRAIN) {
	    cerr << " Reflected amoeba returned " << ytry << endl;
	}

	// If successful try more
	if (ytry <= y[ilo]) {
	    nfunk += 1;
	    ysave = ytry;
	    ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, 1.5);
	    if (debug >= DEBUG_TRAIN) {
		cerr << " Expanded amoeba by 1.5 returned " << ytry <<
		    endl;
	    }

	    // If successful try more
	    if (ytry <= ysave) {
		ysave = ytry;
		nfunk += 1;
		ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, 2.0);
		if (debug >= DEBUG_TRAIN) {
		    cerr << " Expanded amoeba by 2.0 returned " << ytry <<
			endl;
		}
	    }

	    // If successful try more
	    if (ytry <= ysave) {
		nfunk += 1;
		ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, 3.0);
		if (debug >= DEBUG_TRAIN) {
		    cerr << " Expanded amoeba by 3.0 returned " << ytry << endl;
		}
	    }
	} else if (ytry >= y[inhi]) {
	    // If failed shrink
	    ysave = y[ihi];

	    // shrink half
	    nfunk += 1;
	    ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, 0.7);
	    if (debug >= DEBUG_TRAIN) {
		cerr << " Shrunken amoeba by 0.7 returned " << ytry << endl;
	    }

	    // try again opposite direction
	    if (ytry >= ysave) {
		nfunk += 1;
		ytry =
		    amoebaEval(nbest, p, y, psum, ndim, funk, ihi, -0.7);
		if (debug >= DEBUG_TRAIN) {
		    cerr << " Shrunken reflected amoeba by -0.7 returned "
			 << ytry << endl;
		}
	    }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -