📄 nbest-optimize.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 4 页
字号:

	    // try again 
	    if (ytry >= ysave) {
		nfunk += 1;
		ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, 0.5);
		if (debug >= DEBUG_TRAIN) {
		    cerr << " Shrunken amoeba by 0.5 returned " << ytry << endl;
		}
	    }

	    // try again opposite direction
	    if (ytry >= ysave) {
		nfunk += 1;
		ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, -0.5);
		if (debug >= DEBUG_TRAIN) {
		    cerr << " Shrunken reflected amoeba by -0.5 returned "
			 << ytry << endl;
		}
	    }
	    if (ytry >= ysave) {
		nfunk += 1;
		ytry = amoebaEval(nbest, p, y, psum, ndim, funk, ihi, 0.3);
		if (debug >= DEBUG_TRAIN) {
		    cerr << " Shrunken amoeba by 0.3 returned " << ytry << endl;
		}
	    }

	    // if failed to get rid of high contract everything by 0.7
	    if (ytry >= ysave) {
		for (unsigned i = 0; i < mpts; i++) {
		    if (i != ilo) {
			for (unsigned j = 0; j < ndim; j++)
			    p[i][j] = psum[j] =
				0.7 * p[i][j] + 0.3 * p[ilo][j];
			y[i] = (*funk) (nbest, psum);
		    }
		}
		nfunk += ndim;
		computeSum(ndim, mpts, psum, p);
	    }
	} else {
	    --nfunk;
	}
    }
}

/*
 * Amoeba optimization with restarts
 */
void
trainAmoeba(NBestSet &nbestSet)
{
    // Before training reset the lambdas to their unscaled version
    for (unsigned i = 0; i < numScores; i++) {
	lambdas[i] *= posteriorScale;
    }

    // Initialize ameoba points
    // There is one search dimension per score dimension, excluding fixed
    // weights, but adding one for the posterior score (which is stored in
    // the vector even if it is kept fixed).
    int numFreeWeights = numScores - numFixedWeights + 1;

    makeArray(double *, points, numFreeWeights + 1);
    for (unsigned i = 0; i <= numFreeWeights; i++) {
	points[i] = new double[numFreeWeights];
	assert(points[i] != 0);
    }

    makeArray(double, errs, numFreeWeights + 1);
    makeArray(double, prevPoints, numFreeWeights);

    prevPoints[0] = points[0][0] = posteriorScale;
    simplex[0] = posteriorScaleStep;
    for (unsigned i = 0, j = 1; i < numScores; i++) {
	if (!fixLambdas[i]) {
	    prevPoints[j] = points[0][j] = lambdas[i];
	    simplex[j] = lambdaSteps[i];
	    j++;
	}
    }

    makeArray(double *, dir, numFreeWeights + 1);
    for (unsigned i = 0; i <= numFreeWeights; i++) {
	dir[i] = new double[numFreeWeights];
	assert(dir[i] != 0);
	for (unsigned j = 0; j < numFreeWeights; j++) {
	    dir[i][j] = 0.0;
	}
    }

    unsigned nevals = 0;
    unsigned loop = 1;

    unsigned same = 0;
    unsigned shift = 0;
    unsigned reps = 0;

    /* force an improvement */
    bestError = (unsigned)-1;

    while (loop) {
	reps++;

	for (unsigned i = 1; i <= numFreeWeights; i++) {
	    unsigned k = 0;
	    dir[i][k] += (((k + loop + shift - 1) % numFreeWeights) + 1 == i) ?
			    loop * simplex[k] : 0.0;
	    k++;
	    for (unsigned j = 0; j < numScores; j++) {
		if (!fixLambdas[j]) {
		    dir[i][k] +=
			(((k + loop + shift - 1) % numFreeWeights) + 1 == i) ?
			    loop * simplex[k] : 0.0;
		    k++;
		}
	    }
	}

	if (debug >= DEBUG_TRAIN) {
	    cerr << "Simplex points:" << endl;
	}

	for (unsigned i = 0; i <= numFreeWeights; i++) {
	    for (unsigned j = 0; j < numFreeWeights; j++) {
		points[i][j] = points[0][j] + dir[i][j];
	    }
	    errs[i] = amoebaComputeErrors(nbestSet, points[i]);

	    if (debug >= DEBUG_TRAIN) {
		cerr << "Point " << i << " : ";

		for (unsigned j = 0; j < numFreeWeights; j++) {
		    cerr << points[i][j] << " ";
		}
		cerr << "errors = " << errs[i] << endl;
	    }
	}

	unsigned prevErrors = (int) errs[0];

	/*
	 * The objective function fractional tolerance is
	 * decreasing with each retry.
	 */
	amoeba(nbestSet, points, errs, numFreeWeights, converge / reps,
					       amoebaComputeErrors, nevals);

	if ((int) errs[0] < prevErrors) {
	    loop++;
	    same = 0;
	} else if (same < numFreeWeights) {
	    loop++;
	    same++;
	} else {
	    loop = 0;
	}

	if (loop > numFreeWeights / 3) {
	    loop = 1;
	    for (unsigned i = 0; i <= numFreeWeights; i++) {
		for (unsigned j = 0; j < numFreeWeights; j++) {
		    dir[i][j] = 0.0;
		}
	    }
	    shift++;
	}

	// reset step sizes  
	posteriorScale = points[0][0];
	if (loop == 1) {
	    simplex[0] = posteriorScaleStep;
	} else if (fabs(prevPoints[0] - points[0][0])
					> 1.3 * fabs(simplex[0]))
	{
	    simplex[0] = points[0][0] - prevPoints[0]; 
	} else {
	    simplex[0] = simplex[0] * 1.3;
	}
	prevPoints[0] = points[0][0];

	unsigned j = 1;
	for (unsigned i = 0; i < numScores; i++) {
	    if (!fixLambdas[i]) {
		lambdas[i] = points[0][j];
		if (loop == 1) {
		    simplex[j] = lambdaSteps[i];
		} else if (fabs(prevPoints[j] - points[0][j])
					> 1.3 * fabs(simplex[j]))
		{
		    simplex[j] = points[0][j] - prevPoints[j];
		} else {
		    simplex[j] = simplex[j] * 1.3;
		}
		prevPoints[j] = points[0][j];

		if (debug >= DEBUG_TRAIN) {
		    cerr << "lambda_" << i << " " << points[0][j]
			  << " " << simplex[j] << endl;
		}

		j++;
	    }
	}

	if (debug >= DEBUG_TRAIN) {
	    cerr << "scale " << points[0][0] << endl;
	    cerr << "errors " << errs[0] << endl;
	    cerr << "unchanged for " << same << " iterations " << endl;
	}

	if (nevals >= maxIters) {
	    cerr << "maximum number of iterations exceeded" << endl;
	    loop = 0;
	}

	if (reps > maxAmoebaRestarts) {
	    cerr << "maximum number of Amoeba restarts reached" << endl;
	    loop = 0;
	}

	if (abortSearch) {
	    cerr << "search timed out after " << maxTime << " seconds\n";
	    loop = 0;
	}
    }

    for (unsigned i = 0; i <= numFreeWeights; i++) {
	delete points[i];
	delete dir[i];
    }

    // Scale the lambdas back
    for (unsigned i = 0; i < numScores; i++) {
	lambdas[i] /= posteriorScale;
    }
}

/*
 * output 1-best hyp
 */
void
printTop1bestHyp(File &file, RefString id, NBestScore **scores,
							NBestList &nbest)
{
    unsigned numHyps = nbest.numHyps();
    unsigned bestHyp;
    LogP bestScore;

    fprintf(file, "%s", id);

    /*
     * Find hyp with highest score
     */
    for (unsigned i = 0; i < numHyps; i ++) {
	LogP score = hypScore(i, scores);

	if (i == 0 || score > bestScore) {
	    bestScore = score;
	    bestHyp = i;
	}
    }

    if (numHyps > 0) {
	VocabIndex *hyp = nbest.getHyp(bestHyp).words;

	for (unsigned j = 0; hyp[j] != Vocab_None; j ++) {
	    fprintf(file, " %s", nbest.vocab.getWord(hyp[j]));
	}
    }
    fprintf(file, "\n");
}

/*
 * output best sausage hypotheses
 */
void
printTopSausageHyp(File &file, RefString id, NBestScore **scores,
							WordMesh &alignment)
{
    fprintf(file, "%s", id);

    /* 
     * process all positions in alignment
     */
    for (unsigned pos = 0; pos < alignment.length(); pos++) {
	VocabIndex bestWord = Vocab_None;
	Prob bestScore = 0.0;

	WordMeshIter iter(alignment, pos);

	Array<HypID> *hypMap;
	VocabIndex word;
	while (hypMap = iter.next(word)) {
	    /*
	     * compute total score for word and check if it's the correct one
	     */
	    Boolean dummy;
	    Prob totalScore = wordScore(*hypMap, scores, dummy);

	    if (bestWord == Vocab_None || totalScore > bestScore) {
		bestWord = word;
		bestScore = totalScore;
	    }
	}

	assert(bestWord != Vocab_None);

	if (bestWord != alignment.deleteIndex) {
	    fprintf(file, " %s", alignment.vocab.getWord(bestWord));
	}
    }
    fprintf(file, "\n");
}

void
printTopHyps(File &file, NBestSet &nbestSet)
{
    NBestSetIter iter(nbestSet);
    NBestList *nbest;
    RefString id;

    while (nbest = iter.next(id)) {
	NBestScore ***scores = nbestScores.find(id);
	assert(scores != 0);

	if (oneBest) {
	    printTop1bestHyp(file, id, *scores, *nbest);
	} else {
	    WordMesh **alignment = nbestAlignments.find(id);
	    assert(alignment != 0);

	    printTopSausageHyp(file, id, *scores, **alignment);
	}
    }
}

/*
 * Align N-best lists
 */

typedef struct {
    LogP score;
    unsigned rank;
} HypRank;			/* used in sorting nbest hyps by score */

static int
compareHyps(const void *h1, const void *h2)
{
    LogP score1 = ((HypRank *)h1)->score;
    LogP score2 = ((HypRank *)h2)->score;
    
    return score1 > score2 ? -1 :
		score1 < score2 ? 1 : 0;
}

void
alignNbest(NBestSet &nbestSet, RefList &refs,
					SubVocabDistance &subvocabDistance)
{
    NBestSetIter iter(nbestSet);
    NBestList *nbest;
    RefString id;

    while (nbest = iter.next(id)) {
	unsigned numWords;
	VocabIndex *ref = refs.findRef(id);

	assert(ref != 0);

	unsigned numHyps = nbest->numHyps();

	/*
	 * Sort hyps by initial scores.  (Combined initial scores are
	 * stored in acousticScore from before.)
	 * Keep hyp order outside of N-best lists, since scores must be
	 * kept in sync.
	 */
	makeArray(HypRank, reordering, numHyps);
	NBestScore ***scores = nbestScores.find(id);
	assert(scores != 0);

	/*
	 * Copy combined scores back into N-best list acoustic score for
	 * posterior probability computation (since computePosteriors()
	 * doesn't take additional scores).
	 */
	for (unsigned j = 0; j < numHyps; j ++) {
	    reordering[j].rank = j;
	    reordering[j].score = 
		nbest->getHyp(j).acousticScore = hypScore(j, *scores);
	}

	if (!noReorder) {
	    qsort(reordering, numHyps, sizeof(HypRank), compareHyps);
	}
	
	/*
	 * compute posteriors for passing to alignWords().
	 * Note: these now reflect all scores and initial lambdas.
	 */
	nbest->computePosteriors(0.0, 0.0, 1.0);

	/*
	 * create word-mesh for multiple alignment
	 */
	WordMesh *alignment = new WordMesh(nbestSet.vocab);
	if (hiddenVocabFile) {
	    alignment = new WordMesh(nbestSet.vocab, 0, &subvocabDistance);
	} else {
	    alignment = new WordMesh(nbestSet.vocab);
	}
	assert(alignment != 0);

	*nbestAlignments.insert(id) = alignment;

	numWords = Vocab::length(ref);

	/*
	 * Default is to start alignment with hyps strings,
	 * or with the reference if -align-refs-first was given.
	 *	Note we give reference posterior 1 only to constrain the
	 *	alignment. The loss computation in training ignores the
	 *	posteriors assigned to hyps at this point.
	 */
	HypID hypID;

	if (noReorder) {
	    hypID = refID;
	    alignment->alignWords(ref, 1.0, 0, &hypID);
	}

	/*
	 * Now align all N-best hyps, in order of decreasing scores
	 */
	for (unsigned j = 0; j < numHyps; j ++) {
	    unsigned hypRank = reordering[j].rank;
	    NBestHyp &hyp = nbest->getHyp(hypRank);

	    hypID = hypRank;

	    /*
	     * Check for overflow in the hypIDs
	     */
	    if ((unsigned)hypID != hypRank || hypID == refID) {
		cerr << "Sorry, too many hypotheses in " << id << endl;
		exit(2);
	    }

	    alignment->alignWords(hyp.words, hyp.posterior, 0, &hypID);
	}

	if (!noReorder) {
	    hypID = refID;
	    alignment->alignWords(ref, 1.0, 0, &hypID);
	}

	if (debug >= DEBUG_ALIGNMENT) {
	    dumpAlignment(cerr, *alignment);
	}
    }
}

/*
 * Read a single score file into a column of the score matrix
 */
Boolean
readScoreFile(const char *scoreDir, RefString id, NBestScore *scores,
							unsigned numHyps) 
{
    makeArray(char, fileName,
	      strlen(scoreDir) + 1 + strlen(id) + strlen(GZIP_SUFFIX) + 1);
					
    sprintf(fileName, "%s/%s", scoreDir, id);

    /* 
     * If plain file doesn't exist try gzipped version
     */
    FILE *fp = 0;
    if ((fp = fopen(fileName, "r")) == NULL) {
	strcat(fileName, GZIP_SUFFIX);
    } else {
	fclose(fp);
    }

    File file(fileName, "r", 0);

    char *line;
    unsigned hypNo = 0;
    Boolean decipherScores = false;

    while (!file.error() && (line = file.getline())) {
	if (strncmp(line, nbest1Magic, sizeof(nbest1Magic)-1) == 0 ||
	    strncmp(line, nbest2Magic, sizeof(nbest2Magic)-1) == 0)
	{
	    decipherScores = true;
	    continue;
	}

	if (hypNo >= numHyps) {
	    break;
	}

	/*
	 * parse the first word as a score
	 */
	double score;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -