⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vocabdistance.cc

📁 这是一款很好用的工具包
💻 CC
字号:
/*
 * VocabDistance.cc --
 *	Distance metrics over vocabularies
 *
 */

#ifndef lint
static char Copyright[] = "Copyright (c) 2000 SRI International.  All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lm/src/RCS/VocabDistance.cc,v 1.3 2001/07/31 02:38:32 stolcke Exp $";
#endif

#include "VocabDistance.h"
#include "WordAlign.h"

#include "Map2.cc"
#ifdef INSTANTIATE_TEMPLATES
//INSTANTIATE_MAP2(VocabIndex,VocabIndex,double);
#endif

/*
 * Relative phonetic distance
 * (Levenshtein distance of pronunciations normalized by pronunciation length)
 */

DictionaryDistance::DictionaryDistance(Vocab &vocab, VocabMultiMap &dictionary)
    : dictionary(dictionary)
{
    /* 
     * Dictionary must be applicable to word vocabulary
     */
    assert(&vocab == &dictionary.vocab1);
}

double
DictionaryDistance::distance(VocabIndex w1, VocabIndex w2)
{
    if (w2 < w1) {
	VocabIndex h = w2;
	w2 = w1;
	w1 = h;
    } else if (w1 == w2) {
	return 0.0;
    }

    double *dist;
    Boolean foundP;
    dist = cache.insert(w1, w2, foundP);

    if (foundP) {
	return *dist;
    } else {
	double minDistance = -1.0;

	VocabMultiMapIter iter1(dictionary, w1);
	const VocabIndex *pron1;
	Prob p1;

	while (pron1 = iter1.next(p1)) {
	    unsigned len1 = Vocab::length(pron1);

	    VocabMultiMapIter iter2(dictionary, w2);
	    const VocabIndex *pron2;
	    Prob p2;

	    while (pron2 = iter2.next(p2)) {
		unsigned len2 = Vocab::length(pron2);

		unsigned maxLen = (len1 > len2) ? len1 : len2;

		unsigned sub, ins, del;
		double thisDistance =
			(double)wordError(pron1, pron2, sub, ins, del) /
					(maxLen > 0 ? maxLen : 1);

		if (minDistance < 0.0 || thisDistance < minDistance) {
		    minDistance = thisDistance;
		}
	    }
	}

	/*
	 * If no dictionary entries were found use 1 as default distance
	 */
	if (minDistance < 0.0) {
	    minDistance = 1.0;
	}

	*dist = minDistance;
	return minDistance;
    }
}

/*
 * Absolute phonetic distance
 */

const double defaultDistance = 1.0; /* for word without dictionary entries */
const char *emptyWord = "*EMPTY*WORD*";

DictionaryAbsDistance::DictionaryAbsDistance(Vocab &vocab,
						VocabMultiMap &dictionary)
    : dictionary(dictionary)
{
    /* 
     * Dictionary must be applicable to word vocabulary
     */
    assert(&vocab == &dictionary.vocab1);

    emptyIndex = vocab.addWord(emptyWord);
}

double
DictionaryAbsDistance::penalty(VocabIndex w)
{
    double *dist;
    Boolean foundP;
    dist = cache.insert(w, emptyIndex, foundP);

    if (foundP) {
	return *dist;
    } else {
	double minLength = -1.0;

	VocabMultiMapIter iter(dictionary, w);
	const VocabIndex *pron;
	Prob p;

	while (pron = iter.next(p)) {
	    unsigned len = Vocab::length(pron);

	    if (minLength < 0.0 || len < minLength) {
		minLength = len;
	    }
	}

	/*
	 * If no dictionary entries were found use default distance
	 */
	if (minLength < 0.0) {
	    minLength = defaultDistance;
	}

	*dist = minLength;
	return minLength;
    }
}

double
DictionaryAbsDistance::distance(VocabIndex w1, VocabIndex w2)
{
    if (w2 < w1) {
	VocabIndex h = w2;
	w2 = w1;
	w1 = h;
    } else if (w1 == w2) {
	return 0.0;
    }

    double *dist;
    Boolean foundP;
    dist = cache.insert(w1, w2, foundP);

    if (foundP) {
	return *dist;
    } else {
	double minDistance = -1.0;

	VocabMultiMapIter iter1(dictionary, w1);
	const VocabIndex *pron1;
	Prob p1;

	while (pron1 = iter1.next(p1)) {
	    VocabMultiMapIter iter2(dictionary, w2);
	    const VocabIndex *pron2;
	    Prob p2;

	    while (pron2 = iter2.next(p2)) {
		unsigned sub, ins, del;
		double thisDistance =
			(double)wordError(pron1, pron2, sub, ins, del);

		if (minDistance < 0.0 || thisDistance < minDistance) {
		    minDistance = thisDistance;
		}
	    }
	}

	/*
	 * If no dictionary entries were found use default distance
	 */
	if (minDistance < 0.0) {
	    minDistance = defaultDistance;
	}

	*dist = minDistance;
	return minDistance;
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -