⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 taggedngramstats.cc

📁 这是一款很好用的工具包
💻 CC
字号:
/*
 * TaggedNgramStats.cc --
 *	N-gram counting for word/tag pairs
 *
 */

#ifndef lint
static char TaggedNgramStats_Copyright[] = "Copyright (c) 1995,2002 SRI International.  All Rights Reserved.";
static char TaggedNgramStats_RcsId[] = "@(#)$Header: /home/srilm/devel/lm/src/RCS/TaggedNgramStats.cc,v 1.5 2006/01/05 20:21:27 stolcke Exp $";
#endif

#include <iostream>
using namespace std;
#include <string.h>

#include "TaggedNgramStats.h"

TaggedNgramStats::TaggedNgramStats(TaggedVocab &vocab, unsigned int maxOrder)
    : NgramStats(vocab, maxOrder), vocab(vocab)
{
}

void
TaggedNgramStats::incrementTaggedCounts(const VocabIndex *words,
							NgramCount factor)
{
    VocabIndex wbuffer[maxWordsPerLine + 1];

    unsigned i;
    for (i = 0; i < order && words[i] != Vocab_None; i++) {
	wbuffer[i] = TaggedVocab::unTag(words[i]);
    }
    wbuffer[i] = Vocab_None;

    incrementCounts(wbuffer, 1, factor);

    for (i = 0; i < order && words[i] != Vocab_None; i++) {
	VocabIndex tag = TaggedVocab::getTag(words[i]);

	if (tag != Tag_None) {
	    wbuffer[i] = TaggedVocab::tagWord(Tagged_None, tag);
	    incrementCounts(wbuffer, i + 1, factor);
	}
    }
}

unsigned
TaggedNgramStats::countSentence(const VocabIndex *words, NgramCount factor)
{
    unsigned int start;

    for (start = 0; words[start] != Vocab_None; start++) {
        incrementTaggedCounts(words + start, factor);
    }

    /*
     * keep track of word and sentence counts
     */
    stats.numWords += start;
    if (words[0] == vocab.ssIndex()) {
	stats.numWords --;
    }
    if (start > 0 && words[start-1] == vocab.seIndex()) {
	stats.numWords --;
    }

    stats.numSentences ++;

    return start;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -