⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lmstats.cc

📁 这是一款很好用的工具包
💻 CC
字号:
/*
 * LMStats.cc --
 *	Generic methods for LM statistics
 *
 */

#ifndef lint
static char LMStats_Copyright[] = "Copyright (c) 1995, SRI International.  All Rights Reserved.";
static char LMStats_RcsId[] = "@(#)$Header: /home/srilm/devel/lm/src/RCS/LMStats.cc,v 1.11 2006/01/05 20:21:27 stolcke Exp $";
#endif

#include <iostream>
using namespace std;
#include <string.h>

#include "File.h"
#include "LMStats.h"

#include "LHash.cc"
#include "Trie.cc"

#ifdef INSTANTIATE_TEMPLATES
INSTANTIATE_LHASH(VocabIndex, unsigned int);
#endif

/*
 * Debug levels used
 */

#define DEBUG_PRINT_TEXTSTATS	1

LMStats::LMStats(Vocab &vocab)
    : vocab(vocab), openVocab(true)
{
}

LMStats::~LMStats()
{
}

// parse strings into words and update stats
unsigned int
LMStats::countString(char *sentence)
{
    static VocabString words[maxWordsPerLine + 1];
    unsigned int howmany;
    
    howmany = vocab.parseWords(sentence, words, maxWordsPerLine + 1);

    if (howmany == maxWordsPerLine + 1) {
	return 0;
    } else {
	return countSentence(words);
    }
}

// parse file into sentences and update stats
unsigned int
LMStats::countFile(File &file)
{
    int numWords = 0;
    char *line;

    while (line = file.getline()) {
	unsigned int howmany = countString(line);

	/*
	 * Since getline() returns only non-empty lines,
	 * a return value of 0 indicates some sort of problem.
	 */
	if (howmany == 0) {
	    file.position() << "line too long?\n";
	} else {
	    numWords += howmany;
	}
    }
    if (debug(DEBUG_PRINT_TEXTSTATS)) {
	file.position(dout()) << this -> stats;
    }
    return numWords;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -