⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 irmodel.h

📁 一个信息检索模型
💻 H
字号:
#include <iostream>
#include <string>
#include <fstream>

#include <vector>
#include <map>
#include <set>
using namespace std;

class IRModel
{
public:
	IRModel(){};
	~IRModel(){};

	typedef map<short, string> Sequence_Doc;          //<file sequence, file path>
	typedef vector<short> Locs;	         // lexicon sequence in the file
	typedef pair<short, Locs> TF_Locs;  
    typedef map<short, TF_Locs> Doc_locs; //<file sequence, locations>
	
	typedef vector<Doc_locs> IndexTermLocs;
	typedef pair<short,IndexTermLocs> IDF_IndexTerm;   //<DF, indexTerm>
	typedef map<string, IDF_IndexTerm> Index;

	typedef map<short, string>::value_type valT_Seq_Doc;
	typedef map<short, TF_Locs>::value_type ValT_Doc_locs;	
	typedef map<string,IDF_IndexTerm>::value_type ValT_Word_IndexTerm;
 
	string WordSegment(string fFileList);
	void EstablishIndex(string WSFileList);
	void index_line(string line, int wordSequence);
	void initStopWordSet();
	void deleteStopWords();
	void displayIndex();
	//int query_document_similarity(string query, short documentSequence);
private:
	Index invertedIndex;
	Sequence_Doc sequence_doc;
	set<string> stopWord_set; // 停用词集合
	static int docSequence;
};

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -