⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 useelus.cpp

📁 一个信息检索模型
💻 CPP
字号:
#include "stdafx.h"

#include <iostream>
#include <tchar.h>
#include <string>
#include <time.h>
#include <fstream>
using namespace std;

// Using DLL
#include "ELUS_Use.h" // Dynamic Link Lib: ELUSCORE.dll Import Lib: ELUSCORE.lib
// Training Dll
#include "JELUSStatInterface.h" //Dynamic Link Lib: ELUSTRAIN.dll  Import Lib :ELUSTRAIN.lib
// Dict Manager Dll
#include "JELUSDictInterface.h" // Dynamic Link Lib: ELUSDICT.dll  Import Lib :ELUSDICT.lib

// Encode Convert Dll
#include "InsunEncodeToolkit.h" // Dynamic Lib: InsunEncode.dll  Import Lib: InsunEncode.lib

int _tmain(int argc, _TCHAR* argv[])
{

	string LibPath = ".\\ELUS.ini";//ConfigFile Path

	/*****************SEG&POS Train Demo*****************/

	//SegTrainDemo
	//Adding corpus
	AppendStatFileName("..\\corpus\\test_seg.txt");

	//Train Seg
	SegStat( LibPath.c_str() );
	//Clear Corpus List
	ClearStatFileNameList();

	//Pos Train Demo
	// Adding corpus
	AppendStatFileName("..\\corpus\\test_pos.txt");
	//Train Pos
	PosStat( LibPath.c_str() );
	//Clear Corpus List
	ClearStatFileNameList();
	/*****************Train Demo End*****************/

	/******************Seg & Pos Taggin Demo*****************/
	/****************** Singal Sentence *****************/
	//Allocate OBJ
	HObject m_Obj = NewELUSObject( LibPath.c_str() );//申请ELUS对象,所有资源挂载于此对象

	// Start testing
	wstring testOrg = GbToUni( "统计语言模型: 统计语言模型是自然语言处理的主流技术之一。我们研究的主要内容包括各种语言模型的构建、改进以及应用,包括N元文法模型、隐马尔科夫模型、最大熵模型等。" );
	// test Seg
	wstring ViterbiRslt = ViterbiSeg( testOrg.c_str() , m_Obj ); 

	cout << UniToGb( ViterbiRslt.c_str() ) << endl;

	// test Pos , PosTag is presented as "/n" , etc
	wstring PosRslt = HMMPos( ViterbiRslt.c_str() , false , m_Obj ); 
	
	cout << UniToGb( PosRslt.c_str() ) << endl;
	/******************Singal Test End*****************/

	/******************File Demoe*****************/
	//Test File Path
	string FileName="..\\corpus\\test2_nature.txt";

	//
	clock_t start , end;
	
	start = clock();
	cout<<"Now tagging at : "<<FileName<<endl;
	wstring SegRlt;
	wstring PosRlt;
	string tmp="";
	int count=0;
	int totleByte = 0;
    ifstream inFile(FileName.c_str());
	ofstream outFile( (FileName + ".pos").c_str() );

	while( getline( inFile,tmp ) )
	{
		totleByte += tmp.length();
		//Performs Seg & Pos to every line in the file
		SegRlt = ViterbiSeg( GbToUni( tmp.c_str() ) , m_Obj );
		PosRlt = HMMPos( SegRlt.c_str() , false , m_Obj );
		outFile << UniToGb( PosRlt.c_str() ) << endl;
		if(++count%10000==0)
			cout<<count/10000<<"lines Already Done!\r";
	}
	inFile.close();
	outFile.close();
	end = clock();
	double time = ( end - start ) / CLOCKS_PER_SEC;
	cout << "\nTime cost:" << time << "S" << endl;
	FreeAllELUSObject();
	/******************File Demo End*****************/
	/*****************Seg & Pos Tagging Demo End*****************/

	/*****************Dict Manager Demo****************/
	//Load Dict
	PELUSDICT InsunDict = LoadDict("..\\ELUSLib\\ELUSDict.src");
	
	//LookUp Words in the dict!
	int testID = LookupWordID(GbToUni("社会") , InsunDict);
	cout << "Word: 社会 's ID is :" << testID << endl;
	
	// LookUpWord by ID
	cout << UniToGb( LookupWord_ByID( testID , InsunDict ) ) << endl;

	// LookUp Pos by ID
	string testPos = LookupPOS_ByID( testID , InsunDict ) ;
	cout << "Word: 社会 's Pos List is :" <<testPos<< endl;

	// Add Word into the dict!
	string AddWord = "戴维斯加利福尼亚";
	int NewID = NewWord( GbToUni(AddWord.c_str()) , InsunDict );

	// Set Pos Tag By ID
	SetPOS_ByID( testPos.c_str() , NewID , InsunDict );
	// All The Operator above is completed in the momory.

	// Save Dict back to disk!
	SaveDict(".\\INSUNELUSDICT.src" ,InsunDict );

	// Free memory
	FreeDict(InsunDict);

	/*****************Dict Manager Demo End****************/


	return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -