📄 useelus.cpp
字号:
#include "stdafx.h"
#include <iostream>
#include <tchar.h>
#include <string>
#include <time.h>
#include <fstream>
using namespace std;
// Using DLL
#include "ELUS_Use.h" // Dynamic Link Lib: ELUSCORE.dll Import Lib: ELUSCORE.lib
// Training Dll
#include "JELUSStatInterface.h" //Dynamic Link Lib: ELUSTRAIN.dll Import Lib :ELUSTRAIN.lib
// Dict Manager Dll
#include "JELUSDictInterface.h" // Dynamic Link Lib: ELUSDICT.dll Import Lib :ELUSDICT.lib
// Encode Convert Dll
#include "InsunEncodeToolkit.h" // Dynamic Lib: InsunEncode.dll Import Lib: InsunEncode.lib
int _tmain(int argc, _TCHAR* argv[])
{
string LibPath = ".\\ELUS.ini";//ConfigFile Path
/*****************SEG&POS Train Demo*****************/
//SegTrainDemo
//Adding corpus
AppendStatFileName("..\\corpus\\test_seg.txt");
//Train Seg
SegStat( LibPath.c_str() );
//Clear Corpus List
ClearStatFileNameList();
//Pos Train Demo
// Adding corpus
AppendStatFileName("..\\corpus\\test_pos.txt");
//Train Pos
PosStat( LibPath.c_str() );
//Clear Corpus List
ClearStatFileNameList();
/*****************Train Demo End*****************/
/******************Seg & Pos Taggin Demo*****************/
/****************** Singal Sentence *****************/
//Allocate OBJ
HObject m_Obj = NewELUSObject( LibPath.c_str() );//申请ELUS对象,所有资源挂载于此对象
// Start testing
wstring testOrg = GbToUni( "统计语言模型: 统计语言模型是自然语言处理的主流技术之一。我们研究的主要内容包括各种语言模型的构建、改进以及应用,包括N元文法模型、隐马尔科夫模型、最大熵模型等。" );
// test Seg
wstring ViterbiRslt = ViterbiSeg( testOrg.c_str() , m_Obj );
cout << UniToGb( ViterbiRslt.c_str() ) << endl;
// test Pos , PosTag is presented as "/n" , etc
wstring PosRslt = HMMPos( ViterbiRslt.c_str() , false , m_Obj );
cout << UniToGb( PosRslt.c_str() ) << endl;
/******************Singal Test End*****************/
/******************File Demoe*****************/
//Test File Path
string FileName="..\\corpus\\test2_nature.txt";
//
clock_t start , end;
start = clock();
cout<<"Now tagging at : "<<FileName<<endl;
wstring SegRlt;
wstring PosRlt;
string tmp="";
int count=0;
int totleByte = 0;
ifstream inFile(FileName.c_str());
ofstream outFile( (FileName + ".pos").c_str() );
while( getline( inFile,tmp ) )
{
totleByte += tmp.length();
//Performs Seg & Pos to every line in the file
SegRlt = ViterbiSeg( GbToUni( tmp.c_str() ) , m_Obj );
PosRlt = HMMPos( SegRlt.c_str() , false , m_Obj );
outFile << UniToGb( PosRlt.c_str() ) << endl;
if(++count%10000==0)
cout<<count/10000<<"lines Already Done!\r";
}
inFile.close();
outFile.close();
end = clock();
double time = ( end - start ) / CLOCKS_PER_SEC;
cout << "\nTime cost:" << time << "S" << endl;
FreeAllELUSObject();
/******************File Demo End*****************/
/*****************Seg & Pos Tagging Demo End*****************/
/*****************Dict Manager Demo****************/
//Load Dict
PELUSDICT InsunDict = LoadDict("..\\ELUSLib\\ELUSDict.src");
//LookUp Words in the dict!
int testID = LookupWordID(GbToUni("社会") , InsunDict);
cout << "Word: 社会 's ID is :" << testID << endl;
// LookUpWord by ID
cout << UniToGb( LookupWord_ByID( testID , InsunDict ) ) << endl;
// LookUp Pos by ID
string testPos = LookupPOS_ByID( testID , InsunDict ) ;
cout << "Word: 社会 's Pos List is :" <<testPos<< endl;
// Add Word into the dict!
string AddWord = "戴维斯加利福尼亚";
int NewID = NewWord( GbToUni(AddWord.c_str()) , InsunDict );
// Set Pos Tag By ID
SetPOS_ByID( testPos.c_str() , NewID , InsunDict );
// All The Operator above is completed in the momory.
// Save Dict back to disk!
SaveDict(".\\INSUNELUSDICT.src" ,InsunDict );
// Free memory
FreeDict(InsunDict);
/*****************Dict Manager Demo End****************/
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -