📄 test.cpp
字号:
#include "CNaiveBayes.h"
#include "CStatDir.h"
#include <ctime>
#include <iostream>
#include <fstream>
#include <direct.h>
#ifndef PATH_SEPARATOR
# define PATH_SEPARATOR '\\'
#endif
void example1()
{
//示例1:只需要对测试文本进行分类,不需要统计正确与否
int class_num , a_num;
int iFileNum; //记录每类中的测试文档个数
CNaiveBayes mybayes;
int clnum = 0;
clock_t _start_time;
class_num = mybayes.InitPara();
class_num = mybayes.Train();
mybayes.InitTestRes();
// mybayes.PrwFRead();
for ( int i = 0 ; i < class_num ; i++ )
{
string sTestFile = "..\\Dic\\DF\\"+ mybayes.m_psClassName[i];
sTestFile += ".tmi";
int nDocNum = mybayes.Test( sTestFile);
cout << "******************************************" << endl;
cout << "类别 : " << mybayes.m_psClassName[i] << endl;
cout << "总文档数: " << nDocNum << endl;
int nCorrect = 0;
for(int j=0;j<nDocNum;j++)
{
cout << mybayes.m_vTestRes[j] << endl;
// if (mybayes.m_vTestRes[j]==i)
// {
// nCorrect++;
// }
// else
// {
// cout << "第" << j << "篇文档分类错误:从第" << i << "类分到了第" << mybayes.m_vTestRes[j] << "类!" << endl;
// }
}
// cout << "正确归档数 = " << nCorrect << endl;
}
//示例1结束!
}
void example2()
{
//示例2:预知测试文本的类别,对测试文本进行分类后,需要统计正确与否
int class_num , a_num;
int iFileNum; //记录每类中的测试文档个数
CNaiveBayes mybayes;
int clnum = 0;
clock_t _start_time;
// class_num = mybayes.InitPara();
class_num = mybayes.InitPara(true,"TrainCorpus");
class_num = mybayes.Train();
mybayes.InitTestRes();
// mybayes.PrwFRead();
int* pnTestDocNum;
pnTestDocNum = new int[class_num];
memset(pnTestDocNum,0,class_num*sizeof(int));
double* pTime;
pTime = new double[class_num];
memset(pTime,0,class_num*sizeof(double));
for ( int i = 0 ; i < class_num ; i++ )
{
_start_time = clock();
string sTestFile = "..\\Dic\\DF\\"+ mybayes.m_psClassName[i];
sTestFile += ".tmi";
pnTestDocNum[i] = mybayes.Test( sTestFile,true,i );
pTime[i] = (clock() - _start_time) / (double)CLOCKS_PER_SEC;
}
for ( i = 0 ; i < class_num ; i++ )
{
cout << "******************************************" << endl;
mybayes.OutputRes(i,pnTestDocNum[i]);
cout << "time = " << pTime[i] << "s" << endl;
cout << endl;
}
delete[] pnTestDocNum;
delete[] pTime;
//示例2结束!
}
void example4(string sTrainFilesPath,string sTestFilesPath)
{
//示例4:指定训练文本所在文件夹路径,训练分类器
int class_num , a_num;
int iFileNum; //记录每类中的测试文档个数
CNaiveBayes mybayes;
int clnum = 0;
clock_t _start_time;
class_num = mybayes.InitPara(true,sTrainFilesPath);
// class_num = mybayes.InitPara();
class_num = mybayes.Train();
mybayes.InitTestRes();
// mybayes.PrwFRead();
//指定测试文本所在文件夹路径,对测试文本进行分类
mybayes.TestFiles(sTestFilesPath);
for (int i = 0 ; i < class_num ; i++ )
{
cout << "******************************************" << endl;
cout << "类别 : " << mybayes.m_psClassName[i] << endl;
cout << "划分为该类的文档数 = " << mybayes.m_pnResNum[i] << endl;
}
//示例4结束!
}
void TrainExample(string sTrainFilesPath)
{
//示例:指定训练文本所在文件夹路径,训练分类器(带预处理)
CNaiveBayes mybayes;
mybayes.InitPara(true,sTrainFilesPath);
mybayes.Train();
cout << "分类模型训练结束!" << endl;
ofstream Fou;
Fou.open("Res\\Res.txt",ios::out);
Fou << "分类模型训练结束!" << endl;
Fou.close();
}
void TrainExample()
{
//示例:指定训练文本所在文件夹路径,训练分类器(不带预处理)
CNaiveBayes mybayes;
mybayes.InitPara();
mybayes.Train();
cout << "分类模型训练结束!" << endl;
ofstream Fou;
Fou.open("Res\\Res.txt",ios::out);
Fou << "分类模型训练结束!" << endl;
Fou.close();
}
void TestSingleFileExample(string sTestFilePath)
{
//指定单篇文本进行测试
CNaiveBayes mybayes;
mybayes.InitPara();
mybayes.InitTestRes();
mybayes.PrwFRead();
int nClassID = mybayes.TestAFile(sTestFilePath);
ofstream Fou;
Fou.open("Res\\Res.txt",ios::out);
Fou << "该文本划分为\"" << mybayes.m_psClassName[nClassID] << "\"类别"<< endl << endl;;
Fou << "相似文本如下:" << endl << endl;
cout << "该文本划分为\"" << mybayes.m_psClassName[nClassID] << "\"类别"<< endl << endl;;
cout << "相似文本如下:" << endl << endl;
//读文件Ci.txt
char szResFile[20];
memset(szResFile,0,20);
sprintf(szResFile,"Res\\C%d.txt",nClassID);
ifstream tfile(szResFile);
string strLine; //读入一行为一个字符串
int i = 0;
bool bFlag = false;
while(getline(tfile,strLine,'\n')!=NULL)//读取行
{
if (strcmp(sTestFilePath.c_str(),strLine.c_str())!=0)
{
cout << strLine << endl;
Fou << strLine << endl;
i++;
}
else
{
bFlag = true;
}
}
cout << endl << "相似文本总数:" << i << "篇"<< endl;
Fou << endl << "相似文本总数:" << i << "篇"<< endl;
if (!bFlag)
{
ofstream FouResFile;
FouResFile.open(szResFile,ios::out|ios::app);
FouResFile << sTestFilePath << endl;
FouResFile.close();
}
cout << endl << "单篇文本测试结束!" << endl;
Fou << endl << "单篇文本测试结束!" << endl;
Fou.close();
}
void TestFilesExample(string sTestFilesPath)
{
//指定测试文本所在文件夹路径,对测试文本进行分类
int class_num , a_num;
int iFileNum; //记录每类中的测试文档个数
CNaiveBayes mybayes;
int clnum = 0;
clock_t _start_time;
class_num = mybayes.InitPara();
mybayes.InitTestRes();
mybayes.PrwFRead();
mybayes.TestFiles(sTestFilesPath);
ofstream Fou;
Fou.open("Res\\Res.txt",ios::out);
for (int i = 0 ; i < class_num ; i++ )
{
cout << "类别 : " << mybayes.m_psClassName[i] << endl;
cout << "划分为该类的文档数 = " << mybayes.m_pnResNum[i] << endl;
cout << "*************************************" << endl;
Fou << "类别 : " << mybayes.m_psClassName[i] << endl;
Fou << "划分为该类的文档数 = " << mybayes.m_pnResNum[i] << endl;
Fou << "*************************************" << endl;
}
cout << "文档集合测试结束!" << endl;
Fou << "文档集合测试结束!" << endl;
Fou.close();
}
string StandardizePath(string sSrcPath)
{
string sDesPath = "";
string::size_type pos=0, prev_pos=0;
string::size_type len = sSrcPath.size();
while((pos = sSrcPath.find_first_of( '\\', pos ))!=string::npos)
{
string::size_type key_pos=0, weight_pos=0;
sDesPath += sSrcPath.substr( prev_pos, pos - prev_pos );
sDesPath += "\\\\";
prev_pos = ++pos;
}
if (prev_pos<len)
{
sDesPath += sSrcPath.substr( prev_pos, len - prev_pos );;
}
return sDesPath;
}
/*
void main()
{
cout << "*************************************" << endl;
cout << "**欢迎使用Naive Bayes文本分类系统!**" << endl;
cout << "*************************************" << endl;
cout << "本次任务是:" << endl;
cout << "训练分类模型请按“1”,单篇文档测试请按“2”,文档集合测试请按“3”,退出请按q:" << endl;
char cSletctNumber;
string sInput;
string sTemp;
while( cin >> cSletctNumber )
{
switch ( cSletctNumber )
{
case '1' :
cout << "是否需要预处理训练语料,“需要”请按Y,否则请按N" << endl;
cin >> sInput;
if(sInput == "Y" )
{
cout << "请输入训练文件夹,按回车结束" << endl;
cin >> sInput;
sTemp = StandardizePath(sInput);
cout << "*************************************" << endl;
TrainExample(sTemp);
cout << "*************************************" << endl;
}
if(sInput == "N" )
{
cout << "*************************************" << endl;
TrainExample();
cout << "*************************************" << endl;
}
cout << "训练分类模型请按“1”,单篇文档测试请按“2”,文档集合测试请按“3”,退出请按q:" << endl;
break;
case '2' :
cout << "请输入单篇测试文档路径,按回车结束,返回请按b,退出请按q" << endl;
while(cin >> sInput)
{
if(sInput == "q" )
{
return;
}
if(sInput == "b" )
{
cout << "训练分类模型请按“1”,单篇文档测试请按“2”,文档集合测试请按“3”,退出请按q:" << endl;
break;
}
cout << "*************************************" << endl;
sTemp = StandardizePath(sInput);
TestSingleFileExample(sTemp);
cout << "*************************************" << endl;
cout << "请输入单篇测试文档路径,按回车结束,返回请按b,退出请按q" << endl;
}
break;
case '3':
cout << "请输入测试文档集合路径,按回车结束,返回请按b,退出请按q" << endl;
while(cin >> sInput)
{
if(sInput == "q" )
{
return;
}
if(sInput == "b" )
{
cout << "训练分类模型请按“1”,单篇文档测试请按“2”,文档集合测试请按“3”,退出请按q:" << endl;
break;
}
cout << "*************************************" << endl;
sTemp = StandardizePath(sInput);
TestFilesExample(sTemp);
cout << "*************************************" << endl;
cout << "请输入测试文档集合路径,按回车结束,返回请按b,退出请按q" << endl;
}
break;
case 'q' :
return;
break;
}
}
}*/
/* Print the usage message. */
static void PrintHelp (char* exec_name)
{
cout << "系统使用帮助:" << endl;
cout << exec_name << " [OPTION]... [PARAMETER]..." << endl;
cout<< "OPTION : " << endl;
cout << "-h\t系统使用帮助;" << endl;
cout << "-t\tsTrainFilesPath\t指定训练文本所在文件夹路径,训练分类器;" << endl;
cout << "-a\tsTestFilePath\t指定单篇文本(含路径),进行测试;" << endl;
cout << "-p\tsTestFilesPath\t指定测试文本所在文件夹路径,进行批量测试;" << endl;
}
void main(int argc,char* argv[])
{
char* exec_name;
/* Construct the name of the executable, without the directory part. */
exec_name = strrchr (argv[0], PATH_SEPARATOR);
if (!exec_name)
{
exec_name = argv[0];
}
else
{
++exec_name;
}
int nPathLen = strlen(argv[0]) - strlen(exec_name);
if (nPathLen>0)
{
char* exec_path = new char[nPathLen];
memset(exec_path,0,nPathLen);
strncpy(exec_path,argv[0],nPathLen-1);
//构造文件夹类对象
CStatDir statdir;
//设置要转过去的目录
if (!statdir.SetInitDir(exec_path))
{
cout << "运行目录不存在。" << endl;
return;
}
delete[] exec_path;
}
cout << "*************************************" << endl;
cout << "**欢迎使用Naive Bayes文本分类系统!**" << endl;
cout << "*************************************" << endl;
switch(argc)
{
case 1:
cout << "请输入选项和参数!" << endl;
PrintHelp (exec_name);
break;;
case 2:
if(strcmp(argv[1],"-h")==0)
{
PrintHelp (exec_name);
}
else if(strcmp(argv[1],"-t")==0)
{
cout << "输入选项缺少参数!" << endl;
PrintHelp (exec_name);
}
else if(strcmp(argv[1],"-a")==0)
{
cout << "输入选项缺少参数!" << endl;
PrintHelp (exec_name);
}
else if(strcmp(argv[1],"-p")==0)
{
cout << "输入选项缺少参数!" << endl;
PrintHelp (exec_name);
}
else
{
cout << "输入选项有误!" << endl;
PrintHelp (exec_name);
}
break;
case 3:
default:
if(strcmp(argv[1],"-t")==0)
{
TrainExample(argv[2]);
}
else if(strcmp(argv[1],"-a")==0)
{
TestSingleFileExample(argv[2]);
}
else if(strcmp(argv[1],"-p")==0)
{
TestFilesExample(argv[2]);
}
else
{
cout << "输入选项有误!" << endl;
PrintHelp (exec_name);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -