⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 test.cpp

📁 贝叶斯公式
💻 CPP
字号:
#include "CNaiveBayes.h"
#include "CStatDir.h"
#include <ctime>
#include <iostream>
#include <fstream>
#include <direct.h>

#ifndef PATH_SEPARATOR
# define PATH_SEPARATOR '\\'
#endif

void example1()
{
	//示例1:只需要对测试文本进行分类,不需要统计正确与否
	int class_num , a_num;
	int iFileNum;     //记录每类中的测试文档个数
	CNaiveBayes mybayes;
	int clnum = 0;
	clock_t _start_time;
	class_num = mybayes.InitPara();
 	class_num = mybayes.Train();
	mybayes.InitTestRes();

// 	mybayes.PrwFRead();
	for ( int i = 0 ; i < class_num ; i++ )
	{
 		string sTestFile = "..\\Dic\\DF\\"+ mybayes.m_psClassName[i];
		sTestFile += ".tmi";
		int nDocNum = mybayes.Test( sTestFile);
		cout << "******************************************" << endl;
		cout << "类别 : " << mybayes.m_psClassName[i] << endl;
		cout << "总文档数: " << nDocNum << endl;
		int nCorrect = 0;
		for(int j=0;j<nDocNum;j++)
		{
 			cout << mybayes.m_vTestRes[j] << endl;
//			if (mybayes.m_vTestRes[j]==i)
//			{
//				nCorrect++;
//			}
//			else
//			{
//				cout << "第" << j << "篇文档分类错误:从第" << i << "类分到了第" << mybayes.m_vTestRes[j] << "类!" << endl;
// 			}
		}
// 		cout << "正确归档数 = " << nCorrect << endl;
	}
	//示例1结束!
}

void example2()
{
 	//示例2:预知测试文本的类别,对测试文本进行分类后,需要统计正确与否
	int class_num , a_num;
	int iFileNum;     //记录每类中的测试文档个数
	CNaiveBayes mybayes;
	int clnum = 0;
	clock_t _start_time;
// 	class_num = mybayes.InitPara();
	class_num = mybayes.InitPara(true,"TrainCorpus");

	class_num = mybayes.Train();

 	mybayes.InitTestRes();
// 	mybayes.PrwFRead();
	
	int* pnTestDocNum;
	pnTestDocNum = new int[class_num];
	memset(pnTestDocNum,0,class_num*sizeof(int));
	double* pTime;
	pTime = new double[class_num];
	memset(pTime,0,class_num*sizeof(double));
	
	for ( int i = 0 ; i < class_num ; i++ )
	{
		_start_time = clock();
		string sTestFile = "..\\Dic\\DF\\"+ mybayes.m_psClassName[i];
		sTestFile += ".tmi";
		pnTestDocNum[i] = mybayes.Test( sTestFile,true,i );
		pTime[i] = (clock() - _start_time) / (double)CLOCKS_PER_SEC;
	}
	for ( i = 0 ; i < class_num ; i++ )
	{
		cout << "******************************************" << endl;
		
		mybayes.OutputRes(i,pnTestDocNum[i]);
		cout << "time = " << pTime[i] << "s" << endl;
		cout << endl;
	}
	delete[] pnTestDocNum;
	delete[] pTime;
	//示例2结束!
}


void example4(string sTrainFilesPath,string sTestFilesPath)
{
	//示例4:指定训练文本所在文件夹路径,训练分类器
	int class_num , a_num;
	int iFileNum;     //记录每类中的测试文档个数
	CNaiveBayes mybayes;
	int clnum = 0;
	clock_t _start_time;
	class_num = mybayes.InitPara(true,sTrainFilesPath);
//	class_num = mybayes.InitPara();
	class_num = mybayes.Train();
	mybayes.InitTestRes();
// 	mybayes.PrwFRead();

	//指定测试文本所在文件夹路径,对测试文本进行分类
	mybayes.TestFiles(sTestFilesPath);
	
	for (int i = 0 ; i < class_num ; i++ )
	{
		cout << "******************************************" << endl;
        cout << "类别 : " << mybayes.m_psClassName[i] << endl;
        cout << "划分为该类的文档数 = " << mybayes.m_pnResNum[i] << endl;
	}
	//示例4结束!
}


void TrainExample(string sTrainFilesPath)
{
	//示例:指定训练文本所在文件夹路径,训练分类器(带预处理)
	CNaiveBayes mybayes;
	mybayes.InitPara(true,sTrainFilesPath);
	mybayes.Train();
	cout << "分类模型训练结束!" << endl;
	ofstream Fou;
	Fou.open("Res\\Res.txt",ios::out);
	Fou << "分类模型训练结束!" << endl;
	Fou.close();
}

void TrainExample()
{
	//示例:指定训练文本所在文件夹路径,训练分类器(不带预处理)
	CNaiveBayes mybayes;
	mybayes.InitPara();
	mybayes.Train();
	cout << "分类模型训练结束!" << endl;
	ofstream Fou;
	Fou.open("Res\\Res.txt",ios::out);
	Fou << "分类模型训练结束!" << endl;
	Fou.close();
}

void TestSingleFileExample(string sTestFilePath)
{
	//指定单篇文本进行测试
	CNaiveBayes mybayes;
	mybayes.InitPara();
	mybayes.InitTestRes();
 	mybayes.PrwFRead();
	int nClassID = mybayes.TestAFile(sTestFilePath);
	ofstream Fou;
	Fou.open("Res\\Res.txt",ios::out);
	Fou << "该文本划分为\"" << mybayes.m_psClassName[nClassID] << "\"类别"<< endl << endl;;
	Fou << "相似文本如下:" <<  endl << endl;

	cout << "该文本划分为\"" << mybayes.m_psClassName[nClassID] << "\"类别"<< endl << endl;;
	cout << "相似文本如下:" <<  endl << endl;
	//读文件Ci.txt
	char szResFile[20];
	memset(szResFile,0,20);
	sprintf(szResFile,"Res\\C%d.txt",nClassID);
	ifstream tfile(szResFile);
	string strLine;	//读入一行为一个字符串
	int i = 0;
	bool bFlag = false;
	while(getline(tfile,strLine,'\n')!=NULL)//读取行
	{
		if (strcmp(sTestFilePath.c_str(),strLine.c_str())!=0) 
		{
			cout << strLine << endl;
			Fou << strLine << endl;
			i++;
		}
		else
		{
			bFlag = true;
		}
	}
	cout << endl << "相似文本总数:" << i << "篇"<< endl;
	Fou << endl << "相似文本总数:" << i << "篇"<< endl;
	if (!bFlag) 
	{
		ofstream FouResFile;
		FouResFile.open(szResFile,ios::out|ios::app);
		FouResFile << sTestFilePath << endl;
		FouResFile.close();
	}

	cout << endl << "单篇文本测试结束!" << endl;
	Fou << endl << "单篇文本测试结束!" << endl;
	Fou.close();
}

void TestFilesExample(string sTestFilesPath)
{	
	//指定测试文本所在文件夹路径,对测试文本进行分类
	int class_num , a_num;
	int iFileNum;     //记录每类中的测试文档个数
	CNaiveBayes mybayes;
	int clnum = 0;
	clock_t _start_time;
    class_num = mybayes.InitPara();
	mybayes.InitTestRes();
	mybayes.PrwFRead();

 	mybayes.TestFiles(sTestFilesPath);
	ofstream Fou;
	Fou.open("Res\\Res.txt",ios::out);

	for (int i = 0 ; i < class_num ; i++ )
	{
        cout << "类别 : " << mybayes.m_psClassName[i] << endl;
        cout << "划分为该类的文档数 = " << mybayes.m_pnResNum[i] << endl;
		cout << "*************************************" << endl;

		Fou << "类别 : " << mybayes.m_psClassName[i] << endl;
        Fou << "划分为该类的文档数 = " << mybayes.m_pnResNum[i] << endl;
		Fou << "*************************************" << endl;
		
	}
	cout << "文档集合测试结束!" << endl;
	Fou << "文档集合测试结束!" << endl;
	Fou.close();

}

string StandardizePath(string sSrcPath)
{
	string sDesPath = "";
	string::size_type pos=0, prev_pos=0;
	string::size_type len = sSrcPath.size();
	while((pos = sSrcPath.find_first_of( '\\', pos ))!=string::npos)
	{
		string::size_type key_pos=0, weight_pos=0;
		sDesPath += sSrcPath.substr( prev_pos, pos - prev_pos );
		sDesPath += "\\\\";
		prev_pos = ++pos;
	}
	if (prev_pos<len) 
	{
		sDesPath += sSrcPath.substr( prev_pos, len - prev_pos );;
	}

	return sDesPath;	
}
/*

void main()
{
	
	cout << "*************************************" << endl;
	cout << "**欢迎使用Naive Bayes文本分类系统!**" << endl;
	cout << "*************************************" << endl;
	cout << "本次任务是:" << endl;
	cout << "训练分类模型请按“1”,单篇文档测试请按“2”,文档集合测试请按“3”,退出请按q:" << endl;
	char cSletctNumber;
	string sInput;
	string sTemp;
	while( cin >> cSletctNumber )
	{
		switch ( cSletctNumber )
		{
		case '1' :
			cout << "是否需要预处理训练语料,“需要”请按Y,否则请按N" << endl;	
			cin >> sInput;
			if(sInput == "Y" ) 
			{
				cout << "请输入训练文件夹,按回车结束" << endl;
				cin >> sInput;
				sTemp = StandardizePath(sInput);
				cout << "*************************************" << endl;
				TrainExample(sTemp);
				cout << "*************************************" << endl;
			}
			if(sInput == "N" )
			{
				cout << "*************************************" << endl;
				TrainExample();
				cout << "*************************************" << endl;
			}
			cout << "训练分类模型请按“1”,单篇文档测试请按“2”,文档集合测试请按“3”,退出请按q:" << endl;
			break;
		case '2' :
			cout << "请输入单篇测试文档路径,按回车结束,返回请按b,退出请按q" << endl;
			while(cin >> sInput)
			{
				if(sInput == "q" ) 
				{
					return;
				}
				if(sInput == "b" )
				{
					cout << "训练分类模型请按“1”,单篇文档测试请按“2”,文档集合测试请按“3”,退出请按q:" << endl;
					break;
				}
				cout << "*************************************" << endl;
				sTemp = StandardizePath(sInput);
				TestSingleFileExample(sTemp);
				cout << "*************************************" << endl;
				cout << "请输入单篇测试文档路径,按回车结束,返回请按b,退出请按q" << endl;
			}
			break;
		case '3':
			cout << "请输入测试文档集合路径,按回车结束,返回请按b,退出请按q" << endl;
			while(cin >> sInput)
			{
				if(sInput == "q" ) 
				{
					return;
				}
				if(sInput == "b" )
				{
					cout << "训练分类模型请按“1”,单篇文档测试请按“2”,文档集合测试请按“3”,退出请按q:" << endl;
					break;
				}
				cout << "*************************************" << endl;
				sTemp = StandardizePath(sInput);
				TestFilesExample(sTemp);
				cout << "*************************************" << endl;
				cout << "请输入测试文档集合路径,按回车结束,返回请按b,退出请按q" << endl;
			}
			break;
		case 'q' :
			return;
			break;
		}
	}	
}*/

/* Print the usage message.  */
static void PrintHelp (char* exec_name)
{
	cout << "系统使用帮助:" << endl;
	cout << exec_name << " [OPTION]... [PARAMETER]..." << endl;
	cout<< "OPTION : " << endl;
	cout << "-h\t系统使用帮助;" << endl;
	cout << "-t\tsTrainFilesPath\t指定训练文本所在文件夹路径,训练分类器;" << endl;
	cout << "-a\tsTestFilePath\t指定单篇文本(含路径),进行测试;" << endl;
	cout << "-p\tsTestFilesPath\t指定测试文本所在文件夹路径,进行批量测试;" << endl;
}

void main(int argc,char* argv[])
{
	
	char* exec_name;
	/* Construct the name of the executable, without the directory part.  */
	exec_name = strrchr (argv[0], PATH_SEPARATOR);
	if (!exec_name)
	{
		exec_name = argv[0];
	}
	else
	{
		++exec_name;
	}

	int nPathLen = strlen(argv[0]) - strlen(exec_name);
	if (nPathLen>0)
	{
		char* exec_path = new char[nPathLen];
		memset(exec_path,0,nPathLen);
		strncpy(exec_path,argv[0],nPathLen-1);
		
		
		//构造文件夹类对象 
		CStatDir statdir; 
		//设置要转过去的目录 
		if (!statdir.SetInitDir(exec_path)) 
		{ 
			cout << "运行目录不存在。" << endl;
			return; 
		}
		
		delete[] exec_path;
		
	}
	cout << "*************************************" << endl;
	cout << "**欢迎使用Naive Bayes文本分类系统!**" << endl;
	cout << "*************************************" << endl;

	switch(argc)
	{
	case 1:
		cout << "请输入选项和参数!" << endl;
		PrintHelp (exec_name);
		break;;
	case 2:
		if(strcmp(argv[1],"-h")==0)
		{
			PrintHelp (exec_name);
		}
		else if(strcmp(argv[1],"-t")==0)
		{
			cout << "输入选项缺少参数!" << endl;
			PrintHelp (exec_name);
		}
		else if(strcmp(argv[1],"-a")==0)
		{
			cout << "输入选项缺少参数!" << endl;
			PrintHelp (exec_name);
		}
		else if(strcmp(argv[1],"-p")==0)
		{
			cout << "输入选项缺少参数!" << endl;
			PrintHelp (exec_name);
		}
		else
		{
			cout << "输入选项有误!" << endl;
			PrintHelp (exec_name);
		}
		break;
	case 3:
	default:
		if(strcmp(argv[1],"-t")==0)
		{
			TrainExample(argv[2]);
		}
		else if(strcmp(argv[1],"-a")==0)
		{
			TestSingleFileExample(argv[2]);
		}
		else if(strcmp(argv[1],"-p")==0)
		{
			TestFilesExample(argv[2]);
		}
		else
		{
			cout << "输入选项有误!" << endl;
			PrintHelp (exec_name);
		}

	}
	
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -