⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nbc.c

📁 C语言的朴素贝叶斯分类器代码
💻 C
字号:
#include "nbc.h"#include "util.h"NBC_Table::NBC_Table(Source &src){	trainData = new SupervisedDataTable(src);#ifdef DBG1	trainData->print();#endif	classTable = new ClassInfoTable(*trainData);	cSTable    = new ClassSummary(*classTable);	int col = trainData->numOfAttributes();	attrTable = new AttributeInfoTable* [col];#ifdef NBC_DBG1	cout << "col = "<<col<<endl;#endif	for(int c=0; c<col; c++)		attrTable[c] = new AttributeInfoTable(*trainData, c, *classTable);}float NBC_Table::NBC_calculateProb(int nc, Description		*inst, int n_Col){	float classCount = cSTable->getCount(nc);	float p = classCount /		trainData->numOfInstances();#ifdef DBG	printf("p = %d/%d=%f; ", (int)classCount,		trainData->numOfInstances(), p);#endif	n_Col--;	error = 1; // assume an error	for(int c=0; c<n_Col; c++)	{		AttributeInfo *tmp = attrTable[c]->searchInfo(inst[c]);		if(tmp)		{			p*= (float)tmp->getCount(nc) / classCount;			error = 0;		}		else {			p=1; // assume never happen, then probability 1			error &= 1;		}#ifdef DBG		printf("p (%d)= %f; ", c, p);#endif	}	return p;}int NBC_Table::maxIndex(float *p, int n){	float max   = p[0];	int   index = 0;	for(int i=1; i<n; i++)		if(max<p[i]) { max = p[i]; index = i; }	// It is possible that some probabilities are the same.	// For such case, we simply choose one arbitrarily.	return index;}Description NBC_Table::classify(int r, int c, RawDataTable &rDTbl){	int col = rDTbl.numOfColumns();	Description *inst = rDTbl.getRow(r);	int n_Class = classTable -> numOfClasses();	float *p = new float [n_Class];	for(int nc=0; nc<n_Class;nc++)	{		p[nc]=NBC_calculateProb(nc, inst, col);#ifdef DBG1		printf("p[%d]=%f, ", nc, p[nc]);#endif	}#ifdef DBG1	printf("\n");#endif	int id = maxIndex(p, n_Class);#ifdef DBG1	printf("p[%d]=%f, -> ClassResult=%d, \n", id, p[id], 		cSTable -> getDescription(id));#endif	return cSTable -> getDescription(id);}void NBC_Table::NBC_classify( RawDataTable &rDTbl){	int n = rDTbl.numOfRows();	int c = rDTbl.numOfColumns();#ifdef DBG	cout << "NBC_classify: n="<<n<<" c="<<c<<endl;#endif	int numOfAttr = trainData->numOfAttributes();	for(int r=0; r<n; r++)	{		Description res = classify(r, c, rDTbl);		if(!error) rDTbl.setClassField(r, res);		else rDTbl.setClassField(r, ERRORVALUE);	}//		else  rDTbl.setClassField(r, -1); }void NBC_Table::reportTrainingDataErrorRate(){	RawDataTable TTbl(*getSupervisedDataTable());	NBC_classify(TTbl);	TTbl.reportErrorRate();}void NBC_Table::printProbTable(){	int col = trainData->numOfAttributes();//	classTable -> print();	cSTable -> printClassSummary();	for(int c=0; c<col; c++)	{		cout << "Column: "<<c<<endl;		attrTable[c] -> print(cSTable);	}}void NBC_Table::showProbSize(){	int n_Classes = cSTable -> numOfClasses();	int col = trainData->numOfAttributes();	int n_AllAttrs = 0;	for(int c=0; c<col; c++)		n_AllAttrs += attrTable[c] -> numOfValues();	cout << "Probability table size: " << n_Classes <<"(class) X {"<<n_AllAttrs;	cout <<"(all attrs)+1} = "<<n_Classes*(n_AllAttrs+1)<<endl;}NBC_Table::~NBC_Table(){#ifdef DBG	cout << "NBC_Table::destructor"<<endl;#endif}void main(int argc, char **argv){	// set up training data table	Source trainFile(",");//	Source trainFile(",", argv[1]);	NBC_Table probTable(trainFile);	// Option -h	if(hasOptionInArg("-h", argc, argv))	{		cout << "NBC - Naive Bayesian Classifier\n";		cout << "Options: \n  -s Show the size\n";		cout << "  -p Show the probabiblity table\n";		cout << "  -d Show the detailed classification\n";		cout << "  -h Show the help\n";	}	// Option -s	if(hasOptionInArg("-s", argc, argv))		probTable.showProbSize();	// print the number of errors//	probTable.getSupervisedDataTable() -> print();	probTable.reportTrainingDataErrorRate();	// print the probability table	// Option -p	if(hasOptionInArg("-p", argc, argv))		probTable.printProbTable();	// set up testing data table	Source testFile(",");//	Source testFile(",", argv[2]);	RawDataTable rDTbl(testFile, *probTable.getSupervisedDataTable());	// classify	probTable.NBC_classify(rDTbl);	// Option -d, show detailed classified results	if(hasOptionInArg("-p", argc, argv))		rDTbl.print();	// print the error rate	rDTbl.reportErrorRate();}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -