📄 nbc.c
字号:
#include "nbc.h"#include "util.h"NBC_Table::NBC_Table(Source &src){ trainData = new SupervisedDataTable(src);#ifdef DBG1 trainData->print();#endif classTable = new ClassInfoTable(*trainData); cSTable = new ClassSummary(*classTable); int col = trainData->numOfAttributes(); attrTable = new AttributeInfoTable* [col];#ifdef NBC_DBG1 cout << "col = "<<col<<endl;#endif for(int c=0; c<col; c++) attrTable[c] = new AttributeInfoTable(*trainData, c, *classTable);}float NBC_Table::NBC_calculateProb(int nc, Description *inst, int n_Col){ float classCount = cSTable->getCount(nc); float p = classCount / trainData->numOfInstances();#ifdef DBG printf("p = %d/%d=%f; ", (int)classCount, trainData->numOfInstances(), p);#endif n_Col--; error = 1; // assume an error for(int c=0; c<n_Col; c++) { AttributeInfo *tmp = attrTable[c]->searchInfo(inst[c]); if(tmp) { p*= (float)tmp->getCount(nc) / classCount; error = 0; } else { p=1; // assume never happen, then probability 1 error &= 1; }#ifdef DBG printf("p (%d)= %f; ", c, p);#endif } return p;}int NBC_Table::maxIndex(float *p, int n){ float max = p[0]; int index = 0; for(int i=1; i<n; i++) if(max<p[i]) { max = p[i]; index = i; } // It is possible that some probabilities are the same. // For such case, we simply choose one arbitrarily. return index;}Description NBC_Table::classify(int r, int c, RawDataTable &rDTbl){ int col = rDTbl.numOfColumns(); Description *inst = rDTbl.getRow(r); int n_Class = classTable -> numOfClasses(); float *p = new float [n_Class]; for(int nc=0; nc<n_Class;nc++) { p[nc]=NBC_calculateProb(nc, inst, col);#ifdef DBG1 printf("p[%d]=%f, ", nc, p[nc]);#endif }#ifdef DBG1 printf("\n");#endif int id = maxIndex(p, n_Class);#ifdef DBG1 printf("p[%d]=%f, -> ClassResult=%d, \n", id, p[id], cSTable -> getDescription(id));#endif return cSTable -> getDescription(id);}void NBC_Table::NBC_classify( RawDataTable &rDTbl){ int n = rDTbl.numOfRows(); int c = rDTbl.numOfColumns();#ifdef DBG cout << "NBC_classify: n="<<n<<" c="<<c<<endl;#endif int numOfAttr = trainData->numOfAttributes(); for(int r=0; r<n; r++) { Description res = classify(r, c, rDTbl); if(!error) rDTbl.setClassField(r, res); else rDTbl.setClassField(r, ERRORVALUE); }// else rDTbl.setClassField(r, -1); }void NBC_Table::reportTrainingDataErrorRate(){ RawDataTable TTbl(*getSupervisedDataTable()); NBC_classify(TTbl); TTbl.reportErrorRate();}void NBC_Table::printProbTable(){ int col = trainData->numOfAttributes();// classTable -> print(); cSTable -> printClassSummary(); for(int c=0; c<col; c++) { cout << "Column: "<<c<<endl; attrTable[c] -> print(cSTable); }}void NBC_Table::showProbSize(){ int n_Classes = cSTable -> numOfClasses(); int col = trainData->numOfAttributes(); int n_AllAttrs = 0; for(int c=0; c<col; c++) n_AllAttrs += attrTable[c] -> numOfValues(); cout << "Probability table size: " << n_Classes <<"(class) X {"<<n_AllAttrs; cout <<"(all attrs)+1} = "<<n_Classes*(n_AllAttrs+1)<<endl;}NBC_Table::~NBC_Table(){#ifdef DBG cout << "NBC_Table::destructor"<<endl;#endif}void main(int argc, char **argv){ // set up training data table Source trainFile(",");// Source trainFile(",", argv[1]); NBC_Table probTable(trainFile); // Option -h if(hasOptionInArg("-h", argc, argv)) { cout << "NBC - Naive Bayesian Classifier\n"; cout << "Options: \n -s Show the size\n"; cout << " -p Show the probabiblity table\n"; cout << " -d Show the detailed classification\n"; cout << " -h Show the help\n"; } // Option -s if(hasOptionInArg("-s", argc, argv)) probTable.showProbSize(); // print the number of errors// probTable.getSupervisedDataTable() -> print(); probTable.reportTrainingDataErrorRate(); // print the probability table // Option -p if(hasOptionInArg("-p", argc, argv)) probTable.printProbTable(); // set up testing data table Source testFile(",");// Source testFile(",", argv[2]); RawDataTable rDTbl(testFile, *probTable.getSupervisedDataTable()); // classify probTable.NBC_classify(rDTbl); // Option -d, show detailed classified results if(hasOptionInArg("-p", argc, argv)) rDTbl.print(); // print the error rate rDTbl.reportErrorRate();}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -