📄 classifier.cpp

📁 MultiBoost 是c++实现的多类adaboost酸法。与传统的adaboost算法主要解决二类分类问题不同
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
   cout << "\nMatrix Key:\n";      // Print the legend   for (int l = 0; l < numClasses; ++l)      cout << setw(5) << nor_utils::getAlphanumeric(l) << ": " << ClassMappings::getClassNameFromIdx(l) << "\n";   // delete the input data file   if (pData)       delete pData;   vector<ExampleResults*>::iterator it;   for (it = results.begin(); it != results.end(); ++it)      delete (*it);}// -------------------------------------------------------------------------void Classifier::saveConfusionMatrix(const string& dataFileName, const string& shypFileName,                                     const string& outFileName){   InputData* pData = loadInputData(dataFileName, shypFileName);   if (_verbose > 0)      cout << "Loading strong hypothesis..." << flush;   // The class that loads the weak hypotheses   UnSerialization us;   // Where to put the weak hypotheses   vector<BaseLearner*> weakHypotheses;   // loads them   us.loadHypotheses(shypFileName, weakHypotheses);   // where the results go   vector< ExampleResults* > results;   if (_verbose > 0)      cout << "Classifying..." << flush;   // get the results   computeResults( pData, weakHypotheses, results );   const int numClasses = ClassMappings::getNumClasses();   const int numExamples = pData->getNumExamples();   ofstream outFile(outFileName.c_str());   //////////////////////////////////////////////////////////////////////////   for (int l = 0; l < numClasses; ++l)      outFile << '\t' << ClassMappings::getClassNameFromIdx(l);   outFile << endl;   for (int l = 0; l < numClasses; ++l)   {      vector<int> winnerCount(numClasses, 0);      for (int i = 0; i < numExamples; ++i)      {         if ( pData->getClass(i) == l )            ++winnerCount[ results[i]->getWinner().first ];      }      // class name      outFile << ClassMappings::getClassNameFromIdx(l);      for (int j = 0; j < numClasses; ++j)         outFile << '\t' << winnerCount[j];      outFile << endl;   }   //////////////////////////////////////////////////////////////////////////   if (_verbose > 0)      cout << "Done!" << endl;   // delete the input data file   if (pData)       delete pData;   vector<ExampleResults*>::iterator it;   for (it = results.begin(); it != results.end(); ++it)      delete (*it);}// -------------------------------------------------------------------------void Classifier::saveSingleStumpFeatureData(const string& dataFileName, const string& shypFileName,                                            const string& outFileName, int numIterations){   InputData* pData = loadInputData(dataFileName, shypFileName);   if (_verbose > 0)      cout << "Loading strong hypothesis..." << flush;   // The class that loads the weak hypotheses   UnSerialization us;   // Where to put the weak hypotheses   vector<BaseLearner*> weakHypotheses;   // loads them   us.loadHypotheses(shypFileName, weakHypotheses);   if (weakHypotheses.empty())      return;   if (numIterations == 0)      numIterations = static_cast<int>(weakHypotheses.size());   if (_verbose > 0)      cout << "Creating file..." << flush;   const int numClasses = ClassMappings::getNumClasses();   const int numExamples = pData->getNumExamples();   vector<BaseLearner*>::const_iterator whyIt;   vector< vector<double> > data( numIterations );   vector< string > classColumn;   int pos = 0;   int t;   // create the first column   int numRows = numClasses+numExamples;   classColumn.resize(numRows);   for (int l = 0; l < numClasses; ++l)      classColumn[pos++] = "0";   for (int i = 0; i < numExamples; ++i)      classColumn[pos++] = ClassMappings::getClassNameFromIdx( pData->getClass(i) );   // get the data for the other columns   for (t = 0, whyIt = weakHypotheses.begin();        t < numIterations && whyIt != weakHypotheses.end();       ++whyIt, ++t)   {      BaseLearner* currWeakHyp = *whyIt;      vector<double>& tmpCol = data[t];      currWeakHyp->getStateData(tmpCol, "ssfeatures", pData);   }   if ( data[1].empty() )   {      cerr << "ERROR: option -ssfeatures works only with SingleStump feature types!" << endl;      exit(1);   }   //////////////////////////////////////////////////////////////////////////   ofstream outFile(outFileName.c_str());   // Print alpha!   // for every feature: 1..numIterations   outFile << "0\t";   for (t = 0, whyIt = weakHypotheses.begin();         t < numIterations && whyIt != weakHypotheses.end();         ++whyIt, ++t)   {      BaseLearner* currWeakHyp = *whyIt;      outFile << currWeakHyp->getAlpha() << '\t';   }   outFile << endl;   // now print all the data   for (int r = 0; r < numRows; ++r)   {      outFile << classColumn[r];      for (t = 0; t < numIterations; ++t)         outFile << '\t' << data[t][r];      outFile << '\n';   }   //////////////////////////////////////////////////////////////////////////   if (_verbose > 0)      cout << "Done!" << endl;   // delete the input data file   if (pData)       delete pData;}// -------------------------------------------------------------------------InputData* Classifier::loadInputData(const string& dataFileName, const string& shypFileName){   // open file   ifstream inFile(shypFileName.c_str());   if (!inFile.is_open())   {      cerr << "ERROR: Cannot open strong hypothesis file <" << shypFileName << ">!" << endl;      exit(1);   }   // Declares the stream tokenizer   nor_utils::StreamTokenizer st(inFile, "<>\n\r\t");   // Move until it finds the multiboost tag   if ( !UnSerialization::seekSimpleTag(st, "multiboost") )   {      // no multiboost tag found: this is not the correct file!      cerr << "ERROR: Not a valid MultiBoost Strong Hypothesis file!!" << endl;      exit(1);   }   // Move until it finds the algo tag   string basicLearnerName = UnSerialization::seekAndParseEnclosedValue<string>(st, "algo");   // Check if the weak learner exists   if ( !BaseLearner::RegisteredLearners().hasLearner(basicLearnerName) )   {      cerr << "ERROR: Weak learner <" << basicLearnerName << "> not registered!!" << endl;      exit(1);   }   // get the training input data, and load it   InputData* pData = BaseLearner::RegisteredLearners().getLearner(basicLearnerName)->createInputData();   // set the non-default arguments of the input data   pData->initOptions(_args);   // load the data   pData->load(dataFileName, IT_TEST, _verbose);   return pData;}// -------------------------------------------------------------------------// Returns the results into ptResvoid Classifier::computeResults(InputData* pData, vector<BaseLearner*>& weakHypotheses,                                 vector< ExampleResults* >& results){   assert( !weakHypotheses.empty() );   const int numClasses = ClassMappings::getNumClasses();   const int numExamples = pData->getNumExamples();   // Initialize the output info   OutputInfo* pOutInfo = NULL;   if ( !_outputInfoFile.empty() )      pOutInfo = new OutputInfo(_outputInfoFile);   // Creating the results structures. See file Structures.h for the   // PointResults structure   results.clear();   results.reserve(numExamples);   for (int i = 0; i < numExamples; ++i)      results.push_back( new ExampleResults(i, numClasses) );   // iterator over all the weak hypotheses   vector<BaseLearner*>::const_iterator whyIt;   int t;   // for every feature: 1..T   for (whyIt = weakHypotheses.begin(), t = 0;         whyIt != weakHypotheses.end(); ++whyIt, ++t)   {      BaseLearner* currWeakHyp = *whyIt;      double alpha = currWeakHyp->getAlpha();      // for every point      for (int i = 0; i < numExamples; ++i)      {         // a reference for clarity and speed         vector<double>& currVotesVector = results[i]->votesVector;         // for every class         for (int l = 0; l < numClasses; ++l)            currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l);      }      // if needed output the step-by-step information      if ( pOutInfo )      {         pOutInfo->outputIteration(t);         pOutInfo->outputError(pData, currWeakHyp);         // Margins and edge requires an update of the weight,         // therefore I keep them out for the moment         //outInfo.outputMargins(pData, currWeakHyp);         //outInfo.outputEdge(pData, currWeakHyp);         pOutInfo->endLine();      }   }   if (pOutInfo)      delete pOutInfo;}// -------------------------------------------------------------------------double Classifier::getOverallError( InputData* pData, const vector<ExampleResults*>& results,                                     int atLeastRank ){   const int numExamples = pData->getNumExamples();   int numErrors = 0;   assert(atLeastRank >= 0);   for (int i = 0; i < numExamples; ++i)   {      // if the actual class is not the one with the highest vote in the      // vote vector, then it is an error!      if ( !results[i]->isWinner( pData->getClass(i), atLeastRank ) )         ++numErrors;   }     // makes the error between 0 and 1   return (double)numErrors / (double)numExamples;}// -------------------------------------------------------------------------void Classifier::getClassError( InputData* pData, const vector<ExampleResults*>& results,                                 vector<double>& classError, int atLeastRank ){   const int numExamples = pData->getNumExamples();   const int numClasses = ClassMappings::getNumClasses();   classError.resize( numClasses, 0 );   assert(atLeastRank >= 0);   for (int i = 0; i < numExamples; ++i)   {      // if the actual class is not the one with the highest vote in the      // vote vector, then it is an error!      if ( !results[i]->isWinner( pData->getClass(i), atLeastRank ) )         ++classError[ pData->getClass(i) ];   }   // makes the error between 0 and 1   for (int l = 0; l < numClasses; ++l)      classError[l] /= (double)pData->getNumExamplesPerClass(l);}// -------------------------------------------------------------------------} // end of namespace MultiBoost
上一页 12
💿 文件大小 78 K
👤 上传用户 susanxuwenjun
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#adaboost #MultiBoost #算法 #分类
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -