⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 snow.cpp

📁 基于稀疏网络的精选机器学习模型
💻 CPP
📖 第 1 页 / 共 3 页
字号:
// -*-c++-*-//===========================================================//=     University of Illinois at Urbana-Champaign          =//=     Department of Computer Science                      =//=     Dr. Dan Roth - Cognitive Computation Group          =//=                                                         =//=  Project: SNoW                                          =//=                                                         =//=   Module: Snow.cpp                                      =//=  Version: 3.2.0                                         =//=  Authors: Jeff Rosen, Andrew Carlson, Nick Rizzolo      =//=     Date: xx/xx/99                                      = //=                                                         =//= Comments:                                               =//===========================================================#include <vector>#include <string>#include <numeric>#include <iomanip>#include "TargetIdSet.h"#include "GlobalParams.h"#include "Network.h"#include "Example.h"#include "Snow.h"#include <stdlib.h>#ifndef WIN32#include <unistd.h>#endif#include <errno.h>#include <stdio.h>#include <ctype.h>#include <string.h>#ifndef WIN32#include <strings.h>#endif#include <algorithm>#include <iostream>#include <fstream>#include <sstream>#include <iomanip>const char* Snow::logo[] ={  "SNoW+ - Sparse Network of Winnows Plus\n",  "Cognitive Computations Group - University of Illinois at Urbana-Champaign",  "\nVersion 3.2.0\n",  "\0"}; Snow::Snow( GlobalParams & gp_ )    : globalParams(gp_){  DEBUG_ME = false;  //assumes GlobalParams object already initialized and validated  network = 0;  holdRank = 0;    mistakes = 0;  // open the output file if specified  // if none was given, output to console  if (globalParams.outputFile.length() != 0)   {    globalParams.pResultsOutput =      new ofstream(globalParams.outputFile.c_str());    if (!(*globalParams.pResultsOutput))    {      cerr << "Fatal Error:\n";      cerr << "Failed to open output file '"	   << globalParams.outputFile.c_str() << "' for output\n\n";      exit(1);    }    cout << "Directing output to file '"	 << globalParams.outputFile.c_str() << "'\n";  }  else  {    globalParams.pResultsOutput = &cout;  }        //Train -n +, interactive: look for existing network file to read  //if not, create default    //Train -n -: create new network    //Test, Server, Evaluate/TestSingleExample: must have existing network    //presently, allows for object mode by default  //user is responsible for correct initialization of   //GlobalParams object (e.g. specifies network file)    if(globalParams.runMode != MODE_TRAIN) {    network = new Network(globalParams);        if (globalParams.runMode == MODE_INTERACTIVE) {      if(DEBUG_ME)      {	cerr << "##in interactive mode.  looking for network file named "	     << globalParams.networkFile << "..." << endl;      }      ifstream netStream(globalParams.networkFile.c_str());      if (!netStream) // Failed to open network file for input      {	if(DEBUG_ME)	{	  cerr << "##in interactive mode.  didn't find network file named " 	       << globalParams.networkFile << ", so creating new network."  << endl;	}	network->CreateStructure();      }      else      {	if(DEBUG_ME)	{	  cerr << "##in interactive mode.  found network file and reading it..." << endl;	}	network->Read(netStream);      }            if (netStream.is_open())	netStream.close();    }    else {      ifstream netStream(globalParams.networkFile.c_str());      if (!netStream)      {	cerr << "Fatal Error:\n";	cerr << "Failed to open network file '"	     << globalParams.networkFile.c_str() << "'\n\n";	Pause();	exit(1);      }      network->Read(netStream);    }    //Used (presently) for single example test mode    holdRank = new TargetRanking(globalParams,				 network->SingleTarget(), 				 network->FirstThreshold());  }}Snow::~Snow() {  if (globalParams.outputFile.length() != 0)  {    ((ofstream*)(globalParams.pResultsOutput))->close();    delete globalParams.pResultsOutput;  }        delete network;  delete holdRank;}void Snow::Train(){  ifstream trainStream(globalParams.inputFile.c_str());  ofstream outputConjunctionStream;  if (globalParams.writeConjunctions)  {    string outputConjunctionFile = globalParams.inputFile + ".conjunctions";    outputConjunctionStream.open(outputConjunctionFile.c_str());    if (globalParams.verbosity >= VERBOSE_MIN)      cout << "Writing training examples with conjunctions to file: '"         << outputConjunctionFile << "'\n";  }  if (!trainStream)  {    cerr << "Fatal Error:\n";    cerr << "Failed to open training input file: '"       << globalParams.inputFile.c_str() << "'\n\n";    return;  }  // examine training data to set default weight and decide if we should  // perform blow-up on the feature space  int examples = 0;  FeatureID max_id = 0;  Example example(globalParams);  vector<Example> training_set;#if defined(FEATURE_HASH) && !defined(WIN32)  hash_set<FeatureID> featureSet;#else  set<FeatureID> featureSet;#endif  if (globalParams.generateConjunctions != CONJUNCTIONS_OFF      || globalParams.calculateExampleSize)  {    // We might as well store all examples in memory now, since we're reading    // the train stream anyway.    if (globalParams.examplesInMemory)    {      while (!trainStream.eof())      {        if (example.ReadLabeled(trainStream))        {          example.ReadFeatureSet(featureSet, max_id);          training_set.push_back(example);          training_set[training_set.size() - 1].features.free_unused_space();        }        else        {          if (!trainStream.eof())            cerr << "Failed reading example " << (training_set.size() + 1)                 << " from " << globalParams.inputFile.c_str() << endl;        }      }      examples = training_set.size();    }    else    {      while (!trainStream.eof() && featureSet.size() < 1000)      {        if (example.Read(trainStream))        {          example.ReadFeatureSet(featureSet, max_id);          ++examples;        }        else        {          if (!trainStream.eof())            cerr << "Failed reading example " << (examples + 1)                 << " from " << globalParams.inputFile.c_str() << endl;        }      }      // rewind the input file      trainStream.clear();      trainStream.seekg(0L);    }#ifdef AVERAGE_EXAMPLE_SIZE    if (globalParams.calculateExampleSize)      globalParams.averageExampleSize /= examples;#endif  }  // do error / warning checking if user turned conjunctions on  if (globalParams.generateConjunctions == CONJUNCTIONS_ON)  {    if (featureSet.size() > 999)      cerr << "WARNING: -g generate conjunctions flag specified with more "           << "than 1000 features\n";    if (max_id > 9999)      {      cerr << "ERROR: -g generate conjunctions flag specified with "           << "featureIDs over 10000\n";      cerr << "       conjunction generation will be turned off.\n";      globalParams.generateConjunctions = CONJUNCTIONS_OFF;    }  }  // if the user didn't set conjunctions, decide if we should turn them on  if (globalParams.generateConjunctions == CONJUNCTIONS_UNSET)  {    if ((featureSet.size() < 100) && (max_id < 10000))      {      globalParams.generateConjunctions = CONJUNCTIONS_ON;      if (globalParams.verbosity >= VERBOSE_MIN)        cout << "Less than 100 features used: auto-generating conjunctions\n";    }    else globalParams.generateConjunctions = CONJUNCTIONS_OFF;  }  // create the network    this->network = new Network(globalParams);    if(globalParams.updateExistingNetwork) {    ifstream netStream(globalParams.networkFile.c_str());    if (!netStream) // Failed to open network file for input      network->CreateStructure();    else      network->Read(netStream);    if (netStream.is_open())      netStream.close();  }  else    network->CreateStructure();    Example* example_pointer;  if (globalParams.examplesInMemory)  {    if (globalParams.generateConjunctions != CONJUNCTIONS_OFF        || globalParams.calculateExampleSize)      // This means examples have already been read into memory.    {      int i;      if (globalParams.generateConjunctions == CONJUNCTIONS_ON          && globalParams.writeConjunctions)      {        for (i = 0; i < training_set.size(); ++i)        {          training_set[i].GenerateConjunctions();          training_set[i].Write(outputConjunctionStream);        }      }      else if (globalParams.generateConjunctions == CONJUNCTIONS_ON)      {        for (i = 0; i < training_set.size(); ++i)          training_set[i].GenerateConjunctions();      }    }    else // Otherwise, examples haven't been read into memory yet.    {      while (!trainStream.eof())      {        if (example.ReadLabeled(trainStream))        {          training_set.push_back(example);          training_set[training_set.size() - 1].features.free_unused_space();        }        else if (!trainStream.eof())        {          cerr << "Failed reading example " << (training_set.size() + 1)               << " from " << globalParams.inputFile.c_str() << endl;        }      }    }  }   //end if(examples_in_memory)  else example_pointer = &example;  this->mistakes = 1;  for (globalParams.currentCycle = 1;       globalParams.currentCycle <= globalParams.cycles       && (mistakes || (globalParams.currentCycle == 2                        && globalParams.noFirstCycleUpdate));      ++globalParams.currentCycle)  {    if (globalParams.verbosity >= VERBOSE_MED)      *globalParams.pResultsOutput << "Starting training cycle "                                   << globalParams.currentCycle << endl;    examples = 0;    this->mistakes = 0;    while (examples < training_set.size() || !trainStream.eof())    {      if (!(examples % 1000) && globalParams.currentCycle == 1)        network->Discard();      if (globalParams.examplesInMemory)        example_pointer = &training_set[examples];      else      {        if (!example.ReadLabeled(trainStream))        {          if (!trainStream.eof())          {            cerr << "Failed reading example " << (examples + 1) << " from "                 << globalParams.inputFile.c_str() << endl;          }          continue;        }	presentTrainExample(example_pointer);	if (globalParams.verbosity == VERBOSE_MAX)	{	  *globalParams.pResultsOutput << "Ex " << examples << " : ";	  example_pointer->Show(globalParams.pResultsOutput);	}      }      ++examples;      if ((globalParams.verbosity == VERBOSE_MED) && !(examples % 100))        *globalParams.pResultsOutput << examples << " examples presented\n";

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -