📄 snow.cpp
字号:
// -*-c++-*-//===========================================================//= University of Illinois at Urbana-Champaign =//= Department of Computer Science =//= Dr. Dan Roth - Cognitive Computation Group =//= =//= Project: SNoW =//= =//= Module: Snow.cpp =//= Version: 3.2.0 =//= Authors: Jeff Rosen, Andrew Carlson, Nick Rizzolo =//= Date: xx/xx/99 = //= =//= Comments: =//===========================================================#include <vector>#include <string>#include <numeric>#include <iomanip>#include "TargetIdSet.h"#include "GlobalParams.h"#include "Network.h"#include "Example.h"#include "Snow.h"#include <stdlib.h>#ifndef WIN32#include <unistd.h>#endif#include <errno.h>#include <stdio.h>#include <ctype.h>#include <string.h>#ifndef WIN32#include <strings.h>#endif#include <algorithm>#include <iostream>#include <fstream>#include <sstream>#include <iomanip>const char* Snow::logo[] ={ "SNoW+ - Sparse Network of Winnows Plus\n", "Cognitive Computations Group - University of Illinois at Urbana-Champaign", "\nVersion 3.2.0\n", "\0"}; Snow::Snow( GlobalParams & gp_ ) : globalParams(gp_){ DEBUG_ME = false; //assumes GlobalParams object already initialized and validated network = 0; holdRank = 0; mistakes = 0; // open the output file if specified // if none was given, output to console if (globalParams.outputFile.length() != 0) { globalParams.pResultsOutput = new ofstream(globalParams.outputFile.c_str()); if (!(*globalParams.pResultsOutput)) { cerr << "Fatal Error:\n"; cerr << "Failed to open output file '" << globalParams.outputFile.c_str() << "' for output\n\n"; exit(1); } cout << "Directing output to file '" << globalParams.outputFile.c_str() << "'\n"; } else { globalParams.pResultsOutput = &cout; } //Train -n +, interactive: look for existing network file to read //if not, create default //Train -n -: create new network //Test, Server, Evaluate/TestSingleExample: must have existing network //presently, allows for object mode by default //user is responsible for correct initialization of //GlobalParams object (e.g. specifies network file) if(globalParams.runMode != MODE_TRAIN) { network = new Network(globalParams); if (globalParams.runMode == MODE_INTERACTIVE) { if(DEBUG_ME) { cerr << "##in interactive mode. looking for network file named " << globalParams.networkFile << "..." << endl; } ifstream netStream(globalParams.networkFile.c_str()); if (!netStream) // Failed to open network file for input { if(DEBUG_ME) { cerr << "##in interactive mode. didn't find network file named " << globalParams.networkFile << ", so creating new network." << endl; } network->CreateStructure(); } else { if(DEBUG_ME) { cerr << "##in interactive mode. found network file and reading it..." << endl; } network->Read(netStream); } if (netStream.is_open()) netStream.close(); } else { ifstream netStream(globalParams.networkFile.c_str()); if (!netStream) { cerr << "Fatal Error:\n"; cerr << "Failed to open network file '" << globalParams.networkFile.c_str() << "'\n\n"; Pause(); exit(1); } network->Read(netStream); } //Used (presently) for single example test mode holdRank = new TargetRanking(globalParams, network->SingleTarget(), network->FirstThreshold()); }}Snow::~Snow() { if (globalParams.outputFile.length() != 0) { ((ofstream*)(globalParams.pResultsOutput))->close(); delete globalParams.pResultsOutput; } delete network; delete holdRank;}void Snow::Train(){ ifstream trainStream(globalParams.inputFile.c_str()); ofstream outputConjunctionStream; if (globalParams.writeConjunctions) { string outputConjunctionFile = globalParams.inputFile + ".conjunctions"; outputConjunctionStream.open(outputConjunctionFile.c_str()); if (globalParams.verbosity >= VERBOSE_MIN) cout << "Writing training examples with conjunctions to file: '" << outputConjunctionFile << "'\n"; } if (!trainStream) { cerr << "Fatal Error:\n"; cerr << "Failed to open training input file: '" << globalParams.inputFile.c_str() << "'\n\n"; return; } // examine training data to set default weight and decide if we should // perform blow-up on the feature space int examples = 0; FeatureID max_id = 0; Example example(globalParams); vector<Example> training_set;#if defined(FEATURE_HASH) && !defined(WIN32) hash_set<FeatureID> featureSet;#else set<FeatureID> featureSet;#endif if (globalParams.generateConjunctions != CONJUNCTIONS_OFF || globalParams.calculateExampleSize) { // We might as well store all examples in memory now, since we're reading // the train stream anyway. if (globalParams.examplesInMemory) { while (!trainStream.eof()) { if (example.ReadLabeled(trainStream)) { example.ReadFeatureSet(featureSet, max_id); training_set.push_back(example); training_set[training_set.size() - 1].features.free_unused_space(); } else { if (!trainStream.eof()) cerr << "Failed reading example " << (training_set.size() + 1) << " from " << globalParams.inputFile.c_str() << endl; } } examples = training_set.size(); } else { while (!trainStream.eof() && featureSet.size() < 1000) { if (example.Read(trainStream)) { example.ReadFeatureSet(featureSet, max_id); ++examples; } else { if (!trainStream.eof()) cerr << "Failed reading example " << (examples + 1) << " from " << globalParams.inputFile.c_str() << endl; } } // rewind the input file trainStream.clear(); trainStream.seekg(0L); }#ifdef AVERAGE_EXAMPLE_SIZE if (globalParams.calculateExampleSize) globalParams.averageExampleSize /= examples;#endif } // do error / warning checking if user turned conjunctions on if (globalParams.generateConjunctions == CONJUNCTIONS_ON) { if (featureSet.size() > 999) cerr << "WARNING: -g generate conjunctions flag specified with more " << "than 1000 features\n"; if (max_id > 9999) { cerr << "ERROR: -g generate conjunctions flag specified with " << "featureIDs over 10000\n"; cerr << " conjunction generation will be turned off.\n"; globalParams.generateConjunctions = CONJUNCTIONS_OFF; } } // if the user didn't set conjunctions, decide if we should turn them on if (globalParams.generateConjunctions == CONJUNCTIONS_UNSET) { if ((featureSet.size() < 100) && (max_id < 10000)) { globalParams.generateConjunctions = CONJUNCTIONS_ON; if (globalParams.verbosity >= VERBOSE_MIN) cout << "Less than 100 features used: auto-generating conjunctions\n"; } else globalParams.generateConjunctions = CONJUNCTIONS_OFF; } // create the network this->network = new Network(globalParams); if(globalParams.updateExistingNetwork) { ifstream netStream(globalParams.networkFile.c_str()); if (!netStream) // Failed to open network file for input network->CreateStructure(); else network->Read(netStream); if (netStream.is_open()) netStream.close(); } else network->CreateStructure(); Example* example_pointer; if (globalParams.examplesInMemory) { if (globalParams.generateConjunctions != CONJUNCTIONS_OFF || globalParams.calculateExampleSize) // This means examples have already been read into memory. { int i; if (globalParams.generateConjunctions == CONJUNCTIONS_ON && globalParams.writeConjunctions) { for (i = 0; i < training_set.size(); ++i) { training_set[i].GenerateConjunctions(); training_set[i].Write(outputConjunctionStream); } } else if (globalParams.generateConjunctions == CONJUNCTIONS_ON) { for (i = 0; i < training_set.size(); ++i) training_set[i].GenerateConjunctions(); } } else // Otherwise, examples haven't been read into memory yet. { while (!trainStream.eof()) { if (example.ReadLabeled(trainStream)) { training_set.push_back(example); training_set[training_set.size() - 1].features.free_unused_space(); } else if (!trainStream.eof()) { cerr << "Failed reading example " << (training_set.size() + 1) << " from " << globalParams.inputFile.c_str() << endl; } } } } //end if(examples_in_memory) else example_pointer = &example; this->mistakes = 1; for (globalParams.currentCycle = 1; globalParams.currentCycle <= globalParams.cycles && (mistakes || (globalParams.currentCycle == 2 && globalParams.noFirstCycleUpdate)); ++globalParams.currentCycle) { if (globalParams.verbosity >= VERBOSE_MED) *globalParams.pResultsOutput << "Starting training cycle " << globalParams.currentCycle << endl; examples = 0; this->mistakes = 0; while (examples < training_set.size() || !trainStream.eof()) { if (!(examples % 1000) && globalParams.currentCycle == 1) network->Discard(); if (globalParams.examplesInMemory) example_pointer = &training_set[examples]; else { if (!example.ReadLabeled(trainStream)) { if (!trainStream.eof()) { cerr << "Failed reading example " << (examples + 1) << " from " << globalParams.inputFile.c_str() << endl; } continue; } presentTrainExample(example_pointer); if (globalParams.verbosity == VERBOSE_MAX) { *globalParams.pResultsOutput << "Ex " << examples << " : "; example_pointer->Show(globalParams.pResultsOutput); } } ++examples; if ((globalParams.verbosity == VERBOSE_MED) && !(examples % 100)) *globalParams.pResultsOutput << examples << " examples presented\n";
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -