📄 hoeffding.cpp
字号:
#include <stdlib.h>
#include <math.h>
#include <fstream.h>
#include <string.h>
#include <iostream.h>
#include <string.h>
#include "hoeffding.h"
/* Dimension.C
*
* Each example that we want to place in the decision tree has a value for
* each attribute. An attribute together with all the possible values for
* the attribute we call a dimension. The dimension class stores an
* attribute and its possible values, which is handy for various reasons.
*/
// Build a dimension
Dimension::Dimension()
{
next = NULL; // (LBH 3/96)
}
// Destroy a dimension
Dimension::~Dimension()
{
}
// Given a name, lookup the index of the corresponding value
int Dimension::lookup(char temp[])
{
for(int x = 0; x < num_values; x++)
if(strcmp(temp, values[x]) == 0)
return x;
cout<<temp<<endl;
cout<<"there's no such value in that dimension!"<<endl;
return -1; //there's no such value in that dimension
}
// Return the name of the attribute for this dimension
char *Dimension::get_attribute()
{
return attribute;
}
// Read in a dimension from a file
istream& operator>>(istream &i, Dimension &d)
{
i >> d.attribute;//first read in the name of that attribute
cout<<d.attribute<<": ";
i >> d.num_values; //then the num of the values
for(int x = 0; x < d.num_values; x++) //then the values in one by one
{
i >> d.values[x];
cout<<d.values[x]<<"\t";
}
cout<<endl;
return i;
}
/* Example.C
*
* An example is an assignment of values to each possible attribute.
* An example has a classification value.
*/
// Construct an example, from an input stream.
// This input stream is the input from the examples file(dataset).
Example::Example(istream& in)
{
char word[MAX_STRING];
values = new int[Decision::num_dimensions];
for(int x = 0; x < Decision::num_dimensions; x++) { //read the values one by one from file
in >> word;
values[x] = Decision::lookup_attribute(x, word);
}
in >> word; // the last should be the class value
my_class = Decision::lookup_class(word);
}
// Destroy this example. That means destroying the dynamically allocated memory
Example::~Example()
{
delete [] values;
}
// Get the index of the class this example goes with
int Example::get_class()
{
return my_class;
}
// Get the value(index) for a certain dimension.
int Example::get_value(int dim)
{
return values[dim];
}
// Print out the example
//ostream& operator<<(ostream &o, Example &e)
//{
// o <<" [" << Decision::classes[e.my_class] << "] ";
// for(int x = 0; x < Decision::num_dimensions; x++) {
// o << "(";
// o << Decision::dimensions[x]->get_attribute() << ", ";
// o << Decision::dimensions[x]->values[e.values[x]] << ") ";
// o << " ";
// }
// return o;
//}
/* Decision.C
*
* This class is a container class for some globally available values,
* such as the valid dimensions and classes for this problem.
*/
int Decision::num_classes; // The number of classes for this run
char Decision::classes[MAX_CLASSES][MAX_STRING]; // Names for all classes
int Decision::num_dimensions; // The number of dimensions for this run
Dimension **Decision::dimensions; // Array of dimensions
int Decision::majority_class; //among all the sample set--------???????how to predict if
//the stream can't be seen at the start time
// Construct a decision given the filename for the info file
Decision::Decision(char *filename)
{
int x, majority_class_count;
int class_count[MAX_CLASSES];
ifstream in(filename);
if(in.fail()) {
cerr << "Could not open file " << filename << endl;
exit(1);
}
char word[MAX_STRING]; // Some scratch space for reading in words
in >> word; // Should be "classes"
if(strcmp(word, "classes") != 0) {
cerr << "Expected keyword \"classes\" at beginning of example file"
<< endl;
exit(1);
}
cout<<"Classlabel:"<<" ";
in >> num_classes; // Read in the number of classes
for(x = 0; x < num_classes; x++) { // Read in each class name
in >> classes[x];
cout<<classes[x]<<"\t";
class_count[x] = 0; // (LBH 3/96)
}
cout<<endl;
in >> word; // Should be "dimensions"
if(strcmp(word, "dimensions") != 0) {
cerr << "Expected keyword \"dimensions\", got " << word << endl;
exit(1);
}
cout<<"Attributes:"<<" "<<endl;
in >> num_dimensions; // Read in the number of dimensions for this run
// Dynamically allocate an array of pointers to Dimension
dimensions = new Dimension *[num_dimensions];
for(x = 0; x < num_dimensions; x++) {
dimensions[x] = new Dimension();
in >> *dimensions[x]; // Read in each dimension
}
root = new Node; // Create the top of the decision tree
// Add all attributes as possible split attributes in root node
for (x = 0; x < num_dimensions; x++) root->add_attribute(x);
// Construct a linked list of all examples.
in >> word; // Should be "examples"
if(strcmp(word, "examples") != 0) {
cerr << "Expected keyword \"examples\", got " << word << endl;
exit(1);
}
// Read in all the examples and give them to the root node
// char cword[MAX_STRING];
int num=0; //count examples readb
while(!in.eof()) {
if(in.eof() || in.fail()) // End of file?
break;
Example *example = new Example(in);
class_count[example->get_class()]++;
root->add_example(example);
num++;
}
// cout<<num<<endl;
// Find majority class
majority_class = 0;
majority_class_count = class_count[0];
for (x = 1; x < num_classes; x++)
if (class_count[x] > majority_class_count) {
majority_class = x;
majority_class_count = class_count[x];
}
}
// Destroy the decision, which means destroying all the dynamically
// allocated data
Decision::~Decision()
{
for(int x = 0; x < num_dimensions; x++)
delete dimensions[x];//each is a dimension pointer
delete [] dimensions;
delete root; // Deleting the root will delete the entire tree
}
// Given a name, look up which attribute this is, and return the index
int Decision::lookup_attribute(int x, char temp[])
{
return dimensions[x]->lookup(temp);//return the position this value is in that dimension
}
// Given a name, look up which class this is, and return the index
int Decision::lookup_class(char temp[])
{
for(int x = 0; x < num_classes; x++)
if(strcmp(classes[x], temp) == 0)
return x;
cout<<"There's no such class!"<<endl;
return -1; //there's no such class
}
// Run the build_tree() algorithm to create the decision tree
void Decision::build_tree()
{
root->build_tree(majority_class);
}
// Print out the tree
ostream& operator<<(ostream &o, Decision &d)
{
for(int i = 0;i < 80;i++)
o<<"*";
cout<<endl;
for(i = 0;i < 30;i++)
cout<<" ";
cout<<"DECISION TREE"<<endl;
for(i = 0;i < 80;i++)
o<<"*";
cout<<endl;
d.root->print(0);
return o;
}
/* Node.C
*
* The decision tree is built from nodes.
*
* To classify an example, we start at the node of the decision tree
* and work our way down until we reach a yes or no answer. Each
* node has some question to ask, and depending on the answer to that
* question, we choose one of the children. This "question" is called
* an attribute. We store a dimension with the node, because this lets
* us know the name of the attribute as well as all the possible values.
* Each different value as an "answer" to the attribute question would
* direct the search to a different child.
*
* Each node is also associated with one or more examples, if it is
* a leaf node. These examples are previous data that helped to build
* the tree.
*/
// Construct a node. This means setting up some initial values
Node::Node()
{
num_children = 0;
num_examples = 0;
examples = NULL;
num_attrs = 0;
for (int a = 0; a < MAX_ATTRS; a++)
attributes[a] = -1;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -