⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hoeffding.cpp

📁 C4.5的简化版本是一个决策树的算法
💻 CPP
📖 第 1 页 / 共 2 页
字号:

#include <stdlib.h>
#include <math.h>
#include <fstream.h>
#include <string.h>
#include <iostream.h>
#include <string.h>
#include "hoeffding.h"

/* Dimension.C
 *
 * Each example that we want to place in the decision tree has a value for
 * each attribute. An attribute together with all the possible values for
 * the attribute we call a dimension. The dimension class stores an
 * attribute and its possible values, which is handy for various reasons.
 */

// Build a dimension
Dimension::Dimension()
{
  next = NULL;  // (LBH 3/96)
}

// Destroy a dimension
Dimension::~Dimension()
{
}

// Given a name, lookup the index of the corresponding value
int Dimension::lookup(char temp[])
{
   for(int x = 0; x < num_values; x++)
      if(strcmp(temp, values[x]) == 0)
         return x;
   cout<<temp<<endl;
   cout<<"there's no such value in that dimension!"<<endl;
   return -1; //there's no such value in that dimension
}

// Return the name of the attribute for this dimension
char *Dimension::get_attribute()
{
   return attribute;
}

// Read in a dimension from a file
istream& operator>>(istream &i, Dimension &d)
{
   i >> d.attribute;//first read in the name of that attribute
   cout<<d.attribute<<":  ";
   i >> d.num_values;  //then the num of the values
   for(int x = 0; x < d.num_values; x++)  //then the values in one by one
   {
   	   i >> d.values[x];
	   cout<<d.values[x]<<"\t";
   }
   cout<<endl;
   return i;
}
/* Example.C
 *
 * An example is an assignment of values to each possible attribute.
 * An example has a classification value.
 */

// Construct an example, from an input stream.
// This input stream is the input from the examples file(dataset).
Example::Example(istream& in)
{
  char word[MAX_STRING];

  values = new int[Decision::num_dimensions];
  for(int x = 0; x < Decision::num_dimensions; x++) { //read the values one by one from file
    in >> word;
    values[x] = Decision::lookup_attribute(x, word);
  }
 
  in >> word;   // the last should be the class value
  my_class = Decision::lookup_class(word);
}

// Destroy this example. That means destroying the dynamically allocated memory
Example::~Example()
{
   delete [] values;
}

// Get the index of the class this example goes with
int Example::get_class()
{
   return my_class;
}

// Get the value(index) for a certain dimension.
int Example::get_value(int dim)
{
   return values[dim];
}

// Print out the example
//ostream& operator<<(ostream &o, Example &e)
//{
//   o <<" [" << Decision::classes[e.my_class] << "] ";

//   for(int x = 0; x < Decision::num_dimensions; x++) {
//      o << "(";
//      o << Decision::dimensions[x]->get_attribute() << ", ";
//      o << Decision::dimensions[x]->values[e.values[x]] << ") ";
//	  o << " ";
//   }

//   return o;
//}
/* Decision.C
 *
 * This class is a container class for some globally available values,
 * such as the valid dimensions and classes for this problem.
 */

int Decision::num_classes;             // The number of classes for this run
char Decision::classes[MAX_CLASSES][MAX_STRING];   // Names for all classes

int Decision::num_dimensions;          // The number of dimensions for this run
Dimension **Decision::dimensions;      // Array of dimensions
int Decision::majority_class;          //among all the sample set--------???????how to predict if
                                   //the stream can't be seen at the start time

// Construct a decision given the filename for the info file
Decision::Decision(char *filename)
{
   int x, majority_class_count;  
   int class_count[MAX_CLASSES];

   ifstream in(filename);

   if(in.fail()) {
      cerr << "Could not open file " << filename << endl;
      exit(1);
   }

   char word[MAX_STRING];    // Some scratch space for reading in words
   in >> word;   // Should be "classes"
   if(strcmp(word, "classes") != 0) {
      cerr << "Expected keyword \"classes\" at beginning of example file"
           << endl;
      exit(1);
   }
   cout<<"Classlabel:"<<"  ";
   in >> num_classes;   // Read in the number of classes
   for(x = 0; x < num_classes; x++) { // Read in each class name
      in >> classes[x];
	  cout<<classes[x]<<"\t";
      class_count[x] = 0;   // (LBH 3/96)
   }
   cout<<endl;
   in >> word;   // Should be "dimensions"
   if(strcmp(word, "dimensions") != 0) {
      cerr << "Expected keyword \"dimensions\", got " << word << endl;
      exit(1);
   }
   cout<<"Attributes:"<<"  "<<endl;
   in >> num_dimensions;  // Read in the number of dimensions for this run
   // Dynamically allocate an array of pointers to Dimension
   dimensions = new Dimension *[num_dimensions];
   for(x = 0; x < num_dimensions; x++) {
     dimensions[x] = new Dimension();
     in >> *dimensions[x];  // Read in each dimension
   }

   root = new Node;    // Create the top of the decision tree

   // Add all attributes as possible split attributes in root node 
   for (x = 0; x < num_dimensions; x++) root->add_attribute(x);

   // Construct a linked list of all examples.
   in >> word;   // Should be "examples"
   if(strcmp(word, "examples") != 0) {
      cerr << "Expected keyword \"examples\", got " << word << endl;
      exit(1);
   }

   // Read in all the examples and give them to the root node
  // char cword[MAX_STRING];
   int num=0;  //count examples readb
   while(!in.eof()) {
	 if(in.eof() || in.fail())   // End of file?
         break;
	   
	    Example *example = new Example(in);
       class_count[example->get_class()]++;
       root->add_example(example);
	   num++;
   }
  // cout<<num<<endl;

   // Find majority class 
   majority_class = 0;
   majority_class_count = class_count[0];
   for (x = 1; x < num_classes; x++)
     if (class_count[x] > majority_class_count) {
        majority_class = x;
        majority_class_count = class_count[x];
     }
}

// Destroy the decision, which means destroying all the dynamically
// allocated data
Decision::~Decision()
{
   for(int x = 0; x < num_dimensions; x++)
     delete dimensions[x];//each is a dimension pointer
   delete [] dimensions;
   delete root; // Deleting the root will delete the entire tree
}

// Given a name, look up which attribute this is, and return the index
int Decision::lookup_attribute(int x, char temp[])
{
   return dimensions[x]->lookup(temp);//return the position this value is in that dimension
}

// Given a name, look up which class this is, and return the index
int Decision::lookup_class(char temp[])
{
   for(int x = 0; x < num_classes; x++)
      if(strcmp(classes[x], temp) == 0)
         return x;
   cout<<"There's no such class!"<<endl;
   return -1; //there's no such class
}

// Run the build_tree() algorithm to create the decision tree
void Decision::build_tree()
{
   root->build_tree(majority_class);
}

// Print out the tree
ostream& operator<<(ostream &o, Decision &d)
{
   for(int i = 0;i < 80;i++)
		o<<"*";
   cout<<endl;
   for(i = 0;i < 30;i++)
	   cout<<" ";
   cout<<"DECISION    TREE"<<endl;
   for(i = 0;i < 80;i++)
		o<<"*";
   cout<<endl;
   d.root->print(0);

   return o;
}
/* Node.C
 *
 * The decision tree is built from nodes.
 *
 * To classify an example, we start at the node of the decision tree
 * and work our way down until we reach a yes or no answer. Each
 * node has some question to ask, and depending on the answer to that
 * question, we choose one of the children. This "question" is called
 * an attribute. We store a dimension with the node, because this lets
 * us know the name of the attribute as well as all the possible values.
 * Each different value as an "answer" to the attribute question would
 * direct the search to a different child.
 *
 * Each node is also associated with one or more examples, if it is
 * a leaf node. These examples are previous data that helped to build
 * the tree.
 */

// Construct a node. This means setting up some initial values
Node::Node()
{
   num_children = 0;
   num_examples = 0;
   examples = NULL;

   num_attrs = 0;     
   for (int a = 0; a < MAX_ATTRS; a++)
      attributes[a] = -1;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -