⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 c4.5.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/*
    This file is part of Orange.


    Orange is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    Orange is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Orange; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Authors: Janez Demsar, Blaz Zupan, 1996--2002
    Contact: janez.demsar@fri.uni-lj.si
*/


#include "vars.hpp"
#include "examples.hpp"
#include "examplegen.hpp"
#include "table.hpp"
#include "classify.hpp"
#include "learn.hpp"
#include "getarg.hpp"

#include "c4.5.ppp"

DEFINE_TOrangeVector_classDescription(PC45TreeNode, "TC45TreeNodeList", true, ORANGE_API)

bool c45Loaded = false;

typedef void *learnFunc(char gainRatio, char subset, char batch, char probThresh,
                       int trials, int minObjs, int window, int increment, float cf, char prune);
typedef void garbageFunc();

learnFunc *c45learn;
garbageFunc *c45garbage;
void *pc45data;

extern PyObject *orangeModule;


typedef  union  _attribute_value {
  DiscrValue _discr_val;
  float _cont_val;
} AttValue, *Description;


#define Unknown  -999

#define BrDiscr 1
#define ThreshContin 2
#define BrSubset 3

#define Bit(b) (1 << (b))
#define In(b,s) ((s[(b) >> 3]) & Bit((b) & 07))


struct {
  short		*rMaxAtt, *rMaxClass, *rMaxDiscrVal;
  int *rMaxItem;
  Description	**rItem;
  DiscrValue	**rMaxAttVal;
  char **rSpecialStatus, ***rClassName, ***rAttName, ****rAttValName; 
} c45data;

#ifdef _DEBUG
#define C45STEM "c45_d"
#else
#define C45STEM "c45"
#endif

#ifdef _MSC_VER
#define PATHSEP '\\'
#define C45NAME "\\" C45STEM ".dll"
#else
#define PATHSEP '/'
#define C45NAME "/" C45STEM ".so"
#endif

#if defined _MSC_VER

#include <direct.h>
#define getcwd _getcwd

#define WIN32_LEAN_AND_MEAN		// Exclude rarely-used stuff from Windows headers
#include <windows.h>

void *getsym(HINSTANCE handle, const char *name)
{
  void *sym = GetProcAddress(handle, name);
  if (!sym)
    raiseErrorWho("C45Loader", "invalid %s, cannot find symbol %s", C45NAME, name);
  return sym;
}

void dynloadC45(const char *pathname)
{
  HINSTANCE c45Dll = LoadLibrary(pathname);
  if (!c45Dll)
    raiseErrorWho("C45Loader", "cannot load %s", C45NAME);

  pc45data = getsym(c45Dll, "c45Data");
  c45learn = (learnFunc *)(getsym(c45Dll, "learn"));
  c45garbage = (garbageFunc *)(getsym(c45Dll, "guarded_collect"));
}

#elif defined LINUX || defined FREEBSD || defined DARWIN

#include <dlfcn.h>
#include <unistd.h>

void *getsym(void *handle, const char *name)
{
  void *sym = dlsym(handle, name);
  if (!sym)
    raiseErrorWho("C45Loader", "invalid %s, cannot find symbol %s", C45NAME, name);
  return sym;
}

void dynloadC45(char pathname[])
{ 
  void *handle = dlopen(pathname, 0 /*dlopenflags*/);
  if (handle == NULL)
    raiseErrorWho("C45Loader", dlerror());
  
  pc45data = getsym(handle, "c45Data");
  c45learn = (learnFunc *)getsym(handle, "learn");
  c45garbage = (garbageFunc *)getsym(handle, "guarded_collect");
}
   
#else

void dynloadC45(char [])
{ raiseErrorWho("C45Loader", "c45 is not supported on this platform"); }

#endif

#ifdef IGNORE
#undef IGNORE
#endif

void loadC45()
{
  char *buf = NULL, *bp;

  PyObject *orangeDirName = PyDict_GetItemString(PyModule_GetDict(orangeModule), "__file__");
  if (orangeDirName) {
    char *odn = PyString_AsString(orangeDirName);
    buf = (char *)malloc(strlen(odn) + strlen(C45NAME) + 1);
    strcpy(buf, odn);
    bp = buf + strlen(buf);
    while ((bp!=buf) && (*bp!=PATHSEP))
      bp--;
    *bp = 0;
  }
    
  // If path is empty, orange.so was probably loaded from the working directory
  if (!buf || !*buf) {
    buf = (char *)realloc(buf, 512);
    if (!getcwd(buf, 511))
      raiseErrorWho("C45Loader", C45NAME " cannot be found");
    bp = buf + strlen(buf);
  }
  
  strcpy(bp, C45NAME);

  dynloadC45(buf);
  memcpy(&c45data, pc45data, sizeof(c45data));
  c45Loaded = true;
}

#define MaxAtt (*c45data.rMaxAtt)
#define MaxClass (*c45data.rMaxClass)
#define MaxDiscrVal (*c45data.rMaxDiscrVal)
#define MaxItem (*c45data.rMaxItem)
#define Item (*c45data.rItem)
#define MaxAttVal (*c45data.rMaxAttVal)
#define SpecialStatus (*c45data.rSpecialStatus)
#define ClassName (*c45data.rClassName)
#define AttName (*c45data.rAttName)
#define AttValName (*c45data.rAttValName)

TC45Learner::TC45Learner()
 : gainRatio(true),
   subset(false),
   batch(true),
   probThresh(false),
   minObjs(2),
   window(0),
   increment(0),
   cf(0.25),
   trials(10),
   prune(true),
   convertToOrange(false),
   storeContingencies(false),
   storeExamples(false)
{
    if (!c45Loaded)
      loadC45();
 }


bool TC45Learner::clearDomain()
{ if (ClassName) {
    String *ClassNamei=ClassName;
    MaxClass++;
    while(MaxClass--)
      mldelete *(ClassNamei++);
    mldelete ClassName;
    ClassName=NULL;
  }

  if (AttName) {
    String *AttNamei=AttName;
    int atts=MaxAtt+1;
    while(atts--)
      mldelete *(AttNamei++);
    mldelete AttName;
    AttName=NULL;
  }

  if (AttValName && MaxAttVal) {
    String **AttValNamei=AttValName; 
    DiscrValue *MaxAttVali=MaxAttVal; 
    for(int atts=MaxAtt; atts--; MaxAttVali++) {
      String *AttValNameii = *AttValNamei+1; // the first one is NULL...
      while((*MaxAttVali)--)
        mldelete *(AttValNameii++);
      mldelete *(AttValNamei++);
    }
    mldelete AttValName;
    mldelete MaxAttVal;
    AttValNamei=NULL;
    MaxAttVal=NULL;
  }

  if (SpecialStatus) {
    mldelete SpecialStatus;
    SpecialStatus=NULL;
  }

  return true;
}      


bool TC45Learner::convertDomain(PDomain dom)
{ 
  TEnumVariable *classVar=dom->classVar.AS(TEnumVariable);
  if (!classVar)
    raiseError("domain with discrete class attribute expected");

  MaxAtt = dom->attributes->size()-1;
  MaxClass = classVar->noOfValues()-1;
  MaxDiscrVal=2; // increased below

  ClassName = mlnew String[MaxClass+1];
  String *ClassNamei=ClassName;
  PITERATE(TStringList, ni, classVar->values) {
    *ClassNamei = mlnew char[(*ni).length()+1];
    strcpy(*(ClassNamei++), (*ni).c_str());
  }
    
  AttName = mlnew String[MaxAtt+1];
  String *AttNamei = AttName;

  AttValName = mlnew String *[MaxAtt+1];
  String **AttValNamei = AttValName;

  MaxAttVal = mlnew DiscrValue[MaxAtt+1];
  DiscrValue *MaxAttVali = MaxAttVal;

  SpecialStatus = mlnew char [MaxAtt+1];
  char *SpecialStatusi = SpecialStatus;

  PITERATE(TVarList, vi, dom->attributes) {
    *(SpecialStatusi++) = NULL;

    *AttNamei = mlnew char[(*vi)->name.length()+1];
    strcpy(*(AttNamei++), (*vi)->name.c_str());

    if ((*vi)->varType==TValue::INTVAR) {
      int noOfValues = (*vi).AS(TEnumVariable)->noOfValues();
      if (noOfValues>MaxDiscrVal)  
        MaxDiscrVal=noOfValues;
      *(MaxAttVali++) = noOfValues;

      *AttValNamei = mlnew String[noOfValues+1];
      String *AttValNameii = *(AttValNamei++);
      *(AttValNameii++)=NULL;
      PITERATE(TStringList, ni, (*vi).AS(TEnumVariable)->values) {
        *AttValNameii = mlnew char[(*ni).length()+1];
        strcpy(*(AttValNameii++), (*ni).c_str());
      }
    }
    else {
      *(AttValNamei++) = NULL;
      *(MaxAttVali++) = 0;
    }
  }

  return true;
}


Description convertExample(const TExample &example)
{
  Description item = mlnew AttValue[MaxAtt+2];
  Description itemi = item;
  const_ITERATE(TExample, eii, example)
    if ((*eii).varType == TValue::INTVAR)
      (itemi++)->_discr_val = (*eii).isSpecial() ? 0 : int(*eii)+1;
    else if ((*eii).varType == TValue::FLOATVAR)
      (itemi++)->_cont_val = (*eii).isSpecial() ? Unknown : float(*eii);
    else {
      mldelete item;
      item = NULL;
      raiseError("invalid attribute type");
    }
  // Decrease class!
  itemi[-1]._discr_val--;
  return item;
}


bool TC45Learner::convertExamples(PExampleGenerator table)
{ Item = mlnew Description[table->numberOfExamples()];
  Description *Itemi = Item;
  MaxItem = 0;
  PEITERATE(ei, table)
    if (!(*ei).getClass().isSpecial()) {
      *(Itemi++) = convertExample(*ei);
      MaxItem ++;
    }

  MaxItem--;
  return true;
}


bool TC45Learner::clearExamples()
{ if (Item) {
    Description *Itemi = Item;
    MaxItem++;
    while(MaxItem--)
      mldelete *(Itemi++);
    mldelete Item;
    Item=NULL;
  }
  return true;
}


bool TC45Learner::convertGenerator(PExampleGenerator gen)
{ 
  return convertDomain(gen->domain) && convertExamples(gen);
}


bool TC45Learner::clearGenerator()
{ 
  return clearExamples() && clearDomain();
}


bool TC45Learner::parseCommandLine(const string &line)
{
  TProgArguments args("f: b u p v: t: w: i: g s m: c:", line);
  if (args.direct.size())
    raiseError("parseCommandLine: invalid parameter %s", args.direct.front().c_str());

  ITERATE(TMultiStringParameters, oi, args.options)
    switch ((*oi).first[0]) {
      case 'f':
      case 'u':
      case 'v':
      raiseError("parseCommandLine: option -%s not accepted", (*oi).first.c_str());

      case 'b':
        batch = true;
		    break;

      case 'p':   
        probThresh = true;
        break;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -