📄 c4.5.cpp
字号:
/*
This file is part of Orange.
Orange is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Orange is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Orange; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Authors: Janez Demsar, Blaz Zupan, 1996--2002
Contact: janez.demsar@fri.uni-lj.si
*/
#include "vars.hpp"
#include "examples.hpp"
#include "examplegen.hpp"
#include "table.hpp"
#include "classify.hpp"
#include "learn.hpp"
#include "getarg.hpp"
#include "c4.5.ppp"
DEFINE_TOrangeVector_classDescription(PC45TreeNode, "TC45TreeNodeList", true, ORANGE_API)
bool c45Loaded = false;
typedef void *learnFunc(char gainRatio, char subset, char batch, char probThresh,
int trials, int minObjs, int window, int increment, float cf, char prune);
typedef void garbageFunc();
learnFunc *c45learn;
garbageFunc *c45garbage;
void *pc45data;
extern PyObject *orangeModule;
typedef union _attribute_value {
DiscrValue _discr_val;
float _cont_val;
} AttValue, *Description;
#define Unknown -999
#define BrDiscr 1
#define ThreshContin 2
#define BrSubset 3
#define Bit(b) (1 << (b))
#define In(b,s) ((s[(b) >> 3]) & Bit((b) & 07))
struct {
short *rMaxAtt, *rMaxClass, *rMaxDiscrVal;
int *rMaxItem;
Description **rItem;
DiscrValue **rMaxAttVal;
char **rSpecialStatus, ***rClassName, ***rAttName, ****rAttValName;
} c45data;
#ifdef _DEBUG
#define C45STEM "c45_d"
#else
#define C45STEM "c45"
#endif
#ifdef _MSC_VER
#define PATHSEP '\\'
#define C45NAME "\\" C45STEM ".dll"
#else
#define PATHSEP '/'
#define C45NAME "/" C45STEM ".so"
#endif
#if defined _MSC_VER
#include <direct.h>
#define getcwd _getcwd
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
#include <windows.h>
void *getsym(HINSTANCE handle, const char *name)
{
void *sym = GetProcAddress(handle, name);
if (!sym)
raiseErrorWho("C45Loader", "invalid %s, cannot find symbol %s", C45NAME, name);
return sym;
}
void dynloadC45(const char *pathname)
{
HINSTANCE c45Dll = LoadLibrary(pathname);
if (!c45Dll)
raiseErrorWho("C45Loader", "cannot load %s", C45NAME);
pc45data = getsym(c45Dll, "c45Data");
c45learn = (learnFunc *)(getsym(c45Dll, "learn"));
c45garbage = (garbageFunc *)(getsym(c45Dll, "guarded_collect"));
}
#elif defined LINUX || defined FREEBSD || defined DARWIN
#include <dlfcn.h>
#include <unistd.h>
void *getsym(void *handle, const char *name)
{
void *sym = dlsym(handle, name);
if (!sym)
raiseErrorWho("C45Loader", "invalid %s, cannot find symbol %s", C45NAME, name);
return sym;
}
void dynloadC45(char pathname[])
{
void *handle = dlopen(pathname, 0 /*dlopenflags*/);
if (handle == NULL)
raiseErrorWho("C45Loader", dlerror());
pc45data = getsym(handle, "c45Data");
c45learn = (learnFunc *)getsym(handle, "learn");
c45garbage = (garbageFunc *)getsym(handle, "guarded_collect");
}
#else
void dynloadC45(char [])
{ raiseErrorWho("C45Loader", "c45 is not supported on this platform"); }
#endif
#ifdef IGNORE
#undef IGNORE
#endif
void loadC45()
{
char *buf = NULL, *bp;
PyObject *orangeDirName = PyDict_GetItemString(PyModule_GetDict(orangeModule), "__file__");
if (orangeDirName) {
char *odn = PyString_AsString(orangeDirName);
buf = (char *)malloc(strlen(odn) + strlen(C45NAME) + 1);
strcpy(buf, odn);
bp = buf + strlen(buf);
while ((bp!=buf) && (*bp!=PATHSEP))
bp--;
*bp = 0;
}
// If path is empty, orange.so was probably loaded from the working directory
if (!buf || !*buf) {
buf = (char *)realloc(buf, 512);
if (!getcwd(buf, 511))
raiseErrorWho("C45Loader", C45NAME " cannot be found");
bp = buf + strlen(buf);
}
strcpy(bp, C45NAME);
dynloadC45(buf);
memcpy(&c45data, pc45data, sizeof(c45data));
c45Loaded = true;
}
#define MaxAtt (*c45data.rMaxAtt)
#define MaxClass (*c45data.rMaxClass)
#define MaxDiscrVal (*c45data.rMaxDiscrVal)
#define MaxItem (*c45data.rMaxItem)
#define Item (*c45data.rItem)
#define MaxAttVal (*c45data.rMaxAttVal)
#define SpecialStatus (*c45data.rSpecialStatus)
#define ClassName (*c45data.rClassName)
#define AttName (*c45data.rAttName)
#define AttValName (*c45data.rAttValName)
TC45Learner::TC45Learner()
: gainRatio(true),
subset(false),
batch(true),
probThresh(false),
minObjs(2),
window(0),
increment(0),
cf(0.25),
trials(10),
prune(true),
convertToOrange(false),
storeContingencies(false),
storeExamples(false)
{
if (!c45Loaded)
loadC45();
}
bool TC45Learner::clearDomain()
{ if (ClassName) {
String *ClassNamei=ClassName;
MaxClass++;
while(MaxClass--)
mldelete *(ClassNamei++);
mldelete ClassName;
ClassName=NULL;
}
if (AttName) {
String *AttNamei=AttName;
int atts=MaxAtt+1;
while(atts--)
mldelete *(AttNamei++);
mldelete AttName;
AttName=NULL;
}
if (AttValName && MaxAttVal) {
String **AttValNamei=AttValName;
DiscrValue *MaxAttVali=MaxAttVal;
for(int atts=MaxAtt; atts--; MaxAttVali++) {
String *AttValNameii = *AttValNamei+1; // the first one is NULL...
while((*MaxAttVali)--)
mldelete *(AttValNameii++);
mldelete *(AttValNamei++);
}
mldelete AttValName;
mldelete MaxAttVal;
AttValNamei=NULL;
MaxAttVal=NULL;
}
if (SpecialStatus) {
mldelete SpecialStatus;
SpecialStatus=NULL;
}
return true;
}
bool TC45Learner::convertDomain(PDomain dom)
{
TEnumVariable *classVar=dom->classVar.AS(TEnumVariable);
if (!classVar)
raiseError("domain with discrete class attribute expected");
MaxAtt = dom->attributes->size()-1;
MaxClass = classVar->noOfValues()-1;
MaxDiscrVal=2; // increased below
ClassName = mlnew String[MaxClass+1];
String *ClassNamei=ClassName;
PITERATE(TStringList, ni, classVar->values) {
*ClassNamei = mlnew char[(*ni).length()+1];
strcpy(*(ClassNamei++), (*ni).c_str());
}
AttName = mlnew String[MaxAtt+1];
String *AttNamei = AttName;
AttValName = mlnew String *[MaxAtt+1];
String **AttValNamei = AttValName;
MaxAttVal = mlnew DiscrValue[MaxAtt+1];
DiscrValue *MaxAttVali = MaxAttVal;
SpecialStatus = mlnew char [MaxAtt+1];
char *SpecialStatusi = SpecialStatus;
PITERATE(TVarList, vi, dom->attributes) {
*(SpecialStatusi++) = NULL;
*AttNamei = mlnew char[(*vi)->name.length()+1];
strcpy(*(AttNamei++), (*vi)->name.c_str());
if ((*vi)->varType==TValue::INTVAR) {
int noOfValues = (*vi).AS(TEnumVariable)->noOfValues();
if (noOfValues>MaxDiscrVal)
MaxDiscrVal=noOfValues;
*(MaxAttVali++) = noOfValues;
*AttValNamei = mlnew String[noOfValues+1];
String *AttValNameii = *(AttValNamei++);
*(AttValNameii++)=NULL;
PITERATE(TStringList, ni, (*vi).AS(TEnumVariable)->values) {
*AttValNameii = mlnew char[(*ni).length()+1];
strcpy(*(AttValNameii++), (*ni).c_str());
}
}
else {
*(AttValNamei++) = NULL;
*(MaxAttVali++) = 0;
}
}
return true;
}
Description convertExample(const TExample &example)
{
Description item = mlnew AttValue[MaxAtt+2];
Description itemi = item;
const_ITERATE(TExample, eii, example)
if ((*eii).varType == TValue::INTVAR)
(itemi++)->_discr_val = (*eii).isSpecial() ? 0 : int(*eii)+1;
else if ((*eii).varType == TValue::FLOATVAR)
(itemi++)->_cont_val = (*eii).isSpecial() ? Unknown : float(*eii);
else {
mldelete item;
item = NULL;
raiseError("invalid attribute type");
}
// Decrease class!
itemi[-1]._discr_val--;
return item;
}
bool TC45Learner::convertExamples(PExampleGenerator table)
{ Item = mlnew Description[table->numberOfExamples()];
Description *Itemi = Item;
MaxItem = 0;
PEITERATE(ei, table)
if (!(*ei).getClass().isSpecial()) {
*(Itemi++) = convertExample(*ei);
MaxItem ++;
}
MaxItem--;
return true;
}
bool TC45Learner::clearExamples()
{ if (Item) {
Description *Itemi = Item;
MaxItem++;
while(MaxItem--)
mldelete *(Itemi++);
mldelete Item;
Item=NULL;
}
return true;
}
bool TC45Learner::convertGenerator(PExampleGenerator gen)
{
return convertDomain(gen->domain) && convertExamples(gen);
}
bool TC45Learner::clearGenerator()
{
return clearExamples() && clearDomain();
}
bool TC45Learner::parseCommandLine(const string &line)
{
TProgArguments args("f: b u p v: t: w: i: g s m: c:", line);
if (args.direct.size())
raiseError("parseCommandLine: invalid parameter %s", args.direct.front().c_str());
ITERATE(TMultiStringParameters, oi, args.options)
switch ((*oi).first[0]) {
case 'f':
case 'u':
case 'v':
raiseError("parseCommandLine: option -%s not accepted", (*oi).first.c_str());
case 'b':
batch = true;
break;
case 'p':
probThresh = true;
break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -