📄 lib_preprocess.cpp
字号:
/*
This file is part of Orange.
Orange is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Orange is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Orange; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Authors: Janez Demsar, Blaz Zupan, 1996--2002
Contact: janez.demsar@fri.uni-lj.si
*/
/********************************
This file includes constructors and specialized methods for ML* object, defined in project Preprocess
*********************************/
#ifdef _MSC_VER
#pragma warning (disable : 4786 4114 4018 4267 4244)
#endif
#include "vars.hpp"
#include "domain.hpp"
#include "examples.hpp"
#include "examplegen.hpp"
#include "table.hpp"
#include "classify.hpp"
#include "estimateprob.hpp"
#include "distvars.hpp"
#include "distance.hpp"
#include "cls_orange.hpp"
#include "cls_value.hpp"
#include "cls_example.hpp"
#include "lib_kernel.hpp"
#include "vectortemplates.hpp"
#include "maptemplates.hpp"
#include "converts.hpp"
#include "slist.hpp"
#include "externs.px"
/* ************ DISCRETIZATION ************ */
#include "discretize.hpp"
ABSTRACT(Discretizer, TransformValue)
C_NAMED(EquiDistDiscretizer, Discretizer, "([numberOfIntervals=, firstCut=, step=])")
C_NAMED(IntervalDiscretizer, Discretizer, "([points=])")
C_NAMED(ThresholdDiscretizer, Discretizer, "([threshold=])")
C_NAMED(BiModalDiscretizer, Discretizer, "([low=, high=])")
ABSTRACT(Discretization, Orange)
C_CALL (EquiDistDiscretization, Discretization, "() | (attribute, examples[, weight, numberOfIntervals=]) -/-> Variable")
C_CALL ( EquiNDiscretization, Discretization, "() | (attribute, examples[, weight, numberOfIntervals=]) -/-> Variable")
C_CALL ( EntropyDiscretization, Discretization, "() | (attribute, examples[, weight]) -/-> Variable")
C_CALL ( BiModalDiscretization, Discretization, "() | (attribute, examples[, weight]) -/-> Variable")
PyObject *Discretization_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(attribute, examples[, weight]) -> Variable")
{
PyTRY
NO_KEYWORDS
PyObject *variable;
PExampleGenerator egen;
int weightID=0;
if (!PyArg_ParseTuple(args, "OO&|O&", &variable, pt_ExampleGenerator, &egen, pt_weightByGen(egen), &weightID))
PYERROR(PyExc_SystemError, "invalid parameters", PYNULL);
PVariable toDiscretize = varFromArg_byDomain(variable, egen->domain);
if (!toDiscretize)
return PYNULL; // varFromArg_byDomain has already set the error message
PVariable discr = SELF_AS(TDiscretization)(egen, toDiscretize, weightID);
if (!discr)
PYERROR(PyExc_SystemError, "discretization construction failed", PYNULL);
return WrapOrange(discr);
PyCATCH
}
PyObject *Discretizer_constructVariable(PyObject *self, PyObject *var) PYARGS(METH_O, "(variable) -> variable")
{ PyTRY
if (!PyOrVariable_Check(var))
PYERROR(PyExc_TypeError, "invalid parameters (variable expected)", PYNULL);
return WrapOrange(PyOrange_AsDiscretizer(self)->constructVar(PyOrange_AsVariable(var)));
PyCATCH
}
PyObject *EquiDistDiscretizer_get_points(PyObject *self)
{ PyTRY
CAST_TO(TEquiDistDiscretizer, edd);
int nint = edd->numberOfIntervals - 1;
PyObject *res = PyList_New(nint);
for(int i = 0; i < nint; i++)
PyList_SetItem(res, i, PyFloat_FromDouble(edd->firstCut + i*edd->step));
return res;
PyCATCH
}
/* ************ FILTERS FOR REGRESSION ************** */
#include "transval.hpp"
C_NAMED(MapIntValue, TransformValue, "([mapping=])")
C_NAMED(Discrete2Continuous, TransformValue, "([value=])")
C_NAMED(Ordinal2Continuous, TransformValue, "([nvalues=])")
C_NAMED(NormalizeContinuous, TransformValue, "([average=, span=])")
C_NAMED(DomainContinuizer, Orange, "(domain|examples, convertClass=, invertClass=, zeroBased=, normalizeContinuous=, baseValueSelection=) -/-> Domain")
PYCLASSCONSTANT_INT(DomainContinuizer, LowestIsBase, int(TDomainContinuizer::LowestIsBase))
PYCLASSCONSTANT_INT(DomainContinuizer, FrequentIsBase, int(TDomainContinuizer::FrequentIsBase))
PYCLASSCONSTANT_INT(DomainContinuizer, NValues, int(TDomainContinuizer::NValues))
PYCLASSCONSTANT_INT(DomainContinuizer, Ignore, int(TDomainContinuizer::Ignore))
PYCLASSCONSTANT_INT(DomainContinuizer, ReportError, int(TDomainContinuizer::ReportError))
PYCLASSCONSTANT_INT(DomainContinuizer, AsOrdinal, int(TDomainContinuizer::AsOrdinal))
PYCLASSCONSTANT_INT(DomainContinuizer, AsNormalizedOrdinal, int(TDomainContinuizer::AsNormalizedOrdinal))
PYCLASSCONSTANT_INT(DomainContinuizer, Leave, int(TDomainContinuizer::Leave))
PYCLASSCONSTANT_INT(DomainContinuizer, NormalizeBySpan, int(TDomainContinuizer::NormalizeBySpan))
PYCLASSCONSTANT_INT(DomainContinuizer, NormalizeByVariance, int(TDomainContinuizer::NormalizeByVariance))
int getTargetClass(PVariable classVar, PyObject *pyval)
{
if (pyval) {
if (!classVar)
PYERROR(PyExc_TypeError, "cannot set target class value for class-less domain", -2);
if (classVar->varType != TValue::INTVAR)
PYERROR(PyExc_TypeError, "cannot set target value for non-discrete class", -2);
TValue targetValue;
if (!convertFromPython(pyval, targetValue, classVar))
return -2;
if (targetValue.isSpecial())
PYERROR(PyExc_TypeError, "unknown value passed as class target", -2)
else
return targetValue.intV;
}
return -1; // not an error, but undefined!
}
PyObject *DomainContinuizer_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(domain[, targetClass] | examples[, weightID, targetClass]) -> domain")
{
PyTRY
NO_KEYWORDS
if (args && (PyTuple_GET_SIZE(args)<=2) && PyOrDomain_Check(PyTuple_GET_ITEM(args, 0))) {
PDomain domain;
PyObject *pyval = PYNULL;
if (!PyArg_ParseTuple(args, "O&|O", cc_Domain, &domain, &pyval))
return PYNULL;
int targetClass = getTargetClass(domain->classVar, pyval);
if (targetClass == -2)
return PYNULL;
return WrapOrange(SELF_AS(TDomainContinuizer)(domain, targetClass));
}
PExampleGenerator egen;
int weightID = 0;
PyObject *pyval = PYNULL;
if (!PyArg_ParseTuple(args, "O&|O&O", pt_ExampleGenerator, &egen, pt_weightByGen(egen), &weightID, &pyval))
PYERROR(PyExc_AttributeError, "DomainContinuizer.__call__: domain or examples (and, optionally, weight attribute) expected", PYNULL);
int targetClass = getTargetClass(egen->domain->classVar, pyval);
if (targetClass == -2)
return PYNULL;
//printf("%p-%p\n", self, ((TPyOrange *)self)->ptr);
return WrapOrange(SELF_AS(TDomainContinuizer)(egen, weightID, targetClass));
PyCATCH
}
/* ************ REDUNDANCIES ************ */
#include "redundancy.hpp"
ABSTRACT(RemoveRedundant, Orange)
C_CALL(RemoveRedundantByInduction, RemoveRedundant, "([examples[, weightID][, suspicious]) -/-> Domain")
C_CALL(RemoveRedundantByQuality, RemoveRedundant, "([examples[, weightID][, suspicious]) -/-> Domain")
C_CALL(RemoveRedundantOneValue, RemoveRedundant, "([examples[, weightID][, suspicious]) -/-> Domain")
C_CALL3(RemoveUnusedValues, RemoveUnusedValues, Orange, "([[attribute, ]examples[, weightId]]) -/-> attribute")
PyObject *RemoveRedundant_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("([examples[, weightID][, suspicious]) -/-> Domain")
{
PyTRY
NO_KEYWORDS
PExampleGenerator egen;
PyObject *suspiciousList=NULL;
int weight=0;
if (!PyArg_ParseTuple(args, "O&|OO&:RemoveRedundant.call", pt_ExampleGenerator, &egen, &suspiciousList, pt_weightByGen(egen), &weight))
return PYNULL;
TVarList suspiciousset;
if (suspiciousList)
if (!varListFromDomain(suspiciousList, egen->domain, suspiciousset))
return PYNULL;
PDomain newdomain = SELF_AS(TRemoveRedundant)(egen, suspiciousList ? &suspiciousset : NULL, NULL, weight);
return WrapOrange(newdomain);
PyCATCH
}
PyObject *RemoveUnusedValues_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(attribute, examples[, weightId]) -> attribute")
{
PyTRY
NO_KEYWORDS
CAST_TO(TRemoveUnusedValues, ruv);
bool storeOv = ruv->removeOneValued;
PExampleGenerator egen;
PVariable var;
int weightID = 0;
int removeOneValued = -1;
int checkClass = 0;
if (PyArg_ParseTuple(args, "O&O&|O&i:RemoveUnusedValues.call", cc_Variable, &var, pt_ExampleGenerator, &egen, pt_weightByGen(egen), &weightID, &removeOneValued)) {
if (removeOneValued >= 0)
ruv->removeOneValued = removeOneValued != 0;
PyObject *res = WrapOrange(ruv->call(var, egen, weightID));
ruv->removeOneValued = storeOv;
return res;
}
PyErr_Clear();
if (PyArg_ParseTuple(args, "O&|O&ii:RemoveUnusedValues.call", pt_ExampleGenerator, &egen, pt_weightByGen(egen), &weightID, &removeOneValued, &checkClass)) {
if (removeOneValued >= 0)
ruv->removeOneValued = removeOneValued != 0;
PyObject *res = WrapOrange(ruv->call(egen, weightID, checkClass != 0));
ruv->removeOneValued = storeOv;
return res;
}
PYERROR(PyExc_AttributeError, "RemoveUnusedValues.__call__: invalid arguments", PYNULL);
PyCATCH
}
/* ************ PREPROCESSORS ************ */
#include "preprocessors.hpp"
ABSTRACT(Preprocessor, Orange)
C_CALL(Preprocessor_select, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")
C_CALL(Preprocessor_ignore, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")
C_CALL(Preprocessor_take, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")
C_CALL(Preprocessor_drop, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")
C_CALL(Preprocessor_removeDuplicates, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
C_CALL(Preprocessor_takeMissing, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
C_CALL(Preprocessor_dropMissing, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
C_CALL(Preprocessor_takeMissingClasses, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
C_CALL(Preprocessor_dropMissingClasses, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
C_CALL(Preprocessor_addMissing, Preprocessor, "([examples[, weightID]] [<see the manual>]) -/-> ExampleTable")
C_CALL(Preprocessor_addMissingClasses, Preprocessor, "([examples[, weightID]] [classMissing=<float>]) -/-> ExampleTable")
C_CALL(Preprocessor_addNoise, Preprocessor, "([examples[, weightID]] [<see the manual>]) -/-> ExampleTable")
C_CALL(Preprocessor_addClassNoise, Preprocessor, "([examples[, weightID]] [proportion=<float>]) -/-> ExampleTable")
C_CALL(Preprocessor_addGaussianNoise, Preprocessor, "([examples[, weightID]] [<see the manual>]) -/-> ExampleTable")
C_CALL(Preprocessor_addGaussianClassNoise, Preprocessor, "([examples[, weightID]] [deviation=<float>]) -/-> ExampleTable")
C_CALL(Preprocessor_addClassWeight, Preprocessor, "([examples[, weightID]] [equalize=, classWeights=) -/-> ExampleTable")
C_CALL(Preprocessor_addCensorWeight, Preprocessor, "([examples[, weightID]] [method=0-km, 1-nmr, 2-linear, outcomeVar=, eventValue=, timeID=, maxTime=]) -/-> ExampleTable")
C_CALL(Preprocessor_filter, Preprocessor, "([examples[, weightID]] [filter=]) -/-> ExampleTable")
C_CALL(Preprocessor_imputeByLearner, Preprocessor, "([examples[, weightID]] [learner=]) -/-> ExampleTable")
C_CALL(Preprocessor_discretize, Preprocessor, "([examples[, weightID]] [notClass=, method=, attributes=<list-of-strings>]) -/-> ExampleTable")
C_NAMED(ImputeClassifier, Classifier, "([classifierFromVar=][imputer=])")
PYCLASSCONSTANT_INT(Preprocessor_addCensorWeight, KM, TPreprocessor_addCensorWeight::km)
PYCLASSCONSTANT_INT(Preprocessor_addCensorWeight, Linear, TPreprocessor_addCensorWeight::linear)
PYCLASSCONSTANT_INT(Preprocessor_addCensorWeight, Bayes, TPreprocessor_addCensorWeight::bayes)
PyObject *Preprocessor_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(examples[, weightID]) -> ExampleTable")
{
PyTRY
NO_KEYWORDS
int weightID=0;
PExampleGenerator egen = exampleGenFromArgs(args, weightID);
if (!egen)
PYERROR(PyExc_TypeError, "attribute error (example generator expected)", PYNULL);
bool weightGiven = (weightID!=0);
int newWeight;
PExampleGenerator res = SELF_AS(TPreprocessor)(egen, weightID, newWeight);
PyObject *wrappedGen=WrapOrange(res);
return weightGiven || newWeight ? Py_BuildValue("Ni", wrappedGen, newWeight) : wrappedGen;
PyCATCH
}
PyObject *Preprocessor_selectionVector(PyObject *self, PyObject *args, PyObject *) PYARGS(METH_VARARGS, "(examples[, weightID])")
{
PyTRY
int weightID = 0;
PExampleGenerator egen = exampleGenFromArgs(args, weightID);
if (!egen)
PYERROR(PyExc_TypeError, "attribute error (example generator expected)", PYNULL);
return WrapOrange(SELF_AS(TPreprocessor).selectionVector(egen, weightID));
PyCATCH
}
#include "stringvars.hpp"
typedef MapMethods<PVariableFilterMap, TVariableFilterMap, PVariable, PValueFilter> TMM_VariableFilterMap;
int VariableFilterMap_setitemlow(TVariableFilterMap *aMap, PVariable var, PyObject *pyvalue)
{
PValueFilter value;
if (TMM_VariableFilterMap::_valueFromPython(pyvalue, value)) {
aMap->__ormap[var] = value;
return 0;
}
PyErr_Clear();
if (var->varType == TValue::FLOATVAR) {
float min, max;
if (!PyArg_ParseTuple(pyvalue, "ff:VariableFilterMap.__setitem__", &min, &max))
return -1;
aMap->__ormap[var] = (min<=max) ? mlnew TValueFilter_continuous(ILLEGAL_INT, min, max)
: mlnew TValueFilter_continuous(ILLEGAL_INT, max, min, true);
return 0;
}
if (var->varType == TValue::INTVAR) {
TValueFilter_discrete *vfilter = mlnew TValueFilter_discrete(ILLEGAL_INT, var);
PValueFilter wvfilter = vfilter;
TValueList &valueList = vfilter->values.getReference();
if (PyTuple_Check(pyvalue) || PyList_Check(pyvalue)) {
PyObject *iterator = PyObject_GetIter(pyvalue);
for(PyObject *item = PyIter_Next(iterator); item; item = PyIter_Next(iterator)) {
TValue value;
if (!convertFromPython(item, value, var)) {
Py_DECREF(item);
Py_DECREF(iterator);
return -1;
}
Py_DECREF(item);
if (value.isSpecial())
vfilter->acceptSpecial = 1;
else
valueList.push_back(value);
}
Py_DECREF(iterator);
}
else {
TValue value;
if (!convertFromPython(pyvalue, value, var))
return -1;
if (value.isSpecial())
vfilter->acceptSpecial = 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -