⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lib_preprocess.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 4 页
字号:
/*
    This file is part of Orange.

    Orange is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    Orange is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Orange; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Authors: Janez Demsar, Blaz Zupan, 1996--2002
    Contact: janez.demsar@fri.uni-lj.si
*/


/********************************

This file includes constructors and specialized methods for ML* object, defined in project Preprocess

*********************************/

#ifdef _MSC_VER
  #pragma warning (disable : 4786 4114 4018 4267 4244)
#endif

#include "vars.hpp"
#include "domain.hpp"
#include "examples.hpp"
#include "examplegen.hpp"
#include "table.hpp"
#include "classify.hpp"
#include "estimateprob.hpp"
#include "distvars.hpp"
#include "distance.hpp"

#include "cls_orange.hpp"
#include "cls_value.hpp"
#include "cls_example.hpp"
#include "lib_kernel.hpp"
#include "vectortemplates.hpp"
#include "maptemplates.hpp"

#include "converts.hpp"
#include "slist.hpp"

#include "externs.px"


/* ************ DISCRETIZATION ************ */

#include "discretize.hpp"


ABSTRACT(Discretizer, TransformValue)
C_NAMED(EquiDistDiscretizer, Discretizer, "([numberOfIntervals=, firstCut=, step=])")
C_NAMED(IntervalDiscretizer, Discretizer, "([points=])")
C_NAMED(ThresholdDiscretizer, Discretizer, "([threshold=])")
C_NAMED(BiModalDiscretizer, Discretizer, "([low=, high=])")

ABSTRACT(Discretization, Orange)
C_CALL (EquiDistDiscretization, Discretization, "() | (attribute, examples[, weight, numberOfIntervals=]) -/-> Variable")
C_CALL (   EquiNDiscretization, Discretization, "() | (attribute, examples[, weight, numberOfIntervals=]) -/-> Variable")
C_CALL ( EntropyDiscretization, Discretization, "() | (attribute, examples[, weight]) -/-> Variable")
C_CALL ( BiModalDiscretization, Discretization, "() | (attribute, examples[, weight]) -/-> Variable")


PyObject *Discretization_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(attribute, examples[, weight]) -> Variable")
{
  PyTRY
    NO_KEYWORDS

    PyObject *variable;
    PExampleGenerator egen;
    int weightID=0;
    if (!PyArg_ParseTuple(args, "OO&|O&", &variable, pt_ExampleGenerator, &egen, pt_weightByGen(egen), &weightID)) 
      PYERROR(PyExc_SystemError, "invalid parameters", PYNULL);

    PVariable toDiscretize = varFromArg_byDomain(variable, egen->domain);
    if (!toDiscretize)
      return PYNULL; // varFromArg_byDomain has already set the error message

    PVariable discr = SELF_AS(TDiscretization)(egen, toDiscretize, weightID);
    if (!discr)
      PYERROR(PyExc_SystemError, "discretization construction failed", PYNULL);

    return WrapOrange(discr);
  PyCATCH
}


PyObject *Discretizer_constructVariable(PyObject *self, PyObject *var) PYARGS(METH_O, "(variable) -> variable")
{ PyTRY
    if (!PyOrVariable_Check(var))
      PYERROR(PyExc_TypeError, "invalid parameters (variable expected)", PYNULL);

    return WrapOrange(PyOrange_AsDiscretizer(self)->constructVar(PyOrange_AsVariable(var)));
  PyCATCH
}


PyObject *EquiDistDiscretizer_get_points(PyObject *self)
{ PyTRY
   CAST_TO(TEquiDistDiscretizer, edd);
    int nint = edd->numberOfIntervals - 1;
    PyObject *res = PyList_New(nint);
    for(int i = 0; i < nint; i++)
      PyList_SetItem(res, i, PyFloat_FromDouble(edd->firstCut + i*edd->step));
    return res;
  PyCATCH
}


/* ************ FILTERS FOR REGRESSION ************** */

#include "transval.hpp"

C_NAMED(MapIntValue, TransformValue, "([mapping=])")
C_NAMED(Discrete2Continuous, TransformValue, "([value=])")
C_NAMED(Ordinal2Continuous, TransformValue, "([nvalues=])")
C_NAMED(NormalizeContinuous, TransformValue, "([average=, span=])")

C_NAMED(DomainContinuizer, Orange, "(domain|examples, convertClass=, invertClass=, zeroBased=, normalizeContinuous=, baseValueSelection=) -/-> Domain")

PYCLASSCONSTANT_INT(DomainContinuizer, LowestIsBase, int(TDomainContinuizer::LowestIsBase))
PYCLASSCONSTANT_INT(DomainContinuizer, FrequentIsBase, int(TDomainContinuizer::FrequentIsBase))
PYCLASSCONSTANT_INT(DomainContinuizer, NValues, int(TDomainContinuizer::NValues))
PYCLASSCONSTANT_INT(DomainContinuizer, Ignore, int(TDomainContinuizer::Ignore))
PYCLASSCONSTANT_INT(DomainContinuizer, ReportError, int(TDomainContinuizer::ReportError))
PYCLASSCONSTANT_INT(DomainContinuizer, AsOrdinal, int(TDomainContinuizer::AsOrdinal))
PYCLASSCONSTANT_INT(DomainContinuizer, AsNormalizedOrdinal, int(TDomainContinuizer::AsNormalizedOrdinal))

PYCLASSCONSTANT_INT(DomainContinuizer, Leave, int(TDomainContinuizer::Leave))
PYCLASSCONSTANT_INT(DomainContinuizer, NormalizeBySpan, int(TDomainContinuizer::NormalizeBySpan))
PYCLASSCONSTANT_INT(DomainContinuizer, NormalizeByVariance, int(TDomainContinuizer::NormalizeByVariance))

int getTargetClass(PVariable classVar, PyObject *pyval)
{
  if (pyval) {
    if (!classVar)
      PYERROR(PyExc_TypeError, "cannot set target class value for class-less domain", -2);
    if (classVar->varType != TValue::INTVAR)
      PYERROR(PyExc_TypeError, "cannot set target value for non-discrete class", -2);

    TValue targetValue;
    if (!convertFromPython(pyval, targetValue, classVar))
      return -2;
    if (targetValue.isSpecial())
      PYERROR(PyExc_TypeError, "unknown value passed as class target", -2)
    else
      return targetValue.intV;
  }
  return -1; // not an error, but undefined!
}

PyObject *DomainContinuizer_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(domain[, targetClass] | examples[, weightID, targetClass]) -> domain")
{ 
  PyTRY
    NO_KEYWORDS

    if (args && (PyTuple_GET_SIZE(args)<=2) && PyOrDomain_Check(PyTuple_GET_ITEM(args, 0))) {
      PDomain domain;
      PyObject *pyval = PYNULL;
      if (!PyArg_ParseTuple(args, "O&|O", cc_Domain, &domain, &pyval))
        return PYNULL;
      int targetClass = getTargetClass(domain->classVar, pyval);
      if (targetClass == -2)
        return PYNULL;
     
      return WrapOrange(SELF_AS(TDomainContinuizer)(domain, targetClass));
    }

    PExampleGenerator egen;
    int weightID = 0;
    PyObject *pyval = PYNULL;
    if (!PyArg_ParseTuple(args, "O&|O&O", pt_ExampleGenerator, &egen, pt_weightByGen(egen), &weightID, &pyval))
      PYERROR(PyExc_AttributeError, "DomainContinuizer.__call__: domain or examples (and, optionally, weight attribute) expected", PYNULL);

    int targetClass = getTargetClass(egen->domain->classVar, pyval);
    if (targetClass == -2)
      return PYNULL;

    //printf("%p-%p\n", self, ((TPyOrange *)self)->ptr);
    return WrapOrange(SELF_AS(TDomainContinuizer)(egen, weightID, targetClass));

  PyCATCH
}

/* ************ REDUNDANCIES ************ */

#include "redundancy.hpp"

ABSTRACT(RemoveRedundant, Orange)

C_CALL(RemoveRedundantByInduction, RemoveRedundant, "([examples[, weightID][, suspicious]) -/-> Domain")
C_CALL(RemoveRedundantByQuality, RemoveRedundant, "([examples[, weightID][, suspicious]) -/-> Domain")
C_CALL(RemoveRedundantOneValue, RemoveRedundant, "([examples[, weightID][, suspicious]) -/-> Domain")

C_CALL3(RemoveUnusedValues, RemoveUnusedValues, Orange, "([[attribute, ]examples[, weightId]]) -/-> attribute")

PyObject *RemoveRedundant_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("([examples[, weightID][, suspicious]) -/-> Domain")
{
  PyTRY
    NO_KEYWORDS

    PExampleGenerator egen;
    PyObject *suspiciousList=NULL;
    int weight=0;
    if (!PyArg_ParseTuple(args, "O&|OO&:RemoveRedundant.call", pt_ExampleGenerator, &egen, &suspiciousList, pt_weightByGen(egen), &weight))
      return PYNULL;

    TVarList suspiciousset;
    if (suspiciousList)
      if (!varListFromDomain(suspiciousList, egen->domain, suspiciousset))
        return PYNULL;

    PDomain newdomain = SELF_AS(TRemoveRedundant)(egen, suspiciousList ? &suspiciousset : NULL, NULL, weight);
    return WrapOrange(newdomain);
  PyCATCH
}


PyObject *RemoveUnusedValues_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(attribute, examples[, weightId]) -> attribute")
{
  PyTRY
    NO_KEYWORDS
    CAST_TO(TRemoveUnusedValues, ruv);
    bool storeOv = ruv->removeOneValued;

    PExampleGenerator egen;
    PVariable var;
    int weightID = 0;
    int removeOneValued = -1;
    int checkClass = 0;

    if (PyArg_ParseTuple(args, "O&O&|O&i:RemoveUnusedValues.call", cc_Variable, &var, pt_ExampleGenerator, &egen, pt_weightByGen(egen), &weightID, &removeOneValued)) {
      if (removeOneValued >= 0)
        ruv->removeOneValued = removeOneValued != 0;
      PyObject *res = WrapOrange(ruv->call(var, egen, weightID));
      ruv->removeOneValued = storeOv;
      return res;
    }

    PyErr_Clear();

    if (PyArg_ParseTuple(args, "O&|O&ii:RemoveUnusedValues.call", pt_ExampleGenerator, &egen, pt_weightByGen(egen), &weightID, &removeOneValued, &checkClass)) {
      if (removeOneValued >= 0)
        ruv->removeOneValued = removeOneValued != 0;
      PyObject *res = WrapOrange(ruv->call(egen, weightID, checkClass != 0));
      ruv->removeOneValued = storeOv;
      return res;
    }

    PYERROR(PyExc_AttributeError, "RemoveUnusedValues.__call__: invalid arguments", PYNULL);

  PyCATCH
}


/* ************ PREPROCESSORS ************ */

#include "preprocessors.hpp"

ABSTRACT(Preprocessor, Orange)

C_CALL(Preprocessor_select, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")
C_CALL(Preprocessor_ignore, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")

C_CALL(Preprocessor_take, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")
C_CALL(Preprocessor_drop, Preprocessor, "([examples[, weightID]] [attributes=<list-of-strings>]) -/-> ExampleTable")
C_CALL(Preprocessor_removeDuplicates, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
C_CALL(Preprocessor_takeMissing, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
C_CALL(Preprocessor_dropMissing, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
C_CALL(Preprocessor_takeMissingClasses, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")
C_CALL(Preprocessor_dropMissingClasses, Preprocessor, "([examples[, weightID]]) -/-> ExampleTable")

C_CALL(Preprocessor_addMissing, Preprocessor, "([examples[, weightID]] [<see the manual>]) -/-> ExampleTable")
C_CALL(Preprocessor_addMissingClasses, Preprocessor, "([examples[, weightID]] [classMissing=<float>]) -/-> ExampleTable")
C_CALL(Preprocessor_addNoise, Preprocessor, "([examples[, weightID]] [<see the manual>]) -/-> ExampleTable")
C_CALL(Preprocessor_addClassNoise, Preprocessor, "([examples[, weightID]] [proportion=<float>]) -/-> ExampleTable")
C_CALL(Preprocessor_addGaussianNoise, Preprocessor, "([examples[, weightID]] [<see the manual>]) -/-> ExampleTable")
C_CALL(Preprocessor_addGaussianClassNoise, Preprocessor, "([examples[, weightID]] [deviation=<float>]) -/-> ExampleTable")

C_CALL(Preprocessor_addClassWeight, Preprocessor, "([examples[, weightID]] [equalize=, classWeights=) -/-> ExampleTable")
C_CALL(Preprocessor_addCensorWeight, Preprocessor, "([examples[, weightID]] [method=0-km, 1-nmr, 2-linear, outcomeVar=, eventValue=, timeID=, maxTime=]) -/-> ExampleTable")

C_CALL(Preprocessor_filter, Preprocessor, "([examples[, weightID]] [filter=]) -/-> ExampleTable")
C_CALL(Preprocessor_imputeByLearner, Preprocessor, "([examples[, weightID]] [learner=]) -/-> ExampleTable")
C_CALL(Preprocessor_discretize, Preprocessor, "([examples[, weightID]] [notClass=, method=, attributes=<list-of-strings>]) -/-> ExampleTable")

C_NAMED(ImputeClassifier, Classifier, "([classifierFromVar=][imputer=])")

PYCLASSCONSTANT_INT(Preprocessor_addCensorWeight, KM, TPreprocessor_addCensorWeight::km)
PYCLASSCONSTANT_INT(Preprocessor_addCensorWeight, Linear, TPreprocessor_addCensorWeight::linear)
PYCLASSCONSTANT_INT(Preprocessor_addCensorWeight, Bayes, TPreprocessor_addCensorWeight::bayes)

PyObject *Preprocessor_call(PyObject *self, PyObject *args, PyObject *keywords) PYDOC("(examples[, weightID]) -> ExampleTable")
{ 
  PyTRY
    NO_KEYWORDS

    int weightID=0;
    PExampleGenerator egen = exampleGenFromArgs(args, weightID);
    if (!egen)
      PYERROR(PyExc_TypeError, "attribute error (example generator expected)", PYNULL);
    bool weightGiven = (weightID!=0);

    int newWeight;
    PExampleGenerator res = SELF_AS(TPreprocessor)(egen, weightID, newWeight);
    PyObject *wrappedGen=WrapOrange(res);
    return weightGiven || newWeight ? Py_BuildValue("Ni", wrappedGen, newWeight) : wrappedGen;
  PyCATCH
}


PyObject *Preprocessor_selectionVector(PyObject *self, PyObject *args, PyObject *) PYARGS(METH_VARARGS, "(examples[, weightID])")
{
  PyTRY
    int weightID = 0;
    PExampleGenerator egen = exampleGenFromArgs(args, weightID);
    if (!egen)
      PYERROR(PyExc_TypeError, "attribute error (example generator expected)", PYNULL);

    return WrapOrange(SELF_AS(TPreprocessor).selectionVector(egen, weightID));
  PyCATCH
}


#include "stringvars.hpp"

typedef MapMethods<PVariableFilterMap, TVariableFilterMap, PVariable, PValueFilter> TMM_VariableFilterMap;

int VariableFilterMap_setitemlow(TVariableFilterMap *aMap, PVariable var, PyObject *pyvalue)
{
  PValueFilter value;
  if (TMM_VariableFilterMap::_valueFromPython(pyvalue, value)) {
    aMap->__ormap[var] = value;
    return 0;
  }

  PyErr_Clear();

  if (var->varType == TValue::FLOATVAR) {
    float min, max;
    if (!PyArg_ParseTuple(pyvalue, "ff:VariableFilterMap.__setitem__", &min, &max))
      return -1;

    aMap->__ormap[var] = (min<=max) ? mlnew TValueFilter_continuous(ILLEGAL_INT, min, max)
                                   : mlnew TValueFilter_continuous(ILLEGAL_INT, max, min, true);
    return 0;
  }

  if (var->varType == TValue::INTVAR) {
    TValueFilter_discrete *vfilter = mlnew TValueFilter_discrete(ILLEGAL_INT, var);
    PValueFilter wvfilter = vfilter;
    TValueList &valueList = vfilter->values.getReference();

    if (PyTuple_Check(pyvalue) || PyList_Check(pyvalue)) {
      PyObject *iterator = PyObject_GetIter(pyvalue);
      for(PyObject *item = PyIter_Next(iterator); item; item = PyIter_Next(iterator)) {
        TValue value;
        if (!convertFromPython(item, value, var)) {
          Py_DECREF(item);
          Py_DECREF(iterator);
          return -1;
        }
        Py_DECREF(item);
        if (value.isSpecial())
          vfilter->acceptSpecial = 1;
        else
          valueList.push_back(value);
      }
      Py_DECREF(iterator);
    }
    else {
      TValue value;
      if (!convertFromPython(pyvalue, value, var))
        return -1;
      if (value.isSpecial())
        vfilter->acceptSpecial = 1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -