⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 imputation.py

📁 orange源码 数据挖掘技术
💻 PY
字号:
# Description: Shows the use of classes for imputation
# Category:    default classification accuracy, statistics
# Classes:     Imputer, Imputer_defaults, Imputer_asValue, Imputer_model, ImputerConstructor, ImputerConstructor_minimal, ImputerConstructor_maximal, ImputerConstructor_average, ImputerConstructor_asValue, ImputerConstructor_model
# Uses:        bridges
# Referenced:  imputation.htm

import orange

data = orange.ExampleTable("bridges")

print "\n*** IMPUTING MINIMAL VALUES ***\n"

imputer = orange.ImputerConstructor_minimal(data)

print "Example w/ missing values"
print data[19]
print "Imputed:"
print imputer(data[19])
print

impdata = imputer(data)
for i in range(20, 25):
    print data[i]
    print impdata[i]
    print


print "\n*** IMPUTING MAXIMAL VALUES ***\n"

imputer = orange.ImputerConstructor_maximal(data)

print "Example w/ missing values"
print data[19]
print "Imputed:"
print imputer(data[19])
print

impdata = imputer(data)
for i in range(20, 25):
    print data[i]
    print impdata[i]
    print


print "\n*** IMPUTING AVERAGE/MAJORITY VALUES ***\n"

imputer = orange.ImputerConstructor_average(data)

print "Example w/ missing values"
print data[19]
print "Imputed:"
print imputer(data[19])
print

impdata = imputer(data)
for i in range(20, 25):
    print data[i]
    print impdata[i]
    print


print "\n*** MANUALLY CONSTRUCTED IMPUTER ***\n"

imputer = orange.Imputer_defaults(data.domain)
imputer.defaults["LENGTH"] = 1234

print "Example w/ missing values"
print data[19]
print "Imputed:"
print imputer(data[19])
print

impdata = imputer(data)
for i in range(20, 25):
    print data[i]
    print impdata[i]
    print


print "\n*** TREE-BASED IMPUTATION ***\n"

import orngTree
imputer = orange.ImputerConstructor_model()
imputer.learnerContinuous = imputer.learnerDiscrete = orngTree.TreeLearner(minSubset = 20)
imputer = imputer(data)

print "Example w/ missing values"
print data[19]
print "Imputed:"
print imputer(data[19])
print


impdata = imputer(data)
for i in range(20, 25):
    print data[i]
    print impdata[i]
    print



print "\n*** BAYES and AVERAGE IMPUTATION ***\n"

imputer = orange.ImputerConstructor_model()
imputer.learnerContinuous = orange.MajorityLearner()
imputer.learnerDiscrete = orange.BayesLearner()
imputer = imputer(data)

print "Example w/ missing values"
print data[19]
print "Imputed:"
print imputer(data[19])
print


impdata = imputer(data)
for i in range(20, 25):
    print data[i]
    print impdata[i]
    print


print "\n*** CUSTOM IMPUTATION BY MODELS ***\n"

imputer = orange.Imputer_model()
imputer.models = [None] * len(data.domain)

imputer.models[data.domain.index("LANES")] = orange.DefaultClassifier(2.0)

tord = orange.DefaultClassifier(orange.Value(data.domain["T-OR-D"], "THROUGH"))
imputer.models[data.domain.index("T-OR-D")] = tord

import orngTree
len_domain = orange.Domain(["MATERIAL", "SPAN", "ERECTED", "LENGTH"], data.domain)
len_data = orange.ExampleTable(len_domain, data)
len_tree = orngTree.TreeLearner(len_data, minSubset=20)
imputer.models[data.domain.index("LENGTH")] = len_tree
orngTree.printTxt(len_tree)

spanVar = data.domain["SPAN"]
def computeSpan(ex, rw):
    if ex["TYPE"] == "WOOD" or ex["PURPOSE"] == "WALK":
        return orange.Value(spanVar, "SHORT")
    else:
        return orange.Value(spanVar, "MEDIUM")

imputer.models[data.domain.index("SPAN")] = computeSpan

for i in range(20, 25):
    print data[i]
    print impdata[i]
    print

##for i in imputer(data):
##    print i


print "\n*** IMPUTATION WITH SPECIAL VALUES ***\n"

imputer = orange.ImputerConstructor_asValue(data)

original = data[19]
imputed = imputer(data[19])

print original.domain
print
print imputed.domain
print

for i in original.domain:
    print "%s: %s -> %s" % (original.domain[i].name, original[i], imputed[i.name]),
    if original.domain[i].varType == orange.VarTypes.Continuous:
        print "(%s)" % imputed[i.name+"_def"]
    else:
        print
print

impdata = imputer(data)
for i in range(20, 25):
    print data[i]
    print impdata[i]
    print

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -