⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 discretization.py

📁 orange源码 数据挖掘技术
💻 PY
字号:
# Description: Shows how usage of different classes for discretization, including manual discretization
# Category:    discretization, categorization, preprocessing
# Classes:     EntropyDiscretization, EquiDistDiscretization, BiModalDiscretization, Discretization, IntervalDiscretizer, Discretizer, BiModalDiscretizer
# Uses:        iris
# Referenced:  discretization.htm

import orange

data = orange.ExampleTable("iris")

print "\nEntropy discretization, first 10 examples"
sep_w = orange.EntropyDiscretization("sepal width", data)

data2 = data.select([data.domain["sepal width"], sep_w, data.domain.classVar])
for ex in data2[:10]:
    print ex

print "\nDiscretized attribute:", sep_w
print "Continuous attribute:", sep_w.getValueFrom.whichVar
print "Cut-off points:", sep_w.getValueFrom.transformer.points


print "\nManual construction of IntervalDiscretizer - single attribute"
idisc = orange.IntervalDiscretizer(points = [3.0, 5.0])
sep_l = idisc.constructVariable(data.domain["sepal length"])
data2 = data.select([data.domain["sepal length"], sep_l, data.domain.classVar])
for ex in data2[:10]:
    print ex


print "\nManual construction of IntervalDiscretizer - all attributes"
idisc = orange.IntervalDiscretizer(points = [3.0, 5.0])
newattrs = [idisc.constructVariable(attr) for attr in data.domain.attributes]
data2 = data.select(newattrs + [data.domain.classVar])
for ex in data2[:10]:
    print ex


print "\n\nEqual interval size discretization"
disc = orange.EquiDistDiscretization(numberOfIntervals = 6)
newattrs = [disc(attr, data) for attr in data.domain.attributes]
data2 = data.select(newattrs + [data.domain.classVar])

for attr in newattrs:
    print "%s: %s" % (attr.name, attr.values)
print

for attr in newattrs:
    print "%15s: first interval at %5.3f, step %5.3f" % (attr.name, attr.getValueFrom.transformer.firstCut, attr.getValueFrom.transformer.step)
    print " "*17 + "cutoffs at " + ", ".join(["%5.3f" % x for x in attr.getValueFrom.transformer.points])
print



print "\n\nQuartile discretization"
disc = orange.EquiNDiscretization(numberOfIntervals = 6)
newattrs = [disc(attr, data) for attr in data.domain.attributes]
data2 = data.select(newattrs + [data.domain.classVar])

for attr in newattrs:
    print "%s: %s" % (attr.name, attr.values)
print

for attr in newattrs:
    print " "*17 + "cutoffs at " + ", ".join(["%5.3f" % x for x in attr.getValueFrom.transformer.points])
print



print "\nManual construction of EquiDistDiscretizer - all attributes"
edisc = orange.EquiDistDiscretizer(firstCut = 2.0, step = 1.0, numberOfIntervals = 5)
newattrs = [edisc.constructVariable(attr) for attr in data.domain.attributes]
data2 = data.select(newattrs + [data.domain.classVar])
for ex in data2[:10]:
    print ex


print "\nFayyad-Irani discretization"
entro = orange.EntropyDiscretization()
for attr in data.domain.attributes:
    disc = entro(attr, data)
    print "%s: %s" % (attr.name, disc.getValueFrom.transformer.points)
print


newclass = orange.EnumVariable("is versicolor", values = ["no", "yes"])
newclass.getValueFrom = lambda ex, w: ex["iris"]=="Iris-versicolor"
newdomain = orange.Domain(data.domain.attributes, newclass)
data_v = orange.ExampleTable(newdomain, data)

print "\nBi-Modal discretization on binary problem"
bimod = orange.BiModalDiscretization(splitInTwo = 0)
for attr in data_v.domain.attributes:
    disc = bimod(attr, data_v)
    print "%s: %s" % (attr.name, disc.getValueFrom.transformer.points)
print

print "\nBi-Modal discretization on binary problem"
bimod = orange.BiModalDiscretization()
for attr in data_v.domain.attributes:
    disc = bimod(attr, data_v)
    print "%s: (%5.3f, %5.3f]" % (attr.name, disc.getValueFrom.transformer.low, disc.getValueFrom.transformer.high)
print


print "\nEntropy discretization on binary problem"
for attr in data_v.domain.attributes:
    disc = entro(attr, data_v)
    print "%s: %s" % (attr.name, disc.getValueFrom.transformer.points)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -