⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 distributions.py

📁 orange源码 数据挖掘技术
💻 PY
字号:
# Description: Shows how to compute and print attribute distributions. Also shows how to approximate a continuous distribution by Gaussian distribution
# Category:    statistics, distributions
# Classes:     Distribution, DiscDistribution, ContDistribution, DomainDistributions, GaussianDistribution
# Uses:        adult_sample
# Referenced:  distributions.htm

import orange

data = orange.ExampleTable("../datasets/adult_sample")

disc = orange.Distribution("workclass", data)
print type(disc)
print disc

print "Distribution of 'workclass' (through values)"
workclass = data.domain["workclass"]
for i in range(len(workclass.values)):
    print "%20s: %5.3f" % (workclass.values[i], disc[i])
print

print "Distribution of 'workclass' (through items)"
for val, num in disc.items():
    print "%20s: %5.3f" % (val, num)
print

disc[0] = disc[1] = 1000
for i in range(20):
 	print disc.modus(),
print

disc[0] = disc[1] = 1000
for i in range(20):
 	disc[2]=i
 	print disc.modus(),
print

disc = orange.Distribution("workclass", data)
print "Private: ", disc["Private"]
print "Private: ", disc[0]
print "Private: ", disc[orange.Value(workclass, "Private")]

print "length of distribution:", len(disc)
print "no. of values:", len(workclass.values)

print orange.Distribution(1, data)
print orange.Distribution(data.domain["workclass"], data)

cont = orange.Distribution("education-num", data)
print type(cont)
print cont

dist = orange.DomainDistributions(data)

for d in dist:
    if d.variable.varType == orange.VarTypes.Discrete:
        print "%30s: %s" % (d.variable.name, d)
    else:
        print "%30s: avg. %5.3f" % (d.variable.name, d.average())

print "*** AGE ***"
dage = dist["age"]
print "Native representation:", dage.native()
print "Keys:", dage.keys()
print "Values:", dage.values()
print "Items: ", dage.items()
print "Average: %5.3f" % dage.average()
print "Var/Dev/Err: %5.3f/%5.3f/%5.3f" % (dage.var(), dage.dev(), dage.error())
print "Quartiles: %5.3f - %5.3f - %5.3f" % (dage.percentile(25), dage.percentile(50), dage.percentile(75))
print

for x in range(170, 190):
    print "dens(%4.1f)=%5.3f," % (x/10.0, dage.density(x/10.0)),
    

print "*** WORKCLASS ***"
dwcl = dist["workclass"]
print "Native representation:", dwcl.native()
print "Keys:", dwcl.keys()
print "Values:", dwcl.values()
print "Items: ", dwcl.items()
print


disc = orange.DiscDistribution([0.5, 0.3, 0.2])
for i in range(20):
    print disc.random(),
print

v = orange.EnumVariable(values = ["red", "green", "blue"])
disc.variable = v
for i in range(20):
    print disc.random(),
print

print
cont = orange.ContDistribution({0.1: 12, 0.3: 3, 0.7: 3})
print "Manually constructed continuous distibution: ", cont
print


cont = orange.ContDistribution(data.domain["age"])


gauss = orange.GaussianDistribution(10, 2)
print "*** Gauss(10, 2) ***"
print "Average: %5.3f" % gauss.average()
print "Var/Dev/Err: %5.3f/%5.3f/%5.3f" % (gauss.var(), gauss.dev(), gauss.error())
print

for i in range(20):
    print "%5.3f" % gauss.random(),
print

for i in range(60, 140, 5):
    print "dens(%4.1f)=%5.3f" % (i/10.0, gauss.density(i/10.0)),
print

#dage.normalize()
gage = orange.GaussianDistribution(dage)
for x in range(17, 80):
    print "%i\t%5.3f\t%5.3f" % (x, dage.density(x), gage.density(x))

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -