📄 owdistributions.py
字号:
"""
<name>Distributions</name>
<description>Displays attribute value distributions.</description>
<contact>Tomaz Curk</contact>
<icon>icons/Distribution.png</icon>
<priority>100</priority>
"""
#
# OWDistributions.py
# Shows data distributions, distribution of attribute values and distribution of classes for each attribute
#
from OWTools import *
from OWWidget import *
from OWGraph import *
import OWGUI
import math
import statc
class distribErrorBarQwtPlotCurve(QwtPlotCurve):
def __init__(self, parent = None, text = None):
QwtPlotCurve.__init__(self, parent, text)
def draw(self, p, xMap, yMap, f, t):
self.setPen( self.symbol().pen() )
p.setPen( self.symbol().pen() )
if self.style() == QwtCurve.UserCurve:
p.setBackgroundMode(Qt.OpaqueMode)
if t < 0: t = self.dataSize() - 1
if divmod(f, 3)[1] != 0: f -= f % 3
if divmod(t, 3)[1] == 0: t += 1
for i in range(f, t+1, 3):
px = xMap.transform(self.x(i))
pxl = xMap.transform(self.x(i) - 0.1)
pxr = xMap.transform(self.x(i) + 0.1)
py1 = yMap.transform(self.y(i + 0))
py2 = yMap.transform(self.y(i + 1))
py3 = yMap.transform(self.y(i + 2))
p.drawLine(px, py1, px, py3)
p.drawLine(pxl, py1, pxr, py1)
p.drawLine(pxl, py3, pxr, py3)
self.symbol().draw(p, px, py2)
else:
QwtPlotCurve.draw(self, p, xMap, yMap, f, t)
class OWDistributionGraph(OWGraph):
def __init__(self, settingsWidget = None, parent = None, name = None):
OWGraph.__init__(self, parent, name)
self.parent = parent
# initialize settings
self.attributeName = ""
self.variableContinuous = FALSE
self.YLaxisTitle = "Frequency"
self.numberOfBars = 5
self.barSize = 50
self.showContinuousClassGraph=1
self.showProbabilities = 0
self.showConfidenceIntervals = 0
self.smoothLines = 0
self.hdata = {}
self.probGraphValues = []
self.targetValue = None
self.data = None
self.visibleOutcomes = None
self.settingsWidget = settingsWidget
curve = distribErrorBarQwtPlotCurve(self, '')
self.probCurveKey = self.insertCurve(curve)
self.setCurveXAxis(self.probCurveKey, QwtPlot.xBottom)
self.setCurveYAxis(self.probCurveKey, QwtPlot.yRight)
self.probCurveUpperCIKey = self.insertCurve('', QwtPlot.xBottom, QwtPlot.yRight)
self.probCurveLowerCIKey = self.insertCurve('', QwtPlot.xBottom, QwtPlot.yRight)
self.curve(self.probCurveKey).setEnabled(FALSE)
self.curve(self.probCurveUpperCIKey).setEnabled(FALSE)
self.curve(self.probCurveLowerCIKey).setEnabled(FALSE)
def sizeHint(self):
return QSize(500, 500)
def setVisibleOutcomes(self, outcomes):
self.visibleOutcomes = outcomes
def setTargetValue(self, target):
self.targetValue = target
self.refreshProbGraph()
def setData(self, data, variable):
self.data = data
if data: self.dc = orange.DomainContingency(self.data)
if data and data.domain.classVar and data.domain.classVar.varType!=orange.VarTypes.Discrete:
self.pureHistogram=True #No class colors
else:
self.pureHistogram=False
if data and data.domain.classVar:
self.dataHasClass=True
if data.domain.classVar.varType==orange.VarTypes.Continuous:
self.dataHasDiscreteClass=False
else:
self.dataHasDiscreteClass=True
else:
self.dataHasClass=False
self.setVariable(variable)
def setVariable(self, variable):
self.attributeName = variable
if variable: self.setXaxisTitle(variable)
else: self.setXaxisTitle("")
if not self.data: return
if self.data.domain[self.attributeName].varType == orange.VarTypes.Continuous:
self.variableContinuous = TRUE
else: self.variableContinuous = FALSE
if self.variableContinuous:
self.setXlabels(None)
else:
labels = self.data.domain[self.attributeName].values.native()
self.setXlabels(labels)
self.setAxisScale(QwtPlot.xBottom, -0.5, len(labels) - 0.5, 1)
self.calcHistogramAndProbGraph()
self.refreshVisibleOutcomes()
def setNumberOfBars(self, n):
self.numberOfBars = n
if self.variableContinuous:
self.calcHistogramAndProbGraph()
self.refreshVisibleOutcomes()
#self.replot()
def setBarSize(self, n):
self.barSize = n
if not(self.variableContinuous):
self.refreshVisibleOutcomes()
#self.replot()
self.repaint()
def calcPureHistogram(self):
if self.data==None:
return
if self.variableContinuous:
"Continuous variable, break data into self.NumberOfBars subintervals"
"use orange.EquiDistDiscretization(numberOfIntervals)"
equiDist = orange.EquiDistDiscretization(numberOfIntervals = self.numberOfBars)
d_variable = equiDist(self.attributeName, self.data)
d_data = self.data.select([d_variable])
tmphdata = orange.Distribution(0, d_data)
curPos = d_variable.getValueFrom.transformer.firstVal - d_variable.getValueFrom.transformer.step
self.subIntervalStep = d_variable.getValueFrom.transformer.step
self.hdata = {}
for key in tmphdata.keys():
self.hdata[curPos] = tmphdata[key]
curPos += self.subIntervalStep
else:
"Discrete variable"
self.hdata = orange.Distribution(self.attributeName, self.data) #self.dc[self.attributeName]
def calcHistogramAndProbGraph(self):
"Calculates the histogram."
if self.data == None:
return
if self.pureHistogram:
self.calcPureHistogram()
return
if self.variableContinuous:
"Continuous variable, break data into self.NumberOfBars subintervals"
"use orange.EquiDistDiscretization(numberOfIntervals)"
equiDist = orange.EquiDistDiscretization(numberOfIntervals = self.numberOfBars)
d_variable = equiDist(self.attributeName, self.data)
d_data = self.data.select([d_variable, self.data.domain.classVar])
tmphdata = orange.DomainContingency(d_data)[0]
dc = orange.DomainContingency(self.data) #!!!
g = orange.ConditionalProbabilityEstimatorConstructor_loess(dc[self.attributeName]) #!!!
# print [ps.variances for (x, ps) in g.probabilities.items()]
self.probGraphValues = [(x, ps, [(v>=0 and math.sqrt(v)*1.96 or 0.0) for v in ps.variances]) for (x, ps) in g.probabilities.items()]
# calculate the weighted CI=math.sqrt(prob*(1-prob)/(0.0+self.sums[curcol])),
# where self.sums[curcol] = g.probabilities.items()[example][1].cases
# change the attribute value (which is discretized) into the subinterval start value
# keep the same DomainContingency data
curPos = d_variable.getValueFrom.transformer.firstVal - d_variable.getValueFrom.transformer.step
self.subIntervalStep = d_variable.getValueFrom.transformer.step
self.hdata = {}
for key in tmphdata.keys():
self.hdata[curPos] = tmphdata[key]
curPos += self.subIntervalStep
else:
"Discrete variable"
self.hdata = self.dc[self.attributeName]
self.probGraphValues = []
for (x, ds) in self.hdata.items():
ps = []
cis = []
cases = ds.cases
for d in ds:
if cases > 0:
p = d / cases
ci = math.sqrt(p * (1-p) / (0.0 + cases))
else:
p = 0
ci = 0
ps.append(p)
cis.append(ci)
self.probGraphValues.append( (x, ps, cis) )
def refreshPureVisibleOutcomes(self):
if self.dataHasDiscreteClass:
return
keys=self.hdata.keys()
if self.variableContinuous:
keys.sort()
self.removeCurves()
cn=0
for key in keys:
curve=PolygonCurve(self, pen=QPen(Qt.black), brush=QBrush(Qt.gray))
ckey=self.insertCurve(curve)
if self.variableContinuous:
self.setCurveData(ckey, [key, key + self.subIntervalStep, key + self.subIntervalStep, key],[0, 0, self.hdata[key], self.hdata[key]])
else:
tmpx = cn - (self.barSize/2.0)/100.0
tmpx2 = cn + (self.barSize/2.0)/100.0
self.setCurveData(ckey, [tmpx, tmpx2, tmpx2, tmpx], [0, 0, self.hdata[key], self.hdata[key]])
cn+=1
if self.dataHasClass and not self.dataHasDiscreteClass and self.showContinuousClassGraph:
self.enableYRaxis(1)
self.setAxisAutoScale(QwtPlot.yRight)
self.setYRaxisTitle(str(self.data.domain.classVar.name))
if self.variableContinuous:
equiDist = orange.EquiDistDiscretization(numberOfIntervals = self.numberOfBars)
d_variable = equiDist(self.attributeName, self.data)
d_data=self.data.select([d_variable, self.data.domain.classVar])
c=orange.ContingencyAttrClass(d_variable, d_data)
XY=[(key+self.subIntervalStep/2.0, val.average()) for key, val in zip(keys, c.values()) if val.cases]
XY=statc.loess(XY, 10, 4.0, 1)
else:
d_data=orange.ContingencyAttrClass(self.attributeName, self.data)
XY=[(i, dist.average()) for i, dist in zip(range(len(d_data.values())), d_data.values()) if dist.cases]
key=self.insertCurve("")
self.setCurveYAxis(key, QwtPlot.yRight)
self.setCurveData(key, [a[0] for a in XY], [a[1] for a in XY])
if self.variableContinuous:
self.setCurvePen(key, QPen(Qt.black))
else:
self.setCurveStyle(key, QwtCurve.Dots)
self.setCurveSymbol(key, QwtSymbol(QwtSymbol.Diamond, QBrush(Qt.color0), QPen(Qt.black, 2), QSize(7,7)))
else:
self.enableYRaxis(0)
self.setAxisScale(QwtPlot.yRight, 0.0, 1.0, 0.1)
self.repaint()
def refreshVisibleOutcomes(self):
if not self.data or not self.visibleOutcomes: return
if self.pureHistogram:
self.refreshPureVisibleOutcomes()
return
self.enableYRaxis(0)
self.setAxisScale(QwtPlot.yRight, 0.0, 1.0, 0.1)
self.setYRaxisTitle("")
keys = self.hdata.keys()
if self.variableContinuous:
keys.sort()
self.removeCurves()
currentBarsHeight = [0] * len(keys)
for oi in range(len(self.visibleOutcomes)):
if self.visibleOutcomes[oi] == 1:
#for all bars insert curve and
cn = 0
for key in keys:
subBarHeight = self.hdata[key][oi]
curve = PolygonCurve(self, pen = QPen(Qt.black), brush = QBrush(self.discPalette[oi]))
ckey = self.insertCurve(curve)
if self.variableContinuous:
self.setCurveData(ckey, [key, key + self.subIntervalStep, key + self.subIntervalStep, key], [currentBarsHeight[cn], currentBarsHeight[cn], currentBarsHeight[cn] + subBarHeight, currentBarsHeight[cn] + subBarHeight])
else:
tmpx = cn - (self.barSize/2.0)/100.0
tmpx2 = cn + (self.barSize/2.0)/100.0
self.setCurveData(ckey, [tmpx, tmpx2, tmpx2, tmpx], [currentBarsHeight[cn], currentBarsHeight[cn], currentBarsHeight[cn] + subBarHeight, currentBarsHeight[cn] + subBarHeight])
currentBarsHeight[cn] += subBarHeight
cn += 1
curve = distribErrorBarQwtPlotCurve(self, '')
self.probCurveKey = self.insertCurve(curve)
self.setCurveXAxis(self.probCurveKey, QwtPlot.xBottom)
self.setCurveYAxis(self.probCurveKey, QwtPlot.yRight)
self.probCurveUpperCIKey = self.insertCurve('', QwtPlot.xBottom, QwtPlot.yRight)
self.probCurveLowerCIKey = self.insertCurve('', QwtPlot.xBottom, QwtPlot.yRight)
self.refreshProbGraph()
def refreshProbGraph(self):
if not self.data or self.targetValue == None: return
if self.showProbabilities:
self.enableYRaxis(1)
self.setShowYRaxisTitle(self.showYRaxisTitle)
self.setYRaxisTitle(self.YRaxisTitle)
xs = []
ups = []
mps = []
lps = []
cn = 0.0
for (x, ps, cis) in self.probGraphValues:
if self.variableContinuous:
xs.append(x)
ups.append(ps[self.targetValue] + cis[self.targetValue])
mps.append(ps[self.targetValue] + 0.0)
lps.append(ps[self.targetValue] - cis[self.targetValue])
else:
if self.showConfidenceIntervals:
xs.append(cn)
mps.append(ps[self.targetValue] + cis[self.targetValue])
xs.append(cn)
mps.append(ps[self.targetValue] + 0.0)
if self.showConfidenceIntervals:
xs.append(cn)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -