⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 owdistributions.py

📁 orange源码 数据挖掘技术
💻 PY
📖 第 1 页 / 共 2 页
字号:
"""
<name>Distributions</name>
<description>Displays attribute value distributions.</description>
<contact>Tomaz Curk</contact>
<icon>icons/Distribution.png</icon>
<priority>100</priority>
"""

#
# OWDistributions.py
# Shows data distributions, distribution of attribute values and distribution of classes for each attribute
#

from OWTools import *
from OWWidget import *
from OWGraph import *
import OWGUI
import math
import statc

class distribErrorBarQwtPlotCurve(QwtPlotCurve):
    def __init__(self, parent = None, text = None):
        QwtPlotCurve.__init__(self, parent, text)

    def draw(self, p, xMap, yMap, f, t):
        self.setPen( self.symbol().pen() )
        p.setPen( self.symbol().pen() )
        if self.style() == QwtCurve.UserCurve:
            p.setBackgroundMode(Qt.OpaqueMode)
            if t < 0: t = self.dataSize() - 1
            if divmod(f, 3)[1] != 0: f -= f % 3
            if divmod(t, 3)[1] == 0:  t += 1
            for i in range(f, t+1, 3):
                px = xMap.transform(self.x(i))
                pxl = xMap.transform(self.x(i) - 0.1)
                pxr = xMap.transform(self.x(i) + 0.1)
                py1 = yMap.transform(self.y(i + 0))
                py2 = yMap.transform(self.y(i + 1))
                py3 = yMap.transform(self.y(i + 2))
                p.drawLine(px, py1, px, py3)
                p.drawLine(pxl, py1, pxr, py1)
                p.drawLine(pxl, py3, pxr, py3)
                self.symbol().draw(p, px, py2)
        else:
            QwtPlotCurve.draw(self, p, xMap, yMap, f, t)


class OWDistributionGraph(OWGraph):
    def __init__(self, settingsWidget = None, parent = None, name = None):
        OWGraph.__init__(self, parent, name)
        self.parent = parent
        
        # initialize settings
        self.attributeName = ""
        self.variableContinuous = FALSE
        self.YLaxisTitle = "Frequency"
        
        self.numberOfBars = 5
        self.barSize = 50
        self.showContinuousClassGraph=1
        self.showProbabilities = 0
        self.showConfidenceIntervals = 0
        self.smoothLines = 0
        self.hdata = {}
        self.probGraphValues = []
        
        self.targetValue = None
        self.data = None
        self.visibleOutcomes = None

        self.settingsWidget = settingsWidget

        curve = distribErrorBarQwtPlotCurve(self, '')
        self.probCurveKey = self.insertCurve(curve)
        self.setCurveXAxis(self.probCurveKey, QwtPlot.xBottom)
        self.setCurveYAxis(self.probCurveKey, QwtPlot.yRight)

        self.probCurveUpperCIKey = self.insertCurve('', QwtPlot.xBottom, QwtPlot.yRight)
        self.probCurveLowerCIKey = self.insertCurve('', QwtPlot.xBottom, QwtPlot.yRight)
        self.curve(self.probCurveKey).setEnabled(FALSE)
        self.curve(self.probCurveUpperCIKey).setEnabled(FALSE)
        self.curve(self.probCurveLowerCIKey).setEnabled(FALSE)

    def sizeHint(self):
        return QSize(500, 500)

    def setVisibleOutcomes(self, outcomes):
        self.visibleOutcomes = outcomes

    def setTargetValue(self, target):
        self.targetValue = target
        self.refreshProbGraph()

    def setData(self, data, variable):
        self.data = data
        if data: self.dc = orange.DomainContingency(self.data)
        if data and data.domain.classVar and data.domain.classVar.varType!=orange.VarTypes.Discrete:
            self.pureHistogram=True #No class colors
        else:
            self.pureHistogram=False
            
        if data and data.domain.classVar:
            self.dataHasClass=True
            if data.domain.classVar.varType==orange.VarTypes.Continuous:
                self.dataHasDiscreteClass=False
            else:
                self.dataHasDiscreteClass=True
        else:
            self.dataHasClass=False
            
        self.setVariable(variable)

    def setVariable(self, variable):
        self.attributeName = variable
        if variable: self.setXaxisTitle(variable)
        else:        self.setXaxisTitle("")

        if not self.data: return
        
        if self.data.domain[self.attributeName].varType == orange.VarTypes.Continuous:
            self.variableContinuous = TRUE
        else: self.variableContinuous = FALSE

        if self.variableContinuous:
            self.setXlabels(None)
        else:
            labels = self.data.domain[self.attributeName].values.native()
            self.setXlabels(labels)
            self.setAxisScale(QwtPlot.xBottom, -0.5, len(labels) - 0.5, 1)

        self.calcHistogramAndProbGraph()
        self.refreshVisibleOutcomes()
        

    def setNumberOfBars(self, n):
        self.numberOfBars = n

        if self.variableContinuous:
            self.calcHistogramAndProbGraph()
            self.refreshVisibleOutcomes()
            #self.replot()

    def setBarSize(self, n):
        self.barSize = n
        if not(self.variableContinuous):
            self.refreshVisibleOutcomes()
            #self.replot()
            self.repaint()

    def calcPureHistogram(self):
        if self.data==None:
            return
        if self.variableContinuous:
            "Continuous variable, break data into self.NumberOfBars subintervals"
            "use orange.EquiDistDiscretization(numberOfIntervals)"
            equiDist = orange.EquiDistDiscretization(numberOfIntervals = self.numberOfBars)
            d_variable = equiDist(self.attributeName, self.data)
            d_data = self.data.select([d_variable])
            tmphdata = orange.Distribution(0, d_data)
            
            curPos = d_variable.getValueFrom.transformer.firstVal - d_variable.getValueFrom.transformer.step
            self.subIntervalStep = d_variable.getValueFrom.transformer.step
            self.hdata = {}
            for key in tmphdata.keys():
                self.hdata[curPos] = tmphdata[key]
                curPos += self.subIntervalStep
        else:
            "Discrete variable"
            self.hdata = orange.Distribution(self.attributeName, self.data) #self.dc[self.attributeName]

    def calcHistogramAndProbGraph(self):
        "Calculates the histogram."
        if self.data == None:
            return
        if self.pureHistogram:
            self.calcPureHistogram()
            return
        if self.variableContinuous:
            "Continuous variable, break data into self.NumberOfBars subintervals"
            "use orange.EquiDistDiscretization(numberOfIntervals)"
            equiDist = orange.EquiDistDiscretization(numberOfIntervals = self.numberOfBars)
            d_variable = equiDist(self.attributeName, self.data)
            d_data = self.data.select([d_variable, self.data.domain.classVar])
            tmphdata = orange.DomainContingency(d_data)[0]
            dc = orange.DomainContingency(self.data) #!!!
            g = orange.ConditionalProbabilityEstimatorConstructor_loess(dc[self.attributeName]) #!!!
            # print [ps.variances for (x, ps) in g.probabilities.items()]
            self.probGraphValues = [(x, ps, [(v>=0 and math.sqrt(v)*1.96 or 0.0) for v in ps.variances]) for (x, ps) in g.probabilities.items()]
            # calculate the weighted CI=math.sqrt(prob*(1-prob)/(0.0+self.sums[curcol])),
            # where self.sums[curcol] = g.probabilities.items()[example][1].cases

            # change the attribute value (which is discretized) into the subinterval start value
            # keep the same DomainContingency data
            curPos = d_variable.getValueFrom.transformer.firstVal - d_variable.getValueFrom.transformer.step
            self.subIntervalStep = d_variable.getValueFrom.transformer.step
            self.hdata = {}
            for key in tmphdata.keys():
                self.hdata[curPos] = tmphdata[key]
                curPos += self.subIntervalStep
        else:
            "Discrete variable"
            self.hdata = self.dc[self.attributeName]
            self.probGraphValues = []
            for (x, ds) in self.hdata.items():
                ps = []
                cis = []
                cases = ds.cases
                for d in ds:
                    if cases > 0:
                        p = d / cases
                        ci = math.sqrt(p * (1-p) / (0.0 + cases))
                    else:
                        p = 0
                        ci = 0
                    ps.append(p)
                    cis.append(ci)
                self.probGraphValues.append( (x, ps, cis) )
                
    def refreshPureVisibleOutcomes(self):
        if self.dataHasDiscreteClass:
            return 
        keys=self.hdata.keys()
        if self.variableContinuous:
            keys.sort()
        self.removeCurves()
        cn=0
        for key in keys:
            curve=PolygonCurve(self, pen=QPen(Qt.black), brush=QBrush(Qt.gray))
            ckey=self.insertCurve(curve)
            if self.variableContinuous:
                self.setCurveData(ckey, [key, key + self.subIntervalStep, key + self.subIntervalStep, key],[0, 0, self.hdata[key], self.hdata[key]])
            else:
                tmpx = cn - (self.barSize/2.0)/100.0
                tmpx2 = cn + (self.barSize/2.0)/100.0
                self.setCurveData(ckey, [tmpx, tmpx2, tmpx2, tmpx], [0, 0, self.hdata[key], self.hdata[key]])
                cn+=1
        
        if self.dataHasClass and not self.dataHasDiscreteClass and self.showContinuousClassGraph:
            self.enableYRaxis(1)
            self.setAxisAutoScale(QwtPlot.yRight)
            self.setYRaxisTitle(str(self.data.domain.classVar.name))
            if self.variableContinuous:
                equiDist = orange.EquiDistDiscretization(numberOfIntervals = self.numberOfBars)
                d_variable = equiDist(self.attributeName, self.data)
                d_data=self.data.select([d_variable, self.data.domain.classVar])
                c=orange.ContingencyAttrClass(d_variable, d_data)
                XY=[(key+self.subIntervalStep/2.0, val.average()) for key, val in zip(keys, c.values()) if val.cases]
                XY=statc.loess(XY, 10, 4.0, 1)
            else:
                d_data=orange.ContingencyAttrClass(self.attributeName, self.data)
                XY=[(i, dist.average()) for i, dist in zip(range(len(d_data.values())), d_data.values()) if dist.cases]
            key=self.insertCurve("")
            self.setCurveYAxis(key, QwtPlot.yRight)
            self.setCurveData(key, [a[0] for a in XY], [a[1] for a in XY])
            if self.variableContinuous:
                self.setCurvePen(key, QPen(Qt.black))
            else:
                self.setCurveStyle(key, QwtCurve.Dots)
                self.setCurveSymbol(key, QwtSymbol(QwtSymbol.Diamond, QBrush(Qt.color0), QPen(Qt.black, 2), QSize(7,7)))
        else:
            self.enableYRaxis(0)
            self.setAxisScale(QwtPlot.yRight, 0.0, 1.0, 0.1)
        
        self.repaint()
        
    def refreshVisibleOutcomes(self):
        if not self.data or not self.visibleOutcomes: return
        if self.pureHistogram:
            self.refreshPureVisibleOutcomes()
            return
        self.enableYRaxis(0)
        self.setAxisScale(QwtPlot.yRight, 0.0, 1.0, 0.1)
        self.setYRaxisTitle("")
        keys = self.hdata.keys()
        if self.variableContinuous:
            keys.sort()

        self.removeCurves()

        currentBarsHeight = [0] * len(keys)
        for oi in range(len(self.visibleOutcomes)):
            if self.visibleOutcomes[oi] == 1:
                #for all bars insert curve and
                cn = 0
                for key in keys:
                    subBarHeight = self.hdata[key][oi]
                    curve = PolygonCurve(self, pen = QPen(Qt.black), brush = QBrush(self.discPalette[oi]))
                    ckey = self.insertCurve(curve)
                    if self.variableContinuous:
                        self.setCurveData(ckey, [key, key + self.subIntervalStep, key + self.subIntervalStep, key], [currentBarsHeight[cn], currentBarsHeight[cn], currentBarsHeight[cn] + subBarHeight, currentBarsHeight[cn] + subBarHeight])
                    else:
                        tmpx = cn - (self.barSize/2.0)/100.0
                        tmpx2 = cn + (self.barSize/2.0)/100.0
                        self.setCurveData(ckey, [tmpx, tmpx2, tmpx2, tmpx], [currentBarsHeight[cn], currentBarsHeight[cn], currentBarsHeight[cn] + subBarHeight, currentBarsHeight[cn] + subBarHeight])
                    currentBarsHeight[cn] += subBarHeight
                    cn += 1

        curve = distribErrorBarQwtPlotCurve(self, '')
        self.probCurveKey = self.insertCurve(curve)
        self.setCurveXAxis(self.probCurveKey, QwtPlot.xBottom)
        self.setCurveYAxis(self.probCurveKey, QwtPlot.yRight)

        self.probCurveUpperCIKey = self.insertCurve('', QwtPlot.xBottom, QwtPlot.yRight)
        self.probCurveLowerCIKey = self.insertCurve('', QwtPlot.xBottom, QwtPlot.yRight)
        self.refreshProbGraph()

    def refreshProbGraph(self):
        if not self.data or self.targetValue == None: return
        if self.showProbabilities:
            self.enableYRaxis(1)
            self.setShowYRaxisTitle(self.showYRaxisTitle)
            self.setYRaxisTitle(self.YRaxisTitle)
            xs = []
            ups = []
            mps = []
            lps = []
            cn = 0.0
            for (x, ps, cis) in self.probGraphValues:
                if self.variableContinuous:
                    xs.append(x)
                    ups.append(ps[self.targetValue] + cis[self.targetValue])
                    mps.append(ps[self.targetValue] + 0.0)
                    lps.append(ps[self.targetValue] - cis[self.targetValue])
                else:
                    if self.showConfidenceIntervals:
                        xs.append(cn)
                        mps.append(ps[self.targetValue] + cis[self.targetValue])

                    xs.append(cn)
                    mps.append(ps[self.targetValue] + 0.0)

                    if self.showConfidenceIntervals:
                        xs.append(cn)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -