⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 owsievemultigram.py

📁 orange源码 数据挖掘技术
💻 PY
字号:
"""
<name>Sieve multigram</name>
<description>Sieve multigram.</description>
<contact>Gregor Leban (gregor.leban@fri.uni-lj.si)</contact>
<icon>icons/SieveMultigram.png</icon>
<priority>4300</priority>
"""

from OWVisWidget import *
from OWSieveMultigramGraph import *
import orngVisFuncts
from orngCI import FeatureByCartesianProduct
import OWGUI

###########################################################################################
##### WIDGET : Polyviz visualization
###########################################################################################
class OWSieveMultigram(OWVisWidget):
    settingsList = ["maxLineWidth", "pearsonMinRes", "pearsonMaxRes", "showAllAttributes"]
    contextHandlers = {"": DomainContextHandler("", [ContextField("shownAttributes", DomainContextHandler.RequiredList, selected="selectedShown", reservoir="hiddenAttributes")])}
    
            
    def __init__(self,parent=None, signalManager = None):
        OWWidget.__init__(self, parent, signalManager, "Sieve Multigram", TRUE)

        self.inputs = [("Examples", ExampleTable, self.data), ("Selection", list, self.selection)]
        self.outputs = [] 

        #set default settings
        self.graphCanvasColor = str(Qt.white.name())
        self.data = None
        self.maxLineWidth = 3
        self.pearsonMinRes = 2
        self.pearsonMaxRes = 10
        self.showAllAttributes = 0
        
        # add a settings dialog and initialize its values
        self.loadSettings()

        #GUI
        # add a settings dialog and initialize its values
        self.tabs = QTabWidget(self.space, 'tabWidget')
        self.GeneralTab = QVGroupBox(self)
        #self.GeneralTab.setFrameShape(QFrame.NoFrame)
        self.SettingsTab = OWSieveMultigramOptions(self, "Settings")
        self.tabs.insertTab(self.GeneralTab, "General")
        self.tabs.insertTab(self.SettingsTab, "Settings")
              
        #add a graph widget
        self.box = QVBoxLayout(self.mainArea)
        self.graph = OWSieveMultigramGraph(self.mainArea)
        self.box.addWidget(self.graph)
        self.statusBar = QStatusBar(self.mainArea)
        self.box.addWidget(self.statusBar)
        self.statusBar.message("")
                
        #add controls to self.controlArea widget
        self.createShowHiddenLists(self.GeneralTab, callback = self.interestingSubsetSelection)
        
        self.interestingButton = QPushButton("Find interesting attr.", self.GeneralTab)
        self.connect(self.interestingButton, SIGNAL("clicked()"),self.interestingSubsetSelection) 

        #connect controls to appropriate functions
        self.connect(self.SettingsTab.lineCombo, SIGNAL('activated ( const QString & )'), self.updateGraph)
        self.connect(self.SettingsTab.pearsonMaxResCombo, SIGNAL('activated ( const QString & )'), self.updateGraph)
        self.connect(self.SettingsTab.applyButton, SIGNAL("clicked()"), self.updateGraph)

        self.connect(self.graphButton, SIGNAL("clicked()"), self.graph.saveToFile)

        # add a settings dialog and initialize its values
        self.activateLoadedSettings()

    # #########################
    # OPTIONS
    # #########################
    def activateLoadedSettings(self):
        # set loaded options settings
        self.SettingsTab.lineCombo.setCurrentItem(self.maxLineWidth-1)        
        index = self.SettingsTab.pearsonMaxNums.index(self.pearsonMaxRes)
        self.SettingsTab.pearsonMaxResCombo.setCurrentItem(index)
        self.SettingsTab.minResidualEdit.setText(str(self.pearsonMinRes))
        self.cbShowAllAttributes()

        
    # ####################
    # LIST BOX FUNCTIONS
    # ####################

    # ###### SHOWN ATTRIBUTE LIST ##############
    # set attribute list
    def setShownAttributeList(self, data, shownAttributes = None):
        shown = []
        hidden = []

        if data:
            if shownAttributes:
                if type(shownAttributes[0]) == tuple:
                    shown = shownAttributes
                else:
                    domain = self.data.domain
                    shown = [(domain[a].name, domain[a].varType) for a in shownAttributes]
                hidden = filter(lambda x:x not in shown, [(a.name, a.varType) for a in data.domain.attributes])
            else:
                shown = [(a.name, a.varType) for a in data.domain.attributes]
                if not self.showAllAttributes:
                    hidden = shown[10:]
                    shown = shown[:10]

            if data.domain.classVar and (data.domain.classVar.name, data.domain.classVar.varType) not in shown:
                hidden += [(data.domain.classVar.name, data.domain.classVar.varType)]

        self.shownAttributes = shown
        self.hiddenAttributes = hidden
        self.selectedHidden = []
        self.selectedShown = []
        self.resetAttrManipulation()
        
    ####### DATA ################################
    # receive new data and update all fields
    def data(self, data):
        self.closeContext()
        self.data = None
        if data: self.data = orange.Preprocessor_dropMissing(data)
        self.computeProbabilities()        

        self.setShownAttributeList(self.data)
        self.openContext("", self.data)
        self.resetAttrManipulation()
        self.updateGraph()
        
    #################################################

    def sendShownAttributes(self):
        pass

    def updateGraph(self, *args):
        self.maxLineWidth = int(str(self.SettingsTab.lineCombo.currentText()))
        self.pearsonMaxRes = int(str(self.SettingsTab.pearsonMaxResCombo.currentText()))
        self.pearsonMinRes = float(str(self.SettingsTab.minResidualEdit.text()))
        self.graph.setSettings(self.maxLineWidth, self.pearsonMinRes, self.pearsonMaxRes)
        
        self.graph.updateData(self.data, self.getShownAttributeList(), self.probabilities, self.statusBar)
        self.graph.update()

    def interestingSubsetSelection(self):
        labels = self.getShownAttributeList()
        interestingList = []
        data = self.data

        # create a list of interesting attributes        
        for attrXindex in range(len(labels)):
            attrXName = labels[attrXindex]

            for attrYindex in range(attrXindex+1, len(labels)):
                attrYName = labels[attrYindex]

                for valXindex in range(len(data.domain[attrXName].values)):
                    valX = data.domain[attrXName].values[valXindex]

                    for valYindex in range(len(data.domain[attrYName].values)):
                        valY = data.domain[attrYName].values[valYindex]

                        ((nameX, countX),(nameY, countY), actual, sum) = self.probabilities['%s+%s:%s+%s' %(attrXName, valX, attrYName, valY)]
                        expected = float(countX*countY)/float(sum)
                        if actual == expected == 0: continue
                        elif expected == 0: pearson = actual/sqrt(actual)
                        else:               pearson = (actual - expected) / sqrt(expected)
                        if abs(pearson) > self.pearsonMinRes and attrXName not in interestingList: interestingList.append(attrXName)
                        if abs(pearson) > self.pearsonMinRes and attrYName not in interestingList: interestingList.append(attrYName)                     

        # remove attributes that are not in interestingList from visible attribute list
        for attr in labels:
            if attr not in interestingList:
                index = self.shownAttribsLB.index(self.shownAttribsLB.findItem(attr))
                self.shownAttribsLB.removeItem(index)
                self.hiddenAttribsLB.insertItem(attr)
        self.updateGraph()

    def computeProbabilities(self):
        self.probabilities = {}
        if self.data == None: return

        self.statusBar.message("Please wait. Computing...")
        total = len(self.data)
        conts = {}
        dc = []
        for i in range(len(self.data.domain)):
            dc.append(orange.ContingencyAttrAttr(self.data.domain[i], self.data.domain[i], self.data))
            
        for i in range(len(self.data.domain)):
            if self.data.domain[i].varType == orange.VarTypes.Continuous: continue      # we can only check discrete attributes
            
            cont = dc[i]   # distribution of X attribute
            vals = []
            # compute contingency of x attribute
            for key in cont.keys():
                sum = 0
                try:
                    for val in cont[key]: sum += val
                except: pass
                vals.append(sum)
            conts[self.data.domain[i].name] = (cont, vals)

        for attrX in range(len(self.data.domain)):
            if self.data.domain[attrX].varType == orange.VarTypes.Continuous: continue      # we can only check discrete attributes

            for attrY in range(attrX, len(self.data.domain)):
                if self.data.domain[attrY].varType == orange.VarTypes.Continuous: continue  # we can only check discrete attributes

                (contX, valsX) = conts[self.data.domain[attrX].name]
                (contY, valsY) = conts[self.data.domain[attrY].name]

                # create cartesian product of selected attributes and compute contingency 
                (cart, profit) = FeatureByCartesianProduct(self.data, [self.data.domain[attrX], self.data.domain[attrY]])
                tempData = self.data.select(list(self.data.domain) + [cart])
                contXY = orange.ContingencyAttrClass(cart, tempData)   # distribution of X attribute

                # compute probabilities
                for i in range(len(valsX)):
                    valx = valsX[i]
                    for j in range(len(valsY)):
                        valy = valsY[j]

                        actualCount = 0
                        try:
                            for val in contXY['%s-%s' %(contX.keys()[i], contY.keys()[j])]: actualCount += val
                        except: pass
                        self.probabilities['%s+%s:%s+%s' %(self.data.domain[attrX].name, contX.keys()[i], self.data.domain[attrY].name, contY.keys()[j])] = ((contX.keys()[i], valx), (contY.keys()[j], valy), actualCount, total)
                        self.probabilities['%s+%s:%s+%s' %(self.data.domain[attrY].name, contY.keys()[j], self.data.domain[attrX].name, contX.keys()[i])] = ((contY.keys()[j], valy), (contX.keys()[i], valx), actualCount, total)
        self.statusBar.message("")

    ####### SELECTION signal ################################
    # receive info about which attributes to show
    def selection(self, list):
        self.shownAttribsLB.clear()
        self.hiddenAttribsLB.clear()

        if self.data == None: return

        if self.data.domain.classVar.name not in list:
            self.hiddenAttribsLB.insertItem(self.data.domain.classVar.name)
            
        for attr in list:
            self.shownAttribsLB.insertItem(attr)

        for attr in self.data.domain:
            if attr.name not in list:
                self.hiddenAttribsLB.insertItem(attr.name)

        self.updateGraph()
    #################################################

class OWSieveMultigramOptions(QVGroupBox):
    pearsonMaxList = ['4','6','8','10','12']
    pearsonMaxNums = [ 4,  6,  8,  10,  12]
    
    def __init__(self,parent=None,name=None):
        QVGroupBox.__init__(self, parent, name)
        self.parent = parent

        self.lineGroup = QVGroupBox(self)
        self.lineGroup.setTitle("Max line width")
        self.lineCombo = QComboBox(self.lineGroup)

        self.pearsonGroup = QVGroupBox(self)
        self.pearsonGroup.setTitle("Attribute independence (Pearson residuals)")

        self.hbox2 = QHBox(self.pearsonGroup, "residual")
        self.residualLabel = QLabel('Max residual', self.hbox2)
        self.pearsonMaxResCombo = QComboBox(self.hbox2)
        QToolTip.add(self.hbox2, "What is maximum expected Pearson standardized residual. Greater the maximum, brighter the colors.")

        self.hbox3 = QHBox(self.pearsonGroup, "minimum")
        self.residualLabel2 = QLabel('Min residual   ', self.hbox3)
        self.minResidualEdit = QLineEdit(self.hbox3)
        QToolTip.add(self.hbox3, "What is minimal absolute residual value that will be shown in graph.")

        self.applyButton = QPushButton("Apply changes", self)

        self.initSettings()        

    def initSettings(self):
        # line width combo values
        for i in range(1,10): self.lineCombo.insertItem(str(i))

        # max residual combo values
        for item in self.pearsonMaxList: self.pearsonMaxResCombo.insertItem(item)     


#test widget appearance
if __name__=="__main__":
    a=QApplication(sys.argv)
    ow=OWSieveMultigram()
    a.setMainWidget(ow)
    ow.show()
    a.exec_loop()

    #save settings 
    ow.saveSettings()

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -