⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 owinteractivediscretization.py

📁 orange源码 数据挖掘技术
💻 PY
📖 第 1 页 / 共 3 页
字号:

        for i, idx in enumerate(self.continuousIndices):
            self.computeDiscretizer(i, idx, True)

        self.commitIf()            

    def classMethodChanged(self):
        if not self.data:
            return
        
        self.discretizeClass()
        self.classChanged()
        attrIndex = self.continuousIndices[self.selectedAttr]
        self.graph.setData(self.data.domain[attrIndex], self.data)
        self.graph.setSplits(self.discretizers[attrIndex] and self.discretizers[attrIndex].getValueFrom.transformer.points or [])
        if self.targetClass > len(self.data.domain.classVar.values):
            self.targetClass = len(self.data.domain.classVar.values)-1


    def indiMethodChanged(self, dontSetACustom=False):
        if self.data:
            i, idx = self.selectedAttr, self.continuousIndices[self.selectedAttr]
            self.indiData[idx][0] = self.indiDiscretization
            self.indiData[idx][1] = self.indiIntervals

            self.indiInterBox.setEnabled(self.indiDiscretization in [3, 4])
            if self.indiDiscretization and self.indiDiscretization - 4 != self.resetIndividuals:
                self.resetIndividuals = 4

            if not self.data:
                return

            which = self.indiDiscretization - 5
            if not dontSetACustom and which >= 0 and not self.customSplits[which]:
                attr = self.data.domain[idx]
                splitsTxt = self.indiData[idx][2+which] = [str(attr(x)) for x in self.graph.curCutPoints]
                self.customSplits[which] = " ".join(splitsTxt)
                self.customLineEdits[which].setText(" ".join(splitsTxt))
                self.computeDiscretizer(i, idx)
            else:
                self.computeDiscretizer(i, idx)

            self.commitIf()            


    def customSelected(self, which):
        if self.data and self.indiDiscretization != 5+which:
            self.indiDiscretization = 5 + which
            idx = self.continuousIndices[self.selectedAttr]
            attr = self.data.domain[idx]
            self.indiMethodChanged()

        
    def setAllIndividuals(self):
        if not self.data:
            return
        
        self.clearLineEditFocus()
        method = self.resetIndividuals
        if method == 4:
            return
        if method:
            method += 4
        for i, idx in enumerate(self.continuousIndices):
            if self.indiData[idx][0] != method:
                self.indiData[idx][0] = method
                if i == self.selectedAttr:
                    self.indiDiscretization = method
                    self.indiMethodChanged(True) # don't set a custom
                    if method:
                        self.computeDiscretizer(i, idx)
                else:
                    self.computeDiscretizer(i, idx)

        self.attrList.triggerUpdate(0)
        self.commitIf()


    def customChanged(self, which):
        if not self.data:
            return

        idx = self.continuousIndices[self.selectedAttr]
        le = self.customLineEdits[which]

        content = str(le.text()).replace(":", " ").replace(",", " ").replace("-", " ").split()
        content = dict.fromkeys(content).keys()  # remove duplicates (except 8.0, 8.000 ...)
        try:
            content.sort(lambda x,y:cmp(float(x), float(y)))
        except:
            content = str(le.text())

        le.setText(" ".join(content))
        self.customSplits[which] = content
        self.indiData[idx][which+2] = content

        self.indiData[idx][0] = self.indiDiscretization = 5 + which

        self.computeDiscretizer(self.selectedAttr, self.continuousIndices[self.selectedAttr])
        self.commitIf()
                

    def copyToCustom(self, which):
        self.clearLineEditFocus()
        if not self.data:
            return

        idx = self.continuousIndices[self.selectedAttr]

        if self.indiDiscretization >= 5:
            splits = str(self.customSplits[self.indiDiscretization-5])
            try:
                valid = bool([float(i) for i in self.customSplits[which]])
            except:
                valid = False
        else:
            valid = False

        if not valid:        
            attr = self.data.domain[idx]
            splits = list(self.discretizers[idx] and self.discretizers[idx].getValueFrom.transformer.points or [])
            splits = [str(attr(i)) for i in splits]

        self.indiData[idx][2+which] = self.customSplits[which] = splits
        self.customLineEdits[which].setText(" ".join(splits))
#        self.customSelected(which)

    
    shortDiscNames = ("", " (leave continuous)", " (entropy)", " (equal frequency)", " (equal width)", " (custom 1)", " (custom 2)", " (custom 3)")

    def computeDiscretizer(self, i, idx, onlyDefaults=False):
        attr = self.data.domain[idx]
        indiData = self.indiData[idx]

        discType, intervals = indiData[:2]
        discName = self.shortDiscNames[discType]

        defaultUsed = not discType

        if defaultUsed:
            discType = self.discretization+1
            intervals = self.intervals

        if discType >= 5:

            try:
                customs = [float(r) for r in indiData[discType-5+2]]
            except:
                customs = []
                
            if not customs:
                discType = self.discretization+1
                intervals = self.intervals
                discName = "%s ->%s)" % (self.shortDiscNames[indiData[0]][:-1], self.shortDiscNames[discType][2:-1])
                defaultUsed = True

        if onlyDefaults and not defaultUsed:
            return
        
        if discType == 1: # leave continuous
            discretizer = None
        elif discType == 2:
            discretizer = orange.EntropyDiscretization(attr, self.data)
        elif discType == 3:
            discretizer = orange.EquiNDiscretization(attr, self.data, numberOfIntervals = intervals)
        elif discType == 4:
            discretizer = orange.EquiDistDiscretization(attr, self.data, numberOfIntervals = intervals)
        else:
            discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(attr)


        self.discretizers[idx] = discretizer
        
        discInts = discType!=1 and (": " + ", ".join([str(attr(x)) for x in discretizer.getValueFrom.transformer.points])) or ""
        self.indiLabels[i] = discInts + discName
                        
        self.attrList.triggerUpdate(0)

        if i == self.selectedAttr:
            self.graph.setSplits(discretizer and discretizer.getValueFrom.transformer.points or [])



    def discretizeClass(self):
        if self.originalData:
            discType = self.classDiscretization
            classVar = self.originalData.domain.classVar
            
            if discType == 2:
                try:
                    content = str(self.classCustomLineEdit.text()).replace(":", " ").replace(",", " ").replace("-", " ").split()
                    customs = dict.fromkeys([float(x) for x in content]).keys()  # remove duplicates (except 8.0, 8.000 ...)
                    customs.sort()
                except:
                    customs = []

                if not customs:
                    discType = 0

                print customs                
            if discType == 0:
                discretizer = orange.EquiNDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
            elif discType == 1:
                discretizer = orange.EquiDistDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
            else:
                discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(classVar)

            self.data = orange.ExampleTable(orange.Domain(self.originalData.domain.attributes, discretizer), self.originalData)
            
            self.classIntervalsLabel.setText("Current splits: " + ", ".join([str(classVar(x)) for x in discretizer.getValueFrom.transformer.points]))
        

    def classCustomChanged(self):
        self.classMethodChanged()

    def classCustomSelected(self):
        if self.classDiscretization != 2: # prevent a cycle (this function called by setFocus at its end)
            self.classDiscretization = 2
            self.classMethodChanged()
            self.classCustomLineEdit.setFocus()
            
    def discretize(self):
        if not self.data:
            return


    def synchronizeIf(self):
        if self.autoSynchronize:
            self.synchronize()
        else:
            self.pointsChanged = True

    def synchronizePressed(self):
        self.clearLineEditFocus()
        self.synchronize()

    def synchronize(self):
        if not self.data:
            return
        
        slot = self.indiDiscretization - 5
        if slot < 0:
            for slot in range(3):
                if not self.customLineEdits[slot]:
                    break
            else:
                slot = 0
            self.indiDiscretization = slot + 5

        idx = self.continuousIndices[self.selectedAttr]
        attr = self.data.domain[idx]
        cp = list(self.graph.curCutPoints)
        cp.sort()
        splits = [str(attr(i)) for i in cp]
        splitsTxt = " ".join(splits)
        self.indiData[idx][0] = self.indiDiscretization
        self.indiData[idx][2+slot] = self.customSplits[slot] = splits
        self.customLineEdits[slot].setText(splitsTxt)

        discretizer = orange.IntervalDiscretizer(points = cp).constructVariable(attr)
        self.discretizers[idx] = discretizer

        self.indiLabels[self.selectedAttr] = ": " + splitsTxt + self.shortDiscNames[-1]
        self.attrList.triggerUpdate(0)

        self.pointsChanged = False


    def commitIf(self):
        if self.autoApply:
            self.commit()
        else:
            self.dataChanged = True
            
    def commit(self):
        self.clearLineEditFocus()
        
        if self.data:
            newattrs=[]
            for attr, disc in zip(self.data.domain.attributes, self.discretizers):
                if disc:
                    if disc.getValueFrom.transformer.points:
                        newattrs.append(disc)
                else:
                    newattrs.append(attr)

            if self.data.domain.classVar:
                if self.outputOriginalClass:
                    newattrs.append(self.originalData.domain.classVar)
                else:
                    newattrs.append(self.data.domain.classVar)

            self.send("Examples", self.data.select(newattrs))

        elif self.originalData:  # no continuous attributes...
            self.send("Examples", self.originalData)
        else:
            self.send("Examples", None)

        dataChanged = False            


import sys
if __name__=="__main__":
    app=QApplication(sys.argv)
    w=OWInteractiveDiscretization()
    app.setMainWidget(w)
    w.show()
#    d=orange.ExampleTable("../../doc/datasets/bridges.tab")
#    d=orange.ExampleTable("../../doc/datasets/auto-mpg.tab")
    d = orange.ExampleTable("../../doc/datasets/iris.tab")
    w.cdata(d)
    w.cdata(None)
    w.cdata(d)
    app.exec_loop()
    w.saveSettings()

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -