⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ownomogram.py

📁 orange源码 数据挖掘技术
💻 PY
📖 第 1 页 / 共 3 页
字号:
                values = []
                for i in range(2*numOfPartitions):
                    if curr_num+i*d>=minAtValue and curr_num+i*d<=maxAtValue:
                        # get thickness
                        if self.data:
                            thickness = float(len(self.data.filter({att[at].name:(curr_num+i*d-d/2, curr_num+i*d+d/2)})))/len(self.data)
                        else:
                            thickness = 0.0
                        d_filter = filter(lambda x: x>curr_num+i*d-d/2 and x<curr_num+i*d+d/2, cl.conditionalDistributions[at].keys())
                        if len(d_filter)>0:
                            cd = cl.conditionalDistributions[at]
                            conditional0 = avg([cd[f][classVal[self.TargetClassIndex]] for f in d_filter])
                            conditional0 = min(1-aproxZero,max(aproxZero,conditional0))
                            conditional1 = 1-conditional0
                            try:
                                # compute error of loess in logistic space
                                var = avg([cd[f].variances[self.TargetClassIndex] for f in d_filter])
                                standard_error= math.sqrt(var)
                                rightError0 = (conditional0+standard_error)/max(conditional1-standard_error, aproxZero)
                                leftError0  =  max(conditional0-standard_error, aproxZero)/(conditional1+standard_error)
                                se = (math.log(rightError0) - math.log(leftError0))/2
                                se = math.sqrt(math.pow(se,2)+math.pow(priorError,2))

                                # add value to set of values                                
                                a.addAttValue(AttValue(str(round(curr_num+i*d,rndFac)),
                                                       math.log(conditional0/conditional1/prior),
                                                       lineWidth=thickness,
                                                       error = se))
                            except:
                                pass
                a.continuous = True
                # invert values:
            # if there are more than 1 value in the attribute, add it to the nomogram
            if len(a.attValues)>1:
                self.bnomogram.addAttribute(a)

        self.graph.setCanvas(self.bnomogram)
        self.bnomogram.show()
        self.error()

    # Input channel: the logistic regression classifier    
    def lrClassifier(self, cl):
        if self.TargetClassIndex == 0 or self.TargetClassIndex == cl.domain.classVar[0]:
            mult = -1
        else:
            mult = 1

        self.bnomogram = BasicNomogram(self, AttValue('Constant', mult*cl.beta[0], error = 0))
        a = None

        # After applying feature subset selection on discrete attributes
        # aproximate unknown error for each attribute is math.sqrt(math.pow(cl.beta_se[0],2)/len(at))
        try:
            aprox_prior_error = math.sqrt(math.pow(cl.beta_se[0],2)/len(cl.domain.attributes))
        except:
            aprox_prior_error = 0
        
        for at in cl.continuizedDomain.attributes:
            at.setattr("visited",0)
            
        for at in cl.continuizedDomain.attributes:
            if at.getValueFrom and at.visited==0:
                name = at.getValueFrom.variable.name
                var = at.getValueFrom.variable
                if var.ordered:
                    a = AttrLineOrdered(name, self.bnomogram)
                else:
                    a = AttrLine(name, self.bnomogram)
                listOfExcludedValues = []
                for val in var.values:
                    foundValue = False
                    for same in cl.continuizedDomain.attributes:
                        if same.visited==0 and same.getValueFrom and same.getValueFrom.variable == var and same.getValueFrom.variable.values[same.getValueFrom.transformer.value]==val:
                            same.setattr("visited",1)
                            a.addAttValue(AttValue(val, mult*cl.beta[same], error = cl.beta_se[same]))
                            foundValue = True
                    if not foundValue:
                        listOfExcludedValues.append(val)
                if len(listOfExcludedValues) == 1:
                    a.addAttValue(AttValue(listOfExcludedValues[0], 0, error = aprox_prior_error))
                elif len(listOfExcludedValues) == 2:
                    a.addAttValue(AttValue("("+listOfExcludedValues[0]+","+listOfExcludedValues[1]+")", 0, error = aprox_prior_error))
                elif len(listOfExcludedValues) > 2:
                    a.addAttValue(AttValue("Other", 0, error = aprox_prior_error))
                # if there are more than 1 value in the attribute, add it to the nomogram
                if len(a.attValues)>1:
                    self.bnomogram.addAttribute(a)
                    
                
            elif at.visited==0:
                name = at.name
                var = at
                a = AttrLineCont(name, self.bnomogram)
                if self.data:
                    bas = orange.DomainBasicAttrStat(self.data)
                    maxAtValue = bas[var].max
                    minAtValue = bas[var].min
                else:
                    maxAtValue = 1.
                    minAtValue = -1.
                numOfPartitions = 50. 
                d = getDiff((maxAtValue-minAtValue)/numOfPartitions)

                # get curr_num = starting point for continuous att. sampling
                curr_num = getStartingPoint(d, minAtValue) 
                rndFac = getRounding(d)

                while curr_num<maxAtValue+d:
                    if abs(mult*curr_num*cl.beta[at])<aproxZero:
                        a.addAttValue(AttValue("0.0", 0))
                    else:
                        a.addAttValue(AttValue(str(curr_num), mult*curr_num*cl.beta[at]))
                    curr_num += d
                a.continuous = True
                at.setattr("visited", 1)
                # if there are more than 1 value in the attribute, add it to the nomogram
                if len(a.attValues)>1:
                    self.bnomogram.addAttribute(a)



        self.alignRadio.setDisabled(True)
        self.alignType = 0
        self.graph.setCanvas(self.bnomogram)
        self.bnomogram.show()
        self.error()

    def svmClassifier(self, cl):
        import Numeric
        import orngLinVis
        
        if self.TargetClassIndex == 0 or self.TargetClassIndex == cl.domain.classVar[0]:
            mult = -1
        else:
            mult = 1

        try:
            visualizer = orngLinVis.Visualizer(self.data, cl, buckets=1, dimensions=1)
            beta_from_cl = self.cl.estimator.classifier.classifier.beta[0] - self.cl.estimator.translator.trans[0].disp*self.cl.estimator.translator.trans[0].mult*self.cl.estimator.classifier.classifier.beta[1]
            beta_from_cl = mult*beta_from_cl
        except:
            self.error("orngLinVis.Visualizer error"+ str(sys.exc_info()[0])+":"+str(sys.exc_info()[1]))
#            QMessageBox("orngLinVis.Visualizer error", str(sys.exc_info()[0])+":"+str(sys.exc_info()[1]), QMessageBox.Warning,
#                        QMessageBox.NoButton, QMessageBox.NoButton, QMessageBox.NoButton, self).show()
            return
        
        self.bnomogram = BasicNomogram(self, AttValue('Constant', -mult*math.log((1.0/min(max(visualizer.probfunc(0.0),aproxZero),0.9999))-1), 0))

        # get maximum and minimum values in visualizer.m
        maxMap = reduce(Numeric.maximum, visualizer.m)
        minMap = reduce(Numeric.minimum, visualizer.m)

        coeff = 0 #
        at_num = 1
        correction = self.cl.coeff*self.cl.estimator.translator.trans[0].mult*self.cl.estimator.classifier.classifier.beta[1]
        for c in visualizer.coeff_names:
            if type(c[1])==str:
                for i in range(len(c)):
                    if i == 0:
                        if self.data.domain[c[0]].ordered:
                            a = AttrLineOrdered(c[i], self.bnomogram)
                        else:
                            a = AttrLine(c[i], self.bnomogram)                            
                        at_num = at_num + 1
                    else:
                        if self.data:
                            thickness = float(len(self.data.filter({self.data.domain[c[0]].name:str(c[i])})))/float(len(self.data))
                        a.addAttValue(AttValue(c[i], correction*mult*visualizer.coeffs[coeff], lineWidth=thickness))
                        coeff = coeff + 1
            else:
                a = AttrLineCont(c[0], self.bnomogram)

                # get min and max from Data and transform coeff accordingly
                maxNew=maxMap[coeff]
                minNew=maxMap[coeff]
                if self.data:
                    bas = orange.DomainBasicAttrStat(self.data)
                    maxNew = bas[c[0]].max
                    minNew = bas[c[0]].min

                # transform SVM betas to betas siutable for nomogram
                if maxNew == minNew:
                    beta = ((maxMap[coeff]-minMap[coeff])/aproxZero)*visualizer.coeffs[coeff]
                else:
                    beta = ((maxMap[coeff]-minMap[coeff])/(maxNew-minNew))*visualizer.coeffs[coeff]
                n = -minNew+minMap[coeff]
                
                numOfPartitions = 50
                d = getDiff((maxNew-minNew)/numOfPartitions)

                # get curr_num = starting point for continuous att. sampling
                curr_num = getStartingPoint(d, minNew) 
                rndFac = getRounding(d)
                
                while curr_num<maxNew+d:
                    a.addAttValue(AttValue(str(curr_num), correction*(mult*(curr_num-minNew)*beta-minMap[coeff]*visualizer.coeffs[coeff])))
                    curr_num += d

                at_num = at_num + 1
                coeff = coeff + 1
                a.continuous = True
                
            # if there are more than 1 value in the attribute, add it to the nomogram
            if len(a.attValues)>1:
                self.bnomogram.addAttribute(a)
        self.cl.domain = orange.Domain(self.data.domain.classVar)
        self.graph.setCanvas(self.bnomogram)
        self.bnomogram.show()
        self.error()

    def initClassValues(self, classValue):
        self.targetCombo.clear()
        for v in classValue:
            self.targetCombo.insertItem(str(v))
            
    def classifier(self, cl):
        if not self.cl or not cl or not self.cl.domain == cl.domain:
            if cl:
                self.initClassValues(cl.domain.classVar)
        self.cl = cl
        if hasattr(self.cl, "data"):
            self.data = self.cl.data
        else:
            self.data = None
        if self.data and self.data.domain and not self.data.domain.classVar:
            self.error("OWNomogram:"+" This domain has no class attribute!")
            return
        if not self.data:
            self.histogramCheck.setChecked(False)
            self.histogramCheck.setDisabled(True)
            self.histogramLabel.setDisabled(True)
            self.CICheck.setChecked(False)
            self.CICheck.setDisabled(True)
            self.CILabel.setDisabled(True)
        else:
            self.histogramCheck.setEnabled(True)
            self.histogramLabel.setEnabled(True)
            self.CICheck.setEnabled(True)
            self.CILabel.setEnabled(True)
        self.updateNomogram()
        
    def setTarget(self):
        # find index
        for c_i in range(len(self.cl.domain.classVar.values)):
            if str(self.cl.domain.classVar[c_i]) == self.target:
                self.TargetClassIndex = c_i
        self.updateNomogram()
        
    def updateNomogram(self):
        import orngSVM

        def setNone():

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -