⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 owmergedata.py

📁 orange源码 数据挖掘技术
💻 PY
字号:
"""
<name>Merge Data</name>
<description>Merge datasets based on values of selected attributes.</description>
<icon>icons/MergeData.png</icon>
<priority>1110</priority>
<contact>Peter Juvan (peter.juvan@fri.uni-lj.si)</contact>
"""

import orange
from OWWidget import *
from qttable import *
import OWGUI


########################################################
## TODO: crashes if the same data is sent to both inputs
########################################################


class OWMergeData(OWWidget):

##    settingsList = ["memberName"]

    def __init__(self, parent = None, signalManager = None, name = "Merge data"):
        OWWidget.__init__(self, parent, signalManager, name)  #initialize base class

        # set channels
        self.inputs = [("Examples A", ExampleTable, self.onDataAInput), ("Examples B", ExampleTable, self.onDataBInput)]
        self.outputs = [("Merged Examples A+B", ExampleTable), ("Merged Examples B+A", ExampleTable)]

        # data
        self.dataA = None
        self.dataB = None
        self.varListA = []
        self.varListB = []
        self.varA = None
        self.varB = None

        # load settings
        self.loadSettings()
        
        # GUI
        self.mainArea.setFixedWidth(0)
        ca=QFrame(self.controlArea)
        gl=QGridLayout(ca,2,2,5)
        
        # attribute A
        boxAttrA = QVGroupBox('Attribute A', ca)
        gl.addWidget(boxAttrA, 0,0)
        self.lbAttrA = QListBox(boxAttrA)
        self.connect(self.lbAttrA, SIGNAL('selectionChanged()'), self.lbAttrAChange)

        # attribute B
        boxAttrB = QVGroupBox('Attribute B', ca)
        gl.addWidget(boxAttrB, 0,1)
        self.lbAttrB = QListBox(boxAttrB)
        self.connect(self.lbAttrB, SIGNAL('selectionChanged()'), self.lbAttrBChange)

        # info A
        boxDataA = QVGroupBox('Data A', ca)
        gl.addWidget(boxDataA, 1,0)
        self.lblDataAExamples = OWGUI.widgetLabel(boxDataA, "num examples")
        self.lblDataAAttributes = OWGUI.widgetLabel(boxDataA, "num attributes")

        # info B
        boxDataB = QVGroupBox('Data B', ca)
        gl.addWidget(boxDataB, 1,1)
        self.lblDataBExamples = OWGUI.widgetLabel(boxDataB, "num examples")
        self.lblDataBAttributes = OWGUI.widgetLabel(boxDataB, "num attributes")

        # icons
        self.icons = self.createAttributeIconDict()

        # resize        
        self.resize(500,500)


    ############################################################################################################################################################
    ## Data input and output management
    ############################################################################################################################################################

    def onDataAInput(self, data):
        # set self.dataA, generate new domain if it is the same as of self.dataB.domain 
        if data and self.dataB and data.domain == self.dataB.domain:
            if data.domain.classVar:
                classVar = data.domain.classVar.clone()
            else:
                classVar = None
            dc = orange.Domain([x.clone() for x in data.domain], classVar)
            for i, a in enumerate(dc):
                a.getValueFrom = lambda ex,f,i=i: ex[i]
            # no need to clone meta attributes: dc.addmetas(dict([(orange.newmetaid(), x.clone()) for x in data.domain.getmetas().values()])); for i,id,a in enumerate(dc.getmetas().items()): ...
            dc.addmetas(data.domain.getmetas())
            self.dataA = orange.ExampleTable(dc, data)
        else:
            self.dataA = data
        # update self.varListA and self.varA
        if self.dataA:
            self.varListA = self.dataA.domain.variables.native() + self.dataA.domain.getmetas().values()
        else:
            self.varListA = []
        if not self.varA in self.varListA:
            self.varA = None
        # update info
        self.updateInfoA()
        # update attribute A listbox
        self.lbAttrA.clear()
        for var in self.varListA:
            self.lbAttrA.insertItem(self.icons[var.varType], var.name)
        self.sendData()


    def onDataBInput(self, data):
        # set self.dataB, generate new domain if it is the same as of self.dataA.domain 
        if data and self.dataA and data.domain == self.dataA.domain:
            if data.domain.classVar:
                classVar = data.domain.classVar.clone()
            else:
                classVar = None
            dc = orange.Domain([x.clone() for x in data.domain.attributes], classVar)
            for i, a in enumerate(dc):
                a.getValueFrom = lambda ex,f,i=i: ex[i]
            # no need to clone meta attributes: dc.addmetas(dict([(orange.newmetaid(), x.clone()) for x in data.domain.getmetas().values()])); for i,id,a in enumerate(dc.getmetas().items()): ...
            dc.addmetas(data.domain.getmetas())
            self.dataB = orange.ExampleTable(dc, data)
        else:
            self.dataB = data
        # update self.varListB and self.varB
        if self.dataB:
            self.varListB = self.dataB.domain.variables.native() + self.dataB.domain.getmetas().values()
        else:
            self.varListB = []
        if not self.varB in self.varListB:
            self.varB = None
        # update info
        self.updateInfoB()
        # update attribute B listbox
        self.lbAttrB.clear()
        for var in self.varListB:
            self.lbAttrB.insertItem(self.icons[var.varType], var.name)
        self.sendData()


    def updateInfoA(self):
        """Updates data A info box.
        """
        if self.dataA:
            self.lblDataAExamples.setText("%s example%s" % self._sp(self.dataA))
            self.lblDataAAttributes.setText("%s attribute%s" % self._sp(self.varListA))
        else:
            self.lblDataAExamples.setText("No data on input A.")
            self.lblDataAAttributes.setText("")
        

    def updateInfoB(self):
        """Updates data B info box.
        """
        if self.dataB:
            self.lblDataBExamples.setText("%s example%s" % self._sp(self.dataB))
            self.lblDataBAttributes.setText("%s attribute%s" % self._sp(self.varListB))
        else:
            self.lblDataBExamples.setText("No data on input B.")
            self.lblDataBAttributes.setText("")


    def sendData(self):
        """Sends out data.
        """
        if self.varA and self.varB:
            # create dictionaries: attribute values -> example index
            val2idxDictA = {}
            for eIdx, e in enumerate(self.dataA):
                val2idxDictA[e[self.varA].native()] = eIdx
            val2idxDictB = {}
            for eIdx, e in enumerate(self.dataB):
                val2idxDictB[e[self.varB].native()] = eIdx
            # remove DC and DK from dictionaries (change when bug 62 is fixed)
##            if val2idxDictA.has_key(orange.Value(self.varA.varType, orange.ValueTypes.DC).native()):
##                val2idxDictA.pop(orange.Value(self.varA.varType, orange.ValueTypes.DC).native())
##            if val2idxDictA.has_key(orange.Value(self.varA.varType, orange.ValueTypes.DK).native()):
##                val2idxDictA.pop(orange.Value(self.varA.varType, orange.ValueTypes.DK).native())
##            if val2idxDictB.has_key(orange.Value(self.varB.varType, orange.ValueTypes.DC).native()):
##                val2idxDictB.pop(orange.Value(self.varB.varType, orange.ValueTypes.DC).native())
##            if val2idxDictB.has_key(orange.Value(self.varB.varType, orange.ValueTypes.DK).native()):
##                val2idxDictB.pop(orange.Value(self.varB.varType, orange.ValueTypes.DK).native())
            if val2idxDictA.has_key("?"):
                val2idxDictA.pop("?")
            if val2idxDictA.has_key("~"):
                val2idxDictA.pop("~")
            if val2idxDictA.has_key(""):
                val2idxDictA.pop("")
            if val2idxDictB.has_key("?"):
                val2idxDictB.pop("?")
            if val2idxDictB.has_key("~"):
                val2idxDictB.pop("~")
            if val2idxDictB.has_key(""):
                val2idxDictB.pop("")
            # example table names
            nameA = self.dataA.name
            if not nameA: nameA = "Examples A"
            nameB = self.dataB.name
            if not nameB: nameB = "Examples B"
            # create example B with all values unknown
            exBDK = orange.Example(self.dataB[0])
            for var in self.varListB:
##                exBDK[var] = orange.Value(var.varType, orange.ValueTypes.DK)
                exBDK[var] = "?"
            # build example table to append to the right of A
            vlBreduced = list(self.varListB)
            vlBreduced.remove(self.varB)
            domBreduced = orange.Domain(vlBreduced, None)
            etBreduced = orange.ExampleTable(domBreduced)
            for e in self.dataA:
                dataBidx = val2idxDictB.get(e[self.varA].native(), None)
                if dataBidx <> None:
                    etBreduced.append(self.dataB[dataBidx])
                else:
                    etBreduced.append(orange.Example(domBreduced, exBDK))
            etAB = orange.ExampleTable([self.dataA, etBreduced])
            etAB.name = nameA + " (merged with %s)" % nameB
            self.send("Merged Examples A+B", etAB)
            
            # create example A with all values unknown
            exADK = orange.Example(self.dataA[0])
            for var in self.varListA:
##                exADK[var] = orange.Value(var.varType, orange.ValueTypes.DK)
                exADK[var] = "?"
            # build example table to append to the right of B
            vlAreduced = list(self.varListA)
            vlAreduced.remove(self.varA)
            domAreduced = orange.Domain(vlAreduced, None)
            etAreduced = orange.ExampleTable(domAreduced)
            for e in self.dataB:
                dataAidx = val2idxDictA.get(e[self.varB].native(), None)
                if dataAidx <> None:
                    etAreduced.append(self.dataA[dataAidx])
                else:
                    etAreduced.append(orange.Example(domAreduced, exADK))
            etBA = orange.ExampleTable([self.dataB, etAreduced])
            etBA.name = nameB + " (merged with %s)" % nameA
            self.send("Merged Examples B+A", etBA)
        else:
            self.send("Merged Examples A+B", None)
            self.send("Merged Examples B+A", None)


    ############################################################################################################################################################
    ## Event handlers
    ############################################################################################################################################################

    def lbAttrAChange(self):
        if self.dataA:
            currItem = self.lbAttrA.currentItem()
            if currItem >= 0 and self.lbAttrA.isSelected(currItem):
                self.varA = self.varListA[self.lbAttrA.currentItem()]
            else:
                self.varA = None
        else:
            self.varA = None
        self.sendData()


    def lbAttrBChange(self):
        if self.dataB:
            currItem = self.lbAttrB.currentItem()
            if currItem >= 0 and self.lbAttrB.isSelected(currItem):
                self.varB = self.varListB[self.lbAttrB.currentItem()]
            else:
                self.varB = None
        else:
            self.varB = None
        self.sendData()


    ############################################################################################################################################################
    ## Utility functions 
    ############################################################################################################################################################

    def _sp(self, l, capitalize=True):
        """Input: list; returns tupple (str(len(l)), "s"/"")
        """
        n = len(l)
        if n == 0:
            if capitalize:                    
                return "No", "s"
            else:
                return "no", "s"
        elif n == 1:
            return str(n), ''
        else:
            return str(n), 's'



if __name__=="__main__":
    import sys
    import OWDataTable, orngSignalManager
    signalManager = orngSignalManager.SignalManager(0)
    #data = orange.ExampleTable('dicty_800_genes_from_table07.tab')
##    data = orange.ExampleTable(r'..\..\doc\datasets\adult_sample.tab')
##    dataA = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\STEROLTALK\Sterolgene v.0 mouse\sterolgene v.0 mouse probeRatios.tab')
##    dataA = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\STEROLTALK\Sterolgene v.0 mouse\Copy of sterolgene v.0 mouse probeRatios.tab')
##    dataB = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\STEROLTALK\Sterolgene v.0 mouse\sterolgene v.0 mouse probeRatios.tab')
    dataA = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\et1.tab')
    dataB = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\et2.tab')
    a=QApplication(sys.argv)
    ow=OWMergeData()
    a.setMainWidget(ow)
    ow.show()
    ow.onDataAInput(dataA)
    ow.onDataBInput(dataB)
    # data table
    dt = OWDataTable.OWDataTable(signalManager = signalManager)
    signalManager.addWidget(ow)
    signalManager.addWidget(dt)
    signalManager.setFreeze(1)
    signalManager.addLink(ow, dt, 'Merged Examples A+B', 'Examples', 1)
    signalManager.addLink(ow, dt, 'Merged Examples B+A', 'Examples', 1)
    signalManager.setFreeze(0)
    dt.show()
    a.exec_loop()

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -