📄 miemdd.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.RELATIONAL_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.enable(Capability.BINARY_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);        // other    result.enable(Capability.ONLY_MULTIINSTANCE);        return result;  }  /**   * Returns the capabilities of this multi-instance classifier for the   * relational data.   *   * @return            the capabilities of this object   * @see               Capabilities   */  public Capabilities getMultiInstanceCapabilities() {    Capabilities result = super.getCapabilities();        // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.NUMERIC_ATTRIBUTES);    result.enable(Capability.DATE_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.disableAllClasses();    result.enable(Capability.NO_CLASS);        return result;  }  /**   * Builds the classifier   *   * @param train the training data to be used for generating the   * boosted classifier.   * @throws Exception if the classifier could not be built successfully   */  public void buildClassifier(Instances train) throws Exception {    // can classifier handle the data?    getCapabilities().testWithFail(train);    // remove instances with missing class    train = new Instances(train);    train.deleteWithMissingClass();        m_ClassIndex = train.classIndex();    m_NumClasses = train.numClasses();    int nR = train.attribute(1).relation().numAttributes();    int nC = train.numInstances();    int[] bagSize = new int[nC];    Instances datasets = new Instances(train.attribute(1).relation(), 0);    m_Data = new double [nC][nR][];              // Data values    m_Classes = new int [nC];                    // Class values    m_Attributes = datasets.stringFreeStructure();    if (m_Debug) {      System.out.println("\n\nExtracting data...");    }    for (int h = 0; h < nC; h++)  {//h_th bag      Instance current = train.instance(h);      m_Classes[h] = (int)current.classValue();  // Class value starts from 0      Instances currInsts = current.relationalValue(1);      for (int i = 0; i < currInsts.numInstances(); i++){        Instance inst = currInsts.instance(i);        datasets.add(inst);      }      int nI = currInsts.numInstances();      bagSize[h] = nI;    }    /* filter the training data */    if (m_filterType == FILTER_STANDARDIZE)        m_Filter = new Standardize();    else if (m_filterType == FILTER_NORMALIZE)      m_Filter = new Normalize();    else       m_Filter = null;     if (m_Filter != null) {          m_Filter.setInputFormat(datasets);      datasets = Filter.useFilter(datasets, m_Filter); 	    }    m_Missing.setInputFormat(datasets);    datasets = Filter.useFilter(datasets, m_Missing);    int instIndex = 0;    int start = 0;	    for (int h = 0; h < nC; h++)  {	      for (int i = 0; i < datasets.numAttributes(); i++) {        // initialize m_data[][][]        m_Data[h][i] = new double[bagSize[h]];        instIndex=start;        for (int k = 0; k < bagSize[h]; k++){          m_Data[h][i][k] = datasets.instance(instIndex).value(i);          instIndex++;        }      }      start=instIndex;    }    if (m_Debug) {      System.out.println("\n\nIteration History..." );    }    m_emData =new double[nC][nR];    m_Par= new double[2*nR];    double[] x = new double[nR*2];    double[] tmp = new double[x.length];    double[] pre_x = new double[x.length];    double[] best_hypothesis = new double[x.length];    double[][] b = new double[2][x.length];    OptEng opt;    double bestnll = Double.MAX_VALUE;    double min_error = Double.MAX_VALUE;    double nll, pre_nll;    int iterationCount;    for (int t = 0; t < x.length; t++) {      b[0][t] = Double.NaN;      b[1][t] = Double.NaN;    }    //random pick 3 positive bags     Random r = new Random(getSeed());    FastVector index = new FastVector();     int n1, n2, n3;    do {      n1 = r.nextInt(nC-1);	    } while (m_Classes[n1] == 0);    index.addElement(new Integer(n1));     do {      n2 = r.nextInt(nC-1);    } while (n2 == n1|| m_Classes[n2] == 0);    index.addElement(new Integer(n2));     do {      n3 = r.nextInt(nC-1);    } while (n3 == n1 || n3 == n2 || m_Classes[n3] == 0);    index.addElement(new Integer(n3));    for (int s = 0; s < index.size(); s++){      int exIdx = ((Integer)index.elementAt(s)).intValue();      if (m_Debug)        System.out.println("\nH0 at "+exIdx);      for (int p = 0; p < m_Data[exIdx][0].length; p++) {        //initialize a hypothesis        for (int q = 0; q < nR; q++) {          x[2 * q] = m_Data[exIdx][q][p];          x[2 * q + 1] = 1.0;        }         pre_nll = Double.MAX_VALUE;        nll = Double.MAX_VALUE/10.0;        iterationCount = 0;        //while (Math.abs(nll-pre_nll)>0.01*pre_nll && iterationCount<10) {  //stop condition        while (nll < pre_nll && iterationCount < 10) {          iterationCount++;          pre_nll = nll;          if (m_Debug)             System.out.println("\niteration: "+iterationCount);          //E-step (find one instance from each bag with max likelihood )          for (int i = 0; i < m_Data.length; i++) { //for each bag            int insIndex = findInstance(i, x);             for (int att = 0; att < m_Data[0].length; att++) //for each attribute              m_emData[i][att] = m_Data[i][att][insIndex];          }          if (m_Debug)            System.out.println("E-step for new H' finished");          //M-step          opt = new OptEng();          tmp = opt.findArgmin(x, b);          while (tmp == null) {            tmp = opt.getVarbValues();            if (m_Debug)              System.out.println("200 iterations finished, not enough!");            tmp = opt.findArgmin(tmp, b);          }          nll = opt.getMinFunction();          pre_x = x;          x = tmp; // update hypothesis           //keep the track of the best target point which has the minimum nll          /* if (nll < bestnll) {             bestnll = nll;             m_Par = tmp;             if (m_Debug)             System.out.println("!!!!!!!!!!!!!!!!Smaller NLL found: " + nll);             }*/          //if (m_Debug)          //System.out.println(exIdx+" "+p+": "+nll+" "+pre_nll+" " +bestnll);        } //converged for one instance        //evaluate the hypothesis on the training data and        //keep the track of the hypothesis with minimum error on training data        double distribution[] = new double[2];        int error = 0;        if (nll > pre_nll)          m_Par = pre_x;         else          m_Par = x;        for (int i = 0; i<train.numInstances(); i++) {          distribution = distributionForInstance (train.instance(i));          if (distribution[1] >= 0.5 && m_Classes[i] == 0)            error++;          else if (distribution[1]<0.5 && m_Classes[i] == 1)            error++;        }        if (error < min_error) {          best_hypothesis = m_Par;          min_error = error;          if (nll > pre_nll)            bestnll = pre_nll;          else            bestnll = nll;          if (m_Debug)            System.out.println("error= "+ error +"  nll= " + bestnll);        }      }      if (m_Debug) {        System.out.println(exIdx+ ":  -------------<Converged>--------------");        System.out.println("current minimum error= "+min_error+"  nll= "+bestnll);      }    }     m_Par = best_hypothesis;  }  /**   * given x, find the instance in ith bag with the most likelihood   * probability, which is most likely to responsible for the label of the   * bag For a positive bag, find the instance with the maximal probability   * of being positive For a negative bag, find the instance with the minimal   * probability of being negative   *   * @param i the bag index   * @param x the current values of variables   * @return index of the instance in the bag   */  protected int findInstance(int i, double[] x){    double min=Double.MAX_VALUE;    int insIndex=0;    int nI = m_Data[i][0].length; // numInstances in ith bag    for (int j=0; j<nI; j++){      double ins=0.0;      for (int k=0; k<m_Data[i].length; k++)  // for each attribute        ins += (m_Data[i][k][j]-x[k*2])*(m_Data[i][k][j]-x[k*2])*          x[k*2+1]*x[k*2+1];      //the probability can be calculated as Math.exp(-ins)      //to find the maximum Math.exp(-ins) is equivalent to find the minimum of (ins)      if (ins<min)  {        min=ins;        insIndex=j;      }    }    return insIndex;  }  /**   * Computes the distribution for a given exemplar   *   * @param exmp the exemplar for which distribution is computed   * @return the distribution   * @throws Exception if the distribution can't be computed successfully   */  public double[] distributionForInstance(Instance exmp)    throws Exception {    // Extract the data    Instances ins = exmp.relationalValue(1);    if (m_Filter != null)      ins = Filter.useFilter(ins, m_Filter);    ins = Filter.useFilter(ins, m_Missing);    int nI = ins.numInstances(), nA = ins.numAttributes();    double[][] dat = new double [nI][nA];    for (int j = 0; j < nI; j++){      for (int k=0; k<nA; k++){        dat[j][k] = ins.instance(j).value(k);      }    }    //find the concept instance in the exemplar    double min = Double.MAX_VALUE;    double maxProb = -1.0;    for (int j = 0; j < nI; j++){      double exp = 0.0;      for (int k = 0; k<nA; k++)  // for each attribute        exp += (dat[j][k]-m_Par[k*2])*(dat[j][k]-m_Par[k*2])*m_Par[k*2+1]*m_Par[k*2+1];      //the probability can be calculated as Math.exp(-exp)      //to find the maximum Math.exp(-exp) is equivalent to find the minimum of (exp)      if (exp < min)  {        min     = exp;        maxProb = Math.exp(-exp); //maximum probability of being positive         }    }	    // Compute the probability of the bag    double[] distribution = new double[2];    distribution[1] = maxProb;     distribution[0] = 1.0 - distribution[1];  //mininum prob. of being negative    return distribution;  }  /**   * Gets a string describing the classifier.   *   * @return a string describing the classifer built.   */  public String toString() {    String result = "MIEMDD";    if (m_Par == null) {      return result + ": No model built yet.";    }    result += "\nCoefficients...\n"      + "Variable       Point       Scale\n";    for (int j = 0, idx=0; j < m_Par.length/2; j++, idx++) {      result += m_Attributes.attribute(idx).name();      result += " "+Utils.doubleToString(m_Par[j*2], 12, 4);      result += " "+Utils.doubleToString(m_Par[j*2+1], 12, 4)+"\n";    }    return result;  }  /**   * Main method for testing this class.   *   * @param argv should contain the command line arguments to the   * scheme (see Evaluation)   */  public static void main(String[] argv) {    runClassifier(new MIEMDD(), argv);  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -