📄 cfssubseteval.java

📁 这是关于数据挖掘的一些算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
      // do the missing j's      if (sumj[nj - 1] > 0.0) {        for (i = 0; i < ni - 1; i++) {          if (counts[i][nj - 1] > 0.0) {            for (j = 0; j < nj - 1; j++) {              temp = ((j_copy[j]/(sum - j_copy[nj - 1]))*counts[i][nj - 1]);              counts[i][j] += temp;              sumj[j] += temp;            }            counts[i][nj - 1] = 0.0;          }        }      }      sumj[nj - 1] = 0.0;      // do the both missing      if (counts[ni - 1][nj - 1] > 0.0 && total_missing != sum) {        for (i = 0; i < ni - 1; i++) {          for (j = 0; j < nj - 1; j++) {            temp = (counts_copy[i][j]/(sum - total_missing)) *               counts_copy[ni - 1][nj - 1];                        counts[i][j] += temp;            sumi[i] += temp;            sumj[j] += temp;          }        }        counts[ni - 1][nj - 1] = 0.0;      }    }    corr_measure = ContingencyTables.symmetricalUncertainty(counts);    if (Utils.eq(corr_measure, 0.0)) {      if (flag == true) {        return  (0.0);      }      else {        return  (1.0);      }    }    else {      return  (corr_measure);    }  }  private double num_num (int att1, int att2) {    int i;    Instance inst;    double r, diff1, diff2, num = 0.0, sx = 0.0, sy = 0.0;    double mx = m_trainInstances.meanOrMode(m_trainInstances.attribute(att1));    double my = m_trainInstances.meanOrMode(m_trainInstances.attribute(att2));    for (i = 0; i < m_numInstances; i++) {      inst = m_trainInstances.instance(i);      diff1 = (inst.isMissing(att1))? 0.0 : (inst.value(att1) - mx);      diff2 = (inst.isMissing(att2))? 0.0 : (inst.value(att2) - my);      num += (diff1*diff2);      sx += (diff1*diff1);      sy += (diff2*diff2);    }    if (sx != 0.0) {      if (m_std_devs[att1] == 1.0) {        m_std_devs[att1] = Math.sqrt((sx/m_numInstances));      }    }    if (sy != 0.0) {      if (m_std_devs[att2] == 1.0) {        m_std_devs[att2] = Math.sqrt((sy/m_numInstances));      }    }    if ((sx*sy) > 0.0) {      r = (num/(Math.sqrt(sx*sy)));      return  ((r < 0.0)? -r : r);    }    else {      if (att1 != m_classIndex && att2 != m_classIndex) {        return  1.0;      }      else {        return  0.0;      }    }  }  private double num_nom2 (int att1, int att2) {    int i, ii, k;    double temp;    Instance inst;    int mx = (int)m_trainInstances.      meanOrMode(m_trainInstances.attribute(att1));    double my = m_trainInstances.      meanOrMode(m_trainInstances.attribute(att2));    double stdv_num = 0.0;    double diff1, diff2;    double r = 0.0, rr;    int nx = (!m_missingSeperate)       ? m_trainInstances.attribute(att1).numValues()       : m_trainInstances.attribute(att1).numValues() + 1;    double[] prior_nom = new double[nx];    double[] stdvs_nom = new double[nx];    double[] covs = new double[nx];    for (i = 0; i < nx; i++) {      stdvs_nom[i] = covs[i] = prior_nom[i] = 0.0;    }    // calculate frequencies (and means) of the values of the nominal     // attribute    for (i = 0; i < m_numInstances; i++) {      inst = m_trainInstances.instance(i);      if (inst.isMissing(att1)) {        if (!m_missingSeperate) {          ii = mx;        }        else {          ii = nx - 1;        }      }      else {        ii = (int)inst.value(att1);      }      // increment freq for nominal      prior_nom[ii]++;    }    for (k = 0; k < m_numInstances; k++) {      inst = m_trainInstances.instance(k);      // std dev of numeric attribute      diff2 = (inst.isMissing(att2))? 0.0 : (inst.value(att2) - my);      stdv_num += (diff2*diff2);      //       for (i = 0; i < nx; i++) {        if (inst.isMissing(att1)) {          if (!m_missingSeperate) {            temp = (i == mx)? 1.0 : 0.0;          }          else {            temp = (i == (nx - 1))? 1.0 : 0.0;          }        }        else {          temp = (i == inst.value(att1))? 1.0 : 0.0;        }        diff1 = (temp - (prior_nom[i]/m_numInstances));        stdvs_nom[i] += (diff1*diff1);        covs[i] += (diff1*diff2);      }    }    // calculate weighted correlation    for (i = 0, temp = 0.0; i < nx; i++) {      // calculate the weighted variance of the nominal      temp += ((prior_nom[i]/m_numInstances)*(stdvs_nom[i]/m_numInstances));      if ((stdvs_nom[i]*stdv_num) > 0.0) {        //System.out.println("Stdv :"+stdvs_nom[i]);        rr = (covs[i]/(Math.sqrt(stdvs_nom[i]*stdv_num)));        if (rr < 0.0) {          rr = -rr;        }        r += ((prior_nom[i]/m_numInstances)*rr);      }      /* if there is zero variance for the numeric att at a specific          level of the catergorical att then if neither is the class then          make this correlation at this level maximally bad i.e. 1.0.          If either is the class then maximally bad correlation is 0.0 */      else {if (att1 != m_classIndex && att2 != m_classIndex) {        r += ((prior_nom[i]/m_numInstances)*1.0);      }      }    }    // set the standard deviations for these attributes if necessary    // if ((att1 != classIndex) && (att2 != classIndex)) // =============    if (temp != 0.0) {      if (m_std_devs[att1] == 1.0) {        m_std_devs[att1] = Math.sqrt(temp);      }    }    if (stdv_num != 0.0) {      if (m_std_devs[att2] == 1.0) {        m_std_devs[att2] = Math.sqrt((stdv_num/m_numInstances));      }    }    if (r == 0.0) {      if (att1 != m_classIndex && att2 != m_classIndex) {        r = 1.0;      }    }    return  r;  }  private double nom_nom (int att1, int att2) {    int i, j, ii, jj, z;    double temp1, temp2;    Instance inst;    int mx = (int)m_trainInstances.      meanOrMode(m_trainInstances.attribute(att1));    int my = (int)m_trainInstances.      meanOrMode(m_trainInstances.attribute(att2));    double diff1, diff2;    double r = 0.0, rr;    int nx = (!m_missingSeperate)       ? m_trainInstances.attribute(att1).numValues()       : m_trainInstances.attribute(att1).numValues() + 1;    int ny = (!m_missingSeperate)      ? m_trainInstances.attribute(att2).numValues()       : m_trainInstances.attribute(att2).numValues() + 1;    double[][] prior_nom = new double[nx][ny];    double[] sumx = new double[nx];    double[] sumy = new double[ny];    double[] stdvsx = new double[nx];    double[] stdvsy = new double[ny];    double[][] covs = new double[nx][ny];    for (i = 0; i < nx; i++) {      sumx[i] = stdvsx[i] = 0.0;    }    for (j = 0; j < ny; j++) {      sumy[j] = stdvsy[j] = 0.0;    }    for (i = 0; i < nx; i++) {      for (j = 0; j < ny; j++) {        covs[i][j] = prior_nom[i][j] = 0.0;      }    }    // calculate frequencies (and means) of the values of the nominal     // attribute    for (i = 0; i < m_numInstances; i++) {      inst = m_trainInstances.instance(i);      if (inst.isMissing(att1)) {        if (!m_missingSeperate) {          ii = mx;        }        else {          ii = nx - 1;        }      }      else {        ii = (int)inst.value(att1);      }      if (inst.isMissing(att2)) {        if (!m_missingSeperate) {          jj = my;        }        else {          jj = ny - 1;        }      }      else {        jj = (int)inst.value(att2);      }      // increment freq for nominal      prior_nom[ii][jj]++;      sumx[ii]++;      sumy[jj]++;    }    for (z = 0; z < m_numInstances; z++) {      inst = m_trainInstances.instance(z);      for (j = 0; j < ny; j++) {        if (inst.isMissing(att2)) {          if (!m_missingSeperate) {            temp2 = (j == my)? 1.0 : 0.0;          }          else {            temp2 = (j == (ny - 1))? 1.0 : 0.0;          }        }        else {          temp2 = (j == inst.value(att2))? 1.0 : 0.0;        }        diff2 = (temp2 - (sumy[j]/m_numInstances));        stdvsy[j] += (diff2*diff2);      }      //       for (i = 0; i < nx; i++) {        if (inst.isMissing(att1)) {          if (!m_missingSeperate) {            temp1 = (i == mx)? 1.0 : 0.0;          }          else {            temp1 = (i == (nx - 1))? 1.0 : 0.0;          }        }        else {          temp1 = (i == inst.value(att1))? 1.0 : 0.0;        }        diff1 = (temp1 - (sumx[i]/m_numInstances));        stdvsx[i] += (diff1*diff1);        for (j = 0; j < ny; j++) {          if (inst.isMissing(att2)) {            if (!m_missingSeperate) {              temp2 = (j == my)? 1.0 : 0.0;            }            else {              temp2 = (j == (ny - 1))? 1.0 : 0.0;            }          }          else {            temp2 = (j == inst.value(att2))? 1.0 : 0.0;          }          diff2 = (temp2 - (sumy[j]/m_numInstances));          covs[i][j] += (diff1*diff2);        }      }    }    // calculate weighted correlation    for (i = 0; i < nx; i++) {      for (j = 0; j < ny; j++) {        if ((stdvsx[i]*stdvsy[j]) > 0.0) {          //System.out.println("Stdv :"+stdvs_nom[i]);          rr = (covs[i][j]/(Math.sqrt(stdvsx[i]*stdvsy[j])));          if (rr < 0.0) {            rr = -rr;          }          r += ((prior_nom[i][j]/m_numInstances)*rr);        }        // if there is zero variance for either of the categorical atts then if        // neither is the class then make this        // correlation at this level maximally bad i.e. 1.0. If either is         // the class then maximally bad correlation is 0.0        else {if (att1 != m_classIndex && att2 != m_classIndex) {          r += ((prior_nom[i][j]/m_numInstances)*1.0);        }        }      }    }    // calculate weighted standard deviations for these attributes    // (if necessary)    for (i = 0, temp1 = 0.0; i < nx; i++) {      temp1 += ((sumx[i]/m_numInstances)*(stdvsx[i]/m_numInstances));    }    if (temp1 != 0.0) {      if (m_std_devs[att1] == 1.0) {        m_std_devs[att1] = Math.sqrt(temp1);      }    }    for (j = 0, temp2 = 0.0; j < ny; j++) {      temp2 += ((sumy[j]/m_numInstances)*(stdvsy[j]/m_numInstances));    }    if (temp2 != 0.0) {      if (m_std_devs[att2] == 1.0) {        m_std_devs[att2] = Math.sqrt(temp2);      }    }    if (r == 0.0) {      if (att1 != m_classIndex && att2 != m_classIndex) {        r = 1.0;      }    }    return  r;  }  /**   * returns a string describing CFS   *   * @return the description as a string   */  public String toString () {    StringBuffer text = new StringBuffer();    if (m_trainInstances == null) {      text.append("CFS subset evaluator has not been built yet\n");    }    else {      text.append("\tCFS Subset Evaluator\n");      if (m_missingSeperate) {        text.append("\tTreating missing values as a seperate value\n");      }      if (m_locallyPredictive) {        text.append("\tIncluding locally predictive attributes\n");      }    }    return  text.toString();  }  private void addLocallyPredictive (BitSet best_group) {    int i, j;    boolean done = false;    boolean ok = true;    double temp_best = -1.0;    float corr;    j = 0;    BitSet temp_group = (BitSet)best_group.clone();    int larger, smaller;    while (!done) {      temp_best = -1.0;      // find best not already in group      for (i = 0; i < m_numAttribs; i++) {        if (i > m_classIndex) {          larger = i; smaller = m_classIndex;        } else {          smaller = i; larger = m_classIndex;        }        /*      int larger = (i > m_classIndex ? i : m_classIndex);                int smaller = (i > m_classIndex ? m_classIndex : i); */        if ((!temp_group.get(i)) && (i != m_classIndex)) {          if (m_corr_matrix[larger][smaller] == -999) {            corr = correlate(i, m_classIndex);            m_corr_matrix[larger][smaller] = corr;          }          if (m_corr_matrix[larger][smaller]  > temp_best) {            temp_best = m_corr_matrix[larger][smaller];            j = i;          }        }      }      if (temp_best == -1.0) {        done = true;      }      else {        ok = true;        temp_group.set(j);        // check the best against correlations with others already        // in group         for (i = 0; i < m_numAttribs; i++) {          if (i > j) {            larger = i; smaller = j;          } else {            larger = j; smaller = i;          }          /*  int larger = (i > j ? i : j);              int smaller = (i > j ? j : i); */          if (best_group.get(i)) {            if (m_corr_matrix[larger][smaller] == -999) {              corr = correlate(i, j);              m_corr_matrix[larger][smaller] = corr;            }            if (m_corr_matrix[larger][smaller] > temp_best - m_c_Threshold) {              ok = false;              break;            }          }        }        // if ok then add to best_group        if (ok) {          best_group.set(j);        }      }    }  }  /**   * Calls locallyPredictive in order to include locally predictive   * attributes (if requested).   *   * @param attributeSet the set of attributes found by the search   * @return a possibly ranked list of postprocessed attributes   * @throws Exception if postprocessing fails for some reason   */  public int[] postProcess (int[] attributeSet)    throws Exception {    int j = 0;    if (!m_locallyPredictive) {      //      m_trainInstances = new Instances(m_trainInstances,0);      return  attributeSet;    }    BitSet bestGroup = new BitSet(m_numAttribs);    for (int i = 0; i < attributeSet.length; i++) {      bestGroup.set(attributeSet[i]);    }    addLocallyPredictive(bestGroup);    // count how many are set    for (int i = 0; i < m_numAttribs; i++) {      if (bestGroup.get(i)) {        j++;      }    }    int[] newSet = new int[j];    j = 0;    for (int i = 0; i < m_numAttribs; i++) {      if (bestGroup.get(i)) {        newSet[j++] = i;      }    }    //    m_trainInstances = new Instances(m_trainInstances,0);    return  newSet;  }  protected void resetOptions () {    m_trainInstances = null;    m_missingSeperate = false;    m_locallyPredictive = true;    m_c_Threshold = 0.0;  }  /**   * Main method for testing this class.   *   * @param args the options   */  public static void main (String[] args) {    runEvaluator(new CfsSubsetEval(), args);  }}
上一页 12
💿 文件大小 166 K
👤 上传用户 Tonic2009
📂 所属分类数学计算
🏷️ 相关标签

#数据挖掘 #算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -