📄 cfssubseteval.java
字号:
// do the missing j's if (sumj[nj - 1] > 0.0) { for (i = 0; i < ni - 1; i++) { if (counts[i][nj - 1] > 0.0) { for (j = 0; j < nj - 1; j++) { temp = ((j_copy[j]/(sum - j_copy[nj - 1]))*counts[i][nj - 1]); counts[i][j] += temp; sumj[j] += temp; } counts[i][nj - 1] = 0.0; } } } sumj[nj - 1] = 0.0; // do the both missing if (counts[ni - 1][nj - 1] > 0.0 && total_missing != sum) { for (i = 0; i < ni - 1; i++) { for (j = 0; j < nj - 1; j++) { temp = (counts_copy[i][j]/(sum - total_missing)) * counts_copy[ni - 1][nj - 1]; counts[i][j] += temp; sumi[i] += temp; sumj[j] += temp; } } counts[ni - 1][nj - 1] = 0.0; } } corr_measure = ContingencyTables.symmetricalUncertainty(counts); if (Utils.eq(corr_measure, 0.0)) { if (flag == true) { return (0.0); } else { return (1.0); } } else { return (corr_measure); } } private double num_num (int att1, int att2) { int i; Instance inst; double r, diff1, diff2, num = 0.0, sx = 0.0, sy = 0.0; double mx = m_trainInstances.meanOrMode(m_trainInstances.attribute(att1)); double my = m_trainInstances.meanOrMode(m_trainInstances.attribute(att2)); for (i = 0; i < m_numInstances; i++) { inst = m_trainInstances.instance(i); diff1 = (inst.isMissing(att1))? 0.0 : (inst.value(att1) - mx); diff2 = (inst.isMissing(att2))? 0.0 : (inst.value(att2) - my); num += (diff1*diff2); sx += (diff1*diff1); sy += (diff2*diff2); } if (sx != 0.0) { if (m_std_devs[att1] == 1.0) { m_std_devs[att1] = Math.sqrt((sx/m_numInstances)); } } if (sy != 0.0) { if (m_std_devs[att2] == 1.0) { m_std_devs[att2] = Math.sqrt((sy/m_numInstances)); } } if ((sx*sy) > 0.0) { r = (num/(Math.sqrt(sx*sy))); return ((r < 0.0)? -r : r); } else { if (att1 != m_classIndex && att2 != m_classIndex) { return 1.0; } else { return 0.0; } } } private double num_nom2 (int att1, int att2) { int i, ii, k; double temp; Instance inst; int mx = (int)m_trainInstances. meanOrMode(m_trainInstances.attribute(att1)); double my = m_trainInstances. meanOrMode(m_trainInstances.attribute(att2)); double stdv_num = 0.0; double diff1, diff2; double r = 0.0, rr; int nx = (!m_missingSeperate) ? m_trainInstances.attribute(att1).numValues() : m_trainInstances.attribute(att1).numValues() + 1; double[] prior_nom = new double[nx]; double[] stdvs_nom = new double[nx]; double[] covs = new double[nx]; for (i = 0; i < nx; i++) { stdvs_nom[i] = covs[i] = prior_nom[i] = 0.0; } // calculate frequencies (and means) of the values of the nominal // attribute for (i = 0; i < m_numInstances; i++) { inst = m_trainInstances.instance(i); if (inst.isMissing(att1)) { if (!m_missingSeperate) { ii = mx; } else { ii = nx - 1; } } else { ii = (int)inst.value(att1); } // increment freq for nominal prior_nom[ii]++; } for (k = 0; k < m_numInstances; k++) { inst = m_trainInstances.instance(k); // std dev of numeric attribute diff2 = (inst.isMissing(att2))? 0.0 : (inst.value(att2) - my); stdv_num += (diff2*diff2); // for (i = 0; i < nx; i++) { if (inst.isMissing(att1)) { if (!m_missingSeperate) { temp = (i == mx)? 1.0 : 0.0; } else { temp = (i == (nx - 1))? 1.0 : 0.0; } } else { temp = (i == inst.value(att1))? 1.0 : 0.0; } diff1 = (temp - (prior_nom[i]/m_numInstances)); stdvs_nom[i] += (diff1*diff1); covs[i] += (diff1*diff2); } } // calculate weighted correlation for (i = 0, temp = 0.0; i < nx; i++) { // calculate the weighted variance of the nominal temp += ((prior_nom[i]/m_numInstances)*(stdvs_nom[i]/m_numInstances)); if ((stdvs_nom[i]*stdv_num) > 0.0) { //System.out.println("Stdv :"+stdvs_nom[i]); rr = (covs[i]/(Math.sqrt(stdvs_nom[i]*stdv_num))); if (rr < 0.0) { rr = -rr; } r += ((prior_nom[i]/m_numInstances)*rr); } /* if there is zero variance for the numeric att at a specific level of the catergorical att then if neither is the class then make this correlation at this level maximally bad i.e. 1.0. If either is the class then maximally bad correlation is 0.0 */ else {if (att1 != m_classIndex && att2 != m_classIndex) { r += ((prior_nom[i]/m_numInstances)*1.0); } } } // set the standard deviations for these attributes if necessary // if ((att1 != classIndex) && (att2 != classIndex)) // ============= if (temp != 0.0) { if (m_std_devs[att1] == 1.0) { m_std_devs[att1] = Math.sqrt(temp); } } if (stdv_num != 0.0) { if (m_std_devs[att2] == 1.0) { m_std_devs[att2] = Math.sqrt((stdv_num/m_numInstances)); } } if (r == 0.0) { if (att1 != m_classIndex && att2 != m_classIndex) { r = 1.0; } } return r; } private double nom_nom (int att1, int att2) { int i, j, ii, jj, z; double temp1, temp2; Instance inst; int mx = (int)m_trainInstances. meanOrMode(m_trainInstances.attribute(att1)); int my = (int)m_trainInstances. meanOrMode(m_trainInstances.attribute(att2)); double diff1, diff2; double r = 0.0, rr; int nx = (!m_missingSeperate) ? m_trainInstances.attribute(att1).numValues() : m_trainInstances.attribute(att1).numValues() + 1; int ny = (!m_missingSeperate) ? m_trainInstances.attribute(att2).numValues() : m_trainInstances.attribute(att2).numValues() + 1; double[][] prior_nom = new double[nx][ny]; double[] sumx = new double[nx]; double[] sumy = new double[ny]; double[] stdvsx = new double[nx]; double[] stdvsy = new double[ny]; double[][] covs = new double[nx][ny]; for (i = 0; i < nx; i++) { sumx[i] = stdvsx[i] = 0.0; } for (j = 0; j < ny; j++) { sumy[j] = stdvsy[j] = 0.0; } for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { covs[i][j] = prior_nom[i][j] = 0.0; } } // calculate frequencies (and means) of the values of the nominal // attribute for (i = 0; i < m_numInstances; i++) { inst = m_trainInstances.instance(i); if (inst.isMissing(att1)) { if (!m_missingSeperate) { ii = mx; } else { ii = nx - 1; } } else { ii = (int)inst.value(att1); } if (inst.isMissing(att2)) { if (!m_missingSeperate) { jj = my; } else { jj = ny - 1; } } else { jj = (int)inst.value(att2); } // increment freq for nominal prior_nom[ii][jj]++; sumx[ii]++; sumy[jj]++; } for (z = 0; z < m_numInstances; z++) { inst = m_trainInstances.instance(z); for (j = 0; j < ny; j++) { if (inst.isMissing(att2)) { if (!m_missingSeperate) { temp2 = (j == my)? 1.0 : 0.0; } else { temp2 = (j == (ny - 1))? 1.0 : 0.0; } } else { temp2 = (j == inst.value(att2))? 1.0 : 0.0; } diff2 = (temp2 - (sumy[j]/m_numInstances)); stdvsy[j] += (diff2*diff2); } // for (i = 0; i < nx; i++) { if (inst.isMissing(att1)) { if (!m_missingSeperate) { temp1 = (i == mx)? 1.0 : 0.0; } else { temp1 = (i == (nx - 1))? 1.0 : 0.0; } } else { temp1 = (i == inst.value(att1))? 1.0 : 0.0; } diff1 = (temp1 - (sumx[i]/m_numInstances)); stdvsx[i] += (diff1*diff1); for (j = 0; j < ny; j++) { if (inst.isMissing(att2)) { if (!m_missingSeperate) { temp2 = (j == my)? 1.0 : 0.0; } else { temp2 = (j == (ny - 1))? 1.0 : 0.0; } } else { temp2 = (j == inst.value(att2))? 1.0 : 0.0; } diff2 = (temp2 - (sumy[j]/m_numInstances)); covs[i][j] += (diff1*diff2); } } } // calculate weighted correlation for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { if ((stdvsx[i]*stdvsy[j]) > 0.0) { //System.out.println("Stdv :"+stdvs_nom[i]); rr = (covs[i][j]/(Math.sqrt(stdvsx[i]*stdvsy[j]))); if (rr < 0.0) { rr = -rr; } r += ((prior_nom[i][j]/m_numInstances)*rr); } // if there is zero variance for either of the categorical atts then if // neither is the class then make this // correlation at this level maximally bad i.e. 1.0. If either is // the class then maximally bad correlation is 0.0 else {if (att1 != m_classIndex && att2 != m_classIndex) { r += ((prior_nom[i][j]/m_numInstances)*1.0); } } } } // calculate weighted standard deviations for these attributes // (if necessary) for (i = 0, temp1 = 0.0; i < nx; i++) { temp1 += ((sumx[i]/m_numInstances)*(stdvsx[i]/m_numInstances)); } if (temp1 != 0.0) { if (m_std_devs[att1] == 1.0) { m_std_devs[att1] = Math.sqrt(temp1); } } for (j = 0, temp2 = 0.0; j < ny; j++) { temp2 += ((sumy[j]/m_numInstances)*(stdvsy[j]/m_numInstances)); } if (temp2 != 0.0) { if (m_std_devs[att2] == 1.0) { m_std_devs[att2] = Math.sqrt(temp2); } } if (r == 0.0) { if (att1 != m_classIndex && att2 != m_classIndex) { r = 1.0; } } return r; } /** * returns a string describing CFS * * @return the description as a string */ public String toString () { StringBuffer text = new StringBuffer(); if (m_trainInstances == null) { text.append("CFS subset evaluator has not been built yet\n"); } else { text.append("\tCFS Subset Evaluator\n"); if (m_missingSeperate) { text.append("\tTreating missing values as a seperate value\n"); } if (m_locallyPredictive) { text.append("\tIncluding locally predictive attributes\n"); } } return text.toString(); } private void addLocallyPredictive (BitSet best_group) { int i, j; boolean done = false; boolean ok = true; double temp_best = -1.0; float corr; j = 0; BitSet temp_group = (BitSet)best_group.clone(); int larger, smaller; while (!done) { temp_best = -1.0; // find best not already in group for (i = 0; i < m_numAttribs; i++) { if (i > m_classIndex) { larger = i; smaller = m_classIndex; } else { smaller = i; larger = m_classIndex; } /* int larger = (i > m_classIndex ? i : m_classIndex); int smaller = (i > m_classIndex ? m_classIndex : i); */ if ((!temp_group.get(i)) && (i != m_classIndex)) { if (m_corr_matrix[larger][smaller] == -999) { corr = correlate(i, m_classIndex); m_corr_matrix[larger][smaller] = corr; } if (m_corr_matrix[larger][smaller] > temp_best) { temp_best = m_corr_matrix[larger][smaller]; j = i; } } } if (temp_best == -1.0) { done = true; } else { ok = true; temp_group.set(j); // check the best against correlations with others already // in group for (i = 0; i < m_numAttribs; i++) { if (i > j) { larger = i; smaller = j; } else { larger = j; smaller = i; } /* int larger = (i > j ? i : j); int smaller = (i > j ? j : i); */ if (best_group.get(i)) { if (m_corr_matrix[larger][smaller] == -999) { corr = correlate(i, j); m_corr_matrix[larger][smaller] = corr; } if (m_corr_matrix[larger][smaller] > temp_best - m_c_Threshold) { ok = false; break; } } } // if ok then add to best_group if (ok) { best_group.set(j); } } } } /** * Calls locallyPredictive in order to include locally predictive * attributes (if requested). * * @param attributeSet the set of attributes found by the search * @return a possibly ranked list of postprocessed attributes * @throws Exception if postprocessing fails for some reason */ public int[] postProcess (int[] attributeSet) throws Exception { int j = 0; if (!m_locallyPredictive) { // m_trainInstances = new Instances(m_trainInstances,0); return attributeSet; } BitSet bestGroup = new BitSet(m_numAttribs); for (int i = 0; i < attributeSet.length; i++) { bestGroup.set(attributeSet[i]); } addLocallyPredictive(bestGroup); // count how many are set for (int i = 0; i < m_numAttribs; i++) { if (bestGroup.get(i)) { j++; } } int[] newSet = new int[j]; j = 0; for (int i = 0; i < m_numAttribs; i++) { if (bestGroup.get(i)) { newSet[j++] = i; } } // m_trainInstances = new Instances(m_trainInstances,0); return newSet; } protected void resetOptions () { m_trainInstances = null; m_missingSeperate = false; m_locallyPredictive = true; m_c_Threshold = 0.0; } /** * Main method for testing this class. * * @param args the options */ public static void main (String[] args) { runEvaluator(new CfsSubsetEval(), args); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -