⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cfssubseteval.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
    else {      return  (corr_measure);    }  }  private double num_num (int att1, int att2) {    int i;    Instance inst;    double r, diff1, diff2, num = 0.0, sx = 0.0, sy = 0.0;    double mx = m_trainInstances.meanOrMode(m_trainInstances.attribute(att1));    double my = m_trainInstances.meanOrMode(m_trainInstances.attribute(att2));    for (i = 0; i < m_numInstances; i++) {      inst = m_trainInstances.instance(i);      diff1 = (inst.isMissing(att1))? 0.0 : (inst.value(att1) - mx);      diff2 = (inst.isMissing(att2))? 0.0 : (inst.value(att2) - my);      num += (diff1*diff2);      sx += (diff1*diff1);      sy += (diff2*diff2);    }    if (sx != 0.0) {      if (m_std_devs[att1] == 1.0) {	m_std_devs[att1] = Math.sqrt((sx/m_numInstances));      }    }    if (sy != 0.0) {      if (m_std_devs[att2] == 1.0) {	m_std_devs[att2] = Math.sqrt((sy/m_numInstances));      }    }    if ((sx*sy) > 0.0) {      r = (num/(Math.sqrt(sx*sy)));      return  ((r < 0.0)? -r : r);    }    else {      if (att1 != m_classIndex && att2 != m_classIndex) {	return  1.0;      }      else {	return  0.0;      }    }  }  private double num_nom2 (int att1, int att2) {    int i, ii, k;    double temp;    Instance inst;    int mx = (int)m_trainInstances.      meanOrMode(m_trainInstances.attribute(att1));    double my = m_trainInstances.      meanOrMode(m_trainInstances.attribute(att2));    double stdv_num = 0.0;    double diff1, diff2;    double r = 0.0, rr, max_corr = 0.0;    int nx = (!m_missingSeperate)       ? m_trainInstances.attribute(att1).numValues()       : m_trainInstances.attribute(att1).numValues() + 1;    double[] prior_nom = new double[nx];    double[] stdvs_nom = new double[nx];    double[] covs = new double[nx];    for (i = 0; i < nx; i++) {      stdvs_nom[i] = covs[i] = prior_nom[i] = 0.0;    }    // calculate frequencies (and means) of the values of the nominal     // attribute    for (i = 0; i < m_numInstances; i++) {      inst = m_trainInstances.instance(i);      if (inst.isMissing(att1)) {	if (!m_missingSeperate) {	  ii = mx;	}	else {	  ii = nx - 1;	}      }      else {	ii = (int)inst.value(att1);      }      // increment freq for nominal      prior_nom[ii]++;    }    for (k = 0; k < m_numInstances; k++) {      inst = m_trainInstances.instance(k);      // std dev of numeric attribute      diff2 = (inst.isMissing(att2))? 0.0 : (inst.value(att2) - my);      stdv_num += (diff2*diff2);      //       for (i = 0; i < nx; i++) {	if (inst.isMissing(att1)) {	  if (!m_missingSeperate) {	    temp = (i == mx)? 1.0 : 0.0;	  }	  else {	    temp = (i == (nx - 1))? 1.0 : 0.0;	  }	}	else {	  temp = (i == inst.value(att1))? 1.0 : 0.0;	}	diff1 = (temp - (prior_nom[i]/m_numInstances));	stdvs_nom[i] += (diff1*diff1);	covs[i] += (diff1*diff2);      }    }    // calculate weighted correlation    for (i = 0, temp = 0.0; i < nx; i++) {      // calculate the weighted variance of the nominal      temp += ((prior_nom[i]/m_numInstances)*(stdvs_nom[i]/m_numInstances));      if ((stdvs_nom[i]*stdv_num) > 0.0) {	//System.out.println("Stdv :"+stdvs_nom[i]);	rr = (covs[i]/(Math.sqrt(stdvs_nom[i]*stdv_num)));	if (rr < 0.0) {	  rr = -rr;	}	r += ((prior_nom[i]/m_numInstances)*rr);      }      /* if there is zero variance for the numeric att at a specific 	 level of the catergorical att then if neither is the class then 	 make this correlation at this level maximally bad i.e. 1.0. 	 If either is the class then maximally bad correlation is 0.0 */      else {if (att1 != m_classIndex && att2 != m_classIndex) {	r += ((prior_nom[i]/m_numInstances)*1.0);      }      }    }    // set the standard deviations for these attributes if necessary    // if ((att1 != classIndex) && (att2 != classIndex)) // =============    if (temp != 0.0) {      if (m_std_devs[att1] == 1.0) {	m_std_devs[att1] = Math.sqrt(temp);      }    }    if (stdv_num != 0.0) {      if (m_std_devs[att2] == 1.0) {	m_std_devs[att2] = Math.sqrt((stdv_num/m_numInstances));      }    }    if (r == 0.0) {      if (att1 != m_classIndex && att2 != m_classIndex) {	r = 1.0;      }    }    return  r;  }  private double nom_nom (int att1, int att2) {    int i, j, ii, jj, z;    double temp1, temp2;    Instance inst;    int mx = (int)m_trainInstances.      meanOrMode(m_trainInstances.attribute(att1));    int my = (int)m_trainInstances.      meanOrMode(m_trainInstances.attribute(att2));    double diff1, diff2;    double r = 0.0, rr, max_corr = 0.0;    int nx = (!m_missingSeperate)       ? m_trainInstances.attribute(att1).numValues()       : m_trainInstances.attribute(att1).numValues() + 1;    int ny = (!m_missingSeperate)      ? m_trainInstances.attribute(att2).numValues()       : m_trainInstances.attribute(att2).numValues() + 1;    double[][] prior_nom = new double[nx][ny];    double[] sumx = new double[nx];    double[] sumy = new double[ny];    double[] stdvsx = new double[nx];    double[] stdvsy = new double[ny];    double[][] covs = new double[nx][ny];    for (i = 0; i < nx; i++) {      sumx[i] = stdvsx[i] = 0.0;    }    for (j = 0; j < ny; j++) {      sumy[j] = stdvsy[j] = 0.0;    }    for (i = 0; i < nx; i++) {      for (j = 0; j < ny; j++) {	covs[i][j] = prior_nom[i][j] = 0.0;      }    }    // calculate frequencies (and means) of the values of the nominal     // attribute    for (i = 0; i < m_numInstances; i++) {      inst = m_trainInstances.instance(i);      if (inst.isMissing(att1)) {	if (!m_missingSeperate) {	  ii = mx;	}	else {	  ii = nx - 1;	}      }      else {	ii = (int)inst.value(att1);      }      if (inst.isMissing(att2)) {	if (!m_missingSeperate) {	  jj = my;	}	else {	  jj = ny - 1;	}      }      else {	jj = (int)inst.value(att2);      }      // increment freq for nominal      prior_nom[ii][jj]++;      sumx[ii]++;      sumy[jj]++;    }    for (z = 0; z < m_numInstances; z++) {      inst = m_trainInstances.instance(z);      for (j = 0; j < ny; j++) {	if (inst.isMissing(att2)) {	  if (!m_missingSeperate) {	    temp2 = (j == my)? 1.0 : 0.0;	  }	  else {	    temp2 = (j == (ny - 1))? 1.0 : 0.0;	  }	}	else {	  temp2 = (j == inst.value(att2))? 1.0 : 0.0;	}	diff2 = (temp2 - (sumy[j]/m_numInstances));	stdvsy[j] += (diff2*diff2);      }      //       for (i = 0; i < nx; i++) {	if (inst.isMissing(att1)) {	  if (!m_missingSeperate) {	    temp1 = (i == mx)? 1.0 : 0.0;	  }	  else {	    temp1 = (i == (nx - 1))? 1.0 : 0.0;	  }	}	else {	  temp1 = (i == inst.value(att1))? 1.0 : 0.0;	}	diff1 = (temp1 - (sumx[i]/m_numInstances));	stdvsx[i] += (diff1*diff1);	for (j = 0; j < ny; j++) {	  if (inst.isMissing(att2)) {	    if (!m_missingSeperate) {	      temp2 = (j == my)? 1.0 : 0.0;	    }	    else {	      temp2 = (j == (ny - 1))? 1.0 : 0.0;	    }	  }	  else {	    temp2 = (j == inst.value(att2))? 1.0 : 0.0;	  }	  diff2 = (temp2 - (sumy[j]/m_numInstances));	  covs[i][j] += (diff1*diff2);	}      }    }    // calculate weighted correlation    for (i = 0; i < nx; i++) {      for (j = 0; j < ny; j++) {	if ((stdvsx[i]*stdvsy[j]) > 0.0) {	  //System.out.println("Stdv :"+stdvs_nom[i]);	  rr = (covs[i][j]/(Math.sqrt(stdvsx[i]*stdvsy[j])));	  if (rr < 0.0) {	    rr = -rr;	  }	  r += ((prior_nom[i][j]/m_numInstances)*rr);	}	// if there is zero variance for either of the categorical atts then if	// neither is the class then make this	// correlation at this level maximally bad i.e. 1.0. If either is 	// the class then maximally bad correlation is 0.0	else {if (att1 != m_classIndex && att2 != m_classIndex) {	  r += ((prior_nom[i][j]/m_numInstances)*1.0);	}	}      }    }    // calculate weighted standard deviations for these attributes    // (if necessary)    for (i = 0, temp1 = 0.0; i < nx; i++) {      temp1 += ((sumx[i]/m_numInstances)*(stdvsx[i]/m_numInstances));    }    if (temp1 != 0.0) {      if (m_std_devs[att1] == 1.0) {	m_std_devs[att1] = Math.sqrt(temp1);      }    }    for (j = 0, temp2 = 0.0; j < ny; j++) {      temp2 += ((sumy[j]/m_numInstances)*(stdvsy[j]/m_numInstances));    }    if (temp2 != 0.0) {      if (m_std_devs[att2] == 1.0) {	m_std_devs[att2] = Math.sqrt(temp2);      }    }    if (r == 0.0) {      if (att1 != m_classIndex && att2 != m_classIndex) {	r = 1.0;      }    }    return  r;  }  /**   * returns a string describing CFS   *   * @return the description as a string   */  public String toString () {    StringBuffer text = new StringBuffer();    if (m_trainInstances == null) {      text.append("CFS subset evaluator has not been built yet\n");    }    else {      text.append("\tCFS Subset Evaluator\n");      if (m_missingSeperate) {	text.append("\tTreating missing values as a seperate value\n");      }      if (m_locallyPredictive) {	text.append("\tIncluding locally predictive attributes\n");      }    }    return  text.toString();  }  private void addLocallyPredictive (BitSet best_group) {    int i, j;    boolean done = false;    boolean ok = true;    double temp_best = -1.0;    double corr;    j = 0;    BitSet temp_group = (BitSet)best_group.clone();    while (!done) {      temp_best = -1.0;      // find best not already in group      for (i = 0; i < m_numAttribs; i++) {	if ((!temp_group.get(i)) && (i != m_classIndex)) {	  if (m_corr_matrix.getElement(i, m_classIndex) == -999) {	    corr = correlate(i, m_classIndex);	    m_corr_matrix.setElement(i, m_classIndex, corr);	    m_corr_matrix.setElement(m_classIndex, i, corr);	  }	  if (m_corr_matrix.getElement(i, m_classIndex) > temp_best) {	    temp_best = m_corr_matrix.getElement(i, m_classIndex);	    j = i;	  }	}      }      if (temp_best == -1.0) {	done = true;      }      else {	ok = true;	temp_group.set(j);	// check the best against correlations with others already	// in group 	for (i = 0; i < m_numAttribs; i++) {	  if (best_group.get(i)) {	    if (m_corr_matrix.getElement(i, j) == -999) {	      corr = correlate(i, j);	      m_corr_matrix.setElement(i, j, corr);	      m_corr_matrix.setElement(j, i, corr);	    }	    if (m_corr_matrix.getElement(i, j) > temp_best - m_c_Threshold) {	      ok = false;	      break;	    }	  }	}	// if ok then add to best_group	if (ok) {	  best_group.set(j);	}      }    }  }  /**   * Calls locallyPredictive in order to include locally predictive   * attributes (if requested).   *   * @param attributeSet the set of attributes found by the search   * @return a possibly ranked list of postprocessed attributes   * @exception Exception if postprocessing fails for some reason   */  public int[] postProcess (int[] attributeSet)    throws Exception {    int j = 0;    if (!m_locallyPredictive) {      //      m_trainInstances = new Instances(m_trainInstances,0);      return  attributeSet;    }    BitSet bestGroup = new BitSet(m_numAttribs);    for (int i = 0; i < attributeSet.length; i++) {      bestGroup.set(attributeSet[i]);    }    addLocallyPredictive(bestGroup);    // count how many are set    for (int i = 0; i < m_numAttribs; i++) {      if (bestGroup.get(i)) {	j++;      }    }    int[] newSet = new int[j];    j = 0;    for (int i = 0; i < m_numAttribs; i++) {      if (bestGroup.get(i)) {	newSet[j++] = i;      }    }    //    m_trainInstances = new Instances(m_trainInstances,0);    return  newSet;  }  protected void resetOptions () {    m_trainInstances = null;    m_missingSeperate = false;    m_locallyPredictive = false;    m_c_Threshold = 0.0;  }  /**   * Main method for testing this class.   *   * @param args the options   */  public static void main (String[] args) {    try {      System.out.println(AttributeSelection.			 SelectAttributes(new CfsSubsetEval(), args));    }    catch (Exception e) {      e.printStackTrace();      System.out.println(e.getMessage());    }  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -