cfssubseteval.java

来自「wekaUT是 university texas austin 开发的基于wek」· Java 代码 · 共 1,031 行 · 第 1/2 页
JAVA
1,031 行
    else {      return  (corr_measure);    }  }  private double num_num (int att1, int att2) {    int i;    Instance inst;    double r, diff1, diff2, num = 0.0, sx = 0.0, sy = 0.0;    double mx = m_trainInstances.meanOrMode(m_trainInstances.attribute(att1));    double my = m_trainInstances.meanOrMode(m_trainInstances.attribute(att2));    for (i = 0; i < m_numInstances; i++) {      inst = m_trainInstances.instance(i);      diff1 = (inst.isMissing(att1))? 0.0 : (inst.value(att1) - mx);      diff2 = (inst.isMissing(att2))? 0.0 : (inst.value(att2) - my);      num += (diff1*diff2);      sx += (diff1*diff1);      sy += (diff2*diff2);    }    if (sx != 0.0) {      if (m_std_devs[att1] == 1.0) {	m_std_devs[att1] = Math.sqrt((sx/m_numInstances));      }    }    if (sy != 0.0) {      if (m_std_devs[att2] == 1.0) {	m_std_devs[att2] = Math.sqrt((sy/m_numInstances));      }    }    if ((sx*sy) > 0.0) {      r = (num/(Math.sqrt(sx*sy)));      return  ((r < 0.0)? -r : r);    }    else {      if (att1 != m_classIndex && att2 != m_classIndex) {	return  1.0;      }      else {	return  0.0;      }    }  }  private double num_nom2 (int att1, int att2) {    int i, ii, k;    double temp;    Instance inst;    int mx = (int)m_trainInstances.      meanOrMode(m_trainInstances.attribute(att1));    double my = m_trainInstances.      meanOrMode(m_trainInstances.attribute(att2));    double stdv_num = 0.0;    double diff1, diff2;    double r = 0.0, rr, max_corr = 0.0;    int nx = (!m_missingSeperate)       ? m_trainInstances.attribute(att1).numValues()       : m_trainInstances.attribute(att1).numValues() + 1;    double[] prior_nom = new double[nx];    double[] stdvs_nom = new double[nx];    double[] covs = new double[nx];    for (i = 0; i < nx; i++) {      stdvs_nom[i] = covs[i] = prior_nom[i] = 0.0;    }    // calculate frequencies (and means) of the values of the nominal     // attribute    for (i = 0; i < m_numInstances; i++) {      inst = m_trainInstances.instance(i);      if (inst.isMissing(att1)) {	if (!m_missingSeperate) {	  ii = mx;	}	else {	  ii = nx - 1;	}      }      else {	ii = (int)inst.value(att1);      }      // increment freq for nominal      prior_nom[ii]++;    }    for (k = 0; k < m_numInstances; k++) {      inst = m_trainInstances.instance(k);      // std dev of numeric attribute      diff2 = (inst.isMissing(att2))? 0.0 : (inst.value(att2) - my);      stdv_num += (diff2*diff2);      //       for (i = 0; i < nx; i++) {	if (inst.isMissing(att1)) {	  if (!m_missingSeperate) {	    temp = (i == mx)? 1.0 : 0.0;	  }	  else {	    temp = (i == (nx - 1))? 1.0 : 0.0;	  }	}	else {	  temp = (i == inst.value(att1))? 1.0 : 0.0;	}	diff1 = (temp - (prior_nom[i]/m_numInstances));	stdvs_nom[i] += (diff1*diff1);	covs[i] += (diff1*diff2);      }    }    // calculate weighted correlation    for (i = 0, temp = 0.0; i < nx; i++) {      // calculate the weighted variance of the nominal      temp += ((prior_nom[i]/m_numInstances)*(stdvs_nom[i]/m_numInstances));      if ((stdvs_nom[i]*stdv_num) > 0.0) {	//System.out.println("Stdv :"+stdvs_nom[i]);	rr = (covs[i]/(Math.sqrt(stdvs_nom[i]*stdv_num)));	if (rr < 0.0) {	  rr = -rr;	}	r += ((prior_nom[i]/m_numInstances)*rr);      }      /* if there is zero variance for the numeric att at a specific 	 level of the catergorical att then if neither is the class then 	 make this correlation at this level maximally bad i.e. 1.0. 	 If either is the class then maximally bad correlation is 0.0 */      else {if (att1 != m_classIndex && att2 != m_classIndex) {	r += ((prior_nom[i]/m_numInstances)*1.0);      }      }    }    // set the standard deviations for these attributes if necessary    // if ((att1 != classIndex) && (att2 != classIndex)) // =============    if (temp != 0.0) {      if (m_std_devs[att1] == 1.0) {	m_std_devs[att1] = Math.sqrt(temp);      }    }    if (stdv_num != 0.0) {      if (m_std_devs[att2] == 1.0) {	m_std_devs[att2] = Math.sqrt((stdv_num/m_numInstances));      }    }    if (r == 0.0) {      if (att1 != m_classIndex && att2 != m_classIndex) {	r = 1.0;      }    }    return  r;  }  private double nom_nom (int att1, int att2) {    int i, j, ii, jj, z;    double temp1, temp2;    Instance inst;    int mx = (int)m_trainInstances.      meanOrMode(m_trainInstances.attribute(att1));    int my = (int)m_trainInstances.      meanOrMode(m_trainInstances.attribute(att2));    double diff1, diff2;    double r = 0.0, rr, max_corr = 0.0;    int nx = (!m_missingSeperate)       ? m_trainInstances.attribute(att1).numValues()       : m_trainInstances.attribute(att1).numValues() + 1;    int ny = (!m_missingSeperate)      ? m_trainInstances.attribute(att2).numValues()       : m_trainInstances.attribute(att2).numValues() + 1;    double[][] prior_nom = new double[nx][ny];    double[] sumx = new double[nx];    double[] sumy = new double[ny];    double[] stdvsx = new double[nx];    double[] stdvsy = new double[ny];    double[][] covs = new double[nx][ny];    for (i = 0; i < nx; i++) {      sumx[i] = stdvsx[i] = 0.0;    }    for (j = 0; j < ny; j++) {      sumy[j] = stdvsy[j] = 0.0;    }    for (i = 0; i < nx; i++) {      for (j = 0; j < ny; j++) {	covs[i][j] = prior_nom[i][j] = 0.0;      }    }    // calculate frequencies (and means) of the values of the nominal     // attribute    for (i = 0; i < m_numInstances; i++) {      inst = m_trainInstances.instance(i);      if (inst.isMissing(att1)) {	if (!m_missingSeperate) {	  ii = mx;	}	else {	  ii = nx - 1;	}      }      else {	ii = (int)inst.value(att1);      }      if (inst.isMissing(att2)) {	if (!m_missingSeperate) {	  jj = my;	}	else {	  jj = ny - 1;	}      }      else {	jj = (int)inst.value(att2);      }      // increment freq for nominal      prior_nom[ii][jj]++;      sumx[ii]++;      sumy[jj]++;    }    for (z = 0; z < m_numInstances; z++) {      inst = m_trainInstances.instance(z);      for (j = 0; j < ny; j++) {	if (inst.isMissing(att2)) {	  if (!m_missingSeperate) {	    temp2 = (j == my)? 1.0 : 0.0;	  }	  else {	    temp2 = (j == (ny - 1))? 1.0 : 0.0;	  }	}	else {	  temp2 = (j == inst.value(att2))? 1.0 : 0.0;	}	diff2 = (temp2 - (sumy[j]/m_numInstances));	stdvsy[j] += (diff2*diff2);      }      //       for (i = 0; i < nx; i++) {	if (inst.isMissing(att1)) {	  if (!m_missingSeperate) {	    temp1 = (i == mx)? 1.0 : 0.0;	  }	  else {	    temp1 = (i == (nx - 1))? 1.0 : 0.0;	  }	}	else {	  temp1 = (i == inst.value(att1))? 1.0 : 0.0;	}	diff1 = (temp1 - (sumx[i]/m_numInstances));	stdvsx[i] += (diff1*diff1);	for (j = 0; j < ny; j++) {	  if (inst.isMissing(att2)) {	    if (!m_missingSeperate) {	      temp2 = (j == my)? 1.0 : 0.0;	    }	    else {	      temp2 = (j == (ny - 1))? 1.0 : 0.0;	    }	  }	  else {	    temp2 = (j == inst.value(att2))? 1.0 : 0.0;	  }	  diff2 = (temp2 - (sumy[j]/m_numInstances));	  covs[i][j] += (diff1*diff2);	}      }    }    // calculate weighted correlation    for (i = 0; i < nx; i++) {      for (j = 0; j < ny; j++) {	if ((stdvsx[i]*stdvsy[j]) > 0.0) {	  //System.out.println("Stdv :"+stdvs_nom[i]);	  rr = (covs[i][j]/(Math.sqrt(stdvsx[i]*stdvsy[j])));	  if (rr < 0.0) {	    rr = -rr;	  }	  r += ((prior_nom[i][j]/m_numInstances)*rr);	}	// if there is zero variance for either of the categorical atts then if	// neither is the class then make this	// correlation at this level maximally bad i.e. 1.0. If either is 	// the class then maximally bad correlation is 0.0	else {if (att1 != m_classIndex && att2 != m_classIndex) {	  r += ((prior_nom[i][j]/m_numInstances)*1.0);	}	}      }    }    // calculate weighted standard deviations for these attributes    // (if necessary)    for (i = 0, temp1 = 0.0; i < nx; i++) {      temp1 += ((sumx[i]/m_numInstances)*(stdvsx[i]/m_numInstances));    }    if (temp1 != 0.0) {      if (m_std_devs[att1] == 1.0) {	m_std_devs[att1] = Math.sqrt(temp1);      }    }    for (j = 0, temp2 = 0.0; j < ny; j++) {      temp2 += ((sumy[j]/m_numInstances)*(stdvsy[j]/m_numInstances));    }    if (temp2 != 0.0) {      if (m_std_devs[att2] == 1.0) {	m_std_devs[att2] = Math.sqrt(temp2);      }    }    if (r == 0.0) {      if (att1 != m_classIndex && att2 != m_classIndex) {	r = 1.0;      }    }    return  r;  }  /**   * returns a string describing CFS   *   * @return the description as a string   */  public String toString () {    StringBuffer text = new StringBuffer();    if (m_trainInstances == null) {      text.append("CFS subset evaluator has not been built yet\n");    }    else {      text.append("\tCFS Subset Evaluator\n");      if (m_missingSeperate) {	text.append("\tTreating missing values as a seperate value\n");      }      if (m_locallyPredictive) {	text.append("\tIncluding locally predictive attributes\n");      }    }    return  text.toString();  }  private void addLocallyPredictive (BitSet best_group) {    int i, j;    boolean done = false;    boolean ok = true;    double temp_best = -1.0;    double corr;    j = 0;    BitSet temp_group = (BitSet)best_group.clone();    while (!done) {      temp_best = -1.0;      // find best not already in group      for (i = 0; i < m_numAttribs; i++) {	if ((!temp_group.get(i)) && (i != m_classIndex)) {	  if (m_corr_matrix.getElement(i, m_classIndex) == -999) {	    corr = correlate(i, m_classIndex);	    m_corr_matrix.setElement(i, m_classIndex, corr);	    m_corr_matrix.setElement(m_classIndex, i, corr);	  }	  if (m_corr_matrix.getElement(i, m_classIndex) > temp_best) {	    temp_best = m_corr_matrix.getElement(i, m_classIndex);	    j = i;	  }	}      }      if (temp_best == -1.0) {	done = true;      }      else {	ok = true;	temp_group.set(j);	// check the best against correlations with others already	// in group 	for (i = 0; i < m_numAttribs; i++) {	  if (best_group.get(i)) {	    if (m_corr_matrix.getElement(i, j) == -999) {	      corr = correlate(i, j);	      m_corr_matrix.setElement(i, j, corr);	      m_corr_matrix.setElement(j, i, corr);	    }	    if (m_corr_matrix.getElement(i, j) > temp_best - m_c_Threshold) {	      ok = false;	      break;	    }	  }	}	// if ok then add to best_group	if (ok) {	  best_group.set(j);	}      }    }  }  /**   * Calls locallyPredictive in order to include locally predictive   * attributes (if requested).   *   * @param attributeSet the set of attributes found by the search   * @return a possibly ranked list of postprocessed attributes   * @exception Exception if postprocessing fails for some reason   */  public int[] postProcess (int[] attributeSet)    throws Exception {    int j = 0;    if (!m_locallyPredictive) {      //      m_trainInstances = new Instances(m_trainInstances,0);      return  attributeSet;    }    BitSet bestGroup = new BitSet(m_numAttribs);    for (int i = 0; i < attributeSet.length; i++) {      bestGroup.set(attributeSet[i]);    }    addLocallyPredictive(bestGroup);    // count how many are set    for (int i = 0; i < m_numAttribs; i++) {      if (bestGroup.get(i)) {	j++;      }    }    int[] newSet = new int[j];    j = 0;    for (int i = 0; i < m_numAttribs; i++) {      if (bestGroup.get(i)) {	newSet[j++] = i;      }    }    //    m_trainInstances = new Instances(m_trainInstances,0);    return  newSet;  }  protected void resetOptions () {    m_trainInstances = null;    m_missingSeperate = false;    m_locallyPredictive = false;    m_c_Threshold = 0.0;  }  /**   * Main method for testing this class.   *   * @param args the options   */  public static void main (String[] args) {    try {      System.out.println(AttributeSelection.			 SelectAttributes(new CfsSubsetEval(), args));    }    catch (Exception e) {      e.printStackTrace();      System.out.println(e.getMessage());    }  }}
cfssubseteval.java - 源码说明

本页面展示了「wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器」中的 cfssubseteval.java 源码文件，采用 Java 编程语言编写，共 1,031 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与university相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?