📄 attributeselection.java

📁 一个数据挖掘系统的源码
💻 JAVA
📖 第 1 页 / 共 3 页
字号:

    CvString.append("\n\n=== Attribute selection " + m_numFolds
		    + " fold cross-validation ");

    if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) &&
	!(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator) &&
	(m_trainInstances.classAttribute().isNominal())) {
	CvString.append("(stratified), seed: ");
	CvString.append(m_seed+" ===\n\n");
    }
    else {
      CvString.append("seed: "+m_seed+" ===\n\n");
    }

    if ((m_searchMethod instanceof RankedOutputSearch) && (m_doRank == true)) {
      CvString.append("average merit      average rank  attribute\n");

      // calcualte means and std devs
      for (int i = 0; i < m_rankResults[0].length; i++) {
	m_rankResults[0][i] /= m_numFolds; // mean merit
	double var = m_rankResults[0][i]*m_rankResults[0][i]*m_numFolds;
	var = (m_rankResults[2][i] - var);
	var /= m_numFolds;

	if (var <= 0.0) {
	  var = 0.0;
	  m_rankResults[2][i] = 0;
	}
	else {
	  m_rankResults[2][i] = Math.sqrt(var);
	}

	m_rankResults[1][i] /= m_numFolds; // mean rank
	var = m_rankResults[1][i]*m_rankResults[1][i]*m_numFolds;
	var = (m_rankResults[3][i] - var);
	var /= m_numFolds;

	if (var <= 0.0) {
	  var = 0.0;
	  m_rankResults[3][i] = 0;
	}
	else {
	  m_rankResults[3][i] = Math.sqrt(var);
	}
      }

      // now sort them by mean rank
      int[] s = Utils.sort(m_rankResults[1]);
      for (int i=0; i<s.length; i++) {
	if (m_rankResults[1][s[i]] > 0) {
	  CvString.append(Utils.doubleToString(Math.
					       abs(m_rankResults[0][s[i]]),
					       6, 3)
			  + " +-"
			  + Utils.doubleToString(m_rankResults[2][s[i]], 6, 3)
			  + "   "
			  + Utils.doubleToString(m_rankResults[1][s[i]],
						 fieldWidth+2, 1)
			  + " +-"
			  + Utils.doubleToString(m_rankResults[3][s[i]], 5, 2)
			+"  "
			  + Utils.doubleToString(((double)(s[i] + 1)),
						 fieldWidth, 0)
			  + " "
			  + m_trainInstances.attribute(s[i]).name()
			  + "\n");
	}
      }
    }
    else {
      CvString.append("number of folds (%)  attribute\n");

      for (int i = 0; i < m_subsetResults.length; i++) {
	if ((m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) ||
	    (i != m_trainInstances.classIndex())) {
	  CvString.append(Utils.doubleToString(m_subsetResults[i], 12, 0)
			  + "("
			  + Utils.doubleToString((m_subsetResults[i] /
						  m_numFolds * 100.0)
						 , 3, 0)
			  + " %)  "
			  + Utils.doubleToString(((double)(i + 1)),
						 fieldWidth, 0)
			  + " "
			  + m_trainInstances.attribute(i).name()
			  + "\n");
	}
      }
    }

    return CvString.toString();
  }

  /**
   * Select attributes for a split of the data. Calling this function
   * updates the statistics on attribute selection. CVResultsString()
   * returns a string summarizing the results of repeated calls to
   * this function. Assumes that splits are from the same dataset---
   * ie. have the same number and types of attributes as previous
   * splits.
   *
   * @param split the instances to select attributes from
   * @exception Exception if an error occurs
   */
  public void selectAttributesCVSplit(Instances split) throws Exception {
    double[][] attributeRanking = null;

    // if the train instances are null then set equal to this split.
    // If this is the case then this function is more than likely being
    // called from outside this class in order to obtain CV statistics
    // and all we need m_trainIstances for is to get at attribute names
    // and types etc.
    if (m_trainInstances == null) {
      m_trainInstances = split;
    }

    // create space to hold statistics
    if (m_rankResults == null && m_subsetResults == null) {
      m_subsetResults = new double[split.numAttributes()];
      m_rankResults = new double[4][split.numAttributes()];
    }

    m_ASEvaluator.buildEvaluator(split);
    // Do the search
    int[] attributeSet = m_searchMethod.search(m_ASEvaluator,
					       split);
    // Do any postprocessing that a attribute selection method might
    // require
    attributeSet = m_ASEvaluator.postProcess(attributeSet);

    if ((m_searchMethod instanceof RankedOutputSearch) &&
	(m_doRank == true)) {
      attributeRanking = ((RankedOutputSearch)m_searchMethod).
	rankedAttributes();

      // System.out.println(attributeRanking[0][1]);
      for (int j = 0; j < attributeRanking.length; j++) {
	// merit
	m_rankResults[0][(int)attributeRanking[j][0]] +=
	  attributeRanking[j][1];
	// squared merit
	m_rankResults[2][(int)attributeRanking[j][0]] +=
	  (attributeRanking[j][1]*attributeRanking[j][1]);
	// rank
	m_rankResults[1][(int)attributeRanking[j][0]] += (j + 1);
	// squared rank
	m_rankResults[3][(int)attributeRanking[j][0]] += (j + 1)*(j + 1);
	// += (attributeRanking[j][0] * attributeRanking[j][0]);
      }
    } else {
      for (int j = 0; j < attributeSet.length; j++) {
	m_subsetResults[attributeSet[j]]++;
      }
    }

    m_trials++;
  }

  /**
   * Perform a cross validation for attribute selection. With subset
   * evaluators the number of times each attribute is selected over
   * the cross validation is reported. For attribute evaluators, the
   * average merit and average ranking + std deviation is reported for
   * each attribute.
   *
   * @return the results of cross validation as a String
   * @exception Exception if an error occurs during cross validation
   */
  public String CrossValidateAttributes () throws Exception {
    Instances cvData = new Instances(m_trainInstances);
    Instances train;
    double[][] rankResults;
    double[] subsetResults;
    double[][] attributeRanking = null;

    cvData.randomize(new Random(m_seed));

    if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) &&
	!(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
      if (cvData.classAttribute().isNominal()) {
	cvData.stratify(m_numFolds);
      }

    }

    for (int i = 0; i < m_numFolds; i++) {
      // Perform attribute selection
      train = cvData.trainCV(m_numFolds, i);
      selectAttributesCVSplit(train);
    }

    return  CVResultsString();
  }

  /**
   * Perform attribute selection on the supplied training instances.
   *
   * @param data the instances to select attributes from
   * @exception Exception if there is a problem during selection
   */
  public void SelectAttributes (Instances data) throws Exception {
    int [] attributeSet;

    m_transformer = null;
    m_attributeFilter = null;
    m_trainInstances = data;

    if (m_doXval == true && (m_ASEvaluator instanceof AttributeTransformer)) {
      throw new Exception("Can't cross validate an attribute transformer.");
    }

    if (m_ASEvaluator instanceof SubsetEvaluator &&
	m_searchMethod instanceof Ranker) {
      throw new Exception(m_ASEvaluator.getClass().getName()
			  +" must use a search method other than Ranker");
    }

    if (m_ASEvaluator instanceof AttributeEvaluator &&
	!(m_searchMethod instanceof Ranker)) {
      //      System.err.println("AttributeEvaluators must use a Ranker search "
      //			 +"method. Switching to Ranker...");
      //      m_searchMethod = new Ranker();
      throw new Exception("AttributeEvaluators must use the Ranker search "
			  + "method");
    }

    if (m_searchMethod instanceof RankedOutputSearch) {
      m_doRank = ((RankedOutputSearch)m_searchMethod).getGenerateRanking();
    }

    if (m_ASEvaluator instanceof UnsupervisedAttributeEvaluator ||
	m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) {
      // unset the class index
      m_trainInstances.setClassIndex(-1);
    } else {
      // check that a class index has been set
      if (m_trainInstances.classIndex() < 0) {
	m_trainInstances.setClassIndex(m_trainInstances.numAttributes()-1);
      }
    }

    // Initialize the attribute evaluator
    m_ASEvaluator.buildEvaluator(m_trainInstances);
    if (m_ASEvaluator instanceof AttributeTransformer) {
      m_trainInstances =
	((AttributeTransformer)m_ASEvaluator).transformedHeader();
      m_transformer = (AttributeTransformer)m_ASEvaluator;
    }
    int fieldWidth = (int)(Math.log(m_trainInstances.numAttributes()) +1.0);

    // Do the search
    attributeSet = m_searchMethod.search(m_ASEvaluator,
					 m_trainInstances);
    // try and determine if the search method uses an attribute transformer---
    // this is a bit of a hack to make things work properly with RankSearch
    // using PrincipalComponents as its attribute ranker
     try {
       BeanInfo bi = Introspector.getBeanInfo(m_searchMethod.getClass());
       PropertyDescriptor properties[];
       MethodDescriptor methods[];
       //       methods = bi.getMethodDescriptors();
       properties = bi.getPropertyDescriptors();
       for (int i=0;i<properties.length;i++) {
	 String name = properties[i].getDisplayName();
	 Method meth = properties[i].getReadMethod();
	 Object retType = meth.getReturnType();
	 if (retType.equals(ASEvaluation.class)) {
	   Class args [] = { };
	   ASEvaluation tempEval = (ASEvaluation)(meth.invoke(m_searchMethod,
							     args));
	   if (tempEval instanceof AttributeTransformer) {
	     // grab the transformed data header
	     m_trainInstances =
	       ((AttributeTransformer)tempEval).transformedHeader();
	     m_transformer = (AttributeTransformer)tempEval;
	   }
	 }
       }
     } catch (IntrospectionException ex) {
       System.err.println("AttributeSelection: Couldn't "
			  +"introspect");
     }


     // Do any postprocessing that a attribute selection method might require
     attributeSet = m_ASEvaluator.postProcess(attributeSet);
     if (!m_doRank) {
       m_selectionResults.append(printSelectionResults());
     }

    if ((m_searchMethod instanceof RankedOutputSearch) && m_doRank == true) {
      m_attributeRanking =
	((RankedOutputSearch)m_searchMethod).rankedAttributes();
      m_selectionResults.append(printSelectionResults());
      m_selectionResults.append("Ranked attributes:\n");

      // retrieve the number of attributes to retain
      m_numToSelect =
	((RankedOutputSearch)m_searchMethod).getCalculatedNumToSelect();

      // determine fieldwidth for merit
      int f_p=0;
      int w_p=0;

      for (int i = 0; i < m_numToSelect; i++) {
	double precision = (Math.abs(m_attributeRanking[i][1]) -
			    (int)(Math.abs(m_attributeRanking[i][1])));

	if (precision > 0) {
	  precision = Math.abs((Math.log(Math.abs(precision)) /
				Math.log(10)))+3;
	}
	if (precision > f_p) {
	  f_p = (int)precision;
	}
	if ((Math.abs((Math.log(Math.abs(m_attributeRanking[i][1]))
		       / Math.log(10)))+1) > w_p) {
	  if (m_attributeRanking[i][1] > 0) {
	    w_p = (int)Math.abs((Math.log(Math.abs(m_attributeRanking[i][1]))
				 / Math.log(10)))+1;
	  }
	}
      }

      for (int i = 0; i < m_numToSelect; i++) {
	m_selectionResults.
	  append(Utils.doubleToString(m_attributeRanking[i][1],
				      f_p+w_p+1,f_p)
		 + Utils.doubleToString((m_attributeRanking[i][0] + 1),
					fieldWidth+1,0)
		 + " "
		 + m_trainInstances.
		 attribute((int)m_attributeRanking[i][0]).name()
		 + "\n");
      }

      // set up the selected attributes array - usable by a filter or
      // whatever
      if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
	  && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator))
	{
	  // one more for the class
	  m_selectedAttributeSet = new int[m_numToSelect + 1];
	  m_selectedAttributeSet[m_numToSelect] =
	    m_trainInstances.classIndex();
	}
      else {
	m_selectedAttributeSet = new int[m_numToSelect];
      }

      m_selectionResults.append("\nSelected attributes: ");

      for (int i = 0; i < m_numToSelect; i++) {
	m_selectedAttributeSet[i] = (int)m_attributeRanking[i][0];

	if (i == m_numToSelect - 1) {
	  m_selectionResults.append(((int)m_attributeRanking[i][0] + 1)
				    + " : "
				    + (i + 1)
				    + "\n");
	}
	else {
	  m_selectionResults.append(((int)m_attributeRanking[i][0] + 1));
	  m_selectionResults.append(",");
	}
      }
    } else {
      // set up the selected attributes array - usable by a filter or
💿 文件大小 572 K
👤 上传用户 yl810406
📂 所属分类文章/文档
🏷️ 相关标签

#数据挖掘 #源码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -