📄 attributeselection.java
字号:
CvString.append("seed: "+m_seed+" ===\n\n"); } if ((m_searchMethod instanceof RankedOutputSearch) && (m_doRank == true)) { CvString.append("average merit average rank attribute\n"); // calcualte means and std devs for (int i = 0; i < m_rankResults[0].length; i++) { m_rankResults[0][i] /= m_numFolds; // mean merit double var = m_rankResults[0][i]*m_rankResults[0][i]*m_numFolds; var = (m_rankResults[2][i] - var); var /= m_numFolds; if (var <= 0.0) { var = 0.0; m_rankResults[2][i] = 0; } else { m_rankResults[2][i] = Math.sqrt(var); } m_rankResults[1][i] /= m_numFolds; // mean rank var = m_rankResults[1][i]*m_rankResults[1][i]*m_numFolds; var = (m_rankResults[3][i] - var); var /= m_numFolds; if (var <= 0.0) { var = 0.0; m_rankResults[3][i] = 0; } else { m_rankResults[3][i] = Math.sqrt(var); } } // now sort them by mean rank int[] s = Utils.sort(m_rankResults[1]); for (int i=0; i<s.length; i++) { if (m_rankResults[1][s[i]] > 0) { CvString.append(Utils.doubleToString(Math. abs(m_rankResults[0][s[i]]), 6, 3) + " +-" + Utils.doubleToString(m_rankResults[2][s[i]], 6, 3) + " " + Utils.doubleToString(m_rankResults[1][s[i]], fieldWidth+2, 1) + " +-" + Utils.doubleToString(m_rankResults[3][s[i]], 5, 2) +" " + Utils.doubleToString(((double)(s[i] + 1)), fieldWidth, 0) + " " + m_trainInstances.attribute(s[i]).name() + "\n"); } } } else { CvString.append("number of folds (%) attribute\n"); for (int i = 0; i < m_subsetResults.length; i++) { if ((m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) || (i != m_trainInstances.classIndex())) { CvString.append(Utils.doubleToString(m_subsetResults[i], 12, 0) + "(" + Utils.doubleToString((m_subsetResults[i] / m_numFolds * 100.0) , 3, 0) + " %) " + Utils.doubleToString(((double)(i + 1)), fieldWidth, 0) + " " + m_trainInstances.attribute(i).name() + "\n"); } } } return CvString.toString(); } /** * Select attributes for a split of the data. Calling this function * updates the statistics on attribute selection. CVResultsString() * returns a string summarizing the results of repeated calls to * this function. Assumes that splits are from the same dataset--- * ie. have the same number and types of attributes as previous * splits. * * @param split the instances to select attributes from * @exception Exception if an error occurs */ public void selectAttributesCVSplit(Instances split) throws Exception { double[][] attributeRanking = null; // if the train instances are null then set equal to this split. // If this is the case then this function is more than likely being // called from outside this class in order to obtain CV statistics // and all we need m_trainIstances for is to get at attribute names // and types etc. if (m_trainInstances == null) { m_trainInstances = split; } // create space to hold statistics if (m_rankResults == null && m_subsetResults == null) { m_subsetResults = new double[split.numAttributes()]; m_rankResults = new double[4][split.numAttributes()]; } m_ASEvaluator.buildEvaluator(split); // Do the search int[] attributeSet = m_searchMethod.search(m_ASEvaluator, split); // Do any postprocessing that a attribute selection method might // require attributeSet = m_ASEvaluator.postProcess(attributeSet); if ((m_searchMethod instanceof RankedOutputSearch) && (m_doRank == true)) { attributeRanking = ((RankedOutputSearch)m_searchMethod). rankedAttributes(); // System.out.println(attributeRanking[0][1]); for (int j = 0; j < attributeRanking.length; j++) { // merit m_rankResults[0][(int)attributeRanking[j][0]] += attributeRanking[j][1]; // squared merit m_rankResults[2][(int)attributeRanking[j][0]] += (attributeRanking[j][1]*attributeRanking[j][1]); // rank m_rankResults[1][(int)attributeRanking[j][0]] += (j + 1); // squared rank m_rankResults[3][(int)attributeRanking[j][0]] += (j + 1)*(j + 1); // += (attributeRanking[j][0] * attributeRanking[j][0]); } } else { for (int j = 0; j < attributeSet.length; j++) { m_subsetResults[attributeSet[j]]++; } } m_trials++; } /** * Perform a cross validation for attribute selection. With subset * evaluators the number of times each attribute is selected over * the cross validation is reported. For attribute evaluators, the * average merit and average ranking + std deviation is reported for * each attribute. * * @return the results of cross validation as a String * @exception Exception if an error occurs during cross validation */ public String CrossValidateAttributes () throws Exception { Instances cvData = new Instances(m_trainInstances); Instances train; Random random = new Random(m_seed); cvData.randomize(random); if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) { if (cvData.classAttribute().isNominal()) { cvData.stratify(m_numFolds); } } for (int i = 0; i < m_numFolds; i++) { // Perform attribute selection train = cvData.trainCV(m_numFolds, i, random); selectAttributesCVSplit(train); } return CVResultsString(); } /** * Perform attribute selection on the supplied training instances. * * @param data the instances to select attributes from * @exception Exception if there is a problem during selection */ public void SelectAttributes (Instances data) throws Exception { int [] attributeSet; m_transformer = null; m_attributeFilter = null; m_trainInstances = data; if (m_doXval == true && (m_ASEvaluator instanceof AttributeTransformer)) { throw new Exception("Can't cross validate an attribute transformer."); } if (m_ASEvaluator instanceof SubsetEvaluator && m_searchMethod instanceof Ranker) { throw new Exception(m_ASEvaluator.getClass().getName() +" must use a search method other than Ranker"); } if (m_ASEvaluator instanceof AttributeEvaluator && !(m_searchMethod instanceof Ranker)) { // System.err.println("AttributeEvaluators must use a Ranker search " // +"method. Switching to Ranker..."); // m_searchMethod = new Ranker(); throw new Exception("AttributeEvaluators must use the Ranker search " + "method"); } if (m_searchMethod instanceof RankedOutputSearch) { m_doRank = ((RankedOutputSearch)m_searchMethod).getGenerateRanking(); } if (m_ASEvaluator instanceof UnsupervisedAttributeEvaluator || m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) { // unset the class index // m_trainInstances.setClassIndex(-1); } else { // check that a class index has been set if (m_trainInstances.classIndex() < 0) { m_trainInstances.setClassIndex(m_trainInstances.numAttributes()-1); } } // Initialize the attribute evaluator m_ASEvaluator.buildEvaluator(m_trainInstances); if (m_ASEvaluator instanceof AttributeTransformer) { m_trainInstances = ((AttributeTransformer)m_ASEvaluator).transformedHeader(); m_transformer = (AttributeTransformer)m_ASEvaluator; } int fieldWidth = (int)(Math.log(m_trainInstances.numAttributes()) +1.0); // Do the search attributeSet = m_searchMethod.search(m_ASEvaluator, m_trainInstances); // try and determine if the search method uses an attribute transformer--- // this is a bit of a hack to make things work properly with RankSearch // using PrincipalComponents as its attribute ranker try { BeanInfo bi = Introspector.getBeanInfo(m_searchMethod.getClass()); PropertyDescriptor properties[]; MethodDescriptor methods[]; // methods = bi.getMethodDescriptors(); properties = bi.getPropertyDescriptors(); for (int i=0;i<properties.length;i++) { String name = properties[i].getDisplayName(); Method meth = properties[i].getReadMethod(); Object retType = meth.getReturnType(); if (retType.equals(ASEvaluation.class)) { Class args [] = { }; ASEvaluation tempEval = (ASEvaluation)(meth.invoke(m_searchMethod, (Object[])args)); if (tempEval instanceof AttributeTransformer) { // grab the transformed data header m_trainInstances = ((AttributeTransformer)tempEval).transformedHeader(); m_transformer = (AttributeTransformer)tempEval; } } } } catch (IntrospectionException ex) { System.err.println("AttributeSelection: Couldn't " +"introspect"); } // Do any postprocessing that a attribute selection method might require attributeSet = m_ASEvaluator.postProcess(attributeSet); if (!m_doRank) { m_selectionResults.append(printSelectionResults()); } if ((m_searchMethod instanceof RankedOutputSearch) && m_doRank == true) { m_attributeRanking = ((RankedOutputSearch)m_searchMethod).rankedAttributes(); m_selectionResults.append(printSelectionResults()); m_selectionResults.append("Ranked attributes:\n"); // retrieve the number of attributes to retain m_numToSelect = ((RankedOutputSearch)m_searchMethod).getCalculatedNumToSelect(); // determine fieldwidth for merit int f_p=0; int w_p=0; for (int i = 0; i < m_numToSelect; i++) { double precision = (Math.abs(m_attributeRanking[i][1]) - (int)(Math.abs(m_attributeRanking[i][1]))); double intPart = (int)(Math.abs(m_attributeRanking[i][1])); if (precision > 0) { precision = Math.abs((Math.log(Math.abs(precision)) / Math.log(10)))+3; } if (precision > f_p) { f_p = (int)precision; } if (intPart == 0) { if (w_p < 2) { w_p = 2; } } else if ((Math.abs((Math.log(Math.abs(m_attributeRanking[i][1])) / Math.log(10)))+1) > w_p) { if (m_attributeRanking[i][1] > 0) { w_p = (int)Math.abs((Math.log(Math.abs(m_attributeRanking[i][1])) / Math.log(10)))+1; } } } for (int i = 0; i < m_numToSelect; i++) { m_selectionResults. append(Utils.doubleToString(m_attributeRanking[i][1], f_p+w_p+1,f_p) + Utils.doubleToString((m_attributeRanking[i][0] + 1), fieldWidth+1,0) + " " + m_trainInstances. attribute((int)m_attributeRanking[i][0]).name() + "\n"); } // set up the selected attributes array - usable by a filter or // whatever if (m_trainInstances.classIndex() >= 0) { if ((!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) || m_ASEvaluator instanceof AttributeTransformer) { // one more for the class m_selectedAttributeSet = new int[m_numToSelect + 1]; m_selectedAttributeSet[m_numToSelect] = m_trainInstances.classIndex(); } else { m_selectedAttributeSet = new int[m_numToSelect]; } } else { m_selectedAttributeSet = new int[m_numToSelect]; } m_selectionResults.append("\nSelected attributes: "); for (int i = 0; i < m_numToSelect; i++) { m_selectedAttributeSet[i] = (int)m_attributeRanking[i][0]; if (i == m_numToSelect - 1) { m_selectionResults.append(((int)m_attributeRanking[i][0] + 1) + " : " + (i + 1) + "\n"); } else { m_selectionResults.append(((int)m_attributeRanking[i][0] + 1)); m_selectionResults.append(","); } } } else { // set up the selected attributes array - usable by a filter or
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -