📄 relieffattributeeval.java
字号:
* @return the number of nearest neighbours */ public int getNumNeighbours () { return m_Knn; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String seedTipText() { return "Random seed for sampling instances."; } /** * Set the random number seed for randomly sampling instances. * * @param s the random number seed. */ public void setSeed (int s) { m_seed = s; } /** * Get the seed used for randomly sampling instances. * * @return the random number seed. */ public int getSeed () { return m_seed; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String sampleSizeTipText() { return "Number of instances to sample. Default (-1) indicates that all " +"instances will be used for attribute estimation."; } /** * Set the number of instances to sample for attribute estimation * * @param s the number of instances to sample. */ public void setSampleSize (int s) { m_sampleM = s; } /** * Get the number of instances used for estimating attributes * * @return the number of instances. */ public int getSampleSize () { return m_sampleM; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String weightByDistanceTipText() { return "Weight nearest neighbours by their distance."; } /** * Set the nearest neighbour weighting method * * @param b true nearest neighbours are to be weighted by distance. */ public void setWeightByDistance (boolean b) { m_weightByDistance = b; } /** * Get whether nearest neighbours are being weighted by distance * * @return m_weightByDiffernce */ public boolean getWeightByDistance () { return m_weightByDistance; } /** * Gets the current settings of ReliefFAttributeEval. * * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions () { String[] options = new String[9]; int current = 0; if (getWeightByDistance()) { options[current++] = "-W"; } options[current++] = "-M"; options[current++] = "" + getSampleSize(); options[current++] = "-D"; options[current++] = "" + getSeed(); options[current++] = "-K"; options[current++] = "" + getNumNeighbours(); if (getWeightByDistance()) { options[current++] = "-A"; options[current++] = "" + getSigma(); } while (current < options.length) { options[current++] = ""; } return options; } /** * Return a description of the ReliefF attribute evaluator. * * @return a description of the evaluator as a String. */ public String toString () { StringBuffer text = new StringBuffer(); if (m_trainInstances == null) { text.append("ReliefF feature evaluator has not been built yet\n"); } else { text.append("\tReliefF Ranking Filter"); text.append("\n\tInstances sampled: "); if (m_sampleM == -1) { text.append("all\n"); } else { text.append(m_sampleM + "\n"); } text.append("\tNumber of nearest neighbours (k): " + m_Knn + "\n"); if (m_weightByDistance) { text.append("\tExponentially decreasing (with distance) " + "influence for\n" + "\tnearest neighbours. Sigma: " + m_sigma + "\n"); } else { text.append("\tEqual influence nearest neighbours\n"); } } return text.toString(); } /** * Returns the capabilities of this evaluator. * * @return the capabilities of this evaluator * @see Capabilities */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.NUMERIC_CLASS); result.enable(Capability.DATE_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * Initializes a ReliefF attribute evaluator. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully */ public void buildEvaluator (Instances data) throws Exception { int z, totalInstances; Random r = new Random(m_seed); // can evaluator handle data? getCapabilities().testWithFail(data); m_trainInstances = data; m_classIndex = m_trainInstances.classIndex(); m_numAttribs = m_trainInstances.numAttributes(); m_numInstances = m_trainInstances.numInstances(); if (m_trainInstances.attribute(m_classIndex).isNumeric()) { m_numericClass = true; } else { m_numericClass = false; } if (!m_numericClass) { m_numClasses = m_trainInstances.attribute(m_classIndex).numValues(); } else { m_ndc = 0; m_numClasses = 1; m_nda = new double[m_numAttribs]; m_ndcda = new double[m_numAttribs]; } if (m_weightByDistance) // set up the rank based weights { m_weightsByRank = new double[m_Knn]; for (int i = 0; i < m_Knn; i++) { m_weightsByRank[i] = Math.exp(-((i/(double)m_sigma)*(i/(double)m_sigma))); } } // the final attribute weights m_weights = new double[m_numAttribs]; // num classes (1 for numeric class) knn neighbours, // and 0 = distance, 1 = instance index m_karray = new double[m_numClasses][m_Knn][2]; if (!m_numericClass) { m_classProbs = new double[m_numClasses]; for (int i = 0; i < m_numInstances; i++) { m_classProbs[(int)m_trainInstances.instance(i).value(m_classIndex)]++; } for (int i = 0; i < m_numClasses; i++) { m_classProbs[i] /= m_numInstances; } } m_worst = new double[m_numClasses]; m_index = new int[m_numClasses]; m_stored = new int[m_numClasses]; m_minArray = new double[m_numAttribs]; m_maxArray = new double[m_numAttribs]; for (int i = 0; i < m_numAttribs; i++) { m_minArray[i] = m_maxArray[i] = Double.NaN; } for (int i = 0; i < m_numInstances; i++) { updateMinMax(m_trainInstances.instance(i)); } if ((m_sampleM > m_numInstances) || (m_sampleM < 0)) { totalInstances = m_numInstances; } else { totalInstances = m_sampleM; } // process each instance, updating attribute weights for (int i = 0; i < totalInstances; i++) { if (totalInstances == m_numInstances) { z = i; } else { z = r.nextInt()%m_numInstances; } if (z < 0) { z *= -1; } if (!(m_trainInstances.instance(z).isMissing(m_classIndex))) { // first clear the knn and worst index stuff for the classes for (int j = 0; j < m_numClasses; j++) { m_index[j] = m_stored[j] = 0; for (int k = 0; k < m_Knn; k++) { m_karray[j][k][0] = m_karray[j][k][1] = 0; } } findKHitMiss(z); if (m_numericClass) { updateWeightsNumericClass(z); } else { updateWeightsDiscreteClass(z); } } } // now scale weights by 1/m_numInstances (nominal class) or // calculate weights numeric class // System.out.println("num inst:"+m_numInstances+" r_ndc:"+r_ndc); for (int i = 0; i < m_numAttribs; i++) {if (i != m_classIndex) { if (m_numericClass) { m_weights[i] = m_ndcda[i]/m_ndc - ((m_nda[i] - m_ndcda[i])/((double)totalInstances - m_ndc)); } else { m_weights[i] *= (1.0/(double)totalInstances); } // System.out.println(r_weights[i]); } } } /** * Evaluates an individual attribute using ReliefF's instance based approach. * The actual work is done by buildEvaluator which evaluates all features. * * @param attribute the index of the attribute to be evaluated * @throws Exception if the attribute could not be evaluated */ public double evaluateAttribute (int attribute) throws Exception { return m_weights[attribute]; } /** * Reset options to their default values */ protected void resetOptions () { m_trainInstances = null; m_sampleM = -1; m_Knn = 10; m_sigma = 2; m_weightByDistance = false; m_seed = 1; } /** * Normalizes a given value of a numeric attribute. * * @param x the value to be normalized * @param i the attribute's index * @return the normalized value */ private double norm (double x, int i) { if (Double.isNaN(m_minArray[i]) || Utils.eq(m_maxArray[i], m_minArray[i])) { return 0; } else { return (x - m_minArray[i])/(m_maxArray[i] - m_minArray[i]); } } /** * Updates the minimum and maximum values for all the attributes * based on a new instance. * * @param instance the new instance */ private void updateMinMax (Instance instance) { // for (int j = 0; j < m_numAttribs; j++) { try { for (int j = 0; j < instance.numValues(); j++) { if ((instance.attributeSparse(j).isNumeric()) && (!instance.isMissingSparse(j))) { if (Double.isNaN(m_minArray[instance.index(j)])) { m_minArray[instance.index(j)] = instance.valueSparse(j); m_maxArray[instance.index(j)] = instance.valueSparse(j); } else { if (instance.valueSparse(j) < m_minArray[instance.index(j)]) { m_minArray[instance.index(j)] = instance.valueSparse(j); } else { if (instance.valueSparse(j) > m_maxArray[instance.index(j)]) { m_maxArray[instance.index(j)] = instance.valueSparse(j); } } } } } } catch (Exception ex) { System.err.println(ex); ex.printStackTrace(); } } /** * Computes the difference between two given attribute * values. */ private double difference(int index, double val1, double val2) { switch (m_trainInstances.attribute(index).type()) { case Attribute.NOMINAL: // If attribute is nominal if (Instance.isMissingValue(val1) || Instance.isMissingValue(val2)) { return (1.0 - (1.0/((double)m_trainInstances. attribute(index).numValues()))); } else if ((int)val1 != (int)val2) { return 1; } else { return 0; } case Attribute.NUMERIC: // If attribute is numeric if (Instance.isMissingValue(val1) || Instance.isMissingValue(val2)) { if (Instance.isMissingValue(val1) && Instance.isMissingValue(val2)) { return 1; } else { double diff; if (Instance.isMissingValue(val2)) { diff = norm(val1, index); } else { diff = norm(val2, index); } if (diff < 0.5) { diff = 1.0 - diff; } return diff; } } else { return Math.abs(norm(val1, index) - norm(val2, index)); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -