⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 relieffattributeeval.java

📁 一个数据挖掘系统的源码
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
  public int getSampleSize () {
    return  m_sampleM;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String weightByDistanceTipText() {
    return "Weight nearest neighbours by their distance.";
  }

  /**
   * Set the nearest neighbour weighting method
   *
   * @param b true nearest neighbours are to be weighted by distance.
   */
  public void setWeightByDistance (boolean b) {
    m_weightByDistance = b;
  }


  /**
   * Get whether nearest neighbours are being weighted by distance
   *
   * @return m_weightByDiffernce
   */
  public boolean getWeightByDistance () {
    return  m_weightByDistance;
  }


  /**
   * Gets the current settings of ReliefFAttributeEval.
   *
   * @return an array of strings suitable for passing to setOptions()
   */
  public String[] getOptions () {
    String[] options = new String[9];
    int current = 0;

    if (getWeightByDistance()) {
      options[current++] = "-W";
    }

    options[current++] = "-M";
    options[current++] = "" + getSampleSize();
    options[current++] = "-D";
    options[current++] = "" + getSeed();
    options[current++] = "-K";
    options[current++] = "" + getNumNeighbours();
    options[current++] = "-A";
    options[current++] = "" + getSigma();

    while (current < options.length) {
      options[current++] = "";
    }

    return  options;
  }


  /**
   * Return a description of the ReliefF attribute evaluator.
   *
   * @return a description of the evaluator as a String.
   */
  public String toString () {
    StringBuffer text = new StringBuffer();

    if (m_trainInstances == null) {
      text.append("ReliefF feature evaluator has not been built yet\n");
    }
    else {
      text.append("\tReliefF Ranking Filter");
      text.append("\n\tInstances sampled: ");

      if (m_sampleM == -1) {
        text.append("all\n");
      }
      else {
        text.append(m_sampleM + "\n");
      }

      text.append("\tNumber of nearest neighbours (k): " + m_Knn + "\n");

      if (m_weightByDistance) {
        text.append("\tExponentially decreasing (with distance) "
		    + "influence for\n"
		    + "\tnearest neighbours. Sigma: "
		    + m_sigma + "\n");
      }
      else {
        text.append("\tEqual influence nearest neighbours\n");
      }
    }

    return  text.toString();
  }


  /**
   * Initializes a ReliefF attribute evaluator.
   *
   * @param data set of instances serving as training data
   * @exception Exception if the evaluator has not been
   * generated successfully
   */
  public void buildEvaluator (Instances data)
    throws Exception
  {
    int z, totalInstances;
    Random r = new Random(m_seed);

    if (data.checkForStringAttributes()) {
      throw  new Exception("Can't handle string attributes!");
    }

    m_trainInstances = data;
    m_classIndex = m_trainInstances.classIndex();
    m_numAttribs = m_trainInstances.numAttributes();
    m_numInstances = m_trainInstances.numInstances();

    if (m_trainInstances.attribute(m_classIndex).isNumeric()) {
      m_numericClass = true;
    }
    else {
      m_numericClass = false;
    }

    if (!m_numericClass) {
      m_numClasses = m_trainInstances.attribute(m_classIndex).numValues();
    }
    else {
      m_ndc = 0;
      m_numClasses = 1;
      m_nda = new double[m_numAttribs];
      m_ndcda = new double[m_numAttribs];
    }

    if (m_weightByDistance) // set up the rank based weights
      {
	m_weightsByRank = new double[m_Knn];

	for (int i = 0; i < m_Knn; i++) {
	  m_weightsByRank[i] =
	    Math.exp(-((i/(double)m_sigma)*(i/(double)m_sigma)));
	}
      }

    // the final attribute weights
    m_weights = new double[m_numAttribs];
    // num classes (1 for numeric class) knn neighbours,
    // and 0 = distance, 1 = instance index
    m_karray = new double[m_numClasses][m_Knn][2];

    if (!m_numericClass) {
      m_classProbs = new double[m_numClasses];

      for (int i = 0; i < m_numInstances; i++) {
        m_classProbs[(int)m_trainInstances.instance(i).value(m_classIndex)]++;
      }

      for (int i = 0; i < m_numClasses; i++) {
        m_classProbs[i] /= m_numInstances;
      }
    }

    m_worst = new double[m_numClasses];
    m_index = new int[m_numClasses];
    m_stored = new int[m_numClasses];
    m_minArray = new double[m_numAttribs];
    m_maxArray = new double[m_numAttribs];

    for (int i = 0; i < m_numAttribs; i++) {
      m_minArray[i] = m_maxArray[i] = Double.NaN;
    }

    for (int i = 0; i < m_numInstances; i++) {
      updateMinMax(m_trainInstances.instance(i));
    }

    if ((m_sampleM > m_numInstances) || (m_sampleM < 0)) {
      totalInstances = m_numInstances;
    }
    else {
      totalInstances = m_sampleM;
    }

    // process each instance, updating attribute weights
    for (int i = 0; i < totalInstances; i++) {
      if (totalInstances == m_numInstances) {
        z = i;
      }
      else {
        z = r.nextInt()%m_numInstances;
      }

      if (z < 0) {
        z *= -1;
      }

      if (!(m_trainInstances.instance(z).isMissing(m_classIndex))) {
        // first clear the knn and worst index stuff for the classes
        for (int j = 0; j < m_numClasses; j++) {
          m_index[j] = m_stored[j] = 0;

          for (int k = 0; k < m_Knn; k++) {
            m_karray[j][k][0] = m_karray[j][k][1] = 0;
          }
        }

        findKHitMiss(z);

        if (m_numericClass) {
          updateWeightsNumericClass(z);
        }
        else {
          updateWeightsDiscreteClass(z);
        }
      }
    }

    // now scale weights by 1/m_numInstances (nominal class) or
    // calculate weights numeric class
    // System.out.println("num inst:"+m_numInstances+" r_ndc:"+r_ndc);
    for (int i = 0; i < m_numAttribs; i++) {if (i != m_classIndex) {
      if (m_numericClass) {
        m_weights[i] = m_ndcda[i]/m_ndc -
	  ((m_nda[i] - m_ndcda[i])/((double)totalInstances - m_ndc));
      }
      else {
        m_weights[i] *= (1.0/(double)totalInstances);
      }

      //	  System.out.println(r_weights[i]);
    }
    }
  }


  /**
   * Evaluates an individual attribute using ReliefF's instance based approach.
   * The actual work is done by buildEvaluator which evaluates all features.
   *
   * @param attribute the index of the attribute to be evaluated
   * @exception Exception if the attribute could not be evaluated
   */
  public double evaluateAttribute (int attribute)
    throws Exception
  {
    return  m_weights[attribute];
  }


  /**
   * Reset options to their default values
   */
  protected void resetOptions () {
    m_trainInstances = null;
    m_sampleM = -1;
    m_Knn = 10;
    m_sigma = 2;
    m_weightByDistance = false;
    m_seed = 1;
  }


  /**
   * Normalizes a given value of a numeric attribute.
   *
   * @param x the value to be normalized
   * @param i the attribute's index
   */
  private double norm (double x, int i) {
    if (Double.isNaN(m_minArray[i]) ||
	Utils.eq(m_maxArray[i], m_minArray[i])) {
      return  0;
    }
    else {
      return  (x - m_minArray[i])/(m_maxArray[i] - m_minArray[i]);
    }
  }


  /**
   * Updates the minimum and maximum values for all the attributes
   * based on a new instance.
   *
   * @param instance the new instance
   */
  private void updateMinMax (Instance instance) {
    //    for (int j = 0; j < m_numAttribs; j++) {
    try {
      for (int j = 0; j < instance.numValues(); j++) {
	if ((instance.attributeSparse(j).isNumeric()) &&
	    (!instance.isMissingSparse(j))) {
	  if (Double.isNaN(m_minArray[instance.index(j)])) {
	    m_minArray[instance.index(j)] = instance.valueSparse(j);
	    m_maxArray[instance.index(j)] = instance.valueSparse(j);
	  }
	else {
	  if (instance.valueSparse(j) < m_minArray[instance.index(j)]) {
	    m_minArray[instance.index(j)] = instance.valueSparse(j);
	  }
	  else {
	    if (instance.valueSparse(j) > m_maxArray[instance.index(j)]) {
	      m_maxArray[instance.index(j)] = instance.valueSparse(j);
	    }
	  }
	}
	}
      }
    } catch (Exception ex) {
      System.err.println(ex);
      ex.printStackTrace();
    }
  }

  /**
   * Computes the difference between two given attribute
   * values.
   */
  private double difference(int index, double val1, double val2) {

    switch (m_trainInstances.attribute(index).type()) {
    case Attribute.NOMINAL:

      // If attribute is nominal
      if (Instance.isMissingValue(val1) ||
	  Instance.isMissingValue(val2)) {
	return (1.0 - (1.0/((double)m_trainInstances.
			    attribute(index).numValues())));
      } else if ((int)val1 != (int)val2) {
	return 1;
      } else {
	return 0;
      }
    case Attribute.NUMERIC:

      // If attribute is numeric
      if (Instance.isMissingValue(val1) ||
	  Instance.isMissingValue(val2)) {
	if (Instance.isMissingValue(val1) &&
	    Instance.isMissingValue(val2)) {
	  return 1;
	} else {
	  double diff;
	  if (Instance.isMissingValue(val2)) {
	    diff = norm(val1, index);
	  } else {
	    diff = norm(val2, index);
	  }
	  if (diff < 0.5) {
	    diff = 1.0 - diff;
	  }
	  return diff;
	}
      } else {
	return Math.abs(norm(val1, index) - norm(val2, index));
      }
    default:
      return 0;
    }
  }

  /**
   * Calculates the distance between two instances
   *
   * @param test the first instance
   * @param train the second instance
   * @return the distance between the two given instances, between 0 and 1
   */
  private double distance(Instance first, Instance second) {

    double distance = 0;
    int firstI, secondI;

    for (int p1 = 0, p2 = 0;
	 p1 < first.numValues() || p2 < second.numValues();) {
      if (p1 >= first.numValues()) {
	firstI = m_trainInstances.numAttributes();
      } else {
	firstI = first.index(p1);
      }
      if (p2 >= second.numValues()) {
	secondI = m_trainInstances.numAttributes();
      } else {
	secondI = second.index(p2);
      }
      if (firstI == m_trainInstances.classIndex()) {
	p1++; continue;
      }
      if (secondI == m_trainInstances.classIndex()) {
	p2++; continue;
      }
      double diff;
      if (firstI == secondI) {
	diff = difference(firstI,
			  first.valueSparse(p1),
			  second.valueSparse(p2));
	p1++; p2++;
      } else if (firstI > secondI) {
	diff = difference(secondI,
			  0, second.valueSparse(p2));
	p2++;
      } else {
	diff = difference(firstI,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -