⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nnge.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 4 页
字号:

    instance.replaceMissingValues(m_MissingVector);
    m_Train.add(instance);

    /* Update the minimum and maximum for all the attributes */
    updateMinMax(instance);

    /* update the mutual information datas */
    updateMI(instance);

    /* Nearest Exemplar */
    Exemplar nearest = nearestExemplar(instance);
	
    /* Adjust */
    if(nearest == null){
      Exemplar newEx = new Exemplar(this, m_Train, 10, instance.classValue());
      newEx.generalise(instance);
      initWeight(newEx);
      addExemplar(newEx);
      return;
    }
    adjust(instance, nearest);

    /* Generalise */
    generalise(instance);
  }


  /**
   * Returns the nearest Exemplar
   *
   * @param inst an Instance
   * @return the nearest Exemplar to inst, null if no exemplar are found.
   */
  private Exemplar nearestExemplar(Instance inst){

    if (m_Exemplars == null)
      return null;
    Exemplar cur = m_Exemplars, nearest = m_Exemplars;
    double dist, smallestDist = cur.squaredDistance(inst);
    while (cur.next != null){
      cur = cur.next;
      dist = cur.squaredDistance(inst);
      if (dist < smallestDist){
	smallestDist = dist;
	nearest = cur;
      }
    }
    return nearest;
  }

    
  /**
   * Returns the nearest Exemplar with class c
   *
   * @param inst an Instance
   * @param c the class of the Exemplar to return
   * @return the nearest Exemplar to inst with class c, null if no exemplar with class c are found.
   */
  private Exemplar nearestExemplar(Instance inst, double c){

    if (m_ExemplarsByClass[(int) c] == null)
      return null;
    Exemplar cur = m_ExemplarsByClass[(int) c], nearest = m_ExemplarsByClass[(int) c];
    double dist, smallestDist = cur.squaredDistance(inst);
    while (cur.nextWithClass != null){
      cur = cur.nextWithClass;
      dist = cur.squaredDistance(inst);
      if (dist < smallestDist){
	smallestDist = dist;
	nearest = cur;
      }
    }
    return nearest;
  }

    
  /**
   * Generalise an Exemplar (not necessarily predictedExemplar) to match instance.
   * predictedExemplar must be in NNge's lists
   *
   * @param newInst the new instance
   * @param predictedExemplar the Exemplar that matches newInst
   * @exception Exception in case of inconsitent situation
   */
  private void generalise(Instance newInst) throws Exception {

    Exemplar first = m_ExemplarsByClass[(int) newInst.classValue()];
    int n = 0;

    /* try to generalise with the n first exemplars */
    while(n < m_NumAttemptsOfGene && first != null){
	    
      /* find the nearest one starting from first */
      Exemplar closest = first, cur = first;
      double smallestDist = first.squaredDistance(newInst), dist;
      while(cur.nextWithClass != null){
	cur = cur.nextWithClass;
	dist = cur.squaredDistance(newInst);
	if(dist < smallestDist){
	  smallestDist = dist;
	  closest = cur;
	}
      }

      /* remove the Examplar from NNge's lists */
      if(closest == first)
	first = first.nextWithClass;
      removeExemplar(closest); 

      /* try to generalise */
      closest.preGeneralise(newInst);
      if(!detectOverlapping(closest)){
	closest.validateGeneralisation();
	addExemplar(closest);
	return;
      }

      /* it didn't work, put ungeneralised exemplar on the top of the lists */
      closest.cancelGeneralisation();
      addExemplar(closest);			

      n++;
    }

    /* generalisation failled : add newInst as a new Examplar */
    Exemplar newEx = new Exemplar(this, m_Train, 5, newInst.classValue());
    newEx.generalise(newInst);
    initWeight(newEx);
    addExemplar(newEx);
  }


  /**
   * Adjust the NNge.
   *
   * @param newInst the instance to classify
   * @param predictedExemplar the Exemplar that matches newInst
   * @exception Exception in case of inconsistent situation
   */
  private void adjust(Instance newInst, Exemplar predictedExemplar) throws Exception {

    /* correct prediction */
    if(newInst.classValue() == predictedExemplar.classValue()){
      predictedExemplar.incrPositiveCount();
      /* incorrect prediction */
    } else {
      predictedExemplar.incrNegativeCount();

      /* new instance falls inside */
      if(predictedExemplar.holds(newInst)){
	prune(predictedExemplar, newInst);
      }
    }    
  }


  /**
   * Prunes an Exemplar that matches an Instance
   *
   * @param predictedExemplar an Exemplar
   * @param newInst an Instance matched by predictedExemplar
   * @exception Exception in case of inconsistent situation. (shouldn't happen.)
   */
  private void prune(Exemplar predictedExemplar, Instance newInst) throws Exception {

    /* remove the Exemplar */
    removeExemplar(predictedExemplar);

    /* look for the best nominal feature and the best numeric feature to cut */
    int numAttr = -1, nomAttr = -1;
    double smallestDelta = Double.POSITIVE_INFINITY, delta;
    int biggest_N_Nom = -1, biggest_N_Num = -1, n, m;
    for(int i = 0; i < m_Train.numAttributes(); i++){

      if(i == m_Train.classIndex())
	continue;

      /* numeric attribute */
      if(m_Train.attribute(i).isNumeric()){

	/* compute the distance 'delta' to the closest boundary */
	double norm = m_MaxArray[i] - m_MinArray[i];
	if(norm != 0){
	  delta = Math.min((predictedExemplar.getMaxBorder(i) - newInst.value(i)), 
			   (newInst.value(i) - predictedExemplar.getMinBorder(i))) / norm;
	} else {
	  delta = Double.POSITIVE_INFINITY;
	}

	/* compute the size of the biggest Exemplar which would be created */
	n = m = 0;
	Enumeration enu = predictedExemplar.enumerateInstances();
	while(enu.hasMoreElements()){
	  Instance ins = (Instance) enu.nextElement();
	  if(ins.value(i) < newInst.value(i))
	    n++;
	  else if(ins.value(i) > newInst.value(i))
	    m++;
	}
	n = Math.max(n, m);

	if(delta < smallestDelta){
	  smallestDelta = delta;
	  biggest_N_Num = n;
	  numAttr = i;
	} else if(delta == smallestDelta && n > biggest_N_Num){
	  biggest_N_Num = n;
	  numAttr = i;
	}

	/* nominal attribute */
      } else {

	/* compute the size of the Exemplar which would be created */
	Enumeration enu = predictedExemplar.enumerateInstances();
	n = 0;
	while(enu.hasMoreElements()){
	  if(((Instance) enu.nextElement()).value(i) != newInst.value(i))
	    n++;
	}
	if(n > biggest_N_Nom){
	  biggest_N_Nom = n;
	  nomAttr = i;
	} 
      }
    }

    /* selection of the feature to cut between the best nominal and the best numeric */
    int attrToCut;
    if(numAttr == -1 && nomAttr == -1){
      attrToCut = 0;
    } else if (numAttr == -1){
      attrToCut = nomAttr;
    } else if(nomAttr == -1){
      attrToCut = numAttr;
    } else {
      if(biggest_N_Nom > biggest_N_Num)
	attrToCut = nomAttr;
      else
	attrToCut = numAttr;
    }

    /* split the Exemplar */
    Instance curInst;
    Exemplar a, b;
    a = new Exemplar(this, m_Train, 10, predictedExemplar.classValue());
    b = new Exemplar(this, m_Train, 10, predictedExemplar.classValue());
    LinkedList leftAlone = new LinkedList();
    Enumeration enu = predictedExemplar.enumerateInstances();
    if(m_Train.attribute(attrToCut).isNumeric()){
      while(enu.hasMoreElements()){
	curInst = (Instance) enu.nextElement();
	if(curInst.value(attrToCut) > newInst.value(attrToCut)){
	  a.generalise(curInst);
	} else if (curInst.value(attrToCut) < newInst.value(attrToCut)){
	  b.generalise(curInst);
	} else if (notEqualFeatures(curInst, newInst)) {
	  leftAlone.add(curInst);
	}
      }
    } else {
      while(enu.hasMoreElements()){
	curInst = (Instance) enu.nextElement();
	if(curInst.value(attrToCut) != newInst.value(attrToCut)){
	  a.generalise(curInst);
	} else if (notEqualFeatures(curInst, newInst)){
	  leftAlone.add(curInst);
	}
      }
    }
	
    /* treat the left alone Instances */
    while(leftAlone.size() != 0){

      Instance alone = (Instance) leftAlone.removeFirst();
      a.preGeneralise(alone);
      if(!a.holds(newInst)){
	a.validateGeneralisation();
	continue;
      }
      a.cancelGeneralisation();
      b.preGeneralise(alone);
      if(!b.holds(newInst)){
	b.validateGeneralisation();
	continue;
      }
      b.cancelGeneralisation();
      Exemplar exem = new Exemplar(this, m_Train, 3, alone.classValue());
      exem.generalise(alone);
      initWeight(exem);
      addExemplar(exem);
    }

    /* add (or not) the new Exemplars */
    if(a.numInstances() != 0){
      initWeight(a);
      addExemplar(a);
    }
    if(b.numInstances() != 0){
      initWeight(b);
      addExemplar(b);	    
    }
  }


  /**
   * Returns true if the instance don't have the same feature values
   * 
   * @param inst1 an instance
   * @param inst2 an instance
   * @return true if the instance don't have the same feature values
   */
  private boolean notEqualFeatures(Instance inst1, Instance inst2) {

    for(int i = 0; i < m_Train.numAttributes(); i++){
      if(i == m_Train.classIndex())
	continue;
      if(inst1.value(i) != inst2.value(i))
	return true;
    }
    return false;
  }


  /**
   * Returns true if ex overlaps any of the Exemplars in NNge's lists
   *
   * @param ex an Exemplars
   * @return true if ex overlaps any of the Exemplars in NNge's lists
   */
  private boolean detectOverlapping(Exemplar ex){
    Exemplar cur = m_Exemplars;
    while(cur != null){
      if(ex.overlaps(cur)){
	return true;
      }
      cur = cur.next;
    }
    return false;
  }


  /**
   * Updates the minimum, maximum, sum, sumSquare values for all the attributes 
   * 
   * @param instance the new instance
   */
  private void updateMinMax(Instance instance){

    for (int j = 0; j < m_Train.numAttributes(); j++) {
      if(m_Train.classIndex() == j || m_Train.attribute(j).isNominal())
	continue;
      if (instance.value(j) < m_MinArray[j]) 
	m_MinArray[j] = instance.value(j);
      if (instance.value(j) > m_MaxArray[j])
	m_MaxArray[j] = instance.value(j);
    }    
  }


  /**
   * Updates the data for computing the mutual information
   *
   * MUST be called AFTER adding inst in m_Train 
   *
   * @param instance the new instance
   * @exception Exception is thrown if an inconsistent situation is met
   */
  private void updateMI(Instance inst) throws Exception {

    if(m_NumFoldersMI < 1){
      throw new Exception("NNge.updateMI : incorrect number of folders ! Option I must be greater than 1.");
    }

    m_MI_NumClass[(int) inst.classValue()]++;
    m_MI_NumInst++;

    /* for each attribute */
    for(int attrIndex = 0; attrIndex < m_Train.numAttributes(); attrIndex++){

      /* which is the class attribute */
      if(m_Train.classIndex() == attrIndex)
	continue;

      /* which is a numeric attribute */
      else if(m_Train.attribute(attrIndex).isNumeric()){
		
	/* if max-min have to be updated */
	if(Double.isNaN(m_MI_MaxArray[attrIndex]) ||
	   Double.isNaN(m_MI_MinArray[attrIndex]) ||
	   m_MI_MaxArray[attrIndex] < inst.value(attrIndex) || 
	   inst.value(attrIndex) < m_MI_MinArray[attrIndex]){

	  /* then update them */
	  if(Double.isNaN(m_MI_MaxArray[attrIndex])) m_MI_MaxArray[attrIndex] = inst.value(attrIndex);
	  if(Double.isNaN(m_MI_MinArray[attrIndex])) m_MI_MinArray[attrIndex] = inst.value(attrIndex);
	  if(m_MI_MaxArray[attrIndex] < inst.value(attrIndex)) m_MI_MaxArray[attrIndex] = inst.value(attrIndex);
	  if(m_MI_MinArray[attrIndex] > inst.value(attrIndex)) m_MI_MinArray[attrIndex] = inst.value(attrIndex);
		    
	  /* and re-compute everything from scratch... (just for this attribute) */
	  double delta = (m_MI_MaxArray[attrIndex] - m_MI_MinArray[attrIndex]) / (double) m_NumFoldersMI;

	  /* for each interval */
	  for(int inter = 0; inter < m_NumFoldersMI; inter++){

	    m_MI_NumAttrInter[attrIndex][inter] = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -