⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 partition.java

📁 clustering data for the different techniques of data mining
💻 JAVA
📖 第 1 页 / 共 5 页
字号:

  /** @return true if for the specified <code>fitnessMeasure</code>
   * we need to compute weights */
  private boolean needsWeights(int fitnessMeasure)
  {
    return (fitnessMeasure == Global.FM_COS
	    || fitnessMeasure == Global.FM_NORM_W
	    || fitnessMeasure == Global.FM_WE);
  }

  /** if the argument is negative return 0, otherwise return the
   * argument */
  private double filterNegative(double value)
  {
    if (value < 0.0)
      return 0.0;
    else
      return value;
  }


  /** computes the fitness of this Partition with respect to its
   * intersection to <code>NPART</code> partitions in <code>c</code>,
   * available in <code>intersectMap</code>, according to the entropy
   * measure and distance measure */
  public void computeFitness(Hashtable[] intersectMap,
			     Partition[] c, int NPART,
			     int entropyMeasure, int fitnessMeasure,
			     double[] wTargetCondOnDB, 
			     double[] wDBCondOnTarget,
			     double[] weight)
  {
    if (Global.DEBUG == 1)
      if (needsWeights(fitnessMeasure) && 
	  (weight == null || wTargetCondOnDB == null 
	   || wDBCondOnTarget == null))
	{
	  System.err.println("ERROR! Partition.computeFitness() invalid weight");
	  System.exit(1);
	}

    // average entropy of the NPART partitions
    double averageEntropy = 0.0;
    if (fitnessMeasure == Global.FM_ALTERNATE_HAVG)
      {
	// compute average entropy of attribute partitions
	for(int i = 0; i < NPART; i++)
	  averageEntropy += c[i].entropy(entropyMeasure);
	averageEntropy /= NPART;
      }

    // stores in intersectMap[i]: 
    // <class of our partition, <class of c[i], intersection count >>
    // intersectMap[i] is a map for intersections between
    // partitions c[i] and our partition
    // intersectMap[i]->first corresponds to each distinct 
    // value in our partition (C_s)
    // intersectMap[i]->second corresponds to a map with 
    // intersections between c[i][t] and v[s] (B_t x C_s)
    // intersectMap[i] = <C_s, <B_t, B_t x C_s >>

    // c[i]={B_t}, this={C_s}
    // stores in entropyCondThem[i] H(this|c[i])
    double[] entropyCondThem = null; // conditioned by c partitions
    // stores in entropyCondUs[i] H(c[i]|this)
    double[] entropyCondUs = null; // conditioned by our partition
    // stores in entropyIntersect H(this^c[i])
    double[] entropyIntersect = null;

    // memory allocation and initialization
    if (needsEntropyConditionedByThem(fitnessMeasure))
      {
	entropyCondThem = new double[NPART];
	for (int i = 0; i < NPART; i++)
	  entropyCondThem[i] = 0.0; 
      }

    if (needsEntropyConditionedByUs(fitnessMeasure))
      {
	entropyCondUs = new double[NPART];
	for (int i = 0; i < NPART; i++)
	  entropyCondUs[i] = 0.0;
      }
    
    if (needsEntropyOfIntersection(fitnessMeasure))
      {
	entropyIntersect = new double[NPART];
	for (int i = 0; i < NPART; i++)
	  entropyIntersect[i] = 0.0;
      }
    
    // clear old class_fitness maps
    classFitness.clear();
    Hashtable blockFitness = new Hashtable();

    // intersectMap[i] = <C_s, <B_t, B_t x C_s >>      
    // compute entropyCondThem, entropyCondUs
    for (int i = 0; i < NPART; i++)
      {
	// key: C_s, value: <B_t, B_t x C_s >
	for (Enumeration keys = intersectMap[i].keys(); 
	     keys.hasMoreElements(); )
	  {
	    Integer currClass = (Integer)keys.nextElement(); // |C_s|
	    Hashtable intersections = 
	      (Hashtable)intersectMap[i].get(currClass);
	  
	    int ourClassSize = getClassCard(currClass.intValue()); // |C_s|

	    if (Global.DEBUG == 1)
	      if (ourClassSize == 0)
		{
		  System.err.println("ERROR! Partition.computeFitness(): |C_s| = 0 for C_s = " + currClass);
		  print();
		  printClassCardinalities();
		  System.exit(1);
		}
	    
	    double sumCondThem = 0.0;
	    double sumCondUs = 0.0;
	    double sumIntersect = 0.0;

	    // key2: B_t, value2: |Bt x C_s|
	    for (Enumeration keys2 = intersections.keys(); 
		 keys2.hasMoreElements();)
	      {
		Integer currClass2 = (Integer)keys2.nextElement();
		int count = ((Integer)intersections.get(currClass2)).intValue(); // |B_t x C_s|

		if (Global.DEBUG == 1)
		  if (count == 0)
		    {
		      System.err.println("ERROR! Partition.computeFitness(): |B_t x C_s| = 0");
		      System.exit(1);
		    }
	      
		int hisClassSize = c[i].getClassCard(currClass2.intValue()); // |B_t|

		if (Global.DEBUG == 1)
		  if (hisClassSize == 0)
		    {
		      System.err.println("ERROR! Partition.computeFitness(): |B_t| = 0");
		      System.exit(1);
		    }

		if (needsEntropyConditionedByThem(fitnessMeasure))
		  sumCondThem += hisClassSize 
		    * ImpurityMeasure.impurityMeasure(entropyMeasure,
						      (double)count 
						      /(double)hisClassSize);

		if (needsEntropyConditionedByUs(fitnessMeasure))
		  sumCondUs += (double)ourClassSize 
		    * ImpurityMeasure.impurityMeasure(entropyMeasure,
						      (double)count 
						      /(double)ourClassSize);

		if (needsEntropyOfIntersection(fitnessMeasure))
		  sumIntersect += 
		    ImpurityMeasure.impurityMeasure(entropyMeasure,
						    (double)count 
						    /(double)size);

		// compute for classFitness a map 
		// that contains for each our class 
		// <class number, fitness of the class>
		switch(classFitnessType)
		  {
		  case CF_PROD:
		    // <Cs, |Cs| * sum_t |Bt ^ Cs|/|Cs|>
		    if (classFitness.containsKey(currClass) == false)
		      classFitness.put(currClass, 
				       new Double(ourClassSize * ImpurityMeasure.impurityMeasure(entropyMeasure,(double)count /(double)ourClassSize)));
		    else
		      {
			double oldValue = 
			  ((Double)classFitness.get(currClass)).doubleValue();
			classFitness.put(currClass, new Double(oldValue + ourClassSize * ImpurityMeasure.impurityMeasure(entropyMeasure, (double)count /(double)ourClassSize)));
		      }
		    break;

		  case CF_DIV:
		    // <Cs, 1/|Cs| * sum_t |Bt ^ Cs|/|Cs|>
		    if (classFitness.containsKey(currClass) == false)
		      classFitness.put(currClass, new Double((1.0/ourClassSize) * ImpurityMeasure.impurityMeasure(entropyMeasure, (double)count /(double)ourClassSize)));
		    else
		      {
			double oldValue = 
			  ((Double)classFitness.get(currClass)).doubleValue();
			classFitness.put(currClass, new Double(oldValue + (1.0/ourClassSize) * ImpurityMeasure.impurityMeasure(entropyMeasure, (double)count /(double)ourClassSize)));
		      }
		    break;

		  case CF_POW:
		    // <Cs, pow(2, sum_t |Bt ^ Cs|/|Cs|) / |Cs|>
		    // here we are computing pow(2, sum_t |Bt ^ Cs|/|Cs|)
		    // |Cs|/... is adjusted later
		    if (classFitness.containsKey(currClass) == false)
		      classFitness.put(currClass, new Double(Math.pow(2, ImpurityMeasure.impurityMeasure(entropyMeasure, (double)count / (double)ourClassSize))));
		    else
		      {
			double oldValue = 
			  ((Double)classFitness.get(currClass)).doubleValue();
			classFitness.put(currClass, new Double(oldValue * Math.pow(2, ImpurityMeasure.impurityMeasure(entropyMeasure, (double)count / (double)ourClassSize))));
		      }
		    break;

		    // if classFitness_type is 0
		    // use CF_SYM_DIFF
		  case 0:
		  case CF_SYM_DIFF:
		    // <Cs, <i, min (|Cs| + |Bt| - 2|Bt ^ Cs|)/(|Cs| + |Bt|)> >
		    {
		      double val =
			(double)(ourClassSize + hisClassSize - 2 * count)
			/ (double)(ourClassSize + hisClassSize);
		    
		      if (blockFitness.containsKey(currClass) == false)
			// if we see this block for first time
			{
			  Hashtable bf = new Hashtable();
			  bf.put(new Integer(i), new Double(val));
			  blockFitness.put(currClass, bf);
			}
		      else if (((Hashtable)blockFitness.get(currClass)).containsKey(new Integer(i)) == false)
			// if we see partition i for the first time
			{
			  ((Hashtable)blockFitness.get(currClass)).put(new Integer(i), new Double(val));
			}
		      else if (val < ((Double)((Hashtable)blockFitness.get(currClass)).get(new Integer(i))).doubleValue())
			{
			  ((Hashtable)blockFitness.get(currClass)).put(new Integer(i), new Double(val));
			}
		    }
		    break;
		    
		  default:
		    System.err.println("ERROR! Partition.computeFitness(): unknown class fitness measure");
		    System.exit(1);
		  }
	      } // end itr2 loop

	    if (needsEntropyConditionedByThem(fitnessMeasure))
	      entropyCondThem[i] += sumCondThem;
	    if (needsEntropyConditionedByUs(fitnessMeasure))
	      entropyCondUs[i] += sumCondUs;
	    if (needsEntropyOfIntersection(fitnessMeasure))
	      entropyIntersect[i] += sumIntersect;
	  } // end itr loop

	// scaling
	if (needsEntropyConditionedByThem(fitnessMeasure))
	  entropyCondThem[i] /= (double)size;
	if (needsEntropyConditionedByUs(fitnessMeasure))
	  entropyCondUs[i] /= (double)size;
      } // end i loop

    // for fitness class measure CF_POW and CF_SYM_DIFF we need 
    // to make some adjustments to the class fitness
    // we do the adjustment when we have everything computed
    switch(classFitnessType)
      {
      case CF_POW:
	for (int classNo = 1; classNo <= getNoClasses(); classNo++) 
	  {
	    Integer classNoInt = new Integer(classNo);
	    double oldFitness = 
	      ((Double)classFitness.get(classNoInt)).doubleValue();
	    
	    classFitness.put(classNoInt, 
			     new Double(oldFitness / getClassCard(classNo)));
	  }
	break;
	    
      case 0:
      case CF_SYM_DIFF:
	// <Cs, <j, min (|Cs| + |Bt| - 2|Bt ^ Cs|)/(|Cs| + |Bt|)> >
	for (Enumeration keys = blockFitness.keys(); 
	     keys.hasMoreElements(); )
	  {
	    double avg = 0.0;
	    Integer key1 = (Integer)keys.nextElement();
	    Hashtable value1 = 
	      (Hashtable)blockFitness.get(key1);
	    for(Enumeration values2 = value1.elements(); 
		values2.hasMoreElements(); )
	      avg += ((Double)values2.nextElement()).doubleValue();
	  
	    classFitness.put(key1, new Double(avg /(double)NPART));
	  }
	break;
      }

    cfIsValid = true;

    if (Global.ULTRA_VERBOSE == 1)
      printClassFitness();

    double sumEntropyCondUs  = 0.0;
    double sumEntropyCondThem = 0.0;
    double sumSquareEntropies = 0.0;
  
    // for these fitness measures we need to cumulate first
    // the two conditional entropies
    if (fitnessMeasure == Global.FM_NORM_W
	|| fitnessMeasure == Global.FM_ALTERNATE
	|| fitnessMeasure == Global.FM_MOD)
      {
	for (int i = 0; i < NPART; i++)
	  {
	    sumEntropyCondUs += entropyCondUs[i];
	    sumEntropyCondThem += entropyCondThem[i];
	  }
      }
    
    if (fitnessMeasure == Global.FM_MOD)
      fitness = Math.abs(sumEntropyCondUs + sumEntropyCondThem)
	+ Math.abs(sumEntropyCondUs - sumEntropyCondThem);
    else if (fitnessMeasure == Global.FM_ALTERNATE)
      {
	if (sumEntropyCondUs >= sumEntropyCondThem)
	  // minimization of PA_P
	  fitness = sumEntropyCondUs;
	else
	  // minimization of P_PA
	  fitness = sumEntropyCondThem;
      }
    else
      {
	fitness = 0.0;
	for (int i = 0; i < NPART; i++)
	  switch(fitnessMeasure)
	    {
	    case Global.FM_PA_P:
	      fitness += entropyCondUs[i];
	      break;

	    case Global.FM_P_PA:
	      fitness += entropyCondThem[i];
	      break;

	    case Global.FM_BOTH:
	      fitness += entropyCondThem[i] + entropyCondUs[i];
	      break;

	    case Global.FM_BOTH_SCALED:
	      fitness += 
		entropyCondUs[i]/(1 + c[i].entropy(entropyMeasure))
		+ entropyCondThem[i]/(1 + entropy(entropyMeasure));
	      break;

	    case Global.FM_Q:
	      fitness += 
		filterNegative(entropy(entropyMeasure) - entropyCondThem[i])
		/ (1 + c[i].entropy(entropyMeasure));
	      break;

	    case Global.FM_L:
	      fitness += 
		filterNegative(entropy(entropyMeasure) - entropyCondThem[i])
		/ (1 + entropyIntersect[i]);
	      break;

	    case Global.FM_QR:
	      fitness += 
		filterNegative(c[i].entropy(entropyMeasure) - entropyCondUs[i])
		/ (1 + entropy(entropyMeasure));
	      break;

	    case Global.FM_LR:
	      fitness += 
		filterNegative(c[i].entropy(entropyMeasure) - entropyCondUs[i])
		/ (1 + entropyIntersect[i]);
	      break;

	    case Global.FM_Q_QR:
	      fitness += 
		filterNegative(entropy(entropyMeasure) - entropyCondThem[i])
		/ (1 + c[i].entropy(entropyMeasure))
		+ filterNegative(c[i].entropy(entropyMeasure) - entropyCondUs[i])
		/ (1 + entropy(entropyMeasure));
	      break;

	    case Global.FM_ALTERNATE_HAVG:
	      if (entropy(entropyMeasure) <= averageEntropy)
		// minimization of PA_P
		fitness += entropyCondUs[i];
	      else
		// minimization of P_PA
		fitness += entropyCondThem[i];
	      break;

	    case Global.FM_COS:
	      fitness += weight[i] * 
		(entropyCondThem[i] + entropyCondUs[i]);
	      sumSquareEntropies += 
		Math.pow(entropyCondThem[i] + entropyCondUs[i], 2); 
	      break;
	
	    case Global.FM_NORM_W:
	      fitness += Math.abs(entropyCondThem[i] + entropyCondUs[i] 
			      - weight[i] 
			      * (sumEntropyCondUs + sumEntropyCondThem));
	      break;

	    case Global.FM_WE:
	      fitness += 
		Math.abs(wTargetCondOnDB[i] - entropyCondUs[i]) + 
		Math.abs(wDBCondOnTarget[i] - entropyCondThem[i]);
	      break;
	    
	    case Global.FM_TEST:
	      fitness += (entropyCondThem[i] + entropyCondUs[i])
		* weight[i];
	      break;
	    }
      }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -