⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 partition.java

📁 clustering data for the different techniques of data mining
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
    if (fitnessMeasure == Global.FM_COS)
      {
	double sum_square_weights = 0.0;
	for (int i = 0; i < NPART; i++)
	  sum_square_weights += Math.pow(weight[i], 2);
	fitness /= Math.sqrt(sumSquareEntropies)*Math.sqrt(sum_square_weights);
      }
  }

  /** computes the fitness of this Partition with respect to the
   * partitions in collection c, according to the entropy measure and
   * distance measure */
  public void computeFitness(Partition[] c, int NPART, 
			     int entropyMeasure, int fitnessMeasure,
			     double[] wTargetCondOnDB, 
			     double[] wDBCondOnTarget,
			     double[] weight)
  {
    // stores in intersectMap[i]: 
    // <class of our partition, <class of c[i], intersection count >>
    // intersectMap[i] is a map for intersections between
    // partitions c[i] and our partition
    // intersectMap[i]:key corresponds to each distinct 
    // value in our partition (C_s)
    // intersectMap[i]:value corresponds to a map with 
    // intersections between c[i][t] and v[s] (B_t x C_s)
    // intersectMap[i] = <C_s, <B_t, B_t x C_s >>
    Hashtable[]  intersectMap =  new Hashtable[NPART];
    for (int i = 0; i < NPART; i++)
      intersectMap[i] = new Hashtable();

    // fills intersectMap
    computeIntersections(this, c, NPART, intersectMap);
    computeFitness(intersectMap, c, NPART, entropyMeasure, fitnessMeasure,
		   wTargetCondOnDB, wDBCondOnTarget, weight);
  }

  /** estimates the degree in which the other partitions in
   * <code>db</code> influences the target partition; the target
   * partition should be the last partition in <code>db</code>;
   * <code>sampleDBPct</code> specifies the percent of the database to
   * be sampled; sets <code>wTargetCondonDB[i]</code> to
   * H(db[i]|db[target]) and sets the <code>wDBCondOnTarget[i]</code>
   * to H(db[target]|db[i]); sets <code>weight[i]</code> to
   * wTargetCondOnDB[i]+wDBCondOnTarget[i] 
   @return the entropy of the partition sampled from the database */
  static public double estimateWeights(Partition[] db, int NPART,
				       double sampleDBPct,
				       int entropyMeasure, 
				       double[] wTargetCondOnDB,
				       double[] wDBCondOnTarget,
				       double[] weight, Random rand)
  {
    NumberFormat nf = NumberFormat.getInstance();
    nf.setMaximumFractionDigits(3);

    double targetEntr = 0;
    int SIZE = db[0].getSize();
  
    // reference partition in the sample database
    Partition sampleRefPart = null;
  
    // create a separate sample database
    int sampleDBSize = (int)(SIZE*sampleDBPct);
    // consider at least a sample db of size 2
    if (sampleDBSize < 2)
      sampleDBSize = 2;

    // rowids of the database that will belong to the sample
    int[] sampleRowids = new int[sampleDBSize];
    SelectionMethods.selectUnif(SIZE, sampleRowids, sampleDBSize, rand);
    
    Partition[] sampleDB = new Partition[NPART];
    for (int i = 0; i < NPART; i++)
      sampleDB[i] = new Partition(sampleDBSize, 
				  db[0].getMaxNoClasses(), 
				  db[0].getClassFitnessType());
  
    if (Global.VERBOSE == 1)
      {
	for (int i = 0; i < sampleDBSize; i++)
	  System.out.print(sampleRowids[i] + " ");
	System.out.println();
      }

    // extract the sample database from the original database
    int index = 0;
    for (int i = 0; i < sampleDBSize; i++, index++)
      for (int j = 0; j < NPART; j++)
	sampleDB[j].set(index, db[j].get(sampleRowids[i]));
  
  
    if (Global.VERBOSE == 1)
      for (int i = 0; i < NPART; i++)
	sampleDB[i].print();

    // the reference partition is the last column
    sampleRefPart = sampleDB[NPART - 1];
  
    // compute weights of all attributes with respect to the reference
    // partition using the sampleDB
    // the fitness of sampleDB[i] represents the weight
    // weight[i] = H(sampleDB[i]|ref_part) + H(ref_part|sampleDB[i])
    Partition[] c = new Partition[1];
    c[0] = sampleRefPart;
    for (int i = 0; i < NPART - 1; i++)
      {
	sampleDB[i].computeFitness(c, 1, entropyMeasure, Global.FM_P_PA, 
				     null, null, null);
	wDBCondOnTarget[i] = sampleDB[i].getFitness();

	sampleDB[i].computeFitness(c, 1, entropyMeasure, Global.FM_PA_P, 
				   null, null, null);
	wTargetCondOnDB[i] = sampleDB[i].getFitness();

	weight[i] = wTargetCondOnDB[i] + wDBCondOnTarget[i];
      }

    targetEntr = sampleRefPart.entropy(entropyMeasure);

    if (Global.VERBOSE == 1)
      {
	System.out.println("attr_no weight_target_cond_on_db" +
			 " weight_db_cond_on_target sum");

	for (int i = 0; i < NPART - 1; i++)
	  System.out.println(i + "  " + nf.format(wTargetCondOnDB[i]) + "  " 
			     + nf.format(wDBCondOnTarget[i]) 
			     + "  " + nf.format(weight[i]));
	
	System.out.println("H(target) = " + nf.format(targetEntr));
	System.out.println("H(target)-H(target|db)   H(db)-H(db|target)");

	double avgDBInfluence = 0.0;
	for (int i = 0; i < NPART - 1; i++)
	  {
	    System.out.println(i + "  " 
			       + nf.format(new Double(targetEntr - wTargetCondOnDB[i]))
			       + "  " + 
			       nf.format(new Double(sampleDB[i].entropy(entropyMeasure) 
					  - wDBCondOnTarget[i])));
	    avgDBInfluence += targetEntr - wTargetCondOnDB[i];
	  }

	System.out.println("avg H(target)-H(target|db) = " 
			   + nf.format(new Double(avgDBInfluence/(double)(NPART-1))));
      }

    return targetEntr;
  }


  /** @return the symmetrical difference between the classes of the
   * current partition and the one received as argument */
  public double distSymDiff(Partition c)
  {
    // get the unique representation into classes
    int noClasses1 = getNoClasses();
    Hashtable[] am1 = new Hashtable[noClasses1];
    for (int i = 0; i < noClasses1; i++)
      am1[i] = new Hashtable();

    getAM(am1);
  
    // get the unique representation into classes
    int noClasses2 = c.getNoClasses();
    Hashtable[] am2 = new Hashtable[noClasses2];
    for (int i = 0; i < noClasses2; i++)
      am2[i] = new Hashtable();

    c.getAM(am2);
    
    long symDiff = 0;
    for (int k2 = 0; k2 < noClasses2; k2++)
      symDiff += (am2[k2].size()*am2[k2].size());

    for (int k1 = 0; k1 < noClasses1; k1++)
      {
	symDiff += (am1[k1].size()*am1[k1].size());
	for (int k2 = 0; k2 < noClasses2; k2++)
	  {
	    long common = 0;
	    for (Enumeration keys1 = am1[k1].keys(); 
		 keys1.hasMoreElements(); )
	      if (am2[k2].containsKey((Integer)keys1.nextElement()) == true)
		common++;
	    symDiff -= (2*common*common);
	  }
      }

    return ((double)symDiff)/2;
  }

  /** @return Sum distSymDiff(p), for all p in the collection c */
  public double sumDistSymDiff(Partition[] c, int NPART)
  {
    double dist = 0.0;
    for (int i = 0; i < NPART; i++)
      dist += distSymDiff(c[i]);
    
    return dist;
  }

  /** @return the average Hamming distance between the row
   * <code>rowid</code> and all other rows from the class
   * <code>c</code>, according to the values of the <code>NPART</code>
   * partitions in <code>db</code> on these rows */
  private double avgHD(int rowid, Hashtable c, Partition[] db, int NPART)
  {
    long avgHD = 0;
    for (Enumeration keys = c.keys(); 
	 keys.hasMoreElements(); )
      {
	int currRowid = ((Integer)keys.nextElement()).intValue();
	if (currRowid != rowid)
	  {
	    long currHD = 0;
	    for (int j = 0; j < NPART; j++)
	      if (db[j].v[rowid] != db[j].v[currRowid])
		currHD++;
	    avgHD += currHD;
	  }
      }
  
    return ((double)avgHD)/((double)c.size());
  }

  
  /** fills the array <code>silhouettes</code> of dimension 'size'
   * with the silhoutte coefficients, computed with respect to the
   * classes in this Partition and with respect to the values of the
   * <code>NPART</code> partitions in collection <code>c</code>; this
   * Partition should have at least 2 classes; <code>neighbors</code>
   * will contain for each rowid the class number of the closest
   * class */
  public void computeSilhouettes(double[] silhouettes, 
				 int[] neighbors,
				 Partition[] c, int NPART)
  {
    // noClasses should be greater than 2
    int noClasses = getNoClasses();
    
    if (Global.DEBUG == 1)
      if (noClasses < 2)
	{
	  System.out.println("ERROR! Partition.computeSilhouettes():"
			     + "noClasses < 2");
	  System.exit(1);
	}

    Hashtable[] am = new Hashtable[noClasses];
    for (int i = 0; i < noClasses; i++)
      am[i] = new Hashtable();

    getAM(am);
  
    for (int rowid = 0; rowid < size; rowid++)
      {
	// find to what class rowid belongs
	int classNo;
	for (classNo = 0; classNo < noClasses; classNo++)
	  if (am[classNo].containsKey(new Integer(rowid)) == true)
	    break;

	// rowid belongs to classNo
	double a = avgHD(rowid, am[classNo], c, NPART);

	// find average distances with respect to all other classes
	// (classNo, avgHD)
	Hashtable otherClassesHD = new Hashtable();

	for (int k = 0; k < noClasses; k++)
	  if (k != classNo)
	    otherClassesHD.put(new Integer(k), 
			       new Double(avgHD(rowid, am[k], c, NPART)));
      
	// find the class with smallest avgHD
	// we know that we have at least 2 classes at this point
	Enumeration keys = otherClassesHD.keys();
	Integer firstClass = (Integer)keys.nextElement();

	double b = ((Double)otherClassesHD.get(firstClass)).doubleValue();
	neighbors[rowid] = firstClass.intValue();

	for (Enumeration classes = otherClassesHD.keys();
	     classes.hasMoreElements(); )
	  {
	    Integer currClass = (Integer)classes.nextElement();
	    double currValue = 
	      ((Double)otherClassesHD.get(currClass)).doubleValue();
	    if (currValue < b)
	      {
		b = currValue;
		neighbors[rowid] = currClass.intValue();
	      }
	  }

	// if rowid is the only element is that class
	if (am[classNo].size() == 1)
	  silhouettes[rowid] = 0;
	else if (a < b)
	  silhouettes[rowid] = 1 - a/b;
	else if (a == b)
	  silhouettes[rowid] = 0;
	else
	  silhouettes[rowid] = b/a - 1;
      }
  }

  private class SilhInfo implements Comparable
  {
    public int rowid;
    public double silhouette;
    public int neighbor;

    SilhInfo(int rowid, double silhouette, int neighbor)
    {
      this.rowid      = rowid;
      this.silhouette = silhouette;
      this.neighbor   = neighbor;
    }

    public int compareTo(Object o)
    {
      if (!(o instanceof SilhInfo))
	throw new ClassCastException();

      if (silhouette < ((SilhInfo)o).silhouette)
	return -1;
      else 
	if (silhouette > ((SilhInfo)o).silhouette)
	  return 1;
	else
	    return 0;
    }
  }

  /** plots a graphical representation of the silhouettes; classes are
   * plotted in order and the elements in the classes are plotted in
   * decreasing order of their silhouette */
  public void plotSilhouettes(double[] silhouettes, int[] neighbors)
  {
    NumberFormat nf = NumberFormat.getInstance();
    nf.setMaximumFractionDigits(3);

    int noClasses = getNoClasses();  
    Hashtable[] am = new Hashtable[noClasses];
    for (int i = 0; i < noClasses; i++)
      am[i] = new Hashtable();

    getAM(am);

    double avgSilh = 0;
    for (int rowid = 0; rowid < size; rowid++)
      avgSilh += silhouettes[rowid];
    
    System.out.println("Average silhouette " + nf.format(avgSilh/(double)size));

    for (int classNo = 0; classNo < noClasses; classNo++)
      {
	Heap silhToPrint = new Heap(noClasses);
	for (Enumeration rowids = am[classNo].keys();
	     rowids.hasMoreElements();)
	  {
	    int currRowid = ((Integer)rowids.nextElement()).intValue();
	    silhToPrint.insert(new SilhInfo(currRowid, silhouettes[currRowid],
					    neighbors[currRowid]));
	  }

	while (silhToPrint.size() != 0)
	  {
	    SilhInfo s = null;
	    try
	      {
		s = (SilhInfo)silhToPrint.remove();
	      }
	    catch(EmptyHeapException e)
	      {
		System.err.println("Internal Error: Partition.plotSilhouettes(): empty heap");
		System.exit(1);
	      }

	    double currSilh = s.silhouette;
	    System.out.print("c " + (classNo + 1) 
			       + " n " + (s.neighbor + 1) 
			       + " r " + s.rowid + "\t");
	    if (currSilh < 0)
	      {
		int i = 0;
		while(i++ < -30*currSilh)
		  System.out.print("-");
	      }
	    else
	      {
		int i = 0;
		while(i++ < 30*currSilh)
		  System.out.print("+");
	      }
	    System.out.println(" (" + currSilh + ")");
	  }
      }
  }

  /** appends to <code>output</code> a graphical representation of the
   * silhouettes; classes are plotted in order and the elements in the
   * classes are plotted in decreasing order of their silhouette */
  public void plotSilhouettes(double[] silhouettes, int[] neighbors, 
			      StringBuffer output)
  {
    int noClasses = getNoClasses();  
    Hashtable[] am = new Hashtable[noClasses];
    for (int i = 0; i < noClasses; i++)
      am[i] = new Hashtable();

    getAM(am);

    double avgSilh = 0;
    for (int rowid = 0; rowid < size; rowid++)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -