📄 pckmeans.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
	  System.out.println(pair + ": type = " + linkType);	if( linkType == InstancePair.MUST_LINK ){ // concerned with MUST-LINK in Adjacency List	  if (m_AdjacencyList[pair.first] == null) {	    m_AdjacencyList[pair.first] = new HashSet();	  }	  if (!m_AdjacencyList[pair.first].contains(new Integer(pair.second))) {	    m_AdjacencyList[pair.first].add(new Integer(pair.second));	  }	  	  if (m_AdjacencyList[pair.second] == null) {	    m_AdjacencyList[pair.second] = new HashSet();	  }	  if (!m_AdjacencyList[pair.second].contains(new Integer(pair.first))) {	    m_AdjacencyList[pair.second].add(new Integer(pair.first));	  }	}      }            // DFS for finding connected components in Adjacency List, updates required stats      DFS();    }        if (!m_Seedable) { // don't perform any seeding, initialize from random      m_NumCurrentClusters = 0;    }    //    System.out.println("Need to make " + m_NumClusters + " clusters, already made " + m_NumCurrentClusters);    // if the required number of clusters has been obtained, wrap-up    if( m_NumCurrentClusters >= m_NumClusters ){      if (m_verbose) {	System.out.println("Got the required number of clusters ...");	System.out.println("num clusters: " + m_NumClusters + ", num current clusters: " + m_NumCurrentClusters);      }      int clusterSizes[] = new int[m_NumCurrentClusters];      for (int i=0; i<m_NumCurrentClusters; i++) {	if (m_verbose) {	  System.out.println("Neighbor set: " + i + " has size: " + m_NeighborSets[i].size());	}	clusterSizes[i] = -m_NeighborSets[i].size(); // for reverse sort (decreasing order)      }	      int[] indices = Utils.sort(clusterSizes);            Instance[] clusterCentroids = new Instance[m_NumClusters];      // compute centroids of m_NumClusters clusters      m_ClusterCentroids = new Instances(m_Instances, m_NumClusters);      for (int j=0; j < m_NumClusters; j++) {		int i = indices[j];	System.out.println("Neighborhood selected:  " + m_NeighborSets[i].size() + "(" + m_TotalTrainWithLabels.instance(((Integer)(m_NeighborSets[i].iterator().next())).intValue()).classValue()+ ")\t");		if (m_SumOfClusterInstances[i] != null) {	  if (m_verbose) {	    System.out.println("Normalizing instance " + i);	  }	  if (!m_objFunDecreasing) {	    normalize(m_SumOfClusterInstances[i]);	  }	  else {	    normalizeByWeight(m_SumOfClusterInstances[i]);	  }	}	Iterator iter = m_NeighborSets[i].iterator();	while (iter.hasNext()) { // assign points of new cluster	  int instNumber = ((Integer) iter.next()).intValue();	  if (m_verbose) {	    System.out.println("Assigning " + instNumber + " to cluster: " + j);	  }	  // have to re-assign after sorting	  m_ClusterAssignments[instNumber] = j;	}	m_SumOfClusterInstances[j].setDataset(m_Instances);	// m_SumOfClusterInstances suitably normalized now	m_ClusterCentroids.add(m_SumOfClusterInstances[i]);      }      for (int j=m_NumClusters; j < m_NumCurrentClusters; j++) {	int i = indices[j];	Iterator iter = m_NeighborSets[i].iterator();	while (iter.hasNext()) {	  int instNumber = ((Integer) iter.next()).intValue();	  if (m_verbose) {	    System.out.println("Assigning " + instNumber + " to cluster -1");	  }	  m_ClusterAssignments[instNumber] = -1;	}      }      m_NumCurrentClusters = m_NumClusters;      // adding other inferred ML and CL links      addMLAndCLTransitiveClosure(indices);      return;    }    else if( m_NumCurrentClusters < m_NumClusters ){      createGlobalCentroids();      addMLAndCLTransitiveClosure(null);    }  }  // Query: oracle replies on link, added to m_ConstraintsHash  protected int askOracle(int X, int Y) {    Instance first = m_TotalTrainWithLabels.instance(X);    Instance second = m_TotalTrainWithLabels.instance(Y);    int linkType;    if (m_verbose) {      System.out.print("["+X+","+Y);    }    if (first.classValue() == second.classValue()) {      if (m_verbose) {	System.out.println(",MUST]");      }      linkType = InstancePair.MUST_LINK;    }    else if (first.classValue() != second.classValue()) {      if (m_verbose) {	System.out.println(",CANNOT]");      }      linkType = InstancePair.CANNOT_LINK;    } else {      if (m_verbose) {	System.out.println(",DONT_CARE]");      }      linkType = InstancePair.DONT_CARE_LINK;    }        // add to constraintHash and seedHash    int firstIndex = (X<Y)? X:Y;    int secondIndex = (X>=Y)? X:Y;    InstancePair newPair = new InstancePair(firstIndex, secondIndex, InstancePair.DONT_CARE_LINK);    if (!m_ConstraintsHash.containsKey(newPair)) {      m_ConstraintsHash.put(newPair, new Integer(linkType));    }    Integer firstInt = new Integer(firstIndex);    Integer secondInt = new Integer(secondIndex);    // for first point     if(!m_SeedHash.contains(firstInt)) { // add instances with constraints to seedHash      m_SeedHash.add(firstInt);    }    // for second point     if(!m_SeedHash.contains(secondInt)) {      m_SeedHash.add(secondInt);    }        return linkType;  }  /** Finds point which has max min-distance from set visitedPoints, does not consider      points from eliminationSet   */  int farthestFromSet(HashSet visitedPoints, HashSet eliminationSet) throws Exception {        // implements farthest-first search algorithm:    /*      for (each datapoint x not in visitedPoints) {        distance of x to visitedPoints = min{d(x,f):f \in visitedPoints}      }      select the point x with maximum distance as new center;    */    if (visitedPoints.size() == 0) {      int point;      if (m_StartingIndexOfTest < m_Instances.numInstances()) {		point = m_RandomNumberGenerator.nextInt(m_StartingIndexOfTest); // takes care not to select test example      }      else {	point = m_RandomNumberGenerator.nextInt(m_Instances.numInstances());      }      // Note: no need to check for labeled data now, since we have no visitedPoints      // => no labeled data      if (m_verbose)	System.out.println("First point selected: " + point);      return point;    }    else {      if (m_verbose) {	Iterator iter = visitedPoints.iterator();	if (eliminationSet != null) { 	  iter = eliminationSet.iterator();	  while(iter.hasNext()) {	    System.out.println("In elimination set: " + ((Integer) iter.next()).intValue());	  }	}      }    }        double minSimilaritySoFar = Double.POSITIVE_INFINITY;    double maxDistanceSoFar = Double.NEGATIVE_INFINITY;    ArrayList bestPointArray = null;    int bestPoint = -1;    for (int i=0; i<m_Instances.numInstances() && i<m_StartingIndexOfTest; i++) { // point should not belong to test set      if (visitedPoints == null || !visitedPoints.contains(new Integer(i))) { // point should not belong to visitedPoints	if (eliminationSet == null || !eliminationSet.contains(new Integer(i))) { // point should not belong to eliminationSet	  Instance inst = m_Instances.instance(i);	  Iterator iter = visitedPoints.iterator();	  double minDistanceFromSet = Double.POSITIVE_INFINITY;	  double maxSimilarityFromSet = Double.NEGATIVE_INFINITY;	  while (iter.hasNext()) {	    Instance pointInSet = m_Instances.instance(((Integer) iter.next()).intValue());	    if (!m_objFunDecreasing) {	      double sim = m_metric.similarity(inst, pointInSet);	      if (sim > maxSimilarityFromSet) {		maxSimilarityFromSet = sim;	      }	    }	    else {	      double dist = m_metric.distance(inst, pointInSet);	      if (dist < minDistanceFromSet) {		minDistanceFromSet = dist;	      }	    }	  }	  if (!m_objFunDecreasing) {	    if (maxSimilarityFromSet == minSimilaritySoFar) {	      minSimilaritySoFar = maxSimilarityFromSet;	      bestPointArray.add(new Integer(i));	    }	    else if (maxSimilarityFromSet < minSimilaritySoFar) {	      minSimilaritySoFar = maxSimilarityFromSet;	      bestPointArray = new ArrayList();	      bestPointArray.add(new Integer(i));	    }	  }	  else {	    if (minDistanceFromSet == maxDistanceSoFar) {	      minDistanceFromSet = maxDistanceSoFar;	      bestPointArray.add(new Integer(i));	      if (m_verbose) {		System.out.println("Additional point added: " + i + " with similarity: " + minSimilaritySoFar);	      }	    }	    else if (minDistanceFromSet > maxDistanceSoFar) {	      maxDistanceSoFar = minDistanceFromSet;	      bestPointArray = new ArrayList();	      bestPointArray.add(new Integer(i));	      if (m_verbose) {		System.out.println("Farthest point from set is: " + i + " with distance: " + maxDistanceSoFar);	      }	    }	  }	}      }    }    if (bestPointArray == null) {      System.out.println("\n\nAttention!! No more points left, all assigned\n\n");    } else {      if (m_verbose)	System.out.println("Have " + bestPointArray.size() + " points in bestPointArray");      int index = m_RandomNumberGenerator.nextInt(bestPointArray.size()); // select one of the bestPoints at random      bestPoint = ((Integer) bestPointArray.get(index)).intValue();    }    if (m_verbose) {      if (!m_objFunDecreasing) {	System.out.println("Selected " + bestPoint + " with similarity: " + minSimilaritySoFar);      }      else {	System.out.println("Selected " + bestPoint + " with distance: " + maxDistanceSoFar);      }    }    return bestPoint;  }  /** Finds point which is nearest to center. This point should not be   *  a test point and should not belong to visitedPoints    */  int nearestFromPoint(Instance center, HashSet visitedPoints) throws Exception {    double maxSimilarity = Double.NEGATIVE_INFINITY;    double minDistance = Double.POSITIVE_INFINITY;    int bestPoint = -1;    for (int i=0; i<m_Instances.numInstances() && i<m_StartingIndexOfTest; i++) { // bestPoint should not be a test point      if (!visitedPoints.contains(new Integer(i))) { // bestPoint should not belong to visitedPoints	Instance inst = m_Instances.instance(i);	if (!m_objFunDecreasing) {	  double sim = m_metric.similarity(inst, center);	  if (sim > maxSimilarity) {	    bestPoint = i;	    maxSimilarity = sim;	  }	}	else {	  double dist = m_metric.distance(inst, center);	  if (dist < minDistance) {	    bestPoint = i;	    minDistance = dist;	    if (m_verbose) {	      System.out.println("Nearest point is: " + bestPoint + " with dist: " + minDistance);	    }	  }	}      }    }    return bestPoint;  }  /** This function divides every attribute value in an instance by   *  the instance weight -- useful to find the mean of a cluster in   *  Euclidean space    *  @param inst Instance passed in for normalization (destructive update)   */  protected void normalizeByWeight(Instance inst) {    double weight = inst.weight();    if (m_verbose) {      System.out.println("Before weight normalization: " + inst);    }    if (inst instanceof SparseInstance) {       for (int i=0; i<inst.numValues(); i++) {	inst.setValueSparse(i, inst.valueSparse(i)/weight);      }    }    else if (!(inst instanceof SparseInstance)) {      for (int i=0; i<inst.numAttributes(); i++) {	inst.setValue(i, inst.value(i)/weight);      }    }    if (m_verbose) {      System.out.println("After weight normalization: " + inst);    }  }  /** Finds the sum of instance sum with instance inst    */  Instance sumWithInstance(Instance sum, Instance inst) throws Exception {    Instance newSum;    if (sum == null) {      if (m_isSparseInstance) {	newSum = new SparseInstance(inst);	newSum.setDataset(m_Instances);      }      else {	newSum = new Instance(inst);	newSum.setDataset(m_Instances);      }    }    else {      newSum = sumInstances(sum, inst);    }    return newSum;  }  /** Finds sum of 2 instances (handles sparse and non-sparse)   */  protected Instance sumInstances(Instance inst1, Instance inst2) throws Exception {    int numAttributes = inst1.numAttributes();    if (inst2.numAttributes() != numAttributes) {      throw new Exception ("Error!! inst1 and inst2 should have same number of attributes.");    }    //      if (m_verbose) {    //        System.out.println("Instance 1 is: " + inst1 + ", instance 2 is: " + inst2);    //      }    double weight1 = inst1.weight(), weight2 = inst2.weight();    double [] values = new double[numAttributes];    Instance newInst;        for (int i=0; i<numAttributes; i++) {      values[i] = 0;    }        if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) {      for (int i=0; i<inst1.numValues(); i++) {	int indexOfIndex = inst1.index(i);	values[indexOfIndex] = inst1.valueSparse(i);      }      for (int i=0; i<inst2.numValues(); i++) {	int indexOfIndex = inst2.index(i);	values[indexOfIndex] += inst2.valueSparse(i);      }      newInst = new SparseInstance(weight1+weight2, values);      newInst.setDataset(m_Instances);    }    else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)){      for (int i=0; i<numAttributes; i++) {	values[i] = inst1.value(i) + inst2.value(i);      }      newInst = new Instance(weight1+weight2, values);      newInst.setDataset(m_Instances);    }    else {      throw new Exception ("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse");    }    //      if (m_verbose) {    //        System.out.println("Sum instance is: " + newInst);    //      }    return newInst;
💿 文件大小 12323 K
👤 上传用户 ilovexzhu
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#university #supervised #learning #wekaUT
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -