📄 kl.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
   * @param instance1 non-sparse instance.   * @param instance2 non-sparse instance.   * @exception Exception if distance could not be estimated.   */  public double distanceNonSparse(Instance instance1, Instance instance2) throws Exception {    double distance = 0, idivTerm = 0;    double [] values1 = instance1.toDoubleArray();    double [] values2 = instance2.toDoubleArray();    // Go through all attributes    for (int i = 0; i < values1.length; i++) {      if (i != m_classIndex) {	if (values2[i] > 0) { 	  distance += m_attrWeights[i] * (values1[i] * Math.log(values1[i]/values2[i]));	  if (m_useIDivergence) {	    idivTerm -= m_attrWeights[i] * (values1[i] - values2[i]);	  }	} else {  // instance2 has a 0 value	  System.err.println("KL.distanceNonSparse:  0 value in instance2, attribute=" + i); 	  return  Double.MAX_VALUE;	}      }    }    distance = distance + idivTerm;    return distance;  };  /**   * Returns Jensen-Shannon distance value between two instances.    * @param instance1 First instance.   * @param instance2 Second instance.   * @exception Exception if distanceJS could not be estimated.   */  public double distanceJS(Instance instance1, Instance instance2) throws Exception {    if (instance1 instanceof SparseInstance && instance2 instanceof SparseInstance) {      return distanceJSSparse((SparseInstance)instance1, (SparseInstance)instance2);    } else {      return distanceJSNonSparse(instance1, instance2);    }  }      /** Returns Jensen-Shannon distance between two sparse instances.    * @param instance1 First sparse instance.   * @param instance2 Second sparse instance.   * @exception Exception if distanceJS could not be estimated.   */  public double distanceJSSparse(SparseInstance instance1,				 SparseInstance instance2) throws Exception {    double distanceJS = 0;    boolean lookupOK = false;     if (m_instanceConstraintMap.containsKey(instance1)) {      HashMap instanceDiffInstanceMap =	(HashMap) m_instanceConstraintMap.get(instance1);      if (instanceDiffInstanceMap.containsKey(instance2)) {	lookupOK = true;	SparseInstance diffVector = (SparseInstance) instanceDiffInstanceMap.get(instance2);	for (int i = 0; i < diffVector.numValues(); i++) {	  int idx = diffVector.index(i);	  distanceJS += m_attrWeights[idx] * diffVector.valueSparse(i);	}       }     }    if (!lookupOK) {       double value1, value2, sum1 = 0, sum2 = 0;      int numValues1 = instance1.numValues();      int numValues2 = instance2.numValues();      int maxNumValues = numValues1 + numValues2;  // the overall number of attributes      double [] attrValues = new double[maxNumValues];      int [] indices = new int[maxNumValues];      Arrays.fill(attrValues, 0);      Arrays.fill(indices, Integer.MAX_VALUE);      // pick up the values from instance2 that didn't occur in instance1      int counter = 0, counter1 = 0, counter2 = 0;      int attrIdx1 = 0, attrIdx2 = 0;      while (counter1 < numValues1 || counter2 < numValues2) {	if (counter1 < numValues1) {	  attrIdx1 = instance1.index(counter1);	} else {	  attrIdx1 = Integer.MAX_VALUE;	}	if (counter2 < numValues2) {	  attrIdx2 = instance2.index(counter2);	} else {	  attrIdx2 = Integer.MAX_VALUE;	}	while (attrIdx1 < attrIdx2 && counter1 < numValues1 ) {	  if (attrIdx1 != m_classIndex) { 	    sum1 += m_attrWeights[attrIdx1] * instance1.valueSparse(counter1);	    attrValues[counter] = 0.5 * instance1.valueSparse(counter1);	    indices[counter] = attrIdx1;	    counter++;	  }	  counter1++;	  if (counter1 < numValues1) { 	    attrIdx1 = instance1.index(counter1);	  }	}	while (attrIdx2 < attrIdx1 && counter2 < numValues2 ) {	  if (attrIdx2 != m_classIndex) { 	    sum2 += m_attrWeights[attrIdx2] * instance2.valueSparse(counter2);	    attrValues[counter] = 0.5 * instance2.valueSparse(counter2);	    indices[counter] = attrIdx2;	    counter++;	  }	  counter2++;	  if (counter2 < numValues2) {	    attrIdx2 = instance2.index(counter2);	  }	}	if (attrIdx1 == attrIdx2 && attrIdx1 != m_classIndex && attrIdx1 < Integer.MAX_VALUE && attrIdx2 < Integer.MAX_VALUE) {	  value1 = instance1.valueSparse(counter1);	  value2 = instance2.valueSparse(counter2);	  distanceJS += m_attrWeights[attrIdx1] * (value1 * Math.log(value1) + value2 * Math.log(value2) 						   - (value1 + value2) * Math.log((value1+value2)/2.0));	  attrValues[counter] = 0.5 * (value1 * Math.log(value1) + value2 * Math.log(value2) -				       (value1 + value2) * Math.log((value1+value2)/2.0)) / LOG2; 	  indices[counter] = attrIdx1;	  counter++;	  counter1++;	  counter2++;	} else if (attrIdx1 == m_classIndex) {	  if (instance1.classValue() == instance2.classValue()) {	    attrValues[counter] = 1;	  } else {	    attrValues[counter] = -1;	  }	  indices[counter] = m_classIndex;	  counter++;	  counter1++;	  counter2++;	}       }      SparseInstance diffInstanceJS = new SparseInstance(1.0, attrValues, indices, instance1.dataset().numAttributes());      diffInstanceJS.setDataset(instance1.dataset());      // hash the diff-instance for both instances involved      HashMap instanceDiffInstanceMap1;      if (m_instanceConstraintMap.containsKey(instance1)) {	instanceDiffInstanceMap1 = (HashMap) m_instanceConstraintMap.get(instance1);      } else { 	instanceDiffInstanceMap1 = new HashMap();	m_instanceConstraintMap.put(instance1, instanceDiffInstanceMap1);      }       instanceDiffInstanceMap1.put(instance2, diffInstanceJS);      HashMap instanceDiffInstanceMap2;      if (m_instanceConstraintMap.containsKey(instance2)) {	instanceDiffInstanceMap2 = (HashMap) m_instanceConstraintMap.get(instance2);      } else { 	instanceDiffInstanceMap2 = new HashMap();	m_instanceConstraintMap.put(instance2, instanceDiffInstanceMap2);      }       instanceDiffInstanceMap2.put(instance1, diffInstanceJS);      distanceJS = 0.5 * (sum1 + sum2 + distanceJS / LOG2);      if (distanceJS > 1.00001)  System.out.println("TROUBLE: distanceJS=" + distanceJS + " sum1=" + sum1 + " sum2=" + sum2);    }    return distanceJS;  }  /** Returns Jensen-Shannon distance between a non-sparse instance and a sparse instance   * @param instance1 sparse instance.   * @param instance2 sparse instance.   * @exception Exception if distanceJS could not be estimated.   */  public double distanceJSSparseNonSparse(SparseInstance instance1, Instance instance2) throws Exception {    double diff, distanceJS = 0, sum2 = 0;    int numValues1 = instance1.numValues();    int numValues2 = instance2.numValues();    double [] values2 = instance2.toDoubleArray();    // add all contributions of the second instance; unnecessary ones will be subtracted later    for (int i = 0; i < values2.length; i++) {      if (i != m_classIndex) {	sum2 += m_attrWeights[i] * values2[i];      }    }    for (int i = 0; i < numValues1; i++) {      int attrIdx = instance1.index(i);      if (attrIdx != m_classIndex) {	double value1 = instance1.valueSparse(i);	double value2 = values2[attrIdx];	if (value1 != 0 && value2 != 0) { 	  distanceJS += m_attrWeights[attrIdx] * (value1 * Math.log(value1) + value2 * Math.log(value2) 						  - (value1 + value2) * Math.log((value1+value2)/2.0));	  sum2 -= m_attrWeights[attrIdx] * value2;  // subtract the contribution previously added	}       }    }     distanceJS = 0.5 * (sum2 + distanceJS / LOG2);    return distanceJS;  }  /** Returns Jensen-Shannon distance between non-sparse instances without using the weights   * @param instance1 non-sparse instance.   * @param instance2 non-sparse instance.   * @exception Exception if distanceJS could not be estimated.   */  public double distanceJSNonSparse(Instance instance1, Instance instance2) throws Exception {    double distanceJS = 0, sum1 = 0, sum2 = 0;    double [] values1 = instance1.toDoubleArray();    double [] values2 = instance2.toDoubleArray();    // Go through all attributes    for (int i = 0; i < values1.length; i++) {      if (i != m_classIndex) {	if (values1[i] != 0 && values2[i] != 0) {	  distanceJS += m_attrWeights[i] * (values1[i] * Math.log(values1[i]) + values2[i] * Math.log(values2[i]) 					    - (values1[i] + values2[i]) * Math.log((values1[i]+values2[i])/2.0));	} else if (values1[i] != 0) {	  sum1 += m_attrWeights[i] * values1[i];	} else if (values2[i] != 0) {	  sum2 += m_attrWeights[i] * values2[i];	}      }    }    distanceJS = 0.5 * (sum1 + sum2 + distanceJS / LOG2);    return distanceJS;  };  /**   * Returns a similarity estimate between two instances. Similarity is obtained by   * inverting the distance value using one of three methods:   * CONVERSION_LAPLACIAN, CONVERSION_EXPONENTIAL, CONVERSION_UNIT.   * @param instance1 First instance.   * @param instance2 Second instance.   * @exception Exception if similarity could not be estimated.   */  public double similarity(Instance instance1, Instance instance2) throws Exception {    switch (m_conversionType) {    case CONVERSION_LAPLACIAN:       return 1 / (1 + distance(instance1, instance2));    case CONVERSION_UNIT:      return 2 * (1 - distance(instance1, instance2));    case CONVERSION_EXPONENTIAL:      return Math.exp(-distance(instance1, instance2));    default:      throw new Exception ("Unknown distance to similarity conversion method");    }  }   /**   * Returns distance between two instances without using the weights.   * @param instance1 First instance.   * @param instance2 Second instance.   * @exception Exception if similarity could not be estimated.   */  public double distanceNonWeighted(Instance instance1, Instance instance2) throws Exception {    return distance(instance1, instance2);  }    /**   * Returns a similarity estimate between two instances without using the weights.   * @param instance1 First instance.   * @param instance2 Second instance.   * @exception Exception if similarity could not be estimated.   */  public double similarityNonWeighted(Instance instance1, Instance instance2) throws Exception {    switch (m_conversionType) {    case CONVERSION_LAPLACIAN:       return 1 / (1 + distanceNonWeighted(instance1, instance2));    case CONVERSION_UNIT:      return 2 * (1 - distanceNonWeighted(instance1, instance2));    case CONVERSION_EXPONENTIAL:      return Math.exp(-distanceNonWeighted(instance1, instance2));    default:      throw new Exception ("Unknown distance to similarity conversion method");    }  }  /** Get the values of the partial derivates for the metric components   * for a particular instance pair   @param instance1 the first instance   @param instance2 the first instance  */  public double[] getGradients(Instance instance1, Instance instance2) throws Exception {    double[] gradients = new double[m_numAttributes];    double distance = distanceInternal(instance1, instance2);    // gradients are zero for 0-distance instances    if (distance == 0) {      return gradients;    }    // take care of SparseInstances by enumerating over the values of the first instance    for (int i = 0; i < m_numAttributes; i++) {      // get the values       double val1 = instance1.valueSparse(i);      Attribute attr = instance1.attributeSparse(i);      double val2 = instance2.value(attr);      // TODO: why was this gradient used earlier??      //      gradients[i] = 1.0 / (2*distance) * (val2 - val1) * (val2 - val1);      if (val2 > 0) {	gradients[i] = val1 * Math.log(val1/val2) - (val1 - val2);      }    }    return gradients;  }  /** get the normalizer value */  public double getNormalizer() {    return 0;  }    /** Train the metric   */  public void learnMetric (Instances data) throws Exception {    if (m_metricLearner == null) {      System.err.println("Metric learner for KL is not initalized. No training was conducted");      return;    }    // need Idivergence to be switched on during metric learning    if (m_useIDivergence == false) {      System.out.println("Using IDvergence ...");      m_useIDivergence = true;    }    m_metricLearner.trainMetric(this, data);  }  /**   * Set the distance metric learner   *   * @param metricLearner the metric learner   */  public void setMetricLearner (MetricLearner metricLearner) {    m_metricLearner = metricLearner;  }   /**   * Get the distance metric learner   *   * @returns the distance metric learner that this metric employs   */  public MetricLearner getMetricLearner () {    return m_metricLearner;  }       /**   * Create an instance with features corresponding to dot-product components of the two given instances   * @param instance1 first instance   * @param instance2 second instance   */  public Instance createDiffInstance (Instance instance1, Instance instance2) {    if (instance1 instanceof SparseInstance && instance2 instanceof SparseInstance) {      return  createDiffInstanceSparse((SparseInstance)instance1, (SparseInstance)instance2);    } else if (instance1 instanceof SparseInstance) {      return createDiffInstanceSparseNonSparse((SparseInstance)instance1, instance2);    }  else if (instance2 instanceof SparseInstance) {      return createDiffInstanceSparseNonSparse((SparseInstance)instance2, instance1);    } else {      return createDiffInstanceNonSparse(instance1, instance2);    }  }  /**   * Create a sparse instance with features corresponding to dot-product components of the two given instances   * @param instance1 first sparse instance   * @param instance2 second sparse instance   */  protected SparseInstance createDiffInstanceSparse (SparseInstance instance1, SparseInstance instance2) {    int numValues1 = instance1.numValues();    int numValues2 = instance2.numValues();    int maxNumValues =numValues1 + numValues2;  // the overall number of attributes    int classIndex = instance1.classIndex();        int counter = 0;    double [] attrValues = new double[maxNumValues];    int [] indices = new int[maxNumValues];    Arrays.fill(attrValues, 0);    Arrays.fill(indices, Integer.MAX_VALUE);    for (int i = 0; i < numValues1; i++) {      int attrIdx = instance1.index(i);      indices[counter] = attrIdx;      if (attrIdx != classIndex) {  // skip class attributes	double value1 = instance1.valueSparse(i);	int idx2 = instance2.locateIndex(attrIdx);	if (idx2 >=0 && attrIdx == instance2.index(idx2)) {	  double value2 = instance2.valueSparse(idx2);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -