📄 weightedeuclidean.java
字号:
// iterate through the attributes that are present in the first instance for (int i = 0; i < instance1.numValues(); i++) { Attribute attribute = instance1.attributeSparse(i); int attrIdx = attribute.index(); // Skip the class index if (attrIdx == m_classIndex) { continue; } value1 = instance1.value(attrIdx); // get the corresponding value of the second instance int idx2 = instance2.locateIndex(attrIdx); if (idx2 >=0 && attrIdx == instance2.index(idx2)) { value2 = instance2.value(attrIdx); } else { value2 = 0; } distance += (value2 - value1) * (value2 - value1); } // Go through the attributes that are present in the second instance, but not first instance for (int i = 0; i < instance2.numValues(); i++) { Attribute attribute = instance2.attributeSparse(i); int attrIdx = attribute.index(); // Skip the class index if (attrIdx == m_classIndex) { continue; } // only include attributes that are not present in first instance int idx1 = instance1.locateIndex(attrIdx); if (idx1 < 0 || attrIdx != instance1.index(idx1)) { value2 = instance2.value(attrIdx); distance += value2 * value2; } } distance = Math.sqrt(distance); return distance; } /** Returns a distance value between a non-sparse instance and a sparse instance * @param instance1 sparse instance. * @param instance2 sparse instance. * @exception Exception if distance could not be estimated. */ public double distanceSparseNonSparseNonWeighted(SparseInstance instance1, Instance instance2) throws Exception { double diff, distance = 0; double [] values2 = instance2.toDoubleArray(); for (int i = 0; i < values2.length; i++) { // Skip the class index if (i == m_classIndex) { continue; } diff = values2[i] - instance1.value(i); distance += diff * diff; } distance = Math.sqrt(distance); return distance; }; /** Returns a distance value between non-sparse instances (or a non-sparse instance and a sparse instance) * without using the weights * @param instance1 non-sparse instance. * @param instance2 non-sparse instance. * @exception Exception if distance could not be estimated. */ public double distanceNonSparseNonWeighted(Instance instance1, Instance instance2) throws Exception { double value1, value2, diff, distance = 0; double [] values1 = instance1.toDoubleArray(); double [] values2 = instance2.toDoubleArray(); // Go through all attributes for (int i = 0; i < values1.length; i++) { // Skip the class index if (i == m_classIndex) { continue; } diff = values1[i] - values2[i]; distance += diff * diff; } distance = Math.sqrt(distance); return distance; }; /** * Returns a similarity estimate between two instances. Similarity is obtained by * inverting the distance value using one of three methods: * CONVERSION_LAPLACIAN, CONVERSION_EXPONENTIAL, CONVERSION_UNIT. * @param instance1 First instance. * @param instance2 Second instance. * @exception Exception if similarity could not be estimated. */ public double similarity(Instance instance1, Instance instance2) throws Exception { switch (m_conversionType) { case CONVERSION_LAPLACIAN: return 1 / (1 + distance(instance1, instance2)); case CONVERSION_UNIT: return 2 * (1 - distance(instance1, instance2)); case CONVERSION_EXPONENTIAL: return Math.exp(-distance(instance1, instance2)); default: throw new Exception ("Unknown distance to similarity conversion method"); } } /** * Returns a similarity estimate between two instances without using the weights. * @param instance1 First instance. * @param instance2 Second instance. * @exception Exception if similarity could not be estimated. */ public double similarityNonWeighted(Instance instance1, Instance instance2) throws Exception { switch (m_conversionType) { case CONVERSION_LAPLACIAN: return 1 / (1 + distanceNonWeighted(instance1, instance2)); case CONVERSION_UNIT: return 2 * (1 - distanceNonWeighted(instance1, instance2)); case CONVERSION_EXPONENTIAL: return Math.exp(-distanceNonWeighted(instance1, instance2)); default: throw new Exception ("Unknown distance to similarity conversion method"); } } /** Get the values of the partial derivates for the metric components * for a particular instance pair @param instance1 the first instance @param instance2 the first instance */ public double[] getGradients(Instance instance1, Instance instance2) throws Exception { double[] gradients = new double[m_numAttributes]; double distance = distanceInternal(instance1, instance2); // gradients are zero for 0-distance instances if (distance == 0) { return gradients; } // take care of SparseInstances by enumerating over the values of the first instance for (int i = 0; i < m_numAttributes; i++) { // get the values double val1 = instance1.valueSparse(i); Attribute attr = instance1.attributeSparse(i); double val2 = instance2.value(attr); gradients[i] = 1.0 / (2*distance) * (val2 - val1) * (val2 - val1); } return gradients; } /** Train the metric */ public void learnMetric (Instances data) throws Exception { if (m_metricLearner == null) { System.err.println("Metric learner for WeightedEuclidean is not initalized. No training was conducted"); return; } m_metricLearner.trainMetric(this, data); } /** * Set the distance metric learner * * @param metricLearner the metric learner */ public void setMetricLearner (MetricLearner metricLearner) { m_metricLearner = metricLearner; } /** * Get the distance metric learner * * @returns the distance metric learner that this metric employs */ public MetricLearner getMetricLearner () { return m_metricLearner; } /** * Create an instance with features corresponding to dot-product components of the two given instances * @param instance1 first instance * @param instance2 second instance */ public Instance createDiffInstance (Instance instance1, Instance instance2) { if (instance1 instanceof SparseInstance && instance2 instanceof SparseInstance) { return createDiffInstanceSparse((SparseInstance)instance1, (SparseInstance)instance2); } else if (instance1 instanceof SparseInstance) { return createDiffInstanceSparseNonSparse((SparseInstance)instance1, instance2); } else if (instance2 instanceof SparseInstance) { return createDiffInstanceSparseNonSparse((SparseInstance)instance2, instance1); } else { return createDiffInstanceNonSparse(instance1, instance2); } } /** * Create a sparse instance with features corresponding to dot-product components of the two given instances * @param instance1 first sparse instance * @param instance2 second sparse instance */ protected SparseInstance createDiffInstanceSparse (SparseInstance instance1, SparseInstance instance2) { int maxNumValues = instance1.dataset().numAttributes(); // the overall number of attributes int classIndex = instance1.classIndex(); // arrays that will hold values and internal indeces of attribute indices // these will be cut off later double [] attrValues = new double[maxNumValues]; int [] indices = new int[maxNumValues]; int counter = 0; Arrays.fill(attrValues, Double.NaN); Arrays.fill(indices, Integer.MAX_VALUE); // iterate through the attributes that are present for (int i = 0; i < maxNumValues; i++) { if (i != classIndex) { // skip class attributes int idx1 = instance1.locateIndex(i); int idx2 = instance2.locateIndex(i); if ((idx1 >=0 && i == instance1.index(idx1)) || (idx2 >=0 && i == instance2.index(idx2))) { attrValues[counter] = (float) ((instance1.value(i) - instance2.value(i)) * (instance1.value(i) - instance2.value(i))); indices[counter] = i; counter++; } } else { // add the class value if (instance1.classValue() == instance2.classValue()) { attrValues[counter] = 1; } else { attrValues[counter] = -1; } indices[counter] = instance1.classIndex(); counter++; } } // Create the sparse difference instance double [] trueAttrValues = new double[counter]; int [] trueIndices = new int[counter]; for (int i = 0; i < counter; i++) { trueAttrValues[i] = attrValues[i]; trueIndices[i] = indices[i]; } SparseInstance diffInstance = new SparseInstance(1.0, trueAttrValues, trueIndices, maxNumValues); diffInstance.setDataset(instance1.dataset()); return diffInstance; } /** * Create an instance with features corresponding to dot-product components of the two given instances * @param instance1 first sparse instance * @param instance2 second non-sparse instance */ protected Instance createDiffInstanceSparseNonSparse (SparseInstance instance1, Instance instance2) { double[] values2 = instance2.toDoubleArray(); int numAttributes = values2.length; // create an extra attribute if there was no class index originally int classIndex = instance1.classIndex(); if (classIndex < 0) { classIndex = numAttributes; numAttributes++; } double[] diffInstanceValues = new double[numAttributes]; // iterate through the attributes that are present in the sparse instance for (int i = 0; i < numAttributes; i++) { if (i != classIndex) { // round up to float significance to be able to weed out duplicates later diffInstanceValues[i] = (float) ((instance1.value(i) - values2[i]) * (instance1.value(i) - values2[i])); } else { // class value if (instance1.value(i) == values2[i]) { diffInstanceValues[i] = 1; } else { diffInstanceValues[i] = -1; } } } Instance diffInstance = new Instance(1.0, diffInstanceValues); diffInstance.setDataset(instance1.dataset()); return diffInstance; } /** * Create a nonsparse instance with features corresponding to dot-product components of the two given instances * @param instance1 first nonsparse instance * @param instance2 second nonsparse instance */ protected Instance createDiffInstanceNonSparse (Instance instance1, Instance instance2) { double[] values1 = instance1.toDoubleArray(); double[] values2 = instance2.toDoubleArray(); int numAttributes = values1.length; // create an extra attribute if there was no class index originally int classIndex = instance1.classIndex(); if (classIndex < 0) { classIndex = numAttributes; numAttributes++; } double[] diffInstanceValues = new double[numAttributes]; for (int i = 0; i < values1.length; i++) { if (i != classIndex) { // round up to float significance to be able to weed out duplicates later diffInstanceValues[i] = (float) ((values1[i] - values2[i]) * (values1[i] - values2[i])); } else { // class value if (values1[i] == values2[i]) { diffInstanceValues[i] = 1; } else { diffInstanceValues[i] = -1; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -