📄 .#weighteddotp.java.1.13
字号:
* @exception Exception if similarity could not be estimated. */ public double similarityNonSparseNonWeighted(Instance instance1, Instance instance2) throws Exception { double sim = 0; double [] values1 = instance1.toDoubleArray(); double [] values2 = instance2.toDoubleArray(); double length1 = 0, length2 = 0; // iterate through the attributes for (int i = 0; i < values1.length; i++) { // Skip the class index if (i != m_classIndex) { sim += values1[i] * values2[i]; if (m_lengthNormalized) { length1 += values1[i] * values1[i]; length2 += values2[i] * values2[i]; } } } if (m_lengthNormalized && length1!=0 && length2!=0) { sim /= Math.sqrt(length1) * Math.sqrt(length2); } return sim; } /** * Returns a dot product similarity value between a non-sparse instance and a sparse instance * @param instance1 First sparse instance. * @param instance2 Second non-sparse instance. * @exception Exception if similarity could not be estimated. */ public double similaritySparseNonSparseNonWeighted(SparseInstance instance1, Instance instance2) throws Exception { double sim = 0; // iterate through the attributes that are present in the first instance for (int i = 0; i < instance1.numValues(); i++) { Attribute attribute = instance1.attributeSparse(i); int attrIdx = attribute.index(); // Skip the class index if (attrIdx != m_classIndex) { double val1 = instance1.value(attrIdx); double val2 = instance2.value(attrIdx); sim += val1 * val2; } } // Bad news: need to iterate through all attributes of the non-sparse instance // to compute its length for normalization if (m_lengthNormalized) { double length1 = length(instance1); double length2 = length(instance2); if (length1 != 0 && length2 != 0) { sim /= length1 * length2; } } return sim; } /** Create an Instance with features corresponding to internal "features": * for x'y returns an instance with the following features: [x1*y1, x2*y2, ..., xn*yn] * @param instance1 first instance * @param instance2 second instance * @returns an instance containing the dot-product components */ public Instance createDiffInstance(Instance instance1, Instance instance2) { if (instance1 instanceof SparseInstance && instance2 instanceof SparseInstance) { return createDiffInstanceSparse((SparseInstance)instance1, (SparseInstance)instance2); } else if (instance1 instanceof SparseInstance) { return createDiffInstanceSparseNonSparse((SparseInstance)instance1, instance2); } else { return createDiffInstanceNonSparse(instance1, instance2); } } /** Create a SparseInstance with features corresponding to internal "features": * for x'y returns an instance with the following features: [x1*y1, x2*y2, ..., xn*yn] * @param instance1 first sparse instance * @param instance2 second sparse instance * @returns a sparse instance containing the dot-product components */ public SparseInstance createDiffInstanceSparse(SparseInstance instance1, SparseInstance instance2) { SparseInstance diffInstance = new SparseInstance(instance1); diffInstance.setDataset(instance1.dataset()); double sim = 0; try { sim = similaritySparse(instance1, instance2); } catch (Exception e) { e.printStackTrace();} double length1 = lengthWeighted(instance1); double length2 = lengthWeighted(instance2); double denom = length1 * length1 * length2 * length2; if (denom == 0) { for (int i = 0; i < diffInstance.numValues(); i++) { diffInstance.setValueSparse(i, 0); } return diffInstance; } // iterate through the attributes that are present in the first instance for (int i = 0; i < diffInstance.numValues(); i++) { int attrIdx = diffInstance.index(i); // Skip the class index if (attrIdx != m_classIndex) { double val1 = diffInstance.valueSparse(i); // get the corresponding value of the second instance double val2 = instance2.value(attrIdx); double val = (val1 * val2 * length1 * length2 - sim * (val1 * val1 * length2 / (2 * length1) + val2 * val2 * length1 / (2 * length2))) / denom; diffInstance.setValue(attrIdx, val); } else { // second instance doesn't have this value (=0), kill it diffInstance.setValue(attrIdx, 0); } } // pick up values only present in instance2 for (int i = 0; i < instance2.numValues(); i++) { int attrIdx = instance2.index(i); if (attrIdx != m_classIndex) { int idx1 = instance1.locateIndex(attrIdx); if (idx1 < 0 || attrIdx != instance1.index(idx1)) { double val2 = instance2.valueSparse(i); double val = -sim * val2 * val2 * length1 / (2 * length2) / denom; diffInstance.setValue(attrIdx, val); } } } return diffInstance; };/** Create an Instance with features corresponding to internal "features": * for x'y returns an instance with the following features: [x1*y1, x2*y2, ..., xn*yn] * @param instance1 first instance * @param instance2 second instance * @returns an instance containing the dot-product components */ public Instance createDiffInstanceNonSparse(Instance instance1, Instance instance2) { double sim = 0; double [] values1 = instance1.toDoubleArray(); double [] values2 = instance2.toDoubleArray(); double [] diffValues = new double[values1.length]; // iterate through the attributes for (int i = 0; i < values1.length; i++) { // Skip the class index if (i != m_classIndex) { diffValues[i] = values1[i] * values2[i]; } } if (m_lengthNormalized) { double length1 = lengthWeighted(instance1); double length2 = lengthWeighted(instance2); if (length1 != 0 && length2 != 0) { for (int i = 0; i < diffValues.length; i++) { if (i != m_classIndex) { diffValues[i] /= length1 * length2; } } } } Instance diffInstance = new Instance(1.0, diffValues); diffInstance.setDataset(instance1.dataset()); return diffInstance; } /** Create a SparseInstance with features corresponding to internal "features": * for x'y returns an instance with the following features: [x1*y1, x2*y2, ..., xn*yn] * @param instance1 first sparse instance * @param instance2 second instance * @returns a sparse instance containing the dot-product components */ public SparseInstance createDiffInstanceSparseNonSparse(SparseInstance instance1, Instance instance2) { double length = -1; SparseInstance diffInstance = new SparseInstance(instance1); diffInstance.setDataset(instance1.dataset()); if (m_lengthNormalized) { double length1 = lengthWeighted(instance1); double length2 = lengthWeighted(instance2); if (length1 != 0 && length2 != 0) { length = length1 * length2; } } // iterate through the attributes that are present in the first instance for (int i = 0; i < diffInstance.numValues(); i++) { Attribute attribute = diffInstance.attributeSparse(i); int attrIdx = attribute.index(); // Skip the class index if (attrIdx != m_classIndex) { double val1 = diffInstance.value(attrIdx); double val2 = instance2.value(attrIdx); double val = (length < 0) ? val1*val2 : (val1*val2)/length; diffInstance.setValue(attribute, val); } } return diffInstance; }; /** * Returns distance between two instances using the current conversion * type (CONVERSION_LAPLACIAN, CONVERSION_EXPONENTIAL, CONVERSION_UNIT, ...) * @param instance1 First instance. * @param instance2 Second instance. * @exception Exception if distance could not be estimated. */ public double distance (Instance instance1, Instance instance2) throws Exception { if (m_trainable && m_external && m_trained) { return m_metricLearner.getDistance(instance1, instance2); } switch (m_conversionType) { case CONVERSION_LAPLACIAN: return 1 / (1 + similarity(instance1, instance2)); case CONVERSION_UNIT: return 2 * (1 - similarity(instance1, instance2)); case CONVERSION_EXPONENTIAL: return Math.exp(-similarity(instance1, instance2)); default: throw new Exception ("Unknown similarity to distance conversion method"); } } /** * Returns distance between two instances using the current conversion without using the weights * type (CONVERSION_LAPLACIAN, CONVERSION_EXPONENTIAL, CONVERSION_UNIT, ...) * @param instance1 First instance. * @param instance2 Second instance. * @exception Exception if distance could not be estimated. */ public double distanceNonWeighted (Instance instance1, Instance instance2) throws Exception { switch (m_conversionType) { case CONVERSION_LAPLACIAN: return 1 / (1 + similarityNonWeighted(instance1, instance2)); case CONVERSION_UNIT: return 2 * (1 - similarityNonWeighted(instance1, instance2)); case CONVERSION_EXPONENTIAL: return Math.exp(-similarityNonWeighted(instance1, instance2)); default: throw new Exception ("Unknown similarity to distance conversion method"); } } /** * Set the type of similarity to distance conversion. Values other * than CONVERSION_LAPLACIAN, CONVERSION_UNIT, or CONVERSION_EXPONENTIAL will be ignored * * @param type type of the similarity to distance conversion to use */ public void setConversionType(SelectedTag conversionType) { if (conversionType.getTags() == TAGS_CONVERSION) { m_conversionType = conversionType.getSelectedTag().getID(); } } /** * return the type of similarity to distance conversion * @return one of CONVERSION_LAPLACIAN, CONVERSION_UNIT, or CONVERSION_EXPONENTIAL */ public SelectedTag getConversionType() { return new SelectedTag(m_conversionType, TAGS_CONVERSION); } /** Set normalization by instance length to be on or off * @param lengthNormalized if true, similarity is normalized by the length of the vectors */ public void setLengthNormalized(boolean lengthNormalized) { m_lengthNormalized = lengthNormalized; } /** Check whether similarity is normalized by the length of the vectors * @returns true if similarity is normalized by the length of the vectors */ public boolean getLengthNormalized() { return m_lengthNormalized; } /** * Updates the weights */ public void learnMetric(Instances data) throws Exception{ if (m_metricLearner == null) { System.err.println("Metric learner for WeightedDotP is not initalized. No training was conducted"); return; } m_metricLearner.trainMetric(this, data); } /** * Set the distance metric learner * * @param metricLearner the metric learner */ public void setMetricLearner (MetricLearner metricLearner) { m_metricLearner = metricLearner; } /** * Get the distance metric learner * * @returns the distance metric learner that this metric employs */ public MetricLearner getMetricLearner () { return m_metricLearner; } /** * Create a sparse instance with features corresponding to dot-product components of the two given instances * @param instance1 first sparse instance * @param instance2 second sparse instance */ // OLD VERSION// protected SparseInstance createDiffInstanceSparse (SparseInstance instance1, SparseInstance instance2) {// int maxNumValues = instance1.numValues() + instance2.numValues(); // the overall number of attributes// // arrays that will hold values and internal indeces of attribute indices// // these will be cut off later// double [] attrValues = new double[maxNumValues];// int [] indices = new int[maxNumValues];// int counter = 0;// Arrays.fill(attrValues, Double.NaN);// Arrays.fill(indices, Integer.MAX_VALUE);// // iterate through the attributes that are present in the first instance// for (int i = 0; i < instance1.numValues(); i++) {// Attribute attribute = instance1.attributeSparse(i);// int attrIdx = attribute.index();// // For the class index, assign value 0 if instances are from same class, 1 if from different classes// if (attrIdx != m_classIndex) {// // if second instance has the corresponding value, create diff. attribute// int idx2 = instance2.locateIndex(attrIdx);// if (idx2 >=0 && attrIdx == instance2.index(idx2)) {// attrValues[counter] = m_attrWeights[attrIdx] * instance1.value(attrIdx) * instance2.value(attrIdx);// indices[counter] = attrIdx;// counter++;// }// }// }// // Create the sparse difference instance// double [] trueAttrValues = new double[counter];// int [] trueIndices = new int[counter];// for (int i = 0; i < counter; i++) {// trueAttrValues[i] = attrValues[i];// trueIndices[i] = indices[i];// }// SparseInstance diffInstance = new SparseInstance(1.0, trueAttrValues, trueIndices, maxNumValues);// diffInstance.setDataset(instance1.dataset());// return diffInstance;// }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -