📄 .#weighteddotp.java.1.13
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * WeightedDotP.java * Copyright (C) 2001 Mikhail Bilenko * */package weka.core.metrics;import weka.core.*;import java.util.Enumeration;import java.util.ArrayList;import java.util.Vector;import java.util.Arrays;/** * WeightedDotP class * * Implements the weighted dot product distance metric * * @author Mikhail Bilenko (mbilenko@cs.utexas.edu) * @version $Revision: 1.13 $ */public class WeightedDotP extends LearnableMetric implements OptionHandler { /** Should cosine similarity be normalized by the length of two instance vectors? */ protected boolean m_lengthNormalized = true; /** We can have different ways of converting from similarity to distance */ public static final int CONVERSION_LAPLACIAN = 1; public static final int CONVERSION_UNIT = 2; public static final int CONVERSION_EXPONENTIAL = 4; public static final Tag[] TAGS_CONVERSION = { new Tag(CONVERSION_UNIT, "distance = 1-similarity"), new Tag(CONVERSION_LAPLACIAN, "distance=1/(1+similarity)"), new Tag(CONVERSION_EXPONENTIAL, "distance=exp(-similarity)") }; /** The method of converting, by default laplacian */ protected int m_conversionType = CONVERSION_LAPLACIAN; /** A metric learner responsible for training the parameters of the metric */// protected MetricLearner m_metricLearner = new MatlabMetricLearner();// protected MetricLearner m_metricLearner = new GDMetricLearner(); protected MetricLearner m_metricLearner = new ClassifierMetricLearner(); /** Creates an empty metric class */ public WeightedDotP() { m_normalizeData = true; } /** * Creates a new metric. * @param numAttributes the number of attributes that the metric will work on */ public WeightedDotP(int numAttributes) throws Exception { m_normalizeData = true; buildMetric(numAttributes); } /** * Creates a new metric which takes specified attributes. * * @param _attrIdxs An array containing attribute indeces that will * be used in the metric */ public WeightedDotP(int[] _attrIdxs) throws Exception { m_normalizeData = true; setAttrIdxs(_attrIdxs); buildMetric(_attrIdxs.length); } /** * Reset all values that have been learned */ public void resetMetric() throws Exception { m_trained = false; if (m_attrWeights != null) { for (int i = 0; i < m_attrWeights.length; i++) { m_attrWeights[i] = 1; } } } /** * Generates a new Metric. Has to initialize all fields of the metric * with default values. * * @param numAttributes the number of attributes that the metric will work on * @exception Exception if the distance metric has not been * generated successfully. */ public void buildMetric(int numAttributes) throws Exception { m_numAttributes = numAttributes; m_attrWeights = new double[numAttributes]; System.out.println("Setting attrib weights ..."); for (int i = 0; i < numAttributes; i++) { m_attrWeights[i] = 1; } } /** * Generates a new Metric. Has to initialize all fields of the metric * with default values * * @param options an array of options suitable for passing to setOptions. * May be null. * @exception Exception if the distance metric has not been * generated successfully. */ public void buildMetric(int numAttributes, String[] options) throws Exception { buildMetric(numAttributes); }; /** * Create a new metric for operating on specified instances * @param data instances that the metric will be used on */ public void buildMetric(Instances data) throws Exception { m_classIndex = data.classIndex(); m_numAttributes = data.numAttributes(); // account for the class attribute if it is present if (m_classIndex != m_numAttributes-1 && m_classIndex != -1) { throw new Exception("Class attribute (" + m_classIndex + ") should be the last attribute!!!"); } if (m_classIndex != -1) { m_numAttributes--; } System.out.println("About to build metric with " + m_numAttributes + " attributes, classIdx=" + m_classIndex); buildMetric(m_numAttributes); if (m_trainable) { learnMetric(data); } } /** * Returns a dot product similarity value between two instances. * @param instance1 First instance. * @param instance2 Second instance. * @exception Exception if similarity could not be estimated. */ public double similarity(Instance instance1, Instance instance2) throws Exception { // either pass the computation to the external classifier, or do the work yourself if (m_trainable && m_external && m_trained) { return m_metricLearner.getSimilarity(instance1, instance2); } else { return similarityInternal(instance1, instance2); } } /** Return the penalty contribution - distance*distance */ public double penalty(Instance instance1, Instance instance2) throws Exception { double sim = similarity(instance1, instance2); return 1 - sim; } /** Return the penalty contribution - distance*distance */ public double penaltySymmetric(Instance instance1, Instance instance2) throws Exception { double sim = similarity(instance1, instance2); return 1 - sim; } /** * Returns a dot product similarity value between two instances without using the weights. * @param instance1 First instance. * @param instance2 Second instance. * @exception Exception if similarity could not be estimated. */ public double similarityNonWeighted(Instance instance1, Instance instance2) throws Exception { if (instance1 instanceof SparseInstance && instance2 instanceof SparseInstance) { return similaritySparseNonWeighted((SparseInstance)instance1, (SparseInstance)instance2); } else if (instance1 instanceof SparseInstance) { return similaritySparseNonSparseNonWeighted((SparseInstance)instance1, instance2); } else if (instance2 instanceof SparseInstance) { return similaritySparseNonSparseNonWeighted((SparseInstance)instance2, instance1); } else { return similarityNonSparseNonWeighted(instance1, instance2); } } /** * Returns a dot product similarity value between two instances. * @param instance1 First instance. * @param instance2 Second instance. * @exception Exception if similarity could not be estimated. */ public double similarityInternal(Instance instance1, Instance instance2) throws Exception { if (instance1 instanceof SparseInstance && instance2 instanceof SparseInstance) { return similaritySparse((SparseInstance)instance1, (SparseInstance)instance2); } else if (instance1 instanceof SparseInstance) { return similaritySparseNonSparse((SparseInstance)instance1, instance2); } else if (instance2 instanceof SparseInstance) { return similaritySparseNonSparse((SparseInstance)instance2, instance1); } else { return similarityNonSparse(instance1, instance2); } } /** * Returns a dot product similarity value between two sparse instances. * @param instance1 First sparse instance. * @param instance2 Second sparse instance. * @exception Exception if similarity could not be estimated. */ public double similaritySparse(SparseInstance instance1, SparseInstance instance2) throws Exception { double sim = 0; double length1 = 0, length2 = 0; // iterate through the attributes that are present in the first instance for (int i = 0; i < instance1.numValues(); i++) { Attribute attribute = instance1.attributeSparse(i); int attrIdx = attribute.index(); // Skip the class index if (attrIdx != m_classIndex) { // get the corresponding value of the second instance int idx2 = instance2.locateIndex(attrIdx); if (idx2 >=0 && attrIdx == instance2.index(idx2)) { double val1 = instance1.value(attrIdx); double val2 = instance2.value(attrIdx); sim += m_attrWeights[attrIdx] * val1 * val2; } } } if (m_lengthNormalized) { length1 = lengthWeighted(instance1); length2 = lengthWeighted(instance2); if (length1 != 0 && length2 != 0) { sim /= length1 * length2; } } return sim; }; /** * Returns a dot product similarity value between a two non-sparse instances * @param instance1 First non-sparse instance. * @param instance2 Second non-sparse instance. * @exception Exception if similarity could not be estimated. */ public double similarityNonSparse(Instance instance1, Instance instance2) throws Exception { double sim = 0; double [] values1 = instance1.toDoubleArray(); double [] values2 = instance2.toDoubleArray(); double length1 = 0, length2 = 0; // iterate through the attributes for (int i = 0; i < values1.length; i++) { // Skip the class index if (i != m_classIndex) { sim += m_attrWeights[i] * values1[i] * values2[i]; } } if (m_lengthNormalized) { length1 = lengthWeighted(instance1); length2 = lengthWeighted(instance2); if (length1 != 0 && length2 != 0) { sim /= length1 * length2; } } return sim; } /** * Returns a dot product similarity value between a non-sparse instance and a sparse instance * @param instance1 First sparse instance. * @param instance2 Second non-sparse instance. * @exception Exception if similarity could not be estimated. */ public double similaritySparseNonSparse(SparseInstance instance1, Instance instance2) throws Exception { double sim = 0; // iterate through the attributes that are present in the first instance for (int i = 0; i < instance1.numValues(); i++) { Attribute attribute = instance1.attributeSparse(i); int attrIdx = attribute.index(); // Skip the class index if (attrIdx != m_classIndex) { double val1 = instance1.value(attrIdx); double val2 = instance2.value(attrIdx); sim += m_attrWeights[attrIdx] * val1 * val2; } } // Bad news: need to iterate through all attributes of the non-sparse instance // to compute its length for normalization if (m_lengthNormalized) { double length1 = lengthWeighted(instance1); double length2 = lengthWeighted(instance2); if (length1 != 0 && length2 != 0) { sim /= length1 * length2; } } return sim; } /** * Returns a dot product similarity value between two sparse instances. * @param instance1 First sparse instance. * @param instance2 Second sparse instance. * @exception Exception if similarity could not be estimated. */ public double similaritySparseNonWeighted(SparseInstance instance1, SparseInstance instance2) throws Exception { double sim = 0; double length1 = 0, length2 = 0; // iterate through the attributes that are present in the first instance for (int i = 0; i < instance1.numValues(); i++) { Attribute attribute = instance1.attributeSparse(i); int attrIdx = attribute.index(); // Skip the class index if (attrIdx != m_classIndex) { // get the corresponding value of the second instance int idx2 = instance2.locateIndex(attrIdx); if (idx2 >=0 && attrIdx == instance2.index(idx2)) { double val1 = instance1.value(attrIdx); double val2 = instance2.value(attrIdx); sim += val1 * val2; } } } if (m_lengthNormalized) { length1 = length(instance1); length2 = length(instance2); if (length1 != 0 && length2 != 0) { sim /= length1 * length2; } } return sim; }; /** * Returns a dot product similarity value between a two non-sparse instances * @param instance1 First non-sparse instance. * @param instance2 Second non-sparse instance.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -