⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 affinemetric.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    AffineDistance.java *    Copyright (C) 2001 Mikhail Bilenko * */package weka.deduping.metrics;import java.util.*;import java.io.Serializable;import weka.core.*;/** * A measure of distance between two strings based on affine distance. * See D. Gusfield, "Algorithms on Strings, Trees and Sequences", * Cambridge University Press, 1997.  * * @author Mikhail Bilenko */public class AffineMetric extends StringMetric implements OptionHandler, Serializable {  /** The cost of matching two characters */  protected double m_matchCost = -1;    /** The cost of a substituting two characters */  protected double m_subCost = 2;    /** The cost of opening a gap */  protected double m_gapStartCost = 3;    /** The cost of continuing a gap */  protected double m_gapExtendCost = 1;  /** Should the distance be normalized by the lengths of the strings? */  protected boolean m_normalized = true;  /** We can have different ways of converting from distance to similarity  */  public static final int CONVERSION_LAPLACIAN = 1;  public static final int CONVERSION_UNIT = 2;  public static final int CONVERSION_EXPONENTIAL = 4;  public static final Tag[] TAGS_CONVERSION = {    new Tag(CONVERSION_UNIT, "similarity = 1-distance"),    new Tag(CONVERSION_LAPLACIAN, "similarity=1/(1+distance)"),    new Tag(CONVERSION_EXPONENTIAL, "similarity=exp(-distance)")      };  /** The method of converting, by default laplacian */  protected int m_conversionType = CONVERSION_EXPONENTIAL;      /** A default constructor that assigns the name of this distance */  public AffineMetric () { }  /** A metric can be data-dependent (e.g. vector space for IDF) */  public boolean isDataDependent() {    return false;  }  /** Obtain the distance between two strings   * @param s1 String 1   * @param s2 String 2   * @returns Affine distance between the two strings    */  public  double distance(String string1, String string2) throws Exception {    char[] s1 = string1.toCharArray();    char[] s2 = string2.toCharArray();    int l1 = s1.length, l2 = s2.length;    double T[][] = new double[l1+1][l2+1];    double I[][] = new double[l1+1][l2+1];    double D[][] = new double[l1+1][l2+1];    double subCost;    int i, j;    if (l1==0 || l2==0) {      return m_gapStartCost + (l1+l2-1) * m_gapExtendCost;    }    for (j = 0; j < l2+1; j++) {      I[0][j] = Double.MAX_VALUE;      D[0][j] = Double.MAX_VALUE;    }    for (j = 0; j < l1+1; j++) {      I[j][0] = Double.MAX_VALUE;      D[j][0] = Double.MAX_VALUE;    }    T[0][0] = 0;    T[0][1] = m_gapStartCost;    T[1][0] = m_gapStartCost;    for (j = 2; j < l2+1; j++) {      T[0][j] = T[0][j-1] + m_gapExtendCost;    }    for (j = 2; j < l1+1; j++) {      T[j][0] = T[j-1][0] + m_gapExtendCost;    }    for (i = 1; i < l1+1; i++) {      for (j = 1; j < l2+1; j++) {	D[i][j] = (D[i-1][j]+m_gapExtendCost > T[i-1][j]+m_gapStartCost) ?	  T[i-1][j]+m_gapStartCost : D[i-1][j]+m_gapExtendCost;	I[i][j] = (I[i][j-1]+m_gapExtendCost > T[i][j-1]+m_gapStartCost) ?	  T[i][j-1]+m_gapStartCost : I[i][j-1]+m_gapExtendCost;	subCost = (s1[i-1] == s2[j-1]) ? m_matchCost : m_subCost;	if  ((T[i-1][j-1] + subCost < D[i][j]) && (T[i-1][j-1] + subCost < I[i][j])) {	  T[i][j] = T[i-1][j-1] + subCost;	} else {	  if (D[i][j] < I[i][j]) {	    T[i][j] = D[i][j];	  } else {	    T[i][j] = I[i][j];	  }	}      }    }    double ret;    if (T[l1][l2] < D[l1][l2] && T[l1][l2] < I[l1][l2]) {      ret = T[l1][l2];    } else if (D[l1][l2] < I[l1][l2]) {      ret = D[l1][l2];    } else {      ret = I[l1][l2];    }    if (m_normalized) {      ret /= l1 + l2;    }    return ret;  }  /** The computation of a metric can be either based on distance, or on similarity   * @returns true   */  public boolean isDistanceBased() {    return true;  }  /**   * Returns a similarity estimate between two strings. Similarity is obtained by   * inverting the distance value using one of three methods:   * CONVERSION_LAPLACIAN, CONVERSION_EXPONENTIAL, CONVERSION_UNIT.   * @param string1 First string.   * @param string2 Second string.   * @exception Exception if similarity could not be estimated.   */  public double similarity(String string1, String string2) throws Exception {    switch (m_conversionType) {    case CONVERSION_LAPLACIAN:       return 1 / (1 + distance(string1, string2));    case CONVERSION_UNIT:      return 2 * (1 - distance(string1, string2));    case CONVERSION_EXPONENTIAL:      return Math.exp(-distance(string1, string2));    default:      throw new Exception ("Unknown distance to similarity conversion method");    }  }  /** Set the match cost   * @param matchCost the cost of finding a matching pair of characters   */  public void setMatchCost(double matchCost) {    m_matchCost = matchCost;  }   /** Get the match cost   * @returns the cost of finding a matching pair of characters   */  public double getMatchCost() {    return m_matchCost;  }   /** Set the substitution cost   * @param subCost the cost of substituting one character for another   */  public void setSubCost(double subCost) {    m_subCost = subCost;  }   /** Get the substitution cost   * @returns the cost of substituting a pair of characters   */  public double getSubCost() {    return m_subCost;  }  /** Set the gap opening cost   * @param gapStartCost the cost of opening a gap   */  public void setGapStartCost(double gapStartCost) {    m_gapStartCost = gapStartCost;  }   /** Get the gap opening cost   * @returns the cost of opening a gap   */  public double getGapStartCost() {    return m_gapStartCost;  }  /** Set the gap extension cost   * @param gapExtendCost the cost of extending a gap   */  public void setGapExtendCost(double gapExtendCost) {    m_gapExtendCost = gapExtendCost;  }   /** Get the gap extension cost   * @returns the cost of extending a gap   */  public double getGapExtendCost() {    return m_gapExtendCost;  }  /** Set the distance to be normalized by the sum of the string's lengths   * @param normalized if true, distance is normalized by the sum of string's lengths   */  public void setNormalized(boolean normalized) {    m_normalized = normalized;  }   /** Get whether the distance is normalized by the sum of the string's lengths   * @return if true, distance is normalized by the sum of string's lengths   */  public boolean getNormalized() {    return m_normalized;  }  /** Create a copy of this metric   * @return another AffineMetric with the same exact parameters as this  metric   */  public Object clone() {    AffineMetric metric = new AffineMetric();    metric.setNormalized(m_normalized);    metric.setMatchCost(m_matchCost);    metric.setSubCost(m_subCost);     metric.setGapStartCost(m_gapStartCost);    metric.setGapExtendCost(m_gapExtendCost);    return metric;  }  /**   * Gets the current settings of WeightedDotP.   *   * @return an array of strings suitable for passing to setOptions()   */  public String [] getOptions() {    String [] options = new String [10];    int current = 0;    if (m_normalized) {      options[current++] = "-N";    }    options[current++] = "-m";    options[current++] = "" + m_matchCost;    options[current++] = "-s";    options[current++] = "" + m_subCost;    options[current++] = "-g";    options[current++] = "" + m_gapStartCost;    options[current++] = "-e";    options[current++] = "" + m_gapExtendCost;        while (current < options.length) {      options[current++] = "";    }    return options;  }  /**   * Parses a given list of options. Valid options are:<p>   *   * -N normalize by length   * -m matchCost   * -s subCost   * -g gapStartCost   * -e gapExtendCost      */  public void setOptions(String[] options) throws Exception {    setNormalized(Utils.getFlag('N', options));        String matchCostString = Utils.getOption('m', options);    if (matchCostString.length() != 0) {      setMatchCost(Double.parseDouble(matchCostString));    }    String subCostString = Utils.getOption('s', options);    if (subCostString.length() != 0) {      setSubCost(Double.parseDouble(subCostString));    }    String gapStartString = Utils.getOption('g', options);    if (gapStartString.length() != 0) {      setGapStartCost(Double.parseDouble(gapStartString));    }    String gapExtendString = Utils.getOption('e', options);    if (gapExtendString.length() != 0) {      setGapExtendCost(Double.parseDouble(gapExtendString));    }   }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector newVector = new Vector(5);    newVector.addElement(new Option("\tNormalize the dot product by vectors lengths\n",				    "N", 0, "-N"));    newVector.addElement(new Option("\tMatch cost\n",				    "m", 1, "-m matchCost"));    newVector.addElement(new Option("\tSubstitution cost\n",				    "s", 1, "-m subCost"));    newVector.addElement(new Option("\tGap start cost\n",				    "g", 1, "-g gapStartCost"));    newVector.addElement(new Option("\tGap extend cost\n",				    "e", 1, "-e gapExtendCost"));        return newVector.elements();  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -