📄 affineprobmetric.java
字号:
} static String intMatrixToString (int matrix[][]) { String s = ""; for (int i = 0; i < matrix.length; i++) { for (int j = 0; j < matrix[0].length; j++) s = s + matrix[i][j] + " "; s = s + "\n"; } return s; } static String doubleMatrixToString (double matrix[][]) { String s = ""; java.text.DecimalFormat de = new java.text.DecimalFormat("0.0E000"); for (int i = 0; i < matrix.length; i++) { for (int j = 0; j < matrix[0].length; j++) s = s + de.format(matrix[i][j]) + " "; s = s + "\n"; } return s; } static String doubleMatrixToString0 (double matrix[][][], int k) { String s; s = ""; java.text.DecimalFormat de = new java.text.DecimalFormat("0.0E000"); for (int i = 0; i < matrix.length; i++) { for (int j = 0; j < matrix[0].length; j++) s = s + de.format(matrix[i][j][k]) + " "; s = s + "\n"; } return s; } static String charMatrixToString (char matrix[][]) { String s = ""; for (int i = 0; i < matrix.length; i++) { for (int j = 0; j < matrix[0].length; j++) s = s + matrix[i][j] + " "; s = s + "\n"; } return s; } /** Calculation of log(a+b) with a correction for machine precision * @param _a number log(a) * @param _b number log(b) * @returns log(a+b) */ protected double logSum(double _logA, double _logB) { double logSum = 0; // make logA the smaller of the two double logA = (_logA < _logB) ? _logA : _logB; double logB = (_logA < _logB) ? _logB : _logA; if (logA - logB < -324 || logA == Double.NEGATIVE_INFINITY) { logSum = logB; } else { logSum = logA + Math.log(1 + Math.exp(logB - logA)); } return logSum; } /** * Calculate affine gapped distance using learned costs * @param s1 first string * @param s2 second string * @return minimum number of deletions/insertions/substitutions to be performed * to transform s1 into s2 (or vice versa) */ public double costDistance(String string1, String string2) { char [] s1 = string1.toLowerCase().toCharArray(); char [] s2 = string2.toLowerCase().toCharArray(); int l1 = s1.length, l2 = s2.length; double T[][] = new double[l1+1][l2+1]; double I[][] = new double[l1+1][l2+1]; double D[][] = new double[l1+1][l2+1]; double subCost = 0, sub_charCost = 0, ins_charCost = 0, del_charCost = 0, ret; int i, j; if (l1==0 || l2==0) { return m_gapStartCost + (l1+l2-1) * m_gapExtendCost; } for (j = 0; j < l2+1; j++) { I[0][j] = Double.MAX_VALUE; D[0][j] = Double.MAX_VALUE; } for (j = 0; j < l1+1; j++) { I[j][0] = Double.MAX_VALUE; D[j][0] = Double.MAX_VALUE; } T[0][0] = 0; T[0][1] = m_gapStartCost; T[1][0] = m_gapStartCost; for (j = 2; j < l2+1; j++) { ins_charCost = m_editopCosts[blank][s2[j-1]]; T[0][j] = T[0][j-1] + m_gapExtendCost + ins_charCost; } for (j = 2; j < l1+1; j++) { del_charCost = m_editopCosts[blank][s1[j-1]]; T[j][0] = T[j-1][0] + m_gapExtendCost + del_charCost; } for (i = 1; i < l1+1; i++) { for (j = 1; j < l2+1; j++) { char c1 = s1[i-1]; char c2 = s2[j-1]; del_charCost = m_editopCosts[blank][c1]; ins_charCost = m_editopCosts[blank][c2]; sub_charCost = (c1 == c2) ? m_noopCost : m_editopCosts[c1][c2]; sub_charCost = (c1 == c2) ? 0 : m_editopCosts[c1][c2]; // ?? do we use noopCost? if (D[i-1][j]+m_gapExtendCost > T[i-1][j]+m_gapStartCost) { D[i][j] = T[i-1][j]+m_gapStartCost + del_charCost; } else { D[i][j] = D[i-1][j]+m_gapExtendCost + del_charCost; } if (I[i][j-1]+m_gapExtendCost > T[i][j-1]+m_gapStartCost) { I[i][j] = T[i][j-1] + m_gapStartCost + ins_charCost; } else { I[i][j] = I[i][j-1] + m_gapExtendCost + ins_charCost; } //subCost = m_subCost + sub_charCost; subCost =((c1 == c2) ? 0 : (m_subCost + m_gapEndCost)); subCost = subCost + sub_charCost; subCost = (c1 == c2) ? 0 : m_subCost; // subCost = m_subCost; if ((T[i-1][j-1] + subCost < D[i-1][j-1] + m_gapEndCost) && /// d[i][j] or d[i-1][j-1]?? (T[i-1][j-1] + subCost < I[i-1][j-1] + m_gapEndCost )) { T[i][j] = T[i-1][j-1] + subCost + sub_charCost; // ?? do we add subCharCost? } else { if (D[i-1][j-1] < I[i-1][j-1]) { T[i][j] = D[i-1][j-1] + m_gapEndCost + sub_charCost; } else { T[i][j] = I[i-1][j-1] + m_gapEndCost + sub_charCost; } } } } if (T[l1][l2] < D[l1][l2] && T[l1][l2] < I[l1][l2]) { ret = T[l1][l2]; } else if (D[l1][l2] < I[l1][l2]) { ret = D[l1][l2]; } else { ret = I[l1][l2]; } if (m_normalized) {// // get the normalization factor as P(x,y)=P(x)P(y)// double Pxy = 2 * m_gapStartCost; // for (int k = 0; k < l1; k++) {// Pxy += s1[k] + m_gapExtendCost;// }// for (int k = 0; k < l2; k++) {// Pxy += s2[k] + m_gapExtendCost;// }// ret /= Pxy; ret /= 4*(l1 + l2); } return ret; } public static void print3dMatrix(double [][][] matrix) { DecimalFormat fmt = new DecimalFormat ("0.0000E00"); for (int i = 0; i < matrix[0][0].length; i++) { System.out.println ("\nMatrix[][][" + i + "]"); for (int j = 0; j < matrix[0].length; j++) { for (int k = 0; k < matrix.length; k++) { System.out.print(fmt.format(matrix[k][j][i]) + "\t"); } System.out.println(); } } } /** Set the distance to be normalized by the sum of the string's lengths * @param normalized if true, distance is normalized by the sum of string's lengths */ public void setNormalized(boolean normalized) { m_normalized = normalized; } /** Get whether the distance is normalized by the sum of the string's lengths * @return if true, distance is normalized by the sum of string's lengths */ public boolean getNormalized() { return m_normalized; } /** Set the distance to use the generative model or convert back to the additive model * @param useGenerativeModel if true, the generative model is used */ public void setUseGenerativeModel(boolean useGenerativeModel) { m_useGenerativeModel = useGenerativeModel; } /** Do we use the generative model or convert back to the additive model? * @param useGenerativeModel if true, the generative model is used */ public boolean getUseGenerativeModel() { return m_useGenerativeModel; } /** Set the clamping probability value * @param clampProb a lower bound for all probability values to prevent underflow */ public void setClampProb(double clampProb) { m_clampProb = clampProb; } /** Get the clamping probability value * @return a lower bound for all probability values to prevent underflow */ public double getClampProb() { return m_clampProb; } /** Set the number of training iterations * @param numIterations the number of iterations */ public void setNumIterations(int numIterations) { m_numIterations = numIterations; } /** Get the number of training iterations * @return the number of training iterations */ public int setNumIterations() { return m_numIterations; } /** Create a copy of this metric * @return another AffineMetric with the same exact parameters as this metric */ public Object clone() { AffineProbMetric metric = new AffineProbMetric(); metric.setNormalized(m_normalized); metric.setUseGenerativeModel(m_useGenerativeModel); metric.setClampProb(m_clampProb); metric.setNumIterations(m_numIterations); return metric; } /** * Gets the current settings of WeightedDotP. * * @return an array of strings suitable for passing to setOptions() * TODO!!!! */ public String [] getOptions() { String [] options = new String [10]; int current = 0; if (m_normalized) { options[current++] = "-N"; } if (m_useGenerativeModel) { options[current++] = "-G"; } else { options[current++] = "-A"; } options[current++] = "-c"; options[current++] = "" + m_clampProb; while (current < options.length) { options[current++] = ""; } return options; } /** * Parses a given list of options. Valid options are:<p> * * -N normalize by length * -m matchCost * -s subCost * -g gapStartCost * -e gapExtendCost */ public void setOptions(String[] options) throws Exception { setNormalized(Utils.getFlag('N', options)); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. * TODO!!! */ public Enumeration listOptions() { Vector newVector = new Vector(5); newVector.addElement(new Option("\tNormalize by lengths\n", "N", 0, "-N")); return newVector.elements(); } /** The computation of a metric can be either based on distance, or on similarity * @returns true */ public boolean isDistanceBased() { return true; } /** * Returns a similarity estimate between two strings. Similarity is obtained by * inverting the distance value using one of three methods: * CONVERSION_LAPLACIAN, CONVERSION_EXPONENTIAL, CONVERSION_UNIT. * @param string1 First string. * @param string2 Second string. * @exception Exception if similarity could not be estimated. */ public double similarity(String string1, String string2) throws Exception { switch (m_conversionType) { case CONVERSION_LAPLACIAN: return 1 / (1 + distance(string1, string2)); case CONVERSION_UNIT: return 2 * (1 - distance(string1, string2)); case CONVERSION_EXPONENTIAL: return Math.exp(-distance(string1, string2)); default: throw new Exception ("Unknown distance to similarity conversion method"); } } /** * Set the type of similarity to distance conversion. Values other * than CONVERSION_LAPLACIAN, CONVERSION_UNIT, or CONVERSION_EXPONENTIAL will be ignored * * @param type type of the similarity to distance conversion to use */ public void setConversionType(SelectedTag conversionType) { if (conversionType.getTags() == TAGS_CONVERSION) { m_conversionType = conversionType.getSelectedTag().getID(); } } /** * return the type of similarity to distance conversion * @return one of CONVERSION_LAPLACIAN, CONVERSION_UNIT, or CONVERSION_EXPONENTIAL */ public SelectedTag getConversionType() { return new SelectedTag(m_conversionType, TAGS_CONVERSION); } public static void main(String[] args) { try { AffineProbMetric metric = new AffineProbMetric(); // metric.trainMetric(new ArrayList()); String s1 = new String("abcde"); String s2 = new String("ab"); metric.printMatrices(s1, s2); } catch (Exception e) { e.printStackTrace();} } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -