📄 smithwatermangotohwindowedaffine.java
字号:
* sets the d(i,j) cost function used.
*
* @param dCostFunc - the cost function to use
*/
public final void setdCostFunc(final AbstractSubstitutionCost dCostFunc) {
this.dCostFunc = dCostFunc;
}
/**
* returns the string identifier for the metric.
*
* @return the string identifier for the metric
*/
public String getShortDescriptionString() {
return "SmithWatermanGotohWindowedAffine";
}
/**
* returns the long string identifier for the metric.
*
* @return the long string identifier for the metric
*/
public String getLongDescriptionString() {
return "Implements the Smith-Waterman-Gotoh algorithm with a windowed affine gap providing a similarity measure between two string";
}
/**
* gets a div class xhtml similarity explaining the operation of the metric.
*
* @param string1 string 1
* @param string2 string 2
*
* @return a div class html section detailing the metric operation.
*/
public String getSimilarityExplained(String string1, String string2) {
//todo this should explain the operation of a given comparison
return null; //To change body of implemented methods use File | Settings | File Templates.
}
/**
* gets the estimated time in milliseconds it takes to perform a similarity timing.
*
* @param string1 string 1
* @param string2 string 2
*
* @return the estimated time in milliseconds taken to perform the similarity measure
*/
public float getSimilarityTimingEstimated(final String string1, final String string2) {
//timed millisecond times with string lengths from 1 + 50 each increment
//0 15.62 62.5 179.5 360 609 891 1297 1640 2125 2688 3297 3984 4766 5485 6437 7313 8312 9391 10500 12704 12938 14359 15704 17266 18844 20360 23547 23845 25688 27656 29532 32048 33891 35844 37938 40251 42610 45001 47407 50142 52266 55314 57970 60782 63814 66470 70376 72767 75861 79283 82564 85814 89408 92658 96283 100080 103283 107518 111033
final float str1Length = string1.length();
final float str2Length = string2.length();
return ((str1Length * str2Length * windowSize) + (str1Length * str2Length * windowSize)) * ESTIMATEDTIMINGCONST;
}
/**
* gets the similarity of the two strings using Smith-Waterman-Gotoh distance.
*
* @param string1
* @param string2
*
* @return a value between 0-1 of the similarity
*/
public final float getSimilarity(final String string1, final String string2) {
final float smithWatermanGotoh = getUnNormalisedSimilarity(string1, string2);
//normalise into zero to one region from min max possible
float maxValue = Math.min(string1.length(), string2.length());
if (dCostFunc.getMaxCost() > -gGapFunc.getMaxCost()) {
maxValue *= dCostFunc.getMaxCost();
} else {
maxValue *= -gGapFunc.getMaxCost();
}
//check for 0 maxLen
if (maxValue == 0) {
return 1.0f; //as both strings identically zero length
} else {
//return actual / possible NeedlemanWunch distance to get 0-1 range
return (smithWatermanGotoh / maxValue);
}
}
/**
* implements the Smith-Waterman-Gotoh distance function //see http://www.gen.tcd.ie/molevol/nwswat.html for
* details.
*
* @param s
* @param t
*
* @return the Smith-Waterman-Gotoh distance for the two strings given
*/
public float getUnNormalisedSimilarity(final String s, final String t) {
final float[][] d; // matrix
final int n; // length of s
final int m; // length of t
int i; // iterates through s
int j; // iterates through t
float cost; // cost
// check for zero length input
n = s.length();
m = t.length();
if (n == 0) {
return m;
}
if (m == 0) {
return n;
}
//create matrix (n)x(m)
d = new float[n][m];
//process first row and column first as no need to consider previous rows/columns
float maxSoFar = 0.0f;
for (i = 0; i < n; i++) {
// get the substution cost
cost = dCostFunc.getCost(s, i, t, 0);
if (i == 0) {
d[0][0] = Math.max(0,
cost);
} else {
float maxGapCost = 0.0f;
int windowStart = i-windowSize;
if (windowStart < 1) {
windowStart = 1;
}
for (int k = windowStart; k < i; k++) {
maxGapCost = Math.max(maxGapCost, d[i - k][0] - gGapFunc.getCost(s, i - k, i));
}
d[i][0] = MathFuncs.max3(0,
maxGapCost,
cost);
}
//update max possible if available
if (d[i][0] > maxSoFar) {
maxSoFar = d[i][0];
}
}
for (j = 0; j < m; j++) {
// get the substution cost
cost = dCostFunc.getCost(s, 0, t, j);
if (j == 0) {
d[0][0] = Math.max(0,
cost);
} else {
float maxGapCost = 0.0f;
int windowStart = j-windowSize;
if (windowStart < 1) {
windowStart = 1;
}
for (int k = windowStart; k < j; k++) {
maxGapCost = Math.max(maxGapCost, d[0][j - k] - gGapFunc.getCost(t, j - k, j));
}
d[0][j] = MathFuncs.max3(0,
maxGapCost,
cost);
}
//update max possible if available
if (d[0][j] > maxSoFar) {
maxSoFar = d[0][j];
}
}
// cycle through rest of table filling values from the lowest cost value of the three part cost function
for (i = 1; i < n; i++) {
for (j = 1; j < m; j++) {
// get the substution cost
cost = dCostFunc.getCost(s, i, t, j);
// find lowest cost at point from three possible
float maxGapCost1 = 0.0f;
float maxGapCost2 = 0.0f;
int windowStart = i-windowSize;
if (windowStart < 1) {
windowStart = 1;
}
for (int k = windowStart; k < i; k++) {
maxGapCost1 = Math.max(maxGapCost1, d[i - k][j] - gGapFunc.getCost(s, i - k, i));
}
windowStart = j-windowSize;
if (windowStart < 1) {
windowStart = 1;
}
for (int k = windowStart; k < j; k++) {
maxGapCost2 = Math.max(maxGapCost2, d[i][j - k] - gGapFunc.getCost(t, j - k, j));
}
d[i][j] = MathFuncs.max4(0,
maxGapCost1,
maxGapCost2,
d[i - 1][j - 1] + cost);
//update max possible if available
if (d[i][j] > maxSoFar) {
maxSoFar = d[i][j];
}
}
}
//debug output
/* System.out.print(" \t");
for (j = 0; j < m; j++) {
System.out.print(t.charAt(j) + " ");
}
System.out.print("\n");
for (i = 0; i < n; i++) {
//print characteer of string s
System.out.print(s.charAt(i) + "\t");
for (j = 0; j < m; j++) {
System.out.print(d[i][j] + " ");
}
System.out.print("\n");
}
*/ //end debug output
// return max value within matrix as holds the maximum edit score
return maxSoFar;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -