📄 stringkernel.java
字号:
//being part of the subsequence match and once just as a gap. //In both cases lambda is multiplied with the result. double result = 0; /* for (int iS = n-1; iS <= endIndexS;iS++) { result *= m_lambda; result += kernelHelper2(n,s,iS, t, endIndexT); } if (m_useRecursionCache) { cachekhK[adr % maxCache]=adr+1; cachekh[adr % maxCache]=result; } return result; */ /* ^^^ again, above code segment does not store some intermediate results... */ result = m_lambda*kernelHelper(n,s,endIndexS-1,t,endIndexT) + kernelHelper2(n,s,endIndexS,t,endIndexT); if (m_useRecursionCache) { cachekhK[adr % maxCache]=adr+1; cachekh[adr % maxCache]=result; } return result; } /** * helper function for the evaluation of the kernel K'' see section * 'Efficient Computation of SSK' in [1] * * @param n the current length of the matching subsequence * @param s first string, as a char array * @param t second string, as a char array * @param endIndexS the portion of s currently regarded is s[1:endIndexS] * @param endIndexT the portion of t currently regarded is t[1:endIndexT] * @return a partial result for K' */ protected double kernelHelper2(int n, char[] s, int endIndexS, char[] t, int endIndexT) { //recursion ends if one of the indices in both strings is <0 if (endIndexS <0 || endIndexT <0) { return getReturnValue(n); } int adr = 0; if (m_useRecursionCache) { adr=m_multX*n+m_multY*endIndexS+m_multZ*endIndexT; if ( cachekh2K[adr % maxCache] == adr+1) return cachekh2[adr % maxCache]; } //spot the last character in s, we'll need it char x = s[endIndexS]; //recurse if the last characters of s and t, x (and y) are identical. //which is an easy case: just add up two recursions, // 1. one that counts x and y as a part of the subsequence match // -> n, endIndexS and endIndexT are decremented for next recursion level // -> lambda^2 is multiplied with the result to account for the length // of 2 that has been added to the length of the subsequence match // by accepting x and y. // 2. one that counts y as a gap in the match // -> only endIndexT is decremented for next recursion level // -> lambda is multiplied with the result to account for the length // of 1 that has been added to the length of the subsequence match // by omitting y. if (x == t[endIndexT]) { double ret = m_lambda * (kernelHelper2(n,s,endIndexS, t, endIndexT-1) + m_lambda * kernelHelper(n-1,s,endIndexS-1, t, endIndexT-1)); if (m_useRecursionCache) { cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret; } return ret; } else { double ret = m_lambda*kernelHelper2(n,s,endIndexS,t,endIndexT-1); if (m_useRecursionCache) { cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret; } return ret; } //look for x in t from back to front. //this is actually an optimization from [1] that spares unneccessary //recursions iff //x is actually found in t, but not at the last position. /* int i; int threshold = n>0?n-1:0; for (i=endIndexT-1; i >= threshold;i--) { if (x == t[i]) { double ret=getPowerOfLambda(endIndexT-i) * kernelHelper2(n,s,endIndexS, t, i); if (m_useRecursionCache) { cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret; } return ret; } } */ //end the recursion if x is not found in t. /* double ret = getReturnValue(n); if (m_useRecursionCache) { cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret; } return ret;*/ } /** * the kernel function K explained in [1] using lambda pruning, explained in * [2]. An additional parameter is introduced, which denotes the maximum * length of a subsequence match. This allows for the control of how relaxed * the subsequence matches are. <br> * * @param n the current length of the matching subsequence * @param s first string, as a char array * @param t second string, as a char array * @param endIndexS the portion of s currently regarded is s[1:endIndexS] * @param endIndexT the portion of t currently regarded is t[1:endIndexT] * @param remainingMatchLength actually the initial value for * maxLambdaExponent * @return a double indicating the distance or similarity between s and t, * according to and depending on the initial value for n. */ protected double kernelLP(int n, char[] s, int endIndexS,char[] t, int endIndexT,int remainingMatchLength) { //see code docs in kernel() if (Math.min(endIndexS+1,endIndexT +1) < n) { return getReturnValue(n); } //lambda pruning check //stops recursion if the match is so long that the resulting //power of lambda is smaller than minLambda //if lambda pruning is not used, the remainingMatchLength is < 0 //and this check never stops the recursion if (remainingMatchLength == 0) return getReturnValue(n); double result = 0; //see code docs in kernel() for (int iS =endIndexS; iS > n-2; iS--) { double buf = 0; char x = s[iS]; for (int j=0; j <= endIndexT; j++) { if (t[j] == x){ //both t[j] and x are considered part of the subsequence match, hence //subtract 2 from the remainingMatchLength buf += kernelHelperLP(n-1,s,iS-1,t,j-1,remainingMatchLength-2); } } result += buf * m_powersOflambda[2]; } return result; } /** * helper function for the evaluation of the kernel (K'n) using lambda pruning * * @param n the current length of the matching subsequence * @param s first string, as a char array * @param t second string, as a char array * @param endIndexS the portion of s currently regarded is s[1:endIndexS] * @param endIndexT the portion of t currently regarded is t[1:endIndexT] * @param remainingMatchLength the number of characters that may still be * used * for matching (i.e. gaps + matches in both strings) * @return a partial result for K */ protected double kernelHelperLP (int n, char[] s, int endIndexS,char[] t, int endIndexT,int remainingMatchLength) { //see code docs in kernelHelper() if (n == 0) { return getReturnValue(n); } //see code docs in kernelHelper() if (Math.min(endIndexS+1,endIndexT +1) < n) {; return getReturnValue(n); } //lambda pruning check //stops recursion if the match is so long that the resulting //power of lambda is smaller than minLambda //if lambda pruning is not used, the remainingMatchLength is < 0 //and this check never stops the recursion if (remainingMatchLength < 2*n) { return getReturnValue(n); } int adr=0; if (m_useRecursionCache) { adr = m_multX*n+m_multY*endIndexS+m_multZ*endIndexT + m_multZZ * remainingMatchLength; if (cachekh2K[adr % maxCache]==adr+1) { return cachekh2[adr % maxCache]; } } int rml = 0; //counts the remaining match length double result = 0; //see code docs in kernelHelper() //difference to implementation in kernelHelper: //*)choose different starting point, which is found counting //the maximal remaining match length from endIndexS. //*)keep track of the remaining match length, rml, which is // incremented each loop for (int iS = (endIndexS-remainingMatchLength); iS <= endIndexS;iS++) { result *= m_lambda; result += kernelHelper2LP(n,s,iS, t, endIndexT,rml++); } if (m_useRecursionCache && endIndexS >= 0 && endIndexT >= 0 && n >= 0) { cachekhK[adr % maxCache]=adr+1; cachekh[adr % maxCache]=result; } return result; } /** * helper function for the evaluation of the kernel (K''n) using lambda * pruning * * @param n the current length of the matching subsequence * @param s first string, as a char array * @param t second string, as a char array * @param endIndexS the portion of s currently regarded is s[1:endIndexS] * @param endIndexT the portion of t currently regarded is t[1:endIndexT] * @param remainingMatchLength the number of characters that may still be * used * for matching (i.e. gaps + matches in both strings) * @return a partial result for K' */ protected double kernelHelper2LP(int n, char[] s, int endIndexS,char[] t, int endIndexT,int remainingMatchLength) { //lambda pruning check //stops recursion if the match is so long that the resulting //power of lambda is smaller than minLambda //if lambda pruning is not used, the remainingMatchLength is < 0 //and this check never stops the recursion //if (remainingMatchLength <= 0) return 0; if (remainingMatchLength < 2*n) return getReturnValue(n); //see code docs in kernelHelper2() if (endIndexS <0 || endIndexT <0) return getReturnValue(n); int adr=0; if (m_useRecursionCache){ adr = m_multX*n+m_multY*endIndexS+m_multZ*endIndexT + m_multZZ * remainingMatchLength; if (cachekh2K[adr % maxCache]==adr+1) { return cachekh2[adr % maxCache]; } } char x = s[endIndexS]; if (x == t[endIndexT]) { double ret = m_lambda * (kernelHelper2LP(n,s,endIndexS,t,endIndexT-1,remainingMatchLength-1) + m_lambda * kernelHelperLP(n-1,s,endIndexS-1,t,endIndexT-1,remainingMatchLength-2)); if (m_useRecursionCache && endIndexS >= 0 && endIndexT >= 0 && n >= 0) { cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret; } return ret; } //see code docs in kernelHelper() //differences to implementation in kernelHelper(): //*) choose a different ending point for the loop // based on the remaining match length int i; int minIndex = endIndexT - remainingMatchLength; if (minIndex < 0) minIndex = 0; for (i=endIndexT; i >= minIndex;i--) { if (x == t[i]) { int skipLength = endIndexT -i; double ret = getPowerOfLambda(skipLength) * kernelHelper2LP(n,s,endIndexS,t,i,remainingMatchLength-skipLength); if (m_useRecursionCache && endIndexS >= 0 && endIndexT >= 0 && n >= 0) { cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret; } return ret; } } double ret = getReturnValue(n); if (m_useRecursionCache && endIndexS >= 0 && endIndexT >= 0 && n >= 0) { cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret; } return ret; } /** * precalculates small powers of lambda to speed up the kernel evaluation * * @return the powers */ private double[] calculatePowersOfLambda(){ double[] powers = new double[MAX_POWER_OF_LAMBDA+1]; powers[0] = 1.0; double val = 1.0; for (int i = 1; i<=MAX_POWER_OF_LAMBDA;i++) { val *= m_lambda; powers[i] = val; } return powers; } /** * retrieves a power of lambda from the lambda cache or calculates it * directly * * @param exponent the exponent to calculate * @return the exponent-th power of lambda */ private double getPowerOfLambda(int exponent){ if (exponent > MAX_POWER_OF_LAMBDA) return Math.pow(m_lambda,exponent); if (exponent < 0) throw new IllegalArgumentException( "only positive powers of lambda may be computed"); return m_powersOflambda[exponent]; } /** * initializes variables etc. * * @param data the data to use */ protected void initVars(Instances data) { super.initVars(data); m_kernelEvals = 0; // take the first string attribute m_strAttr = -1; for (int i = 0; i < data.numAttributes(); i++) { if (i == data.classIndex()) continue; if (data.attribute(i).type() == Attribute.STRING) { m_strAttr = i; break; } } m_numInsts = m_data.numInstances(); m_storage = new double[m_cacheSize]; m_keys = new long[m_cacheSize]; m_powersOflambda = calculatePowersOfLambda(); } /** * Returns the Capabilities of this kernel. * * @return the capabilities of this object * @see Capabilities */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.enable(Capability.STRING_ATTRIBUTES); result.enableAllClasses(); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * builds the kernel with the given data. * * @param data the data to base the kernel on * @throws Exception if something goes wrong, e.g., the data does not * consist of one string attribute and the class */ public void buildKernel(Instances data) throws Exception { super.buildKernel(data); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -