⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 stringkernel.java

📁 Weka
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
    //being part of the subsequence match and once just as a gap.     //In both cases lambda is multiplied with the result.    double result = 0;    /*       for (int iS = n-1; iS <= endIndexS;iS++) {       result *= m_lambda;       result += kernelHelper2(n,s,iS, t, endIndexT);       }       if (m_useRecursionCache) {       cachekhK[adr % maxCache]=adr+1; cachekh[adr % maxCache]=result;       }       return result;       */    /* ^^^ again, above code segment does not store some intermediate results... */    result = m_lambda*kernelHelper(n,s,endIndexS-1,t,endIndexT)             + kernelHelper2(n,s,endIndexS,t,endIndexT);    if (m_useRecursionCache) {      cachekhK[adr % maxCache]=adr+1; cachekh[adr % maxCache]=result;    }    return result;  }  /**    * helper function for the evaluation of the kernel K'' see section   * 'Efficient Computation of SSK' in [1]   *   * @param n the current length of the matching subsequence   * @param s first string, as a char array   * @param t second string, as a char array   * @param endIndexS the portion of s currently regarded is s[1:endIndexS]   * @param endIndexT the portion of t currently regarded is t[1:endIndexT]   * @return a partial result for K'   */  protected double kernelHelper2(int n, char[] s, int endIndexS, char[] t,     int endIndexT) {    //recursion ends if one of the indices in both strings is <0    if (endIndexS <0 || endIndexT <0) {      return getReturnValue(n);    }    int adr = 0;    if (m_useRecursionCache) {      adr=m_multX*n+m_multY*endIndexS+m_multZ*endIndexT;      if ( cachekh2K[adr % maxCache] == adr+1) return cachekh2[adr % maxCache];    }    //spot the last character in s, we'll need it    char x = s[endIndexS];    //recurse if the last characters of s and t, x (and y) are identical.    //which is an easy case: just add up two recursions,     // 1. one that counts x and y as a part of the subsequence match    //	 -> n, endIndexS and endIndexT are decremented for next recursion level    // 	 -> lambda^2 is multiplied with the result to account for the length    //      of 2 that has been added to the length of the subsequence match    //      by accepting x and y.    // 2. one that counts y as a gap in the match     //   -> only endIndexT is decremented for next recursion level    // 	 -> lambda is multiplied with the result to account for the length    //      of 1 that has been added to the length of the subsequence match    //		by omitting y.    if (x == t[endIndexT]) {      double ret =  m_lambda * (kernelHelper2(n,s,endIndexS, t, endIndexT-1)          + m_lambda * kernelHelper(n-1,s,endIndexS-1, t, endIndexT-1));      if (m_useRecursionCache) {        cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret;      }      return ret;    } else {      double ret = m_lambda*kernelHelper2(n,s,endIndexS,t,endIndexT-1);      if (m_useRecursionCache) {        cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret;      }      return ret;    }    //look for x in t from back to front.     //this is actually an optimization from [1] that spares unneccessary    //recursions iff    //x is actually found in t, but not at the last position.    /*       int i;       int threshold = n>0?n-1:0;       for (i=endIndexT-1; i >= threshold;i--) {       if (x == t[i]) {       double ret=getPowerOfLambda(endIndexT-i) * kernelHelper2(n,s,endIndexS, t, i);       if (m_useRecursionCache) {       cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret;       }       return ret;       }       }       */		    //end the recursion if x is not found in t.    /*        double ret = getReturnValue(n);              if (m_useRecursionCache) {              cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret;              }              return ret;*/  }  /**   * the kernel function K explained in [1] using lambda pruning, explained in   * [2].  An additional parameter is introduced, which denotes the maximum   * length of a subsequence match. This allows for the control of how relaxed   * the subsequence matches are. <br>   *   * @param n the current length of the matching subsequence   * @param s first string, as a char array   * @param t second string, as a char array   * @param endIndexS the portion of s currently regarded is s[1:endIndexS]   * @param endIndexT the portion of t currently regarded is t[1:endIndexT]   * @param remainingMatchLength actually the initial value for   * maxLambdaExponent   * @return a double indicating the distance or similarity between s and t,    * according to and depending on the initial value for n.    */  protected double kernelLP(int n, char[] s, int endIndexS,char[] t,      int endIndexT,int remainingMatchLength) {    //see code docs in kernel()    if (Math.min(endIndexS+1,endIndexT +1) < n) {      return getReturnValue(n);    }    //lambda pruning check     //stops recursion if the match is so long that the resulting    //power of lambda is smaller than minLambda    //if lambda pruning is not used, the remainingMatchLength is < 0    //and this check never stops the recursion    if (remainingMatchLength == 0) return getReturnValue(n);    double result = 0;    //see code docs in kernel()    for (int iS =endIndexS; iS > n-2; iS--) {      double buf = 0;      char x = s[iS];      for (int j=0; j <= endIndexT; j++) {        if (t[j] == x){          //both t[j] and x are considered part of the subsequence match, hence          //subtract 2 from the remainingMatchLength          buf += kernelHelperLP(n-1,s,iS-1,t,j-1,remainingMatchLength-2);        }      }      result += buf * m_powersOflambda[2];    }    return result;  }  /**   * helper function for the evaluation of the kernel (K'n) using lambda pruning   *   * @param n the current length of the matching subsequence   * @param s first string, as a char array   * @param t second string, as a char array   * @param endIndexS the portion of s currently regarded is s[1:endIndexS]   * @param endIndexT the portion of t currently regarded is t[1:endIndexT]   * @param remainingMatchLength the number of characters that may still be   * used    * for matching (i.e. gaps + matches in both strings)   * @return a partial result for K    */  protected double kernelHelperLP (int n, char[] s, int endIndexS,char[] t,    int endIndexT,int remainingMatchLength) {    //see code docs in kernelHelper()    if (n == 0) {      return getReturnValue(n);          }    //see code docs in kernelHelper()    if (Math.min(endIndexS+1,endIndexT +1) < n) {;      return getReturnValue(n);    }    //lambda pruning check    //stops recursion if the match is so long that the resulting    //power of lambda is smaller than minLambda    //if lambda pruning is not used, the remainingMatchLength is < 0    //and this check never stops the recursion    if (remainingMatchLength < 2*n) {       return getReturnValue(n);    }    int adr=0;    if (m_useRecursionCache) {      adr = m_multX*n+m_multY*endIndexS+m_multZ*endIndexT             + m_multZZ * remainingMatchLength;      if (cachekh2K[adr % maxCache]==adr+1) {         return cachekh2[adr % maxCache];       }    }    int rml = 0; //counts the remaining match length    double result = 0;    //see code docs in kernelHelper()    //difference to implementation in kernelHelper:    //*)choose different starting point, which is found counting    //the maximal remaining match length from endIndexS.    //*)keep track of the remaining match length, rml, which is    //  incremented each loop    for (int iS = (endIndexS-remainingMatchLength); iS <= endIndexS;iS++) {      result *= m_lambda;      result += kernelHelper2LP(n,s,iS, t, endIndexT,rml++);    }    if (m_useRecursionCache && endIndexS >= 0 && endIndexT >= 0 && n >= 0) {      cachekhK[adr % maxCache]=adr+1; cachekh[adr % maxCache]=result;     }    return result;  }  /**   * helper function for the evaluation of the kernel (K''n) using lambda   * pruning   *   * @param n the current length of the matching subsequence   * @param s first string, as a char array   * @param t second string, as a char array   * @param endIndexS the portion of s currently regarded is s[1:endIndexS]   * @param endIndexT the portion of t currently regarded is t[1:endIndexT]   * @param remainingMatchLength the number of characters that may still be   * used    * for matching (i.e. gaps + matches in both strings)   * @return a partial result for K'    */  protected double kernelHelper2LP(int n, char[] s, int endIndexS,char[] t,    int endIndexT,int remainingMatchLength) {    //lambda pruning check    //stops recursion if the match is so long that the resulting    //power of lambda is smaller than minLambda    //if lambda pruning is not used, the remainingMatchLength is < 0    //and this check never stops the recursion    //if (remainingMatchLength <= 0) return 0;    if (remainingMatchLength < 2*n) return getReturnValue(n);    //see code docs in kernelHelper2()    if (endIndexS <0 || endIndexT <0) return getReturnValue(n);    int adr=0;    if (m_useRecursionCache){      adr = m_multX*n+m_multY*endIndexS+m_multZ*endIndexT            + m_multZZ * remainingMatchLength;      if (cachekh2K[adr % maxCache]==adr+1) {         return cachekh2[adr % maxCache];       }    }    char x = s[endIndexS];    if (x == t[endIndexT]) {      double ret =         m_lambda         * (kernelHelper2LP(n,s,endIndexS,t,endIndexT-1,remainingMatchLength-1)        + m_lambda         * kernelHelperLP(n-1,s,endIndexS-1,t,endIndexT-1,remainingMatchLength-2));      if (m_useRecursionCache && endIndexS >= 0 && endIndexT >= 0 && n >= 0) {        cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret; }      return ret;    }    //see code docs in kernelHelper()    //differences to implementation in kernelHelper():    //*) choose a different ending point for the loop    //   based on the remaining match length    int i;    int minIndex = endIndexT - remainingMatchLength;    if (minIndex < 0) minIndex = 0;    for (i=endIndexT; i >= minIndex;i--) {      if (x == t[i]) {        int skipLength = endIndexT -i;        double ret = getPowerOfLambda(skipLength) *          kernelHelper2LP(n,s,endIndexS,t,i,remainingMatchLength-skipLength);        if (m_useRecursionCache && endIndexS >= 0 && endIndexT >= 0 && n >= 0) {           cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret;         }        return ret;      }    }    double ret = getReturnValue(n);    if (m_useRecursionCache && endIndexS >= 0 && endIndexT >= 0 && n >= 0) {       cachekh2K[adr % maxCache]=adr+1; cachekh2[adr % maxCache]=ret;     }    return ret;  }  /**   * precalculates small powers of lambda to speed up the kernel evaluation   *   * @return		the powers   */  private double[] calculatePowersOfLambda(){    double[] powers = new double[MAX_POWER_OF_LAMBDA+1];    powers[0] = 1.0;    double val = 1.0;    for (int i = 1; i<=MAX_POWER_OF_LAMBDA;i++) {      val *= m_lambda;      powers[i] = val;    }    return powers;  }  /**   * retrieves a power of lambda from the lambda cache or calculates it   * directly   *   * @param exponent	the exponent to calculate   * @return 		the exponent-th power of lambda   */  private double getPowerOfLambda(int exponent){    if (exponent > MAX_POWER_OF_LAMBDA)       return Math.pow(m_lambda,exponent);        if (exponent < 0)       throw new IllegalArgumentException(          "only positive powers of lambda may be computed");    return m_powersOflambda[exponent];  }  /**   * initializes variables etc.   *    * @param data	the data to use   */  protected void initVars(Instances data) {    super.initVars(data);    m_kernelEvals    = 0;    // take the first string attribute    m_strAttr        = -1;    for (int i = 0; i < data.numAttributes(); i++) {      if (i == data.classIndex())	continue;      if (data.attribute(i).type() == Attribute.STRING) {	m_strAttr = i;	break;      }    }    m_numInsts       = m_data.numInstances();    m_storage        = new double[m_cacheSize];    m_keys           = new long[m_cacheSize];    m_powersOflambda = calculatePowersOfLambda();  }  /**    * Returns the Capabilities of this kernel.   *   * @return            the capabilities of this object   * @see               Capabilities   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();        result.enable(Capability.STRING_ATTRIBUTES);    result.enableAllClasses();    result.enable(Capability.MISSING_CLASS_VALUES);        return result;  }    /**   * builds the kernel with the given data.   *    * @param data	the data to base the kernel on   * @throws Exception	if something goes wrong, e.g., the data does not   * 			consist of one string attribute and the class   */  public void buildKernel(Instances data) throws Exception {    super.buildKernel(data);  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -