📄 mismo.java

📁 Java 编写的多种数据挖掘算法包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
      // Set class values      m_class = new double[insts.numInstances()];      m_iUp = -1; m_iLow = -1;      for (int i = 0; i < m_class.length; i++) {        if ((int) insts.instance(i).classValue() == cl1) {          m_class[i] = -1; m_iLow = i;        } else if ((int) insts.instance(i).classValue() == cl2) {          m_class[i] = 1; m_iUp = i;        } else {          throw new Exception ("This should never happen!");        }      }      // Check whether one or both classes are missing      if ((m_iUp == -1) || (m_iLow == -1)) {        if (m_iUp != -1) {          m_b = -1;        } else if (m_iLow != -1) {          m_b = 1;        } else {          m_class = null;          return;        }        m_supportVectors = new SMOset(0);        m_alpha = new double[0];        m_class = new double[0];        // Fit sigmoid if requested        if (fitLogistic) {          fitLogistic(insts, cl1, cl2, numFolds, new Random(randomSeed));        }        return;      }      // Set the reference to the data      m_data = insts;      m_weights = null;      // Initialize alpha array to zero      m_alpha = new double[m_data.numInstances()];      // Initialize sets      m_supportVectors = new SMOset(m_data.numInstances());      m_I0 = new SMOset(m_data.numInstances());      m_I1 = new SMOset(m_data.numInstances());      m_I2 = new SMOset(m_data.numInstances());      m_I3 = new SMOset(m_data.numInstances());      m_I4 = new SMOset(m_data.numInstances());      // Clean out some instance variables      m_sparseWeights = null;      m_sparseIndices = null;      // Initialize error cache      m_errors = new double[m_data.numInstances()];      m_errors[m_iLow] = 1; m_errors[m_iUp] = -1;      // Initialize kernel      m_kernel.buildKernel(m_data);      // Build up I1 and I4      for (int i = 0; i < m_class.length; i++ ) {        if (m_class[i] == 1) {          m_I1.insert(i);        } else {          m_I4.insert(i);        }      }      // Loop to find all the support vectors      int numChanged = 0;      boolean examineAll = true;      while ((numChanged > 0) || examineAll) {        numChanged = 0;        if (examineAll) {          for (int i = 0; i < m_alpha.length; i++) {            if (examineExample(i)) {              numChanged++;            }          }        } else {          // This code implements Modification 1 from Keerthi et al.'s paper          for (int i = 0; i < m_alpha.length; i++) {            if ((m_alpha[i] > 0) &&                  (m_alpha[i] < m_C * m_data.instance(i).weight())) {              if (examineExample(i)) {                numChanged++;              }              // Is optimality on unbound vectors obtained?              if (m_bUp > m_bLow - 2 * m_tol) {                numChanged = 0;                break;              }                }          }          //This is the code for Modification 2 from Keerthi et al.'s paper          /*boolean innerLoopSuccess = true;             numChanged = 0;            while ((m_bUp < m_bLow - 2 * m_tol) && (innerLoopSuccess == true)) {            innerLoopSuccess = takeStep(m_iUp, m_iLow, m_errors[m_iLow]);            }*/        }        if (examineAll) {          examineAll = false;        } else if (numChanged == 0) {          examineAll = true;        }      }      // Set threshold      m_b = (m_bLow + m_bUp) / 2.0;      // Save memory      m_kernel.clean();       m_errors = null;      m_I0 = m_I1 = m_I2 = m_I3 = m_I4 = null;      // Fit sigmoid if requested      if (fitLogistic) {        fitLogistic(insts, cl1, cl2, numFolds, new Random(randomSeed));      }    }    /**     * Computes SVM output for given instance.     *     * @param index the instance for which output is to be computed     * @param inst the instance      * @return the output of the SVM for the given instance     * @throws Exception if something goes wrong     */    protected double SVMOutput(int index, Instance inst) throws Exception {      double result = 0;      for (int i = m_supportVectors.getNext(-1); i != -1;           i = m_supportVectors.getNext(i)) {        result += m_class[i] * m_alpha[i] * m_kernel.eval(index, i, inst);      }      result -= m_b;      return result;    }    /**     * Prints out the classifier.     *     * @return a description of the classifier as a string     */    public String toString() {      StringBuffer text = new StringBuffer();      int printed = 0;      if ((m_alpha == null) && (m_sparseWeights == null)) {        return "BinaryMISMO: No model built yet.\n";      }      try {        text.append("BinaryMISMO\n\n");        for (int i = 0; i < m_alpha.length; i++) {          if (m_supportVectors.contains(i)) {            double val = m_alpha[i];            if (m_class[i] == 1) {              if (printed > 0) {                text.append(" + ");              }            } else {              text.append(" - ");            }            text.append(Utils.doubleToString(val, 12, 4)                 + " * <");            for (int j = 0; j < m_data.numAttributes(); j++) {              if (j != m_data.classIndex()) {                text.append(m_data.instance(i).toString(j));              }              if (j != m_data.numAttributes() - 1) {                text.append(" ");              }            }            text.append("> * X]\n");            printed++;          }        }        if (m_b > 0) {          text.append(" - " + Utils.doubleToString(m_b, 12, 4));        } else {          text.append(" + " + Utils.doubleToString(-m_b, 12, 4));        }        text.append("\n\nNumber of support vectors: " +             m_supportVectors.numElements());        int numEval = 0;        int numCacheHits = -1;        if(m_kernel != null)        {          numEval = m_kernel.numEvals();          numCacheHits = m_kernel.numCacheHits();        }        text.append("\n\nNumber of kernel evaluations: " + numEval);        if (numCacheHits >= 0 && numEval > 0)        {          double hitRatio = 1 - numEval*1.0/(numCacheHits+numEval);          text.append(" (" + Utils.doubleToString(hitRatio*100, 7, 3).trim() + "% cached)");        }      } catch (Exception e) {        e.printStackTrace();        return "Can't print BinaryMISMO classifier.";      }      return text.toString();    }    /**     * Examines instance.     *     * @param i2 index of instance to examine     * @return true if examination was successfull     * @throws Exception if something goes wrong     */    protected boolean examineExample(int i2) throws Exception {      double y2, F2;      int i1 = -1;      y2 = m_class[i2];      if (m_I0.contains(i2)) {        F2 = m_errors[i2];      } else {         F2 = SVMOutput(i2, m_data.instance(i2)) + m_b - y2;        m_errors[i2] = F2;        // Update thresholds        if ((m_I1.contains(i2) || m_I2.contains(i2)) && (F2 < m_bUp)) {          m_bUp = F2; m_iUp = i2;        } else if ((m_I3.contains(i2) || m_I4.contains(i2)) && (F2 > m_bLow)) {          m_bLow = F2; m_iLow = i2;        }      }      // Check optimality using current bLow and bUp and, if      // violated, find an index i1 to do joint optimization      // with i2...      boolean optimal = true;      if (m_I0.contains(i2) || m_I1.contains(i2) || m_I2.contains(i2)) {        if (m_bLow - F2 > 2 * m_tol) {          optimal = false; i1 = m_iLow;        }      }      if (m_I0.contains(i2) || m_I3.contains(i2) || m_I4.contains(i2)) {        if (F2 - m_bUp > 2 * m_tol) {          optimal = false; i1 = m_iUp;        }      }      if (optimal) {        return false;      }      // For i2 unbound choose the better i1...      if (m_I0.contains(i2)) {        if (m_bLow - F2 > F2 - m_bUp) {          i1 = m_iLow;        } else {          i1 = m_iUp;        }      }      if (i1 == -1) {        throw new Exception("This should never happen!");      }      return takeStep(i1, i2, F2);    }    /**     * Method solving for the Lagrange multipliers for     * two instances.     *     * @param i1 index of the first instance     * @param i2 index of the second instance     * @param F2     * @return true if multipliers could be found     * @throws Exception if something goes wrong     */    protected boolean takeStep(int i1, int i2, double F2) throws Exception {      double alph1, alph2, y1, y2, F1, s, L, H, k11, k12, k22, eta,             a1, a2, f1, f2, v1, v2, Lobj, Hobj;      double C1 = m_C * m_data.instance(i1).weight();      double C2 = m_C * m_data.instance(i2).weight();      // Don't do anything if the two instances are the same      if (i1 == i2) {        return false;      }      // Initialize variables      alph1 = m_alpha[i1]; alph2 = m_alpha[i2];      y1 = m_class[i1]; y2 = m_class[i2];      F1 = m_errors[i1];      s = y1 * y2;      // Find the constraints on a2      if (y1 != y2) {        L = Math.max(0, alph2 - alph1);         H = Math.min(C2, C1 + alph2 - alph1);      } else {        L = Math.max(0, alph1 + alph2 - C1);        H = Math.min(C2, alph1 + alph2);      }      if (L >= H) {        return false;      }      // Compute second derivative of objective function      k11 = m_kernel.eval(i1, i1, m_data.instance(i1));      k12 = m_kernel.eval(i1, i2, m_data.instance(i1));      k22 = m_kernel.eval(i2, i2, m_data.instance(i2));      eta = 2 * k12 - k11 - k22;      // Check if second derivative is negative      if (eta < 0) {        // Compute unconstrained maximum        a2 = alph2 - y2 * (F1 - F2) / eta;        // Compute constrained maximum        if (a2 < L) {          a2 = L;        } else if (a2 > H) {          a2 = H;        }      } else {        // Look at endpoints of diagonal        f1 = SVMOutput(i1, m_data.instance(i1));        f2 = SVMOutput(i2, m_data.instance(i2));        v1 = f1 + m_b - y1 * alph1 * k11 - y2 * alph2 * k12;         v2 = f2 + m_b - y1 * alph1 * k12 - y2 * alph2 * k22;         double gamma = alph1 + s * alph2;        Lobj = (gamma - s * L) + L - 0.5 * k11 * (gamma - s * L) * (gamma - s * L) -           0.5 * k22 * L * L - s * k12 * (gamma - s * L) * L -           y1 * (gamma - s * L) * v1 - y2 * L * v2;        Hobj = (gamma - s * H) + H - 0.5 * k11 * (gamma - s * H) * (gamma - s * H) -           0.5 * k22 * H * H - s * k12 * (gamma - s * H) * H -           y1 * (gamma - s * H) * v1 - y2 * H * v2;        if (Lobj > Hobj + m_eps) {          a2 = L;        } else if (Lobj < Hobj - m_eps) {          a2 = H;        } else {          a2 = alph2;        }      }      if (Math.abs(a2 - alph2) < m_eps * (a2 + alph2 + m_eps)) {        return false;      }      // To prevent precision problems      if (a2 > C2 - m_Del * C2) {        a2 = C2;      } else if (a2 <= m_Del * C2) {        a2 = 0;      }      // Recompute a1      a1 = alph1 + s * (alph2 - a2);      // To prevent precision problems      if (a1 > C1 - m_Del * C1) {        a1 = C1;      } else if (a1 <= m_Del * C1) {        a1 = 0;      }      // Update sets      if (a1 > 0) {        m_supportVectors.insert(i1);      } else {        m_supportVectors.delete(i1);      }      if ((a1 > 0) && (a1 < C1)) {        m_I0.insert(i1);      } else {        m_I0.delete(i1);      }      if ((y1 == 1) && (a1 == 0)) {        m_I1.insert(i1);      } else {        m_I1.delete(i1);      }      if ((y1 == -1) && (a1 == C1)) {        m_I2.insert(i1);      } else {        m_I2.delete(i1);      }      if ((y1 == 1) && (a1 == C1)) {        m_I3.insert(i1);      } else {        m_I3.delete(i1);      }      if ((y1 == -1) && (a1 == 0)) {        m_I4.insert(i1);      } else {        m_I4.delete(i1);      }      if (a2 > 0) {        m_supportVectors.insert(i2);      } else {        m_supportVectors.delete(i2);      }      if ((a2 > 0) && (a2 < C2)) {        m_I0.insert(i2);      } else {        m_I0.delete(i2);      }      if ((y2 == 1) && (a2 == 0)) {
💿 文件大小 3872 K
👤 上传用户 fengkuangyidao
📂 所属分类 Applet
🏷️ 相关标签

#Java #编写 #数据挖掘算法 #分类
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -