📄 smoreg.java
字号:
} if (examineAll) examineAll = false; else if (numChanged == 0) examineAll = true; } /* END OF MAIN ROUTINE FOR MODIFICATION 1 */ // debuggage // checkOptimality(); // checkAlphas(); // displayB(); // Set threshold m_b = (m_bLow + m_bUp) / 2.0; // Save memory m_kernel.clean(); m_fcache = null; m_I0 = m_I1 = m_I2 = m_I3 = null; // If machine is linear, delete training data // and store weight vector in sparse format if (m_KernelIsLinear) { // compute weight vector for(int j = 0; j < m_weights.length; j++){ m_weights[j] = 0; } for(int k = 0; k < m_alpha.length; k++){ for(int j = 0; j < m_weights.length; j++){ if(j == m_data.classIndex()) continue; m_weights[j] += (m_alpha[k] - m_alpha_[k]) * m_data.instance(k).value(j); } } // Convert weight vector double[] sparseWeights = new double[m_weights.length]; int[] sparseIndices = new int[m_weights.length]; int counter = 0; for (int ii = 0; ii < m_weights.length; ii++) { if (m_weights[ii] != 0.0) { sparseWeights[counter] = m_weights[ii]; sparseIndices[counter] = ii; counter++; } } m_sparseWeights = new double[counter]; m_sparseIndices = new int[counter]; System.arraycopy(sparseWeights, 0, m_sparseWeights, 0, counter); System.arraycopy(sparseIndices, 0, m_sparseIndices, 0, counter); // Clean out training data if (!m_checksTurnedOff) { m_data = new Instances(m_data, 0); } else { m_data = null; } // Clean out weight vector m_weights = null; // We don't need the alphas in the linear case m_alpha = null; m_alpha_ = null; } } /** * Examines instance. (As defined in Shevade's paper.) * * @param i2 index of instance to examine * @return true if examination was successfull * @throws Exception if something goes wrong */ protected int examineExample(int i2) throws Exception{ // Lagrange multipliers for i2 double alpha2 = m_alpha[i2]; double alpha2_ = m_alpha_[i2]; double F2 = 0; if(m_I0.contains(i2)){ F2 = m_fcache[i2]; } else { // compute F2 = F_i2 and set f-cache[i2] = F2 F2 = m_data.instance(i2).classValue(); for(int j = 0; j < m_alpha.length; j++){ F2 -= (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i2, j, m_data.instance(i2)); } m_fcache[i2] = F2; // Update (b_low, i_low) or (b_up, i_up) using (F2, i2)... if(m_I1.contains(i2)){ if(F2 + m_epsilon < m_bUp){ m_bUp = F2 + m_epsilon; m_iUp = i2; } else if (F2 - m_epsilon > m_bLow){ m_bLow = F2 - m_epsilon; m_iLow = i2; } } else if(m_I2.contains(i2) && (F2+m_epsilon > m_bLow)){ m_bLow = F2 + m_epsilon; m_iLow = i2; } else if(m_I3.contains(i2) && (F2-m_epsilon < m_bUp)){ m_bUp = F2 - m_epsilon; m_iUp = i2; } } // check optimality using current b_low and b_up and, if // violated, find an index i1 to do joint optimization with i2... boolean optimality = true; int i1 = -1; // case 1 : i2 is in I_0a if(m_I0.contains(i2) && 0 < alpha2 && alpha2 < m_C * m_data.instance(i2).weight()){ if(m_bLow-(F2-m_epsilon) > 2 * m_tol){ optimality = false; i1 = m_iLow; // for i2 in I_0a choose the better i1 if((F2 - m_epsilon) - m_bUp > m_bLow - (F2 - m_epsilon)){ i1 = m_iUp; } }else if((F2 - m_epsilon)-m_bUp > 2 * m_tol){ optimality = false; i1 = m_iUp; // for i2 in I_0a choose the better i1... if(m_bLow-(F2-m_epsilon) > (F2-m_epsilon)-m_bUp){ i1 = m_iLow; } } } // case 2 : i2 is in I_0b else if(m_I0.contains(i2) && 0 < alpha2_ && alpha2_ < m_C * m_data.instance(i2).weight()){ if(m_bLow-(F2+m_epsilon) > 2 * m_tol){ optimality = false; i1 = m_iLow; // for i2 in I_0b choose the better i1 if((F2 + m_epsilon) - m_bUp > m_bLow - (F2 + m_epsilon)){ i1 = m_iUp; } }else if((F2 + m_epsilon)-m_bUp > 2 * m_tol){ optimality = false; i1 = m_iUp; // for i2 in I_0b choose the better i1... if(m_bLow-(F2+m_epsilon) > (F2+m_epsilon)-m_bUp){ i1 = m_iLow; } } } // case 3 : i2 is in I_1 else if(m_I1.contains(i2)){ if(m_bLow-(F2+m_epsilon) > 2 * m_tol){ optimality = false; i1 = m_iLow; // for i2 in I_1 choose the better i1 if((F2 + m_epsilon) - m_bUp > m_bLow - (F2 + m_epsilon)){ i1 = m_iUp; } } else if((F2 - m_epsilon)-m_bUp > 2 * m_tol){ optimality = false; i1 = m_iUp; // for i2 in I_1 choose the better i1... if(m_bLow-(F2-m_epsilon) > (F2-m_epsilon)-m_bUp){ i1 = m_iLow; } } } // case 4 : i2 is in I_2 else if(m_I2.contains(i2)){ if((F2+m_epsilon)-m_bUp > 2 * m_tol){ optimality = false; i1 = m_iUp; } } // case 5 : i2 is in I_3 else if(m_I3.contains(i2)){ if(m_bLow-(F2-m_epsilon) > 2 * m_tol){ optimality = false; i1 = m_iLow; } } else{ throw new RuntimeException("Inconsistent state ! I0, I1, I2 and I3 " + "must cover the whole set of indices."); } if(optimality){ return 0; } if(takeStep(i1, i2)){ return 1; } else { return 0; } } /** * Method solving for the Lagrange multipliers for * two instances. (As defined in Shevade's paper.) * * @param i1 index of the first instance * @param i2 index of the second instance * @return true if multipliers could be found * @throws Exception if something goes wrong */ protected boolean takeStep(int i1, int i2) throws Exception{ if(i1 == i2){ return false; } double alpha1 = m_alpha[i1]; double alpha1_ = m_alpha_[i1]; double alpha2 = m_alpha[i2]; double alpha2_ = m_alpha_[i2]; double F1 = m_fcache[i1]; double F2 = m_fcache[i2]; double k11 = m_kernel.eval(i1, i1, m_data.instance(i1)); double k12 = m_kernel.eval(i1, i2, m_data.instance(i1)); double k22 = m_kernel.eval(i2, i2, m_data.instance(i2)); double eta = -2*k12+k11+k22; double gamma = alpha1 - alpha1_ + alpha2 - alpha2_; // In case of numerical instabilies eta might be sligthly less than 0. // (Theoretically, it cannot happen with a kernel which respects Mercer's condition.) if(eta < 0) eta = 0; boolean case1 = false, case2 = false, case3 = false, case4 = false, finished = false; double deltaphi = F1 - F2; double L, H; boolean changed = false; double a1, a2; while(!finished){ // this loop is passed at most three times // Case variables needed to avoid attempting small changes twices if(!case1 && (alpha1 > 0 || (alpha1_ == 0 && deltaphi > 0)) && (alpha2 > 0 || (alpha2_ == 0 && deltaphi < 0)) ){ // compute L, H (wrt alpha1, alpha2) L = java.lang.Math.max(0, gamma - m_C * m_data.instance(i1).weight()); H = java.lang.Math.min(m_C * m_data.instance(i2).weight(), gamma); if(L < H){ if(eta > 0){ a2 = alpha2 - (deltaphi / eta); if(a2 > H) a2 = H; else if(a2 < L) a2 = L; } else { double Lobj = -L*deltaphi; double Hobj = -H*deltaphi; if(Lobj > Hobj) a2 = L; else a2 = H; } a1 = alpha1 - (a2 - alpha2); // update alpha1, alpha2 if change is larger than some eps if(java.lang.Math.abs(a1-alpha1) > m_eps || java.lang.Math.abs(a2-alpha2) > m_eps){ alpha1 = a1; alpha2 = a2; changed = true; } } else { finished = true; } case1 = true; } else if(!case2 && (alpha1 > 0 || (alpha1_ == 0 && deltaphi > 2 * m_epsilon)) && (alpha2_ > 0 || (alpha2 == 0 && deltaphi > 2 * m_epsilon)) ){ // compute L, H (wrt alpha1, alpha2_) L = java.lang.Math.max(0, -gamma); H = java.lang.Math.min(m_C * m_data.instance(i2).weight(), -gamma + m_C*m_data.instance(i1).weight()); if(L < H){ if(eta > 0){ a2 = alpha2_ + ((deltaphi - 2*m_epsilon) / eta); if(a2 > H) a2 = H; else if(a2 < L) a2 = L; } else { double Lobj = L*(-2*m_epsilon + deltaphi); double Hobj = H*(-2*m_epsilon + deltaphi); if(Lobj > Hobj) a2 = L; else a2 = H; } a1 = alpha1 + (a2 - alpha2_); // update alpha1, alpha2_ if change is larger than some eps if(java.lang.Math.abs(a1-alpha1) > m_eps || java.lang.Math.abs(a2-alpha2_) > m_eps){ alpha1 = a1; alpha2_ = a2; changed = true; } } else { finished = true; } case2 = true; } else if(!case3 && (alpha1_ > 0 || (alpha1 == 0 && deltaphi < -2 * m_epsilon)) && (alpha2 > 0 || (alpha2_ == 0 && deltaphi < -2 * m_epsilon)) ){ // compute L, H (wrt alpha1_, alpha2) L = java.lang.Math.max(0, gamma); H = java.lang.Math.min(m_C * m_data.instance(i2).weight(), m_C * m_data.instance(i1).weight() + gamma); if(L < H){ if(eta > 0){ a2 = alpha2 - ((deltaphi + 2*m_epsilon) / eta); if(a2 > H) a2 = H; else if(a2 < L) a2 = L; } else { double Lobj = -L*(2*m_epsilon + deltaphi); double Hobj = -H*(2*m_epsilon + deltaphi); if(Lobj > Hobj) a2 = L; else a2 = H; } a1 = alpha1_ + (a2 - alpha2); // update alpha1_, alpha2 if change is larger than some eps if(java.lang.Math.abs(a1-alpha1_) > m_eps || java.lang.Math.abs(a2-alpha2) > m_eps){ alpha1_ = a1; alpha2 = a2; changed = true; } } else { finished = true; } case3 = true; } else if(!case4 && (alpha1_ > 0 || (alpha1 == 0 && deltaphi < 0)) && (alpha2_ > 0 || (alpha2 == 0 && deltaphi > 0)) ){ // compute L, H (wrt alpha1_, alpha2_) L = java.lang.Math.max(0, -gamma - m_C * m_data.instance(i1).weight()); H = java.lang.Math.min(m_C * m_data.instance(i2).weight(), -gamma); if(L < H){ if(eta > 0){ a2 = alpha2_ + deltaphi / eta; if(a2 > H) a2 = H; else if(a2 < L) a2 = L; } else { double Lobj = L*deltaphi; double Hobj = H*deltaphi; if(Lobj > Hobj) a2 = L; else a2 = H; } a1 = alpha1_ - (a2 - alpha2_); // update alpha1_, alpha2_ if change is larger than some eps if(java.lang.Math.abs(a1-alpha1_) > m_eps || java.lang.Math.abs(a2-alpha2_) > m_eps){ alpha1_ = a1; alpha2_ = a2; changed = true; } } else { finished = true; } case4 = true; } else { finished = true; } // update deltaphi deltaphi += eta * ((alpha2 - alpha2_) - (m_alpha[i2] - m_alpha_[i2])); } if(changed){ // update f-cache[i] for i in I_0 using new Lagrange multipliers for (int i = m_I0.getNext(-1); i != -1; i = m_I0.getNext(i)) { if (i != i1 && i != i2){ m_fcache[i] += ((m_alpha[i1] - m_alpha_[i1]) - (alpha1 - alpha1_)) * m_kernel.eval(i1, i, m_data.instance(i1)) + ((m_alpha[i2] - m_alpha_[i2]) - (alpha2 - alpha2_)) * m_kernel.eval(i2, i, m_data.instance(i2)); } } // update f-cache[i1] and f-cache[i2] m_fcache[i1] += ((m_alpha[i1] - m_alpha_[i1]) - (alpha1 - alpha1_)) * k11 + ((m_alpha[i2] - m_alpha_[i2]) - (alpha2 - alpha2_)) * k12; m_fcache[i2] += ((m_alpha[i1] - m_alpha_[i1]) - (alpha1 - alpha1_)) * k12 + ((m_alpha[i2] - m_alpha_[i2]) - (alpha2 - alpha2_)) * k22; // to prevent precision problems if(alpha1 > m_C * m_data.instance(i1).weight() - m_Del * m_C * m_data.instance(i1).weight()){ alpha1 = m_C * m_data.instance(i1).weight(); } else if (alpha1 <= m_Del * m_C * m_data.instance(i1).weight()){ alpha1 = 0; } if(alpha1_ > m_C * m_data.instance(i1).weight() - m_Del * m_C * m_data.instance(i1).weight()){ alpha1_ = m_C * m_data.instance(i1).weight(); } else if (alpha1_ <= m_Del * m_C * m_data.instance(i1).weight()){ alpha1_ = 0; } if(alpha2 > m_C * m_data.instance(i2).weight() - m_Del * m_C * m_data.instance(i2).weight()){ alpha2 = m_C * m_data.instance(i2).weight(); } else if (alpha2 <= m_Del * m_C * m_data.instance(i2).weight()){ alpha2 = 0; } if(alpha2_ > m_C * m_data.instance(i2).weight() - m_Del * m_C * m_data.instance(i2).weight()){ alpha2_ = m_C * m_data.instance(i2).weight(); } else if (alpha2_ <= m_Del * m_C * m_data.instance(i2).weight()){ alpha2_ = 0; } // Store the changes in alpha, alpha' array m_alpha[i1] = alpha1; m_alpha_[i1] = alpha1_; m_alpha[i2] = alpha2; m_alpha_[i2] = alpha2_; // update I_0, I_1, I_2, I_3 if((0 < alpha1 && alpha1 < m_C * m_data.instance(i1).weight()) || (0 < alpha1_ && alpha1_ < m_C * m_data.instance(i1).weight())){ m_I0.insert(i1); } else { m_I0.delete(i1); } if((alpha1 == 0 && alpha1_ == 0)){ m_I1.insert(i1); } else { m_I1.delete(i1); } if((alpha1 == 0 && alpha1_ == m_C * m_data.instance(i1).weight())){ m_I2.insert(i1); } else { m_I2.delete(i1); } if((alpha1 == m_C * m_data.instance(i1).weight() && alpha1_ == 0)){ m_I3.insert(i1); } else { m_I3.delete(i1); } if((0 < alpha2 && alpha2 < m_C * m_data.instance(i2).weight()) || (0 < alpha2_ && alpha2_ < m_C * m_data.instance(i2).weight())){ m_I0.insert(i2); } else { m_I0.delete(i2); } if((alpha2 == 0 && alpha2_ == 0)){ m_I1.insert(i2); } else { m_I1.delete(i2); } if((alpha2 == 0 && alpha2_ == m_C * m_data.instance(i2).weight())){ m_I2.insert(i2); } else { m_I2.delete(i2); } if((alpha2 == m_C * m_data.instance(i2).weight() && alpha2_ == 0)){ m_I3.insert(i2); } else { m_I3.delete(i2); } // for debuggage // checkSets();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -