📄 algorithmrvm.java
字号:
{ if (debug_level_d) { // System.out.println("curr_weights_d: "); Matrix.printDoubleVector(curr_weights_d); } // copy out the current best weights to the weight vector // so they can be used for evaluations // if (num_irls_its > 0) { if (start_index == 1) { bias_d = MathUtil.doubleValue(curr_weights_d, 0); } else { bias_d = 0.0; } MathUtil.copyVector(weights_d, curr_weights_d, weights_d.size(), 0, start_index); } // 1.b.1. compute the B matrix and the sigma vector // if (debug_level_d) { // System.out.println("IRLS iteration: " + num_irls_its); // System.out.println( // "1.b.1. compute the B matrix and the sigma vector"); } computeSigma(); for (int i = 0; i < size_B; i++) { double sigma = MathUtil.doubleValue(sigma_d, i); B_d.setValue(i, i, sigma * (1 - sigma)); } // if the likelihood decreases then the newton step was // too large and needs to be reduced // new_likelihood = computeLikelihood(); if (debug_level_d) { // System.out.println("sigma_d: "); // Matrix.printDoubleVector(sigma_d); // System.out.println("phi_d: "); // phi_d.printMatrix(); // System.out.println("B_d: ") // B_d.printMatrix(); // System.out.println("old likelihood 1 : " + old_likelihood); // System.out.println("new likelihood 1 : " + new_likelihood); } if (!reduced) { if (new_likelihood < old_likelihood || (num_irls_its < 2)) { // store the weights computed on the last IRLS iteration // old_likelihood = new_likelihood; if (lambda < 0.9) { // System.out.println("increasing step parameter"); lambda *= 2.0; } else { lambda = 1.0; } } else { // revert to the previous weights // // curr_weights_d = (Vector)old_irls_weights_d.clone(); // curr_weights_d = old_irls_weights_d; lambda *= 0.5; if (lambda < 0.0001) { last_iteration = true; } reduced = true; continue; } } reduced = false; // old_irls_weights_d = (Vector)curr_weights_d.clone(); // old_irls_weights_d = curr_weights_d; if (debug_level_d) { // System.out.println("curr_weights_d: "); // Matrix.printDoubleVector(curr_weights_d); // System.out.println( // "1.b.2. compute the gradient and Hessian "); } /* debug if (debug_level_d > Integral::BRIEF) { Vector error_signal; error_signal.sub(targets_d, sigma_d); error_signal.abs(); Double out = error_signal.sum(); out.debug(L"sum-squared error"); curr_weights_d.debug(L"current weights"); } */ // 1.b.2. compute the gradient and Hessian // // gradient = phi * [y_d - sigma] - A * weights // sigma is reused as temporary data space in the second segment // for computing A * weights. // /* */ // init matrix // boolean status = true; sigma_M.initMatrix(sigma_d); y_M.initMatrix(y_d); curr_weights_M.initMatrix(curr_weights_d); gradient_M.initMatrix(gradient_d); sigma_M.scalarMultMatrix(-1.0); // -sigma sigma_M.addMatrix(y_M); // sigma_M = y_d -sigma_M sigma_M.transposeMatrix(temp_M); // gradient = phi * [y_d - sigma] // phi_d.multMatrix(temp_M, gradient_M); // temp_M = -A * weights curr_weights_M.transposeMatrix(temp_M); A_d.multMatrix(temp_M, temp2_M); temp2_M.scalarMultMatrix(-1); // gradient = phi * [y_d - sigma] - A * weights // gradient_M.addMatrix(temp2_M); //gradient_M.printMatrix(); gradient_M.toDoubleVector(gradient_d); /* if (!status) { gradient_d.debug(L"computed gradient"); return Error::handle(name(), L"train:gradient computation", ERR, __FILE__, __LINE__); } // debugging information // if (debug_level_d > Integral::DETAILED) { gradient_d.debug(L"gradient"); } */ // Hessian = -(phi * B * transpose(phi) + A) // this is a quadratic form about B // status = true; // temp_M = phi * B, temp2_M = phi' // hessian_d = phi * B * phi' // phi_d.multMatrix(B_d, temp_M); phi_d.transposeMatrix(temp2_M); temp_M.multMatrix(temp2_M, hessian_d); // phi * B * phi' + A // hessian_d.addMatrix(A_d); /* if (!status) { hessian_d.debug(L"computed hessian"); return Error::handle(name(), L"train:Hessian computation", ERR, __FILE__, __LINE__); } // debugging information // if (debug_level_d > Integral::DETAILED) { hessian_d.debug(L"Hessian"); } */ // with the cholesky decomposition, we can solve systems // of equations as well as determine the diagonal elements // of the covariance. This is all we need for the RVM // training // if (!hessian_d.decompositionCholesky(covar_cholesky_d)) { // System.out.println("train:Cholesky decomposition"); return false; } /* // debugging information // if (debug_level_d > Integral::DETAILED) { covar_cholesky_d.debug(L"cholesky decomposition"); } */ if (debug_level_d) { // System.out.println("hessian_d: "); // hessian_d.printMatrix(); // System.out.println("gradient_d: "); // Matrix.printDoubleVector(gradient_d); // System.out.println("covar_cholesky_d: "); // covar_cholesky_d.printMatrix(); } // 1.b.3. update the weights: // because the space is convex we take a full Newton step // weights* = weights - inverse(Hessian) * gradient // // System.out.println("1.b.3. update the weights:"); status = true; gradient_M.initMatrix(gradient_d); status &= covar_cholesky_d.choleskySolve(temp_M, covar_cholesky_d, gradient_M); if (debug_level_d) { // System.out.println("temp_M: "); // temp_M.printMatrix(); } if (!last_iteration) { temp_M.scalarMultMatrix(lambda); temp2_M.initMatrix(old_irls_weights_d); temp_M.addMatrix(temp2_M); //temp_M.printMatrix(); temp_M.toDoubleVector(curr_weights_d); double val = MathUtil.vectorProduct(gradient_d, gradient_d); val = Math.sqrt(val); val /= curr_weights_d.size(); // 1.b.4. check convergence // /* if ( MathUtil.almostEqual(curr_weights_d, old_irls_weights_d) && MathUtil.almostEqual(gradient_d, 0.0 )){ */ if (val < 1e-6) { last_iteration = true; } } else { irls_converged = true; } /* if (!status) { covar_cholesky_d.debug(L"computed cholesky"); return Error::handle(name(), L"train:weight update", ERR, __FILE__, __LINE__); } */ // next iteration // num_irls_its++; pro_box_d.setProgressCurr((int)num_irls_its % 20); } if (debug_level_d) { // System.out.println("new likelihood 2: " + new_likelihood); } /* if (debug_level_d > Integral::NONE) { new_likelihood.debug(L"new likelihood"); } */ // generate the true hessian (negative of the one we store) // hessian_d.scalarMultMatrix(-1.0); computeSigma(); /* if (debug_level_d > Integral::DETAILED) { curr_weights_d.debug(L"current weights after IRLS"); Vector tmp_1; tmp_1.sub(targets_d, sigma_d); Vector tmp_weights; Matrix tmp_matrix(A_d); tmp_matrix.inverse(); tmp_matrix.changeType(Integral::FULL); tmp_matrix.mult(phi_d); tmp_matrix.multv(tmp_weights, tmp_1); tmp_weights.debug(L"computed weights after IRLS"); } */ // exit gracefully // return true; } /** * * Finalizes the trained model making it ready to write to file or use * in prediction * * * @return true * * */ boolean finalizeTraining() { // Debug // System.out.println("AlgorithmRVM : finalizeTraining()"); // perform a final check on the range of the weights and // manually prune any that have fallen below the minimum // allowed weight value // if (Math.abs(bias_d) < min_allowed_weight_d) { bias_d = 0.0; } for (int i = 0; i < weights_d.size(); i++) { if (Math.abs(MathUtil.doubleValue(weights_d, i)) < min_allowed_weight_d) { weights_d.set(i, new Double(0.0)); } } // prune any remaining zero-weights with large hyperparameters // pruneAndUpdate(); // run a final iteration of IRLS training to get the final hessian // irlsTrain(); // compute the final inverse hessian and assign // inv_hessian_d.invertMatrix(hessian_d); inv_hessian_d.scalarMultMatrix(-1); // exit gracefully // return true; } /** * * prunes off vectors whose hyperparameters have gone to infinity and * updates working data sets * * * @return true * */ public boolean pruneAndUpdate() { // Debug // System.out.println("AlgorithmRVM : pruneAndUpdate()"); /* // debugging information // if (debug_level_d > Integral::BRIEF) { A_d.debug(L"A"); } */ if (debug_level_d) { // System.out.println("A_d: "); // A_d.printMatrix(); // System.out.println("weights_d: "); // Matrix.printDoubleVector(weights_d); // System.out.println("alpha_thresh_d: " + alpha_thresh_d); } // prune any weights that have gone to zero // pruneWeights(); // update the number of hyperparameters after pruning // dimA_d = A_d.getNumRows(); // reset the size of the vector set // num_samples_d = weights_d.size(); gradient_d.setSize(dimA_d); MathUtil.initDoubleVector(gradient_d, 0.0); curr_weights_d.setSize(dimA_d); MathUtil.initDoubleVector(curr_weights_d, 0.0); hessian_d.initMatrixValue(dimA_d, dimA_d, 0, Matrix.SYMMETRIC); covar_cholesky_d.initMatrixValue(dimA_d, dimA_d, 0, Matrix.LOWER_TRIANGULAR); // exit gracefully // return true; } /** * * Prunes off vectors which attain a zero weight during * training. Auxiliary data structures that are indexed according to the * working set are also updated. * * @return true * * */ public boolean pruneWeights() { // Debug // // System.out.println("AlgorithmRVM : pruneWeights()"); // declare local variables // int num_weights = curr_weights_d.size(); // the A matrix is diagonal so we can get a temporary version of it // into a vector - this makes it easier to work with // Vector<Double> tmp_A = new Vector<Double>(); A_d.getDiagonalVector(tmp_A); if (debug_level_d) { // System.out.println("tmp_A before pruning:"); // Matrix.printDoubleVector(tmp_A); } /* //A_d.getDiagonal(tmp_A); for (int i = 0; i < dimA_d; i++) { // prune any weight which has been floored. we prune off // the corresponding hyperparameter as well as the row of // the phi matrix corresponding to this weight // if (!((MathUtil.doubleValue(curr_weights_d, i) == 0.0) && (A_d.getValue(i,i) > alpha_thresh_d))){ flags[i] = true; tmp_A.add(new Double(A_d.getValue(i, i))); dimA ++; } else { flags[i] = false; } } */ // get the dimensions of the phi matrix // int num_rows = phi_d.getNumRows(); boolean flags[] = new boolean[num_rows]; int dimA = 0; // loop over the weights and check for zero-valued weights. // the weights will be exactly zero because we have manually // floored them previously // int offset = weights_d.size() - num_weights; int weights_pruned = 0; for (int i = 0; i < num_weights; i++) { double weights_i = MathUtil.doubleValue(curr_weights_d, i); int a_i = i + weights_pruned; // prune any weight that is floored // if ((weights_i == 0.0) && (A_d.getValue(a_i, a_i) > alpha_thresh_d)) { /* if (debug_level_d > Integral::BRIEF) { Console::put(L"pruning"); } */ weights_pruned++; // remove the weight, target and vector // if (i + offset >= 0) { weights_d.remove(i + offset); support_vectors_d.remove(i + offset); evalx_d.remove(i + offset); evaly_d.remove(i + offset); } curr_weights_d.remove(i); tmp_A.remove(i); // because we have removed an element we need to stay at this // entry index and update the stop condition // i = i - 1; num_weights = num_weights - 1; flags[i + weights_pruned] = false; } else { flags[i + weights_pruned] = true; dimA++; } } /* // debugging information // if (debug_level_d > Integral::DETAILED) { Console::put(L"Pruning completed");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -