📄 bayesianlogisticregression.java
字号:
public static double bigF(double r, double sigma) { double funcValue = 0.25; double absR = Math.abs(r); if (absR > sigma) { funcValue = 1.0 / (2.0 + Math.exp(absR - sigma) + Math.exp(sigma - absR)); } return funcValue; } /** * This method implements the stopping criterion * function. * * @return boolean whether to stop or not. */ public boolean stoppingCriterion() { int i; double sum_deltaR = 0.0; double sum_R = 1.0; boolean shouldStop; double value = 0.0; double delta; //Summation of changes in R(i) vector. for (i = 0; i < m_Instances.numInstances(); i++) { sum_deltaR += Math.abs(DeltaR[i]); //Numerator (deltaR(i)) sum_R += Math.abs(R[i]); // Denominator (1+sum(R(i)) } delta = Math.abs(sum_deltaR - Change); Change = delta / sum_R; if (debug) { System.out.println(Change + " <= " + Tolerance); } shouldStop = ((Change <= Tolerance) || (iterationCounter >= maxIterations)) ? true : false; iterationCounter++; Change = sum_deltaR; return shouldStop; } /** * This method computes the values for the logistic link function. * <pre>f(r)=exp(r)/(1+exp(r))</pre> * * @return output value */ public static double logisticLinkFunction(double r) { return Math.exp(r) / (1.0 + Math.exp(r)); } /** * Sign for a given value. * @param r * @return double +1 if r>0, -1 if r<0 */ public static double sgn(double r) { double sgn = 0.0; if (r > 0) { sgn = 1.0; } else if (r < 0) { sgn = -1.0; } return sgn; } /** * This function computes the norm-based hyperparameters * and stores them in the m_Hyperparameters. */ public double normBasedHyperParameter() { //TODO: Implement this method. Instance instance; double mean = 0.0; for (int i = 0; i < m_Instances.numInstances(); i++) { instance = m_Instances.instance(i); double sqr_sum = 0.0; for (int j = 0; j < m_Instances.numAttributes(); j++) { if (j != ClassIndex) { sqr_sum += (instance.value(j) * instance.value(j)); } } //sqr_sum=Math.sqrt(sqr_sum); mean += sqr_sum; } mean = mean / (double) m_Instances.numInstances(); return ((double) m_Instances.numAttributes()) / mean; } /** * Classifies the given instance using the Bayesian Logistic Regression function. * * @param instance the test instance * @return the classification * @throws Exception if classification can't be done successfully */ public double classifyInstance(Instance instance) throws Exception { //TODO: Implement double sum_R = 0.0; double classification = 0.0; sum_R = BetaVector[0]; for (int j = 0; j < instance.numAttributes(); j++) { if (j != (ClassIndex - 1)) { sum_R += (BetaVector[j + 1] * instance.value(j)); } } sum_R = logisticLinkFunction(sum_R); if (sum_R > Threshold) { classification = 1.0; } else { classification = 0.0; } return classification; } /** * Outputs the linear regression model as a string. * * @return the model as string */ public String toString() { if (m_Instances == null) { return "Bayesian logistic regression: No model built yet."; } StringBuffer buf = new StringBuffer(); String text = ""; switch (HyperparameterSelection) { case 1: text = "Norm-Based Hyperparameter Selection: "; break; case 2: text = "Cross-Validation Based Hyperparameter Selection: "; break; case 3: text = "Specified Hyperparameter: "; break; } buf.append(text).append(HyperparameterValue).append("\n\n"); buf.append("Regression Coefficients\n"); buf.append("=========================\n\n"); for (int j = 0; j < m_Instances.numAttributes(); j++) { if (j != ClassIndex) { if (BetaVector[j] != 0.0) { buf.append(m_Instances.attribute(j).name()).append(" : ") .append(BetaVector[j]).append("\n"); } } } buf.append("===========================\n\n"); buf.append("Likelihood: " + m_PriorUpdate.getLoglikelihood() + "\n\n"); buf.append("Penalty: " + m_PriorUpdate.getPenalty() + "\n\n"); buf.append("Regularized Log Posterior: " + m_PriorUpdate.getLogPosterior() + "\n"); buf.append("===========================\n\n"); return buf.toString(); } /** * Method computes the best hyperparameter value by doing cross * -validation on the training data and compute the likelihood. * The method can parse a range of values or a list of values. * @return Best hyperparameter value with the max likelihood value on the training data. * @throws Exception */ public double CVBasedHyperparameter() throws Exception { //TODO: Method incomplete. double start; //TODO: Method incomplete. double end; //TODO: Method incomplete. double multiplier; int size = 0; double[] list = null; double MaxHypeValue = 0.0; double MaxLikelihood = 0.0; StringTokenizer tokenizer = new StringTokenizer(HyperparameterRange); String rangeType = tokenizer.nextToken(":"); if (rangeType.equals("R")) { String temp = tokenizer.nextToken(); tokenizer = new StringTokenizer(temp); start = Double.parseDouble(tokenizer.nextToken("-")); tokenizer = new StringTokenizer(tokenizer.nextToken()); end = Double.parseDouble(tokenizer.nextToken(",")); multiplier = Double.parseDouble(tokenizer.nextToken()); int steps = (int) (((Math.log10(end) - Math.log10(start)) / Math.log10(multiplier)) + 1); list = new double[steps]; int count = 0; for (double i = start; i <= end; i *= multiplier) { list[count++] = i; } } else if (rangeType.equals("L")) { Vector vec = new Vector(); while (tokenizer.hasMoreTokens()) { vec.add(tokenizer.nextToken(",")); } list = new double[vec.size()]; for (int i = 0; i < vec.size(); i++) { list[i] = Double.parseDouble((String) vec.get(i)); } } else { //throw exception. } // Perform two-fold cross-validation to collect // unbiased predictions if (list != null) { int numFolds = (int) NumFolds; Random random = new Random(); m_Instances.randomize(random); m_Instances.stratify(numFolds); for (int k = 0; k < list.length; k++) { for (int i = 0; i < numFolds; i++) { Instances train = m_Instances.trainCV(numFolds, i, random); SerializedObject so = new SerializedObject(this); BayesianLogisticRegression blr = (BayesianLogisticRegression) so.getObject(); // blr.setHyperparameterSelection(3); blr.setHyperparameterSelection(new SelectedTag(SPECIFIC_VALUE, TAGS_HYPER_METHOD)); blr.setHyperparameterValue(list[k]); // blr.setPriorClass(PriorClass); blr.setPriorClass(new SelectedTag(PriorClass, TAGS_PRIOR)); blr.setThreshold(Threshold); blr.setTolerance(Tolerance); blr.buildClassifier(train); Instances test = m_Instances.testCV(numFolds, i); double val = blr.getLoglikeliHood(blr.BetaVector, test); if (debug) { System.out.println("Fold " + i + "Hyperparameter: " + list[k]); System.out.println("==================================="); System.out.println(" Likelihood: " + val); } if ((k == 0) | (val > MaxLikelihood)) { MaxLikelihood = val; MaxHypeValue = list[k]; } } } } else { return HyperparameterValue; } return MaxHypeValue; } /** * * @return likelihood for a given set of betas and instances */ public double getLoglikeliHood(double[] betas, Instances instances) { m_PriorUpdate.computelogLikelihood(betas, instances); return m_PriorUpdate.getLoglikelihood(); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(); newVector.addElement(new Option("\tShow Debugging Output\n", "D", 0, "-D")); newVector.addElement(new Option("\tDistribution of the Prior " +"(1=Gaussian, 2=Laplacian)" +"\n\t(default: 1=Gaussian)" , "P", 1, "-P <integer>")); newVector.addElement(new Option("\tHyperparameter Selection Method " +"(1=Norm-based, 2=CV-based, 3=specific value)\n" +"\t(default: 1=Norm-based)", "H", 1, "-H <integer>")); newVector.addElement(new Option("\tSpecified Hyperparameter Value (use in conjunction with -H 3)\n" +"\t(default: 0.27)", "V", 1, "-V <double>")); newVector.addElement(new Option( "\tHyperparameter Range (use in conjunction with -H 2)\n" +"\t(format: R:start-end,multiplier OR L:val(1), val(2), ..., val(n))\n" +"\t(default: R:0.01-316,3.16)", "R", 1, "-R <string>")); newVector.addElement(new Option("\tTolerance Value\n\t(default: 0.0005)", "Tl", 1, "-Tl <double>")); newVector.addElement(new Option("\tThreshold Value\n\t(default: 0.5)", "S", 1, "-S <double>")); newVector.addElement(new Option("\tNumber Of Folds (use in conjuction with -H 2)\n" +"\t(default: 2)", "F", 1, "-F <integer>")); newVector.addElement(new Option("\tMax Number of Iterations\n\t(default: 100)", "I", 1, "-I <integer>")); newVector.addElement(new Option("\tNormalize the data", "N", 0, "-N")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Show Debugging Output * </pre> * * <pre> -P <integer> * Distribution of the Prior (1=Gaussian, 2=Laplacian) * (default: 1=Gaussian)</pre> * * <pre> -H <integer> * Hyperparameter Selection Method (1=Norm-based, 2=CV-based, 3=specific value) * (default: 1=Norm-based)</pre> * * <pre> -V <double> * Specified Hyperparameter Value (use in conjunction with -H 3) * (default: 0.27)</pre> * * <pre> -R <string> * Hyperparameter Range (use in conjunction with -H 2) * (format: R:start-end,multiplier OR L:val(1), val(2), ..., val(n)) * (default: R:0.01-316,3.16)</pre> * * <pre> -Tl <double> * Tolerance Value * (default: 0.0005)</pre> * * <pre> -S <double> * Threshold Value * (default: 0.5)</pre> * * <pre> -F <integer> * Number Of Folds (use in conjuction with -H 2) * (default: 2)</pre> * * <pre> -I <integer> * Max Number of Iterations * (default: 100)</pre> * * <pre> -N * Normalize the data</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { //Debug Option debug = Utils.getFlag('D', options); // Set Tolerance. String Tol = Utils.getOption("Tl", options); if (Tol.length() != 0) { Tolerance = Double.parseDouble(Tol); } //Set Threshold String Thres = Utils.getOption('S', options);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -