📄 logisticregression.java
字号:
progressWriter.println("Number of Parameters=" + (numOutcomes-1)*(long)numDimensions); progressWriter.println("Prior:\n" + prior); progressWriter.println("Annealing Schedule=" + annealingSchedule); progressWriter.println("Minimum Epochs=" + minEpochs); progressWriter.println("Maximum Epochs=" + maxEpochs); progressWriter.println("Minimum Improvement Per Period=" + minImprovement); progressWriter.println("Has Sparse Inputs=" + hasSparseInputs); progressWriter.println("Has Informative Prior=" + hasPrior); } long startTime = System.currentTimeMillis(); long[] lastRegularizations = (hasSparseInputs && hasPrior) ? new long[numDimensions] : null; double lastLog2LikelihoodAndPrior = - Double.MAX_VALUE / 2.0; LogisticRegression regression = new LogisticRegression(weightVectors); double rollingAverageRelativeDiff = 1.0; // fairly arbitrary starting point double bestLog2LikelihoodAndPrior = Double.NEGATIVE_INFINITY; for (int epoch = 0; epoch < maxEpochs; ++epoch) { DenseVector[] weightVectorCopies = copy(weightVectors); // SPARSE, PRIOR if (lastRegularizations != null) { Arrays.fill(lastRegularizations,0); } double learningRate = annealingSchedule.learningRate(epoch); for (int j = 0; j < numTrainingInstances; ++j) { Vector xsJ = xs[j]; int csJ = cs[j]; if (hasSparseInputs) { int[] dimensions = xsJ.nonZeroDimensions(); if (hasPrior) { // +SPARSE, +PRIOR for (int i = 0; i < dimensions.length; ++i) { int dim = dimensions[i]; for (int k = 0; k < numOutcomesMinus1; ++k) { Vector weightVectorsK = weightVectors[k]; double weightVectorsKDim = weightVectorsK.value(dim); double priorGrad = prior.gradient(weightVectorsKDim,dim); double delta = (priorGrad * (learningRate * (j - lastRegularizations[dim]))) / numTrainingInstances; // clip normalization to 0 double newVal = weightVectorsKDim > 0 ? Math.max(0.0,weightVectorsKDim-delta) : Math.min(0.0,weightVectorsKDim-delta); weightVectorsK.setValue(dim, newVal); } lastRegularizations[dim] = j; } } // shouldn't we regularize necessary dimensions first? double[] conditionalProbs = regression.classify(xsJ); // SPARSE, ? PRIOR for (int k = 0; k < numOutcomesMinus1; ++k) { Vector weightVectorsK = weightVectors[k]; double conditionalProbMinusTruth = conditionalProbs[k]; if (k == csJ) conditionalProbMinusTruth -= 1.0; weightVectorsK.increment(-learningRate * conditionalProbMinusTruth,xsJ); } } else { // DENSE, ?PRIOR double[] conditionalProbs = regression.classify(xsJ); for (int k = 0; k < numOutcomesMinus1; ++k) { Vector weightVectorsK = weightVectors[k]; double conditionalProbMinusTruth = conditionalProbs[k]; if (k == csJ) conditionalProbMinusTruth -= 1.0; for (int i = 0; i < numDimensions; ++i) { // nice if we had add and scale operations for vectors; scale is same for all feats double weightVectorsKI = weightVectorsK.value(i); double gradient = xsJ.value(i) * conditionalProbMinusTruth; weightVectorsKI -= learningRate * gradient; // DENSE, PRIOR if (hasPrior && weightVectorsKI != 0.0) { double priorGradient = prior.gradient(weightVectorsKI,i); double delta = (learningRate * priorGradient) / numTrainingInstances; // clip normalization to 0 weightVectorsKI = weightVectorsKI > 0 ? Math.max(0.0,weightVectorsKI-delta) : Math.min(0.0,weightVectorsKI-delta); } weightVectorsK.setValue(i, weightVectorsKI); } } } } int step = numTrainingInstances; // ? -1 // catch up feature regularizations with priors every epoch if (hasPrior) { if (hasSparseInputs) { // SPARSE, PRIOR for (int k = 0; k < numOutcomesMinus1; ++k) { Vector weightVectorsK = weightVectors[k]; for (int i = 0; i < numDimensions; ++i) { double weightVectorsKI = weightVectorsK.value(i); if (weightVectorsKI != 0.0) { double priorGradient = prior.gradient(weightVectorsKI,i); double delta = ((step - lastRegularizations[i]) * learningRate * priorGradient) / numTrainingInstances; // clip normalization to 0; can't be = 0.0 given above test weightVectorsKI = weightVectorsKI > 0.0 ? Math.max(0.0,weightVectorsKI-delta) : Math.min(0.0,weightVectorsKI-delta); } } } } else { // DENSE, PRIOR for (int k = 0; k < numOutcomesMinus1; ++k) { Vector weightVectorsK = weightVectors[k]; for (int i = 0; i < numDimensions; ++i) { double weightVectorsKI = weightVectorsK.value(i); if (weightVectorsKI != 0.0) { double priorGradient = prior.gradient(weightVectorsKI,i); double delta = (learningRate * priorGradient) / numTrainingInstances; // clip normalization to 0 weightVectorsKI = weightVectorsKI > 0 ? Math.max(0.0,weightVectorsKI-delta) : Math.min(0.0,weightVectorsKI-delta); } } } } } double log2Likelihood = log2Likelihood(xs,cs,regression); double log2Prior = prior.log2Prior(weightVectors); double log2LikelihoodAndPrior = log2Likelihood + prior.log2Prior(weightVectors); if (log2LikelihoodAndPrior > bestLog2LikelihoodAndPrior) bestLog2LikelihoodAndPrior = log2LikelihoodAndPrior; boolean acceptUpdate = annealingSchedule.receivedError(epoch,learningRate,-log2LikelihoodAndPrior); if (!acceptUpdate) { weightVectors = weightVectorCopies; regression = new LogisticRegression(weightVectors); } double relativeDiff = relativeDifference(lastLog2LikelihoodAndPrior,log2LikelihoodAndPrior); rollingAverageRelativeDiff = (9.0 * rollingAverageRelativeDiff + relativeDiff)/10.0; lastLog2LikelihoodAndPrior = log2LikelihoodAndPrior; if (progressWriter != null) progressWriter.printf("epoch=%5d lr=%11.9f ll=%11.4f lp=%11.4f llp=%11.4f llp*=%11.4f %9s\n", epoch, learningRate, log2Likelihood, log2Prior, log2LikelihoodAndPrior, bestLog2LikelihoodAndPrior, Strings.msToString(System.currentTimeMillis() - startTime)); if (rollingAverageRelativeDiff < minImprovement) { break; } } return regression; } /** * Returns the log (base 2) likelihood of the specified inputs * with the specified categories using the specified regression * model. * * @param inputs Input vectors. * @param cats Categories for input vectors. * @param regression Model to use for computing likelihood. * @throws IllegalArgumentException If the inputs and categories * are not the same length. */ public static double log2Likelihood(Vector[] inputs, int[] cats, LogisticRegression regression) { if (inputs.length != cats.length) { String msg = "Inputs and categories must be same length." + " Found inputs.length=" + inputs.length + " cats.length=" + cats.length; throw new IllegalArgumentException(msg); } int numTrainingInstances = inputs.length; double log2Likelihood = 0.0; for (int j = 0; j < numTrainingInstances; ++j) { double[] conditionalProbs = regression.classify(inputs[j]); log2Likelihood += com.aliasi.util.Math.log2(conditionalProbs[cats[j]]); } return log2Likelihood; } private static boolean isSparse(Vector[] xs) { int sparseCount = 0; for (int i = 0; i < xs.length; ++i) if (xs[i] instanceof SparseFloatVector) ++sparseCount; return sparseCount >= xs.length/2; } private static int max(int[] xs) { int max = xs[0]; for (int i = 1; i < xs.length; ++i) if (xs[i] > max) max = xs[i]; return max; } private static double relativeDifference(double x, double y) { return (Double.isInfinite(x) || Double.isInfinite(y)) ? Double.POSITIVE_INFINITY : (Math.abs(x - y) / (Math.abs(x) + Math.abs(y))); } private static double[][] deepCopy(double[][] xs) { double[][] ys = new double[xs.length][]; for (int i = 0; i < xs.length; ++i) ys[i] = deepCopy(xs[i]); return ys; } private static double[] deepCopy(double[] xs) { double[] ys = new double[xs.length]; for (int i = 0; i < xs.length; ++i) ys[i] = xs[i]; return ys; } private static DenseVector[] copy(DenseVector[] xs) { DenseVector[] result = new DenseVector[xs.length]; for (int k = 0; k < xs.length; ++k) result[k] = new DenseVector(xs[k]); return result; } static class Externalizer extends AbstractExternalizable { static final long serialVersionUID = -2256261505231943102L; final LogisticRegression mRegression; public Externalizer() { this(null); } public Externalizer(LogisticRegression regression) { mRegression = regression; } public void writeExternal(ObjectOutput out) throws IOException { int numOutcomes = mRegression.mWeightVectors.length + 1; out.writeInt(numOutcomes); int numDimensions = mRegression.mWeightVectors[0].numDimensions(); out.writeInt(numDimensions); for (int c = 0; c < (numOutcomes - 1); ++c) { Vector vC = mRegression.mWeightVectors[c]; for (int i = 0; i < numDimensions; ++i) out.writeDouble(vC.value(i)); } } public Object read(ObjectInput in) throws IOException { int numOutcomes = in.readInt(); int numDimensions = in.readInt(); Vector[] weightVectors = new Vector[numOutcomes-1]; for (int c = 0; c < weightVectors.length; ++c) { Vector weightVectorsC = new DenseVector(numDimensions); weightVectors[c] = weightVectorsC; for (int i = 0; i < numDimensions; ++i) weightVectorsC.setValue(i,in.readDouble()); } return new LogisticRegression(weightVectors); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -