📄 perceptronclassifier.java
字号:
corpus = null; // don't need it any more // initialize perceptrons int currentPerceptronIndex = -1; // no initial zero perceptron int[] weights = new int[INITIAL_BASIS_SIZE]; int[] basisIndexes = new int[INITIAL_BASIS_SIZE]; for (int iteration = 0; iteration < numIterations; ++iteration) { // System.out.println("\n\nIteration=" + iteration); for (int i = 0; i < featureVectors.length; ++i) { double yHat = prediction(featureVectors[i], featureVectors, polarities, weights, basisIndexes, currentPerceptronIndex); boolean accept = yHat > 0.0; //System.out.println(" yHat=" + yHat // + " accept=" + accept // + " for vect=" + featureVectors[i]); if (accept == polarities[i]) { // System.out.println(" correct"); if (currentPerceptronIndex >= 0) // avoid incrementing zero ++weights[currentPerceptronIndex]; } else { // System.out.println(" incorrect"); ++currentPerceptronIndex; if (currentPerceptronIndex >= weights.length) { weights = Arrays.reallocate(weights); basisIndexes = Arrays.reallocate(basisIndexes); } basisIndexes[currentPerceptronIndex] = i; weights[currentPerceptronIndex] = 1; } } } // renumber indexes to pack only necessary basis vectors Map<Integer,Integer> renumbering = new HashMap<Integer,Integer>(); int next = 0; for (int i = 0; i <= currentPerceptronIndex; ++i) if (!renumbering.containsKey(basisIndexes[i])) renumbering.put(basisIndexes[i],next++); // compute basis vectors and cumulative weight for avg mBasisVectors = new SparseFloatVector[renumbering.size()]; mBasisWeights = new int[renumbering.size()]; int weightSum = 0; for (int i = currentPerceptronIndex+1; --i >= 0; ) { int oldIndex = basisIndexes[i]; int newIndex = renumbering.get(oldIndex); mBasisVectors[newIndex] = featureVectors[oldIndex]; weightSum += weights[i]; if (polarities[i]) mBasisWeights[newIndex] += weightSum; else mBasisWeights[newIndex] -= weightSum; } } /** * Returns the kernel function for this perceptron. * * @return The kernel function for this perceptron. */ public KernelFunction kernelFunction() { return mKernelFunction; } /** * Returns the feature extractor for this perceptron. * * @return The feature extractor for this perceptron. */ public FeatureExtractor<? super E> featureExtractor() { return mFeatureExtractor; } /** * Returns a string-based representation of this perceptron. * This may be long, as it outputs every basis vector and weight. * * @return A string-based representation of this perceptron. */ public String toString() { StringBuilder sb = new StringBuilder(); sb.append("Averaged Perceptron"); sb.append(" Kernel Function=" + mKernelFunction + "\n"); for (int i = 0; i < mBasisVectors.length; ++i) sb.append(" idx=" + i + " " + "vec=" + mBasisVectors[i] + " wgt=" + mBasisWeights[i] + "\n"); return sb.toString(); } /** * Return the scored classification for the specified input. The * input is first converted to a feature vector using the feature * extractor, then scored against the perceptron. The resulting * score for the accept category is the perceptron score, and * the resulting score for the reject category is the negative * perceptron score. * * @param in The element to be classified. * @return The scored classification for the specified element. */ public ScoredClassification classify(E in) { Map<String,? extends Number> featureVector = mFeatureExtractor.features(in); SparseFloatVector inputVector = toVector(featureVector,mSymbolTable,Integer.MAX_VALUE); double sum = 0.0; for (int i = mBasisVectors.length; --i >= 0; ) sum += mBasisWeights[i] * mKernelFunction.proximity(mBasisVectors[i], inputVector); return sum > 0 ? new ScoredClassification(new String[] { mAcceptCategory, mRejectCategory }, new double[] { sum, -sum }) : new ScoredClassification(new String[] { mRejectCategory, mAcceptCategory }, new double[] { -sum, sum }); } double prediction(SparseFloatVector inputVector, SparseFloatVector[] featureVectors, boolean[] polarities, int[] weights, int[] basisIndexes, int currentPerceptronIndex) { // System.out.println("\n prediction(" + inputVector + ")" // + " numPerceptrons=" + (1+currentPerceptronIndex)); double sum = 0.0; // int weightSum = 0; int weightSum = 1; for (int i = currentPerceptronIndex; i >= 0; --i) { // weightSum += weights[i]; int index = basisIndexes[i]; double kernel = mKernelFunction.proximity(inputVector,featureVectors[index]); double total = (polarities[i] ? weightSum : -weightSum) * kernel; // System.out.println(" i=" + i + " weightSum=" + weightSum // + " polarity=" + polarities[i] // + " index=" + index // + " featureVectors[index]=" + featureVectors[index] // + " kernel=" + kernel // + " total=" + total); sum += total; } return sum; } static double power(double base, int exponent) { switch (exponent) { case 0: return 1.0; case 1: return base; case 2: return base * base; case 3: return base * base * base; case 4: return base * base * base * base; default: return Math.pow(base,exponent); } } private Object writeReplace() { return new Externalizer<E>(this); } class CorpusCollector implements ClassificationHandler<E,Classification> { final List<Vector> mInputFeatureVectorList = new ArrayList<Vector>(); final List<Boolean> mInputAcceptList = new ArrayList<Boolean>(); public void handle(E object, Classification c) { Map<String,? extends Number> featureMap = mFeatureExtractor.features(object); mInputFeatureVectorList.add(toVectorAddSymbols(featureMap,mSymbolTable,Integer.MAX_VALUE)); mInputAcceptList.add(mAcceptCategory.equals(c.bestCategory()) ? Boolean.TRUE : Boolean.FALSE); } SparseFloatVector[] featureVectors() { SparseFloatVector[] vectors = new SparseFloatVector[mInputAcceptList.size()]; mInputFeatureVectorList.toArray(vectors); return vectors; } boolean[] polarities() { boolean[] categories = new boolean[mInputAcceptList.size()]; for (int i = 0; i < categories.length; ++i) categories[i] = mInputAcceptList.get(i).booleanValue(); return categories; } } static class Externalizer<F> extends AbstractExternalizable { static final long serialVersionUID = -1901362811305741506L; final PerceptronClassifier<F> mClassifier; public Externalizer() { this(null); } public Externalizer(PerceptronClassifier<F> classifier) { mClassifier = classifier; } public Object read(ObjectInput in) throws ClassNotFoundException, IOException { FeatureExtractor<F> featureExtractor = (FeatureExtractor<F>) in.readObject(); KernelFunction kernelFunction = (KernelFunction) in.readObject(); MapSymbolTable symbolTable = (MapSymbolTable) in.readObject(); int basisLen = in.readInt(); SparseFloatVector[] basisVectors = new SparseFloatVector[basisLen]; for (int i = 0; i < basisLen; ++i) basisVectors[i] = (SparseFloatVector) in.readObject(); int[] basisWeights = new int[basisLen]; for (int i = 0; i < basisLen; ++i) basisWeights[i] = in.readInt(); String acceptCategory = in.readUTF(); String rejectCategory = in.readUTF(); return new PerceptronClassifier<F>(featureExtractor, kernelFunction, symbolTable, basisVectors, basisWeights, acceptCategory, rejectCategory); } public void writeExternal(ObjectOutput out) throws IOException { // feature extractor if (mClassifier.mFeatureExtractor instanceof Compilable) { ((Compilable) mClassifier.mFeatureExtractor).compileTo(out); } else if (mClassifier.mFeatureExtractor instanceof Serializable) { out.writeObject(mClassifier.mFeatureExtractor); } else { String msg = "Feature extractor not Compilable or Serializable." + " Found class=" + mClassifier.mFeatureExtractor.getClass(); throw new UnsupportedOperationException(msg); } // kernel function if (mClassifier.mKernelFunction instanceof Compilable) { ((Compilable) mClassifier.mKernelFunction).compileTo(out); } else if (mClassifier.mKernelFunction instanceof Serializable) { out.writeObject(mClassifier.mKernelFunction); } // symbol table out.writeObject(mClassifier.mSymbolTable); // basis length out.writeInt(mClassifier.mBasisVectors.length); // basis vectors for (int i = 0; i < mClassifier.mBasisVectors.length; ++i) out.writeObject(mClassifier.mBasisVectors[i]); // basis weights for (int i = 0; i < mClassifier.mBasisWeights.length; ++i) out.writeInt(mClassifier.mBasisWeights[i]); // accept, reject cats out.writeUTF(mClassifier.mAcceptCategory); out.writeUTF(mClassifier.mRejectCategory); } } /** * Convert the specified feature vector into a sparse float vector using * the specified symbol table to encode features as integers. Features * that do not exist as symbols in the symbol table will be added * to the symbol table. * * @param table Symbol table for encoding features as integers. * @param featureVector Feature vector to convert to sparse float vector. * @return Sparse float vector encoding the feature vector with * the symbol table. */ static SparseFloatVector toVectorAddSymbols(Map<String,? extends Number> featureVector, SymbolTable table, int numDimensions) { return toVectorAddSymbols(featureVector,table,numDimensions,false); } static SparseFloatVector toVectorAddSymbols(Map<String,? extends Number> featureVector, SymbolTable table, int numDimensions, boolean addIntercept) { int size = (featureVector.size() * 3) / 2; Map<Integer,Number> vectorMap = new HashMap<Integer,Number>(size); for (Map.Entry<String,? extends Number> entry : featureVector.entrySet()) { String feature = entry.getKey(); Number val = entry.getValue(); int id = table.getOrAddSymbol(feature); vectorMap.put(new Integer(id), val); } if (addIntercept) vectorMap.put(new Integer(0),1.0); return new SparseFloatVector(vectorMap,numDimensions); } static SparseFloatVector toVector(Map<String,? extends Number> featureVector, SymbolTable table, int numDimensions) { return toVector(featureVector,table,numDimensions,false); } static SparseFloatVector toVector(Map<String,? extends Number> featureVector, SymbolTable table, int numDimensions, boolean addIntercept) { int size = (featureVector.size() * 3) / 2; Map<Integer,Number> vectorMap = new HashMap<Integer,Number>(size); for (Map.Entry<String,? extends Number> entry : featureVector.entrySet()) { String feature = entry.getKey(); int id = table.symbolToID(feature); if (id < 0) continue; // symbol not in any basis vector Number val = entry.getValue(); vectorMap.put(new Integer(id), val); } if (addIntercept) vectorMap.put(new Integer(0),1.0); return new SparseFloatVector(vectorMap,numDimensions); } static final int INITIAL_BASIS_SIZE = 32*1024; // 32K * 8B = 240KB initially}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -