⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 perceptronclassifier.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
        corpus = null; // don't need it any more        // initialize perceptrons        int currentPerceptronIndex = -1;  // no initial zero perceptron        int[] weights = new int[INITIAL_BASIS_SIZE];        int[] basisIndexes = new int[INITIAL_BASIS_SIZE];        for (int iteration = 0; iteration < numIterations; ++iteration) {            // System.out.println("\n\nIteration=" + iteration);            for (int i = 0; i < featureVectors.length; ++i) {                double yHat = prediction(featureVectors[i],                                         featureVectors,                                         polarities,                                         weights,                                         basisIndexes,                                         currentPerceptronIndex);                boolean accept = yHat > 0.0;                //System.out.println("      yHat=" + yHat                // + " accept=" + accept                // + " for vect=" + featureVectors[i]);                if (accept == polarities[i]) {                    // System.out.println("       correct");                    if (currentPerceptronIndex >= 0) // avoid incrementing zero                        ++weights[currentPerceptronIndex];                } else {                    // System.out.println("       incorrect");                    ++currentPerceptronIndex;                    if (currentPerceptronIndex >= weights.length) {                        weights = Arrays.reallocate(weights);                        basisIndexes = Arrays.reallocate(basisIndexes);                    }                    basisIndexes[currentPerceptronIndex] = i;                    weights[currentPerceptronIndex] = 1;                }            }        }        // renumber indexes to pack only necessary basis vectors        Map<Integer,Integer> renumbering = new HashMap<Integer,Integer>();        int next = 0;        for (int i = 0; i <= currentPerceptronIndex; ++i)            if (!renumbering.containsKey(basisIndexes[i]))                renumbering.put(basisIndexes[i],next++);        // compute basis vectors and cumulative weight for avg        mBasisVectors = new SparseFloatVector[renumbering.size()];        mBasisWeights = new int[renumbering.size()];        int weightSum = 0;        for (int i = currentPerceptronIndex+1; --i >= 0; ) {            int oldIndex = basisIndexes[i];            int newIndex = renumbering.get(oldIndex);            mBasisVectors[newIndex] = featureVectors[oldIndex];            weightSum += weights[i];            if (polarities[i])                mBasisWeights[newIndex] += weightSum;            else                mBasisWeights[newIndex] -= weightSum;        }    }    /**     * Returns the kernel function for this perceptron.     *     * @return The kernel function for this perceptron.     */    public KernelFunction kernelFunction() {        return mKernelFunction;    }    /**     * Returns the feature extractor for this perceptron.     *     * @return The feature extractor for this perceptron.     */    public FeatureExtractor<? super E> featureExtractor() {        return mFeatureExtractor;    }    /**     * Returns a string-based representation of this perceptron.     * This may be long, as it outputs every basis vector and weight.     *     * @return A string-based representation of this perceptron.     */    public String toString() {        StringBuilder sb = new StringBuilder();        sb.append("Averaged Perceptron");        sb.append("  Kernel Function=" + mKernelFunction + "\n");        for (int i = 0; i < mBasisVectors.length; ++i)            sb.append("  idx=" + i + " "                      + "vec=" + mBasisVectors[i]                      + " wgt=" + mBasisWeights[i]                      + "\n");        return sb.toString();    }    /**     * Return the scored classification for the specified input.  The     * input is first converted to a feature vector using the feature     * extractor, then scored against the perceptron.  The resulting     * score for the accept category is the perceptron score, and     * the resulting score for the reject category is the negative     * perceptron score.     *     * @param in The element to be classified.     * @return The scored classification for the specified element.     */    public ScoredClassification classify(E in) {        Map<String,? extends Number> featureVector = mFeatureExtractor.features(in);        SparseFloatVector inputVector = toVector(featureVector,mSymbolTable,Integer.MAX_VALUE);        double sum = 0.0;        for (int i = mBasisVectors.length; --i >= 0; )            sum += mBasisWeights[i] * mKernelFunction.proximity(mBasisVectors[i],                                                               inputVector);        return sum > 0            ? new ScoredClassification(new String[] { mAcceptCategory,                                                      mRejectCategory },                                       new double[] { sum, -sum })            : new ScoredClassification(new String[] { mRejectCategory,                                                      mAcceptCategory },                                       new double[] { -sum, sum });    }    double prediction(SparseFloatVector inputVector,                      SparseFloatVector[] featureVectors,                      boolean[] polarities,                      int[] weights,                      int[] basisIndexes,                      int currentPerceptronIndex) {        // System.out.println("\n  prediction(" + inputVector + ")"        // + " numPerceptrons=" + (1+currentPerceptronIndex));        double sum = 0.0;        // int weightSum = 0;        int weightSum = 1;        for (int i = currentPerceptronIndex; i >= 0; --i) {            // weightSum += weights[i];            int index = basisIndexes[i];            double kernel = mKernelFunction.proximity(inputVector,featureVectors[index]);            double total = (polarities[i] ? weightSum : -weightSum) * kernel;            // System.out.println("      i=" + i + " weightSum=" + weightSum            // + " polarity=" + polarities[i]            // + " index=" + index            // + " featureVectors[index]=" + featureVectors[index]            // + " kernel=" + kernel            // + " total=" + total);            sum += total;        }        return sum;    }    static double power(double base, int exponent) {        switch (exponent) {        case 0:            return 1.0;        case 1:            return base;        case 2:            return base * base;        case 3:            return base * base * base;        case 4:            return base * base * base * base;        default:            return Math.pow(base,exponent);        }    }    private Object writeReplace() {        return new Externalizer<E>(this);    }    class CorpusCollector        implements ClassificationHandler<E,Classification> {        final List<Vector> mInputFeatureVectorList            = new ArrayList<Vector>();        final List<Boolean> mInputAcceptList            = new ArrayList<Boolean>();        public void handle(E object, Classification c) {            Map<String,? extends Number> featureMap = mFeatureExtractor.features(object);            mInputFeatureVectorList.add(toVectorAddSymbols(featureMap,mSymbolTable,Integer.MAX_VALUE));            mInputAcceptList.add(mAcceptCategory.equals(c.bestCategory())                                 ? Boolean.TRUE                                 : Boolean.FALSE);        }        SparseFloatVector[] featureVectors() {            SparseFloatVector[] vectors = new SparseFloatVector[mInputAcceptList.size()];            mInputFeatureVectorList.toArray(vectors);            return vectors;        }        boolean[] polarities() {            boolean[] categories = new boolean[mInputAcceptList.size()];            for (int i = 0; i < categories.length; ++i)                categories[i] = mInputAcceptList.get(i).booleanValue();            return categories;        }    }    static class Externalizer<F> extends AbstractExternalizable {        static final long serialVersionUID = -1901362811305741506L;        final PerceptronClassifier<F> mClassifier;        public Externalizer() {            this(null);        }        public Externalizer(PerceptronClassifier<F> classifier) {            mClassifier = classifier;        }        public Object read(ObjectInput in) throws ClassNotFoundException, IOException {            FeatureExtractor<F> featureExtractor                = (FeatureExtractor<F>) in.readObject();            KernelFunction kernelFunction                = (KernelFunction) in.readObject();            MapSymbolTable symbolTable = (MapSymbolTable) in.readObject();            int basisLen = in.readInt();            SparseFloatVector[] basisVectors = new SparseFloatVector[basisLen];            for (int i = 0; i < basisLen; ++i)                basisVectors[i] = (SparseFloatVector) in.readObject();            int[] basisWeights = new int[basisLen];            for (int i = 0; i < basisLen; ++i)                basisWeights[i] = in.readInt();            String acceptCategory = in.readUTF();            String rejectCategory = in.readUTF();            return new PerceptronClassifier<F>(featureExtractor,                                               kernelFunction,                                               symbolTable,                                               basisVectors,                                               basisWeights,                                               acceptCategory,                                               rejectCategory);        }        public void writeExternal(ObjectOutput out) throws IOException {            // feature extractor            if (mClassifier.mFeatureExtractor instanceof Compilable) {                ((Compilable) mClassifier.mFeatureExtractor).compileTo(out);            } else if (mClassifier.mFeatureExtractor instanceof Serializable) {                out.writeObject(mClassifier.mFeatureExtractor);            } else {                String msg = "Feature extractor not Compilable or Serializable."                    + " Found class=" + mClassifier.mFeatureExtractor.getClass();                throw new UnsupportedOperationException(msg);            }            // kernel function            if (mClassifier.mKernelFunction instanceof Compilable) {                ((Compilable) mClassifier.mKernelFunction).compileTo(out);            } else if (mClassifier.mKernelFunction instanceof Serializable) {                out.writeObject(mClassifier.mKernelFunction);            }            // symbol table            out.writeObject(mClassifier.mSymbolTable);            // basis length            out.writeInt(mClassifier.mBasisVectors.length);            // basis vectors            for (int i = 0; i < mClassifier.mBasisVectors.length; ++i)                out.writeObject(mClassifier.mBasisVectors[i]);            // basis weights            for (int i = 0; i < mClassifier.mBasisWeights.length; ++i)                out.writeInt(mClassifier.mBasisWeights[i]);            // accept, reject cats            out.writeUTF(mClassifier.mAcceptCategory);            out.writeUTF(mClassifier.mRejectCategory);         }    }    /**     * Convert the specified feature vector into a sparse float vector using     * the specified symbol table to encode features as integers.  Features     * that do not exist as symbols in the symbol table will be added     * to the symbol table.     *     * @param table Symbol table for encoding features as integers.     * @param featureVector Feature vector to convert to sparse float vector.     * @return Sparse float vector encoding the feature vector with     * the symbol table.     */    static SparseFloatVector        toVectorAddSymbols(Map<String,? extends Number> featureVector,                           SymbolTable table,                           int numDimensions) {        return toVectorAddSymbols(featureVector,table,numDimensions,false);    }    static SparseFloatVector        toVectorAddSymbols(Map<String,? extends Number> featureVector,                           SymbolTable table,                           int numDimensions,                           boolean addIntercept) {        int size = (featureVector.size() * 3) / 2;        Map<Integer,Number> vectorMap = new HashMap<Integer,Number>(size);        for (Map.Entry<String,? extends Number> entry : featureVector.entrySet()) {            String feature = entry.getKey();            Number val = entry.getValue();            int id = table.getOrAddSymbol(feature);            vectorMap.put(new Integer(id), val);        }        if (addIntercept)            vectorMap.put(new Integer(0),1.0);         return new SparseFloatVector(vectorMap,numDimensions);    }    static SparseFloatVector toVector(Map<String,? extends Number> featureVector,                                      SymbolTable table,                                      int numDimensions) {        return toVector(featureVector,table,numDimensions,false);    }    static SparseFloatVector toVector(Map<String,? extends Number> featureVector,                                      SymbolTable table,                                      int numDimensions,                                      boolean addIntercept) {        int size = (featureVector.size() * 3) / 2;        Map<Integer,Number> vectorMap = new HashMap<Integer,Number>(size);        for (Map.Entry<String,? extends Number> entry : featureVector.entrySet()) {            String feature = entry.getKey();            int id = table.symbolToID(feature);            if (id < 0) continue; // symbol not in any basis vector            Number val = entry.getValue();            vectorMap.put(new Integer(id), val);        }        if (addIntercept)            vectorMap.put(new Integer(0),1.0);        return new SparseFloatVector(vectorMap,numDimensions);    }    static final int INITIAL_BASIS_SIZE = 32*1024;  // 32K * 8B = 240KB initially}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -