📄 dynamiclmclassifier.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
		Corpus<ClassificationHandler<CharSequence,Classification>> labeledData,		Corpus<ObjectHandler<CharSequence>> unlabeledData,		int numEpochs,		double trainingInstanceMultiple) throws IOException {	DynamicLMClassifier<L> lastClassifier = classifierFactory.create();	labeledData.visitCorpus(lastClassifier);	for (int epoch = 0; epoch < numEpochs; ++epoch) {	    DynamicLMClassifier<L> classifier = classifierFactory.create();	    labeledData.visitCorpus(classifier);	    ObjectHandler<CharSequence> emHandler 		= new EmHandler(classifier,lastClassifier,trainingInstanceMultiple);	    unlabeledData.visitCorpus(emHandler);	    lastClassifier = classifier;	}	return lastClassifier;    }        /**     * Provides a training instance for the specified character     * sequence using the best category from the specified     * classification.  Only the first-best category from the     * classification is used.  The object is cast to {@link CharSequence},     * and the result passed along with the first-best category     * to {@link #train(String,CharSequence)}.     *     * @param charSequence Character sequence for training.     * @param classification Classification to use for training.     * @throws ClassCastException If the specified object does not     * implement <code>CharSequence</code>.     */    public void handle(CharSequence charSequence, Classification classification) {        train(classification.bestCategory(),(CharSequence) charSequence);    }    /**     * Returns the maximum likelihood estimator for categories in this     * classifier.  Changes to the returned model will be reflected in     * this classifier; thus it may be used to train the category     * estimator without affecting the language models for any     * category.     *     * @return The maximum likelihood estimator for categories in this     * classifier.     * @deprecated As of 3.0, use general method {@link     * #categoryDistribution()}.     */    public MultivariateEstimator categoryEstimator() {        return (MultivariateEstimator) mCategoryDistribution;    }    /**     * Returns the language model for the specified category.  Changes     * to the returned model will be reflected in this classifier; thus     * it may be used to train a language model without affecting     * the category estimates.     *     * @return The language model for the specified category.     * @throws IllegalArgumentException If the category is not known.     * @deprecated As of 3.0, use general {@link #languageModel(String)}.     */    public L lmForCategory(String category) {        L result = mCategoryToModel.get(category);        if (result == null) {            String msg = "Unknown category=" + category;            throw new IllegalArgumentException(msg);        }        return result;    }    /**     * Writes a compiled version of this classifier to the specified     * object output.  The object returned will be an instance     * of {@link LMClassifier}.     *     * @param objOut Object output to which this classifier is     * written.     * @throws IOException If there is an I/O exception writing to     * the output stream.     */    public void compileTo(ObjectOutput objOut) throws IOException {        objOut.writeObject(new Externalizer(this));    }    /**     * Resets the specified category to the specified language model.     * This also resets the count in the multivariate estimator of     * categories to zero.     *     * @param category Category to reset.     * @param lm New dynamic language model for category.     * @param newCount New count for category.     * @throws IllegalArgumentException If the category is not known.     */    public void resetCategory(String category,                              L lm,                              int newCount) {        if (newCount < 0) {            String msg = "Count must be non-negative."                + " Found new count=" + newCount;            throw new IllegalArgumentException(msg);        }        categoryEstimator().resetCount(category); // resets to zero        categoryEstimator().train(category,newCount);        L currentLM = lmForCategory(category);        for (int i = 0; i < mLanguageModels.length; ++i) {            if (currentLM == mLanguageModels[i]) {                mLanguageModels[i] = lm;                break;            }        }        mCategoryToModel.put(category,lm);    }    /**     * Construct a dynamic classifier over the specified categories,     * using process character n-gram models of the specified order.     *     * <P>See the documentation for the constructor {@link     * #DynamicLMClassifier(String[], LanguageModel.Dynamic[])} for     * information on the category multivariate estimate for priors.     *     * @param categories Categories used for classification.     * @param maxCharNGram Maximum length of character sequence     * counted in model.     * @throws IllegalArgumentException If there are not at least two     * categories.     */    public static DynamicLMClassifier<NGramProcessLM>        createNGramProcess(String[] categories,                           int maxCharNGram) {        NGramProcessLM[] lms = new NGramProcessLM[categories.length];        for (int i = 0; i < lms.length; ++i)            lms[i] = new NGramProcessLM(maxCharNGram);        return new DynamicLMClassifier<NGramProcessLM>(categories,lms);    }    /**     * Construct a dynamic classifier over the specified cateogries,     * using boundary character n-gram models of the specified order.     *     * <P>See the documentation for the constructor {@link     * #DynamicLMClassifier(String[], LanguageModel.Dynamic[])} for     * information on the category multivariate estimate for priors.     *     * @param categories Categories used for classification.     * @param maxCharNGram Maximum length of character sequence     * counted in model.     * @throws IllegalArgumentException If there are not at least two     * categories.     */    public static DynamicLMClassifier<NGramBoundaryLM>        createNGramBoundary(String[] categories,                              int maxCharNGram) {        NGramBoundaryLM[] lms = new NGramBoundaryLM[categories.length];        for (int i = 0; i < lms.length; ++i)            lms[i] = new NGramBoundaryLM(maxCharNGram);        return new DynamicLMClassifier<NGramBoundaryLM>(categories,lms);    }    /**     * Construct a dynamic language model classifier over the     * specified categories using token n-gram language models of the     * specified order and the specified tokenizer factory for     * tokenization.     *     * <P>The multivariate estimator over categories is initialized     * with one count for each category.     *     * <P>The unknown token and whitespace models are uniform sequence     * models.     *     * @param categories Categories used for classification.     * @param maxTokenNGram Maximum length of token n-grams used.     * @param tokenizerFactory Tokenizer factory for tokenization.     * @throws IllegalArgumentException If there are not at least two     * categories.     */    public static DynamicLMClassifier<TokenizedLM>        createTokenized(String[] categories,                        TokenizerFactory tokenizerFactory,                        int maxTokenNGram) {        TokenizedLM[] lms = new TokenizedLM[categories.length];        for (int i = 0; i < lms.length; ++i)            lms[i] = new TokenizedLM(tokenizerFactory,maxTokenNGram);        return new DynamicLMClassifier<TokenizedLM>(categories,lms);    }    // used in init and by other classes to create a smoothed estimator    static MultivariateEstimator createCategoryEstimator(String[] categories) {        MultivariateEstimator estimator = new MultivariateEstimator();        for (int i = 0; i < categories.length; ++i)            estimator.train(categories[i],1);        return estimator;    }    private static class Externalizer extends AbstractExternalizable {        static final long serialVersionUID = -5411956637253735953L;        final DynamicLMClassifier mClassifier;        public Externalizer() {            mClassifier = null;        }        public Externalizer(DynamicLMClassifier classifier) {            mClassifier = classifier;        }        public void writeExternal(ObjectOutput objOut) throws IOException {            objOut.writeObject(mClassifier.categories());            mClassifier.categoryEstimator().compileTo(objOut);            int numCategories = mClassifier.mCategories.length;            for (int i = 0; i < numCategories; ++i)                ((LanguageModel.Dynamic) mClassifier.mLanguageModels[i]).compileTo(objOut);        }        public Object read(ObjectInput objIn)            throws ClassNotFoundException, IOException {            String[] categories                = (String[]) objIn.readObject();            MultivariateDistribution categoryEstimator                = (MultivariateDistribution) objIn.readObject();            LanguageModel[] models = new LanguageModel[categories.length];            for (int i = 0; i < models.length; ++i)                models[i] = (LanguageModel) objIn.readObject();            return new LMClassifier(categories,models,categoryEstimator);        }    }}
上一页 12
💿 文件大小 4561 K
👤 上传用户 edan1181
📂 所属分类 Java编程
🏷️ 相关标签

#LingPipe #Java #自然语言处理 #开源
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -