📄 classifierevaluator.java
字号:
/* * LingPipe v. 3.5 * Copyright (C) 2003-2008 Alias-i * * This program is licensed under the Alias-i Royalty Free License * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Alias-i * Royalty Free License Version 1 for more details. * * You should have received a copy of the Alias-i Royalty Free License * Version 1 along with this program; if not, visit * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211, * +1 (718) 290-9170. */package com.aliasi.classify;import com.aliasi.corpus.ClassificationHandler;import com.aliasi.util.Collections;import com.aliasi.util.Scored;import java.util.ArrayList;import java.util.HashSet;import java.util.List;/** * A <code>ClassifierEvaluator</code> provides an evaluation harness * for classifiers. An evaluator is constructed from a classifier and * a complete list of the categories returned by the classifier. Test * cases are then added using the {@link #addCase(String,Object)} * which accepts a string-based category and object to classify. The * evaluator will run the classifier over the input object and collect * results over multiple cases. Depending on the classification types * returned by the classifier, various report statistics are * available. * * <P>An exhaustive set of evaluation metrics for first-best * classification results is accessbile as a confusion matrix through * the {@link #confusionMatrix()} method. Confusion matrices provide * dozens of statistics on classification which can be computed from * first-best results; see {@link ConfusionMatrix} for more * information. * * <P>Depending on the class of return results for the classifier * being evaluated, the following methods are supported: * * <blockquote> * <table border='1' cellpadding='5'> * <tr><td><i>Classifier Return Class</i></td> * <td><i>Supported Methods</i></td></tr> * <tr><td><code>Classification</code></td> * <td><table cellpadding='5'> * <tr><td>{@link #confusionMatrix()}</td></tr> * </table> * </td></tr> * <tr><td><code>RankedClassification</code></td> * <td><table cellpadding='5'> * <tr><td>{@link #rankCount(String,int)}</td></tr> * <tr><td>{@link #averageRankReference()}</td></tr> * <tr><td>{@link #meanReciprocalRank()}</td></tr> * <tr><td>{@link #averageRank(String,String)}</td></tr> * </table> * </td></tr> * <tr><td><code>ScoredClassification</code></td> * <td><table cellpadding='5'> * <tr><td>{@link #scoredOneVersusAll(String)}</td></tr> * <tr><td>{@link #averageScore(String,String)}</td></tr> * <tr><td>{@link #averageScoreReference()}</td></tr> * </table> * </td></tr> * <tr><td><code>ConditionalClassification</code></td> * <td><table cellpadding='5'> * <tr><td>{@link #averageConditionalProbability(String,String)}</td></tr> * <tr><td>{@link #averageConditionalProbabilityReference()}</td></tr> * </table> * </td></tr> * <tr><td><code>JointClassification</code></td> * <td><table cellpadding='5'> * <tr><td>{@link #averageLog2JointProbability(String,String)}</td></tr> * <tr><td>{@link #averageLog2JointProbabilityReference()}</td></tr> * </table> * </td></tr> * </table> * </blockquote> * * <P>If the input is a ranked classification and the reference * category does not appear at some rank in the classification, * results will be returned as if the reference category appeared in * the last possible rank in the ranked classification. This * heuristic for scoring applies to all four methods listed for ranked * classifications in the table above. As a consequence, the results * of {@link #averageRank(String,String)} might not be such as they * could be derived by a set of ranked classifications, because we are * assuming that all unlisted categories have the worst possible rank. * * <P>This class requires concurrent read and synchronous write * synchronization. Reads are any of the statistics gathering methods * and write is just adding new test cases. * * <h4>Incomplete Rankings, Scorings and Conditionals</h4> * * <p>Some classifiers might not return a rank, score or conditional * probability estimate for every input. In this case, the counts for * existing categories are still updated, but flags are set indicating * that values are missing. If any ranked, scored or conditional * classification missed a rank, score or conditonal probability estimate * for a category, the corresponding method will return true, * {@link #missingRankings()}, * {@link #missingScorings()}, or * {@link #missingConditionals()} * * @author Bob Carpenter * @version 3.5 * @since LingPipe2.0 */public class ClassifierEvaluator<E,C extends Classification> implements ClassificationHandler<E,Classification> { boolean mDefectiveRanking = false; boolean mDefectiveScoring = false; boolean mDefectiveConditioning = false; // Classification final Classifier<E,C> mClassifier; private final ConfusionMatrix mConfusionMatrix; private int mNumCases = 0; final String[] mCategories; final HashSet mCategorySet; // paired inputs and outputs final ArrayList mReferenceCategories = new ArrayList(); final ArrayList mClassifications = new ArrayList(); // RankedClassification private boolean mHasRanked = false; private final int[][] mRankCounts; // ScoredClassification private boolean mHasScored = false; private final ArrayList[] mScoreOutcomeLists; // ConditionalClassification private boolean mHasConditional = false; private final ArrayList[] mConditionalOutcomeLists; // JointClassification private boolean mHasJoint = false; /** * Construct a classifier evaluator for the specified classifier * that records results for the specified set of categories. * * <P>If the classifier evaluator is only going to be populated * using the {@link #addClassification(String,Classification)} * method, then the classifier may be null. * * @param classifier Classifier to evaluate. * @param categories Categories of the classifier. */ public ClassifierEvaluator(Classifier<E,C> classifier, String[] categories) { // Classification mClassifier = classifier; mCategories = categories; mCategorySet = new HashSet(); Collections.addAll(mCategorySet,categories); mConfusionMatrix = new ConfusionMatrix(categories); // RankedClassification int len = categories.length; mRankCounts = new int[len][len]; for (int i = 0; i < len; ++i) for (int j = 0; j < len; ++j) mRankCounts[i][j] = 0; // Scored mScoreOutcomeLists = new ArrayList[numCategories()]; for (int i = 0; i < mScoreOutcomeLists.length; ++i) mScoreOutcomeLists[i] = new ArrayList(); // Conditional mConditionalOutcomeLists = new ArrayList[numCategories()]; for (int i = 0; i < mConditionalOutcomeLists.length; ++i) mConditionalOutcomeLists[i] = new ArrayList(); // Joint } /** * Returns the classifier for this evaluator. * * @return The classifier for this evaluator. */ public Classifier<E,C> classifier() { return mClassifier; } /** * Returns the categories for which this evaluator stores * results. * * @return The categories for which this evaluator stores * results. */ public String[] categories() { return mCategories; } /** * Adds a test case for the specified input with the specified * reference category. This method runs the classifer over * the specified input. It then stores the resulting classification * and reference category for collective reporting. * * <P>This method simply applies the classifier specified at * construction time to the specified input to produce a * classification which is forwarded to {@link * #addClassification(String,Classification)}. * * @param referenceCategory Correct category for object. * @param input Object being classified. * @throws IllegalArgumentException If the reference category is * not a category for this evaluator. */ public void addCase(String referenceCategory, E input) { validateCategory(referenceCategory); Classification classification = mClassifier.classify(input); addClassification(referenceCategory,classification); } /** * This is a convenience implementation for the classification * handler interface. It merely delegates to {@link * #addCase(String,Object)} by extracting the best category from * the specified classification. * * @param input Object being evaluated. * @param classification Reference classification of object. */ public void handle(E input, Classification classification) { addCase(classification.bestCategory(),input); } /** * Returns the number of test cases which have been provided * to this evaluator. * * @return The number of test cases which have been provided * to this evaluator. */ public int numCases() { return mNumCases; } /** * Returns the confusion matrix of first-best classification * result statistics for this evaluator. See {@link * ConfusionMatrix} for details of the numerous available * evaluation metrics provided by confusion matrices. * * @return The confusion matrix for the test cases evaluated so far. */ public ConfusionMatrix confusionMatrix() { return mConfusionMatrix; } /** * Returns <code>true</code> if this evaluation involved ranked * classifications that did not rank every category. * * @return <code>true</code> if categories were unranked in * some ranked classification. */ public boolean missingRankings() { return mDefectiveRanking; } /** * Returns <code>true</code> if this evaluation involved ranked * classifications that did not score every category. * * @return <code>true</code> if categories were unscored in * some scored classification. */ public boolean missingScorings() { return mDefectiveScoring; } /** * Returns <code>true</code> if this evaluation involved conditional * classifications that did not score every category. * * @return <code>true</code> if categories were missing conditional * probability estimates in some conditional classification. */ public boolean missingConditionals() { return mDefectiveScoring; } /** * Returns the number of times that the reference category's * rank was the specified rank. * * <P>For example, in the set of training samples and results * described in the method documentation for {@link * #averageRank(String,String)}, sample rank counts are as * follows: * * <blockquote><code> * rankCount("a",0) = 3 * <br>rankCount("a",1) = 1 * <br>rankCount("a",2) = 0 * <br> * <br>rankCount("b",0) = 1 * <br>rankCount("b",1) = 0 * <br>rankCount("b",2) = 1 * <br> * <br>rankCount("c",0) = 1 * <br>rankCount("c",1) = 0 * <br>rankCount("c",2) = 0 * </code></blockquote> * * These results are typically presented as a bar graph histogram * per category. * * @param referenceCategory Reference category. * @param rank Rank of count. * @return Number of times the reference category's ranking was * the specified rank. * @throws IllegalArgumentException If the category is unknown. */ public int rankCount(String referenceCategory, int rank) { validateCategory(referenceCategory); int i = categoryToIndex(referenceCategory); return rankCount(i,rank); } /** * Returns the average over all test samples of the rank of * the the response that matches the reference category. * * <P>Using the example classifications shown in the method * documentation of {@link #averageRank(String,String)}: * * <blockquote><code> * averageRankReference() * <br> = (0 + 0 + 0 + 1 + 0 + 2 + 0)/7 ~ 0.43 * </code></blockquote> *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -