📄 withindoccoref.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
字号:
/* * LingPipe v. 3.5 * Copyright (C) 2003-2008 Alias-i * * This program is licensed under the Alias-i Royalty Free License * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i * Royalty Free License Version 1 for more details. * * You should have received a copy of the Alias-i Royalty Free License * Version 1 along with this program; if not, visit * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211, * +1 (718) 290-9170. */package com.aliasi.coref;import com.aliasi.util.Collections;import java.util.Arrays;import java.util.ArrayList;import java.util.Comparator;import java.util.List;/** * A <code>WithinDocCoref</code> object handles resolution of * coreference relations between mentions of entities. * * @author  Bob Carpenter * @version 1.0 * @since   LingPipe1.0 */public final class WithinDocCoref {    private final ArrayList mMentionChains = new ArrayList();    /**     * The mention factory used to create mentions and mention     * chains.     */    private final MentionFactory mMentionFactory;    /**     * Construct an instance of within-document coreference with     * the specified mention factory.     *     * @param mentionFactory Factory for creating mentions and chains.     */    public WithinDocCoref(MentionFactory mentionFactory) {        mMentionFactory = mentionFactory;    }    /**     * Returns the set of mention chains, sorted in order of     * identifier from first appearance to last.     *     * @return Array of mention chains resolved for this document.     */    public MentionChain[] mentionChains() {        MentionChain[] result = new MentionChain[mMentionChains.size()];        mMentionChains.toArray(result);        return result;    }    /**     * Resolves a specified mention at a specified offset, returning     * the integer identifier of the mention, or <code>-1</code> if the     * mention could not be resolved, which should only occur for     * pronominal mentions.     *     * @param mention Mention to resolve.     * @param offset Sentence offset of the specified mention.     * @return Integer identifier for the mention, or <code>-1</code>     * if it could not be resolved.     */    public int resolveMention(Mention mention, int offset) {        List[] hypotheses = new List[Matcher.MAX_SCORE+1];        for (int i = 0; i < hypotheses.length; ++i)            hypotheses[i] = new ArrayList();        MentionChain[] antecedents = mentionChains();        Arrays.sort(antecedents,SENTENCE_FINAL_COMPARATOR);        for (int i = 0; i < antecedents.length; ++i) {            MentionChain nextAntecedent = antecedents[i];            if (finished(offset,nextAntecedent,hypotheses)) break;            addPossibleAntecedent(mention,offset,nextAntecedent,hypotheses);        }        return selectAntecedent(hypotheses,mention,offset);    }    /**     * Returns <code>true</code> if there is a hypothesis in the     * specified list of hypotheses that has a better score than the     * chain under consideration can possibly have versus the mention.     * Currently, only distance is used, with coreference being     * finished if a mention has a potential antecedent with a total     * score less than the distance from the specified chain.     *     * @param mentionOffset Offset of mention to measure.     * @param chain Mention chain to measure against mention.     * @param hypotheses Current hypotheses for antecedent of mention.     * @return <code>true</code> if there is a better hypothesis in     * the list of hypotheses than the mention can score against the     * chain.     */    private boolean finished(int mentionOffset,                             MentionChain chain,                             List[] hypotheses) {        int distance = distanceScore(mentionOffset,chain);        for (int i = distance; i > 0; --i)            if (hypotheses[i].size() > 0) return true;        return false;    }    /**     * Scores the specified mention against the specified mention     * chain, adding the antecedent to the list of hypotheses at the     * index corresponding to its score, not adding it if there is no     * match.     *     * @param mention Mention to score.     * @param chain Mention chain to score against mention.     * @param hypotheses Current hypotheses for antecedent of mention.     */    private void addPossibleAntecedent(Mention mention,                                       int offset,                                       MentionChain antecedent,                                       List[] hypotheses) {        if (antecedent.killed(mention)) return;        int matchScore = antecedent.matchScore(mention);        if (matchScore == Matcher.NO_MATCH_SCORE) return;        int totalScore = matchScore + distanceScore(offset,antecedent);        hypotheses[totalScore].add(antecedent);    }    /**     * Given a complete list of hypotheses antecedents for the     * specified mention, the mention will be resolved and either     * added to the set of chains as a new mention chain or will be     * merged into an antecedent provided in the hypotheses.  The     * algorithm picks the first index (lowest index, which is best     * scoring) in the hypotheses list that is non-empty, and if it is     * a singleton, resolves the antecedent, and if it is not a     * singleton, promotes it to a new mention chain. If there are no     * antecedents in the list of hypotheses, then the mention is     * also promoted to a new mention chain. The integer identifier     * of the resulting mention chain is returned, or <code>-1</code>     * if no mention chain is created.     *     * @param hypotheses List of sets of candidate antecedents.     * @param mention Mention to be resolved.     * @return Integer identifier for the mention chain against which     * the mention is resolved, or <code>-1</code> if it could not be     * resolved.     */    private int selectAntecedent(List[] hypotheses,                                 Mention mention, int offset) {        for (int score = 0; score < hypotheses.length; ++score) {            if (hypotheses[score].size() == 1) {                MentionChain antecedent                    = (MentionChain) Collections.getFirst(hypotheses[score]);                antecedent.add(mention,offset);                return antecedent.identifier();            } else if (hypotheses[score].size() > 1) {                // multiple antecedents, don't select any                return promoteToNewChain(mention,offset);            }        }        // no antecedent        return promoteToNewChain(mention,offset);    }    /**     * Promotes a mention to a new mention chain, adding it to the set     * of chains, returning the identifier of the new chain, or     * <code>-1</code> if none is created.  A return will be     * <code>false</code> only if the mention is pronominal.     *     * @param mention Mention to promote to a mention chain.     * @param offset Sentence offset for this token.     * @return Integer identifier of new chain, or <code>-1</code>     * if none is created.     */    private int promoteToNewChain(Mention mention, int offset) {        if (mention.isPronominal()) return -1;        MentionChain chain = mMentionFactory.promote(mention,offset);        mMentionChains.add(chain);        return chain.identifier();    }    /**     * Returns the ``distance'' between a mention and an antecedent     * mention chain, based on their sentence offsets.  The distance     * returned is <code>0</code> if they are in the same sentence,     * <code>1</code> if the antecedent is in the previous sentence or     * the sentence before the previous sentence, and <code>2</code>     * otherwise.     *     * @param mentionOffset Offset of mention.     * @param antecedent Mention chain to measure.     * @return Distance between mention and antecedent.     */    private static int distanceScore(int mentionOffset,                                     MentionChain antecedent) {        switch (mentionOffset - antecedent.maxSentenceOffset()) {        case 0: return 0;        case 1: return 1;        case 2: return 1;        default: return 2;        }    }    /**     *  A comparator for comparing two mentions chains. The greater     *  one is the one with the largest sentence offset.  May return     *  <code>0</code> for entities that are not equal according to     *  but this is OK because they're just being sorted.  Also * not     *  coordinated with the mention chain's equality or hash code.  *     *  The sort is also not stable; changing mention chains by adding     *  mentions may change the results in subsequent runs.     */    public static final Comparator SENTENCE_FINAL_COMPARATOR        = new Comparator() {                public int compare(Object o1, Object o2) {                    return compare((MentionChain)o1,(MentionChain)o2);                }                public int compare(MentionChain chain1,                                   MentionChain chain2) {                    if (chain1.maxSentenceOffset()                        < chain2.maxSentenceOffset()) return 1;                    if (chain1.maxSentenceOffset()                        > chain2.maxSentenceOffset()) return -1;                    return 0;                }            };}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -