📄 maxentresolver.java
字号:
MentionContext cec = entity.getLastExtent(); int entityDistance = mention.getNounPhraseDocumentIndex()- cec.getNounPhraseDocumentIndex(); int sentenceDistance = mention.getSentenceNumber() - cec.getSentenceNumber(); int hobbsEntityDistance; if (sentenceDistance == 0) { hobbsEntityDistance = cec.getNounPhraseSentenceIndex(); } else { //hobbsEntityDistance = entityDistance - (entities within sentence from mention to end) + (entities within sentence form start to mention) //hobbsEntityDistance = entityDistance - (cec.maxNounLocation - cec.getNounPhraseSentenceIndex) + cec.getNounPhraseSentenceIndex; hobbsEntityDistance = entityDistance + (2 * cec.getNounPhraseSentenceIndex()) - cec.getMaxNounPhraseSentenceIndex(); } features.add("hd=" + hobbsEntityDistance); features.add("de=" + entityDistance); features.add("ds=" + sentenceDistance); //features.add("ds=" + sdist + pronoun); //features.add("dn=" + cec.sentenceNumber); //features.add("ep=" + cec.nounLocation); return (features); } private Map getPronounFeatureMap(String pronoun) { Map pronounMap = new HashMap(); if (Linker.malePronounPattern.matcher(pronoun).matches()) { pronounMap.put("gender","male"); } else if (Linker.femalePronounPattern.matcher(pronoun).matches()) { pronounMap.put("gender","female"); } else if (Linker.neuterPronounPattern.matcher(pronoun).matches()) { pronounMap.put("gender","neuter"); } if (Linker.singularPronounPattern.matcher(pronoun).matches()) { pronounMap.put("number","singular"); } else if (Linker.pluralPronounPattern.matcher(pronoun).matches()) { pronounMap.put("number","plural"); } /* if (Linker.firstPersonPronounPattern.matcher(pronoun).matches()) { pronounMap.put("person","first"); } else if (Linker.secondPersonPronounPattern.matcher(pronoun).matches()) { pronounMap.put("person","second"); } else if (Linker.thirdPersonPronounPattern.matcher(pronoun).matches()) { pronounMap.put("person","third"); } */ return pronounMap; } /** * Returns features indicating whether the specified mention is compatible with the pronouns * of the specified entity. * @param mention The mention. * @param entity The entity. * @return list of features indicating whether the specified mention is compatible with the pronouns * of the specified entity. */ protected List getPronounMatchFeatures(MentionContext mention, DiscourseEntity entity) { boolean foundCompatiblePronoun = false; boolean foundIncompatiblePronoun = false; if (mention.getHeadTokenTag().startsWith("PRP")) { Map pronounMap = getPronounFeatureMap(mention.getHeadTokenText()); //System.err.println("getPronounMatchFeatures.pronounMap:"+pronounMap); for (Iterator mi=entity.getMentions();mi.hasNext();) { MentionContext candidateMention = (MentionContext) mi.next(); if (candidateMention.getHeadTokenTag().startsWith("PRP")) { if (mention.getHeadTokenText().equalsIgnoreCase(candidateMention.getHeadTokenText())) { foundCompatiblePronoun = true; break; } else { Map candidatePronounMap = getPronounFeatureMap(candidateMention.getHeadTokenText()); //System.err.println("getPronounMatchFeatures.candidatePronounMap:"+candidatePronounMap); boolean allKeysMatch = true; for (Iterator ki = pronounMap.keySet().iterator(); ki.hasNext();) { Object key = ki.next(); Object cfv = candidatePronounMap.get(key); if (cfv != null) { if (!pronounMap.get(key).equals(cfv)) { foundIncompatiblePronoun = true; allKeysMatch = false; } } else { allKeysMatch = false; } } if (allKeysMatch) { foundCompatiblePronoun = true; } } } } } List pronounFeatures = new ArrayList(); if (foundCompatiblePronoun) { pronounFeatures.add("compatiblePronoun"); } if (foundIncompatiblePronoun) { pronounFeatures.add("incompatiblePronoun"); } return pronounFeatures; } /** * Returns string-match features for the the specified mention and entity. * @param mention The mention. * @param entity The entity. * @return list of string-match features for the the specified mention and entity. */ protected List getStringMatchFeatures(MentionContext mention, DiscourseEntity entity) { boolean sameHead = false; boolean modsMatch = false; boolean titleMatch = false; boolean nonTheModsMatch = false; List features = new ArrayList(); Parse[] mtokens = mention.getTokenParses(); Set ecModSet = constructModifierSet(mtokens, mention.getHeadTokenIndex()); String mentionHeadString = mention.getHeadTokenText().toLowerCase(); Set featureSet = new HashSet(); for (Iterator ei = entity.getMentions(); ei.hasNext();) { MentionContext entityMention = (MentionContext) ei.next(); String exactMatchFeature = getExactMatchFeature(entityMention, mention); if (exactMatchFeature != null) { featureSet.add(exactMatchFeature); } else if (entityMention.getParse().isCoordinatedNounPhrase() && !mention.getParse().isCoordinatedNounPhrase()) { featureSet.add("cmix"); } else { String mentionStrip = stripNp(mention); String entityMentionStrip = stripNp(entityMention); if (mentionStrip != null && entityMentionStrip != null) { if (isSubstring(mentionStrip, entityMentionStrip)) { featureSet.add("substring"); } } } Parse[] xtoks = entityMention.getTokenParses(); int headIndex = entityMention.getHeadTokenIndex(); //if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) { // //System.err.println("skipping "+mention.headTokenText+" with "+xec.headTokenText+" because "+mention.headTokenTag+" != "+xec.headTokenTag); // continue; //} want to match NN NNP String entityMentionHeadString = entityMention.getHeadTokenText().toLowerCase(); // model lexical similarity if (mentionHeadString.equals(entityMentionHeadString)) { sameHead = true; featureSet.add("hds=" + mentionHeadString); if (!modsMatch || !nonTheModsMatch) { //only check if we haven't already found one which is the same modsMatch = true; nonTheModsMatch = true; Set entityMentionModifierSet = constructModifierSet(xtoks, headIndex); for (Iterator mi = ecModSet.iterator(); mi.hasNext();) { String mw = (String) mi.next(); if (!entityMentionModifierSet.contains(mw)) { modsMatch = false; if (!mw.equals("the")) { nonTheModsMatch = false; featureSet.add("mmw=" + mw); } } } } } Set descModSet = constructModifierSet(xtoks, entityMention.getNonDescriptorStart()); if (descModSet.contains(mentionHeadString)) { titleMatch = true; } } if (!featureSet.isEmpty()) { features.addAll(featureSet); } if (sameHead) { features.add("sameHead"); if (modsMatch) { features.add("modsMatch"); } else if (nonTheModsMatch) { features.add("nonTheModsMatch"); } else { features.add("modsMisMatch"); } } if (titleMatch) { features.add("titleMatch"); } return features; } private String mentionString(MentionContext ec) { StringBuffer sb = new StringBuffer(); Object[] mtokens = ec.getTokens(); sb.append(mtokens[0].toString()); for (int ti = 1, tl = mtokens.length; ti < tl; ti++) { String token = mtokens[ti].toString(); sb.append(" ").append(token); } //System.err.println("mentionString "+ec+" == "+sb.toString()+" mtokens.length="+mtokens.length); return sb.toString(); } private String excludedTheMentionString(MentionContext ec) { StringBuffer sb = new StringBuffer(); boolean first = true; Object[] mtokens = ec.getTokens(); for (int ti = 0, tl = mtokens.length; ti < tl; ti++) { String token = mtokens[ti].toString(); if (!token.equals("the") && !token.equals("The") && !token.equals("THE")) { if (!first) { sb.append(" "); } sb.append(token); first = false; } } return sb.toString(); } private String excludedHonorificMentionString(MentionContext ec) { StringBuffer sb = new StringBuffer(); boolean first = true; Object[] mtokens = ec.getTokens(); for (int ti = 0, tl = mtokens.length; ti < tl; ti++) { String token = mtokens[ti].toString(); if (!Linker.honorificsPattern.matcher(token).matches()) { if (!first) { sb.append(" "); } sb.append(token); first = false; } } return sb.toString(); } private String excludedDeterminerMentionString(MentionContext ec) { StringBuffer sb = new StringBuffer(); boolean first = true; Parse[] mtokens = ec.getTokenParses(); for (int ti = 0, tl = mtokens.length; ti < tl; ti++) { Parse token = mtokens[ti]; String tag = token.getSyntacticType(); if (!tag.equals("DT")) { if (!first) { sb.append(" "); } sb.append(token.toString()); first = false; } } return sb.toString(); } private String getExactMatchFeature(MentionContext ec, MentionContext xec) { //System.err.println("getExactMatchFeature: ec="+mentionString(ec)+" mc="+mentionString(xec)); if (mentionString(ec).equals(mentionString(xec))) { return "exactMatch"; } else if (excludedHonorificMentionString(ec).equals(excludedHonorificMentionString(xec))) { return "exactMatchNoHonor"; } else if (excludedTheMentionString(ec).equals(excludedTheMentionString(xec))) { return "exactMatchNoThe"; } else if (excludedDeterminerMentionString(ec).equals(excludedDeterminerMentionString(xec))) { return "exactMatchNoDT"; } return null; } /** * Returns a list of word features for the specified tokens. * @param token The token for which fetures are to be computed. * @return a list of word features for the specified tokens. */ public static List getWordFeatures(Parse token) { List wordFeatures = new ArrayList(); String word = token.toString().toLowerCase(); String wf = ""; if (endsWithPeriod.matcher(word).find()) { wf = ",endWithPeriod"; } String tokTag = token.getSyntacticType(); wordFeatures.add("w=" + word + ",t=" + tokTag + wf); wordFeatures.add("t=" + tokTag + wf); return (wordFeatures); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -