📄 maxentresolver.java
字号:
if (ec.getNounPhraseSentenceIndex() == 0) { return (true); } return (false); } public DiscourseEntity retain(MentionContext mention, DiscourseModel dm) { //System.err.println(this+".retain("+ec+") "+mode); if (ResolverMode.TRAIN == mode) { DiscourseEntity de = null; boolean referentFound = false; boolean hasReferentialCandidate = false; boolean nonReferentFound = false; for (int ei = 0; ei < getNumEntities(dm); ei++) { DiscourseEntity cde = (DiscourseEntity) dm.getEntity(ei); MentionContext entityMention = cde.getLastExtent(); if (outOfRange(mention, cde)) { if (mention.getId() != -1 && !referentFound) { //System.err.println("retain: Referent out of range: "+ec.toText()+" "+ec.parse.getSpan()); } break; } if (excluded(mention, cde)) { if (showExclusions) { if (mention.getId() != -1 && entityMention.getId() == mention.getId()) { System.err.println(this +".retain: Referent excluded: (" + mention.getId() + ") " + mention.toText() + " " + mention.getIndexSpan() + " -> (" + entityMention.getId() + ") " + entityMention.toText() + " " + entityMention.getSpan() + " " + this); } } } else { hasReferentialCandidate = true; boolean useAsDifferentExample = defaultReferent(cde); //if (!sampleSelection || (mention.getId() != -1 && entityMention.getId() == mention.getId()) || (!nonReferentFound && useAsDifferentExample)) { List features = getFeatures(mention, cde); //add Event to Model if (debugOn) { System.err.println(this +".retain: " + mention.getId() + " " + mention.toText() + " -> " + entityMention.getId() + " " + cde); } if (mention.getId() != -1 && entityMention.getId() == mention.getId()) { referentFound = true; events.add(new Event(SAME, (String[]) features.toArray(new String[features.size()]))); de = cde; //System.err.println("MaxentResolver.retain: resolved at "+ei); distances.add(new Integer(ei)); } else if (!pairedSampleSelection || (!nonReferentFound && useAsDifferentExample)) { nonReferentFound = true; events.add(new Event(DIFF, (String[]) features.toArray(new String[features.size()]))); } //} } if (pairedSampleSelection && referentFound && nonReferentFound) { break; } if (preferFirstReferent && referentFound) { break; } } // doesn't refer to anything if (hasReferentialCandidate) { nonReferentialResolver.addEvent(mention); } return (de); } else { return (super.retain(mention, dm)); } } protected String getMentionCountFeature(DiscourseEntity de) { if (de.getNumMentions() >= 5) { return ("mc=5+"); } else { return ("mc=" + de.getNumMentions()); } } /** * Returns a list of features for deciding whether the specificed mention refers to the specified discourse entity. * @param mention the mention being considers as possibly referential. * @param entity The disource entity with which the mention is being considered referential. * @return a list of features used to predict reference between the specified mention and entity. */ protected List getFeatures(MentionContext mention, DiscourseEntity entity) { List features = new ArrayList(); features.add(DEFAULT); features.addAll(getCompatibilityFeatures(mention, entity)); return features; } public void train() throws IOException { if (ResolverMode.TRAIN == mode) { if (debugOn) { System.err.println(this +" referential"); FileWriter writer = new FileWriter(modelName+".events"); for (Iterator ei=events.iterator();ei.hasNext();) { Event e = (Event) ei.next(); writer.write(e.toString()+"\n"); } writer.close(); } (new SuffixSensitiveGISModelWriter(GIS.trainModel(new CollectionEventStream(events),100,10),new File(modelName+modelExtension))).persist(); nonReferentialResolver.train(); } } public static void setSimilarityModel(TestSimilarityModel sm) { simModel = sm; } private String getSemanticCompatibilityFeature(MentionContext ec, DiscourseEntity de) { if (simModel != null) { double best = 0; for (Iterator xi = de.getMentions(); xi.hasNext();) { MentionContext ec2 = (MentionContext) xi.next(); double sim = simModel.compatible(ec, ec2); if (debugOn) { System.err.println("MaxentResolver.getSemanticCompatibilityFeature: sem-compat " + sim + " " + ec.toText() + " " + ec2.toText()); } if (sim > best) { best = sim; } } if (best > minSimProb) { return SIM_COMPATIBLE; } else if (best > (1 - minSimProb)) { return SIM_UNKNOWN; } else { return SIM_INCOMPATIBLE; } } else { System.err.println("MaxentResolver: Uninitialized Semantic Model"); return SIM_UNKNOWN; } } private String getGenderCompatibilityFeature(MentionContext ec, DiscourseEntity de) { GenderEnum eg = de.getGender(); //System.err.println("getGenderCompatibility: mention="+ec.getGender()+" entity="+eg); if (eg == GenderEnum.UNKNOWN || ec.getGender() == GenderEnum.UNKNOWN) { return GEN_UNKNOWN; } else if (ec.getGender() == eg) { return GEN_COMPATIBLE; } else { return GEN_INCOMPATIBLE; } } private String getNumberCompatibilityFeature(MentionContext ec, DiscourseEntity de) { NumberEnum en = de.getNumber(); if (en == NumberEnum.UNKNOWN || ec.getNumber() == NumberEnum.UNKNOWN) { return NUM_UNKNOWN; } else if (ec.getNumber() == en) { return NUM_COMPATIBLE; } else { return NUM_INCOMPATIBLE; } } /** * Returns features indicating whether the specified mention and the specified entity are compatible. * @param mention The mention. * @param entity The entity. * @return list of features indicating whether the specified mention and the specified entity are compatible. */ private List getCompatibilityFeatures(MentionContext mention, DiscourseEntity entity) { List compatFeatures = new ArrayList(); String semCompatible = getSemanticCompatibilityFeature(mention, entity); compatFeatures.add(semCompatible); String genCompatible = getGenderCompatibilityFeature(mention, entity); compatFeatures.add(genCompatible); String numCompatible = getNumberCompatibilityFeature(mention, entity); compatFeatures.add(numCompatible); if (semCompatible.equals(SIM_COMPATIBLE) && genCompatible.equals(GEN_COMPATIBLE) && numCompatible.equals(NUM_COMPATIBLE)) { compatFeatures.add("all.compatible"); } else if (semCompatible.equals(SIM_INCOMPATIBLE) || genCompatible.equals(GEN_INCOMPATIBLE) || numCompatible.equals(NUM_INCOMPATIBLE)) { compatFeatures.add("some.incompatible"); } return compatFeatures; } /** * Returns a list of features based on the surrounding context of the specified mention. * @param mention he mention whose surround context the features model. * @return a list of features based on the surrounding context of the specified mention */ public static List getContextFeatures(MentionContext mention) { List features = new ArrayList(); if (mention.getPreviousToken() != null) { features.add("pt=" + mention.getPreviousToken().getSyntacticType()); features.add("pw=" + mention.getPreviousToken().toString()); } else { features.add("pt=BOS"); features.add("pw=BOS"); } if (mention.getNextToken() != null) { features.add("nt=" + mention.getNextToken().getSyntacticType()); features.add("nw=" + mention.getNextToken().toString()); } else { features.add("nt=EOS"); features.add("nw=EOS"); } if (mention.getNextTokenBasal() != null) { features.add("bnt=" + mention.getNextTokenBasal().getSyntacticType()); features.add("bnw=" + mention.getNextTokenBasal().toString()); } else { features.add("bnt=EOS"); features.add("bnw=EOS"); } return (features); } private Set constructModifierSet(Parse[] tokens, int headIndex) { Set modSet = new HashSet(); for (int ti = 0; ti < headIndex; ti++) { Parse tok = tokens[ti]; modSet.add(tok.toString().toLowerCase()); } return (modSet); } /** * Returns whether the specified token is a definite article. * @param tok The token. * @param tag The pos-tag for the specified token. * @return whether the specified token is a definite article. */ protected boolean definiteArticle(String tok, String tag) { tok = tok.toLowerCase(); if (tok.equals("the") || tok.equals("these") || tok.equals("these") || tag.equals("PRP$")) { return (true); } return (false); } private boolean isSubstring(String ecStrip, String xecStrip) { //System.err.println("MaxentResolver.isSubstring: ec="+ecStrip+" xec="+xecStrip); int io = xecStrip.indexOf(ecStrip); if (io != -1) { //check boundries if (io != 0 && xecStrip.charAt(io - 1) != ' ') { return false; } int end = io + ecStrip.length(); if (end != xecStrip.length() && xecStrip.charAt(end) != ' ') { return false; } return true; } return false; } protected boolean excluded(MentionContext ec, DiscourseEntity de) { if (super.excluded(ec, de)) { return true; } return false; /* else { if (GEN_INCOMPATIBLE == getGenderCompatibilityFeature(ec,de)) { return true; } else if (NUM_INCOMPATIBLE == getNumberCompatibilityFeature(ec,de)) { return true; } else if (SIM_INCOMPATIBLE == getSemanticCompatibilityFeature(ec,de)) { return true; } return false; } */ } /** * Returns distance features for the specified mention and entity. * @param mention The mention. * @param entity The entity. * @return list of distance features for the specified mention and entity. */ protected List getDistanceFeatures(MentionContext mention, DiscourseEntity entity) { List features = new ArrayList();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -