📄 similaritymodel.java
字号:
if (!exclusionSet.contains(sec1)) { if (debugOn) System.err.println(ec1.toString()+" "+entityNameSet+" "+sec1.toString()+" "+nameSets.get(new Integer(sec1.getId()))); addEvent(false, ec1, sec1); break; } } while (axi != startIndex); } } } } /** * Returns a number between 0 and 1 which represents the models belief that the specified mentions are compatible. * Value closer to 1 are more compatible, while values closer to 0 are less compatible. * @param mention1 The first mention to be considered. * @param mention2 The second mention to be considered. * @return a number between 0 and 1 which represents the models belief that the specified mentions are compatible. */ public double compatible(Context mention1, Context mention2) { List feats = getFeatures(mention1, mention2); if (debugOn) System.err.println("SimilarityModel.compatible: feats="+feats); return (testModel.eval((String[]) feats.toArray(new String[feats.size()]))[SAME_INDEX]); } /** * Train a model based on the previously supplied evidence. * @see #setExtents(Context[]) */ public void trainModel() throws IOException { if (debugOn) { FileWriter writer = new FileWriter(modelName+".events"); for (Iterator ei=events.iterator();ei.hasNext();) { Event e = (Event) ei.next(); writer.write(e.toString()+"\n"); } writer.close(); } (new SuffixSensitiveGISModelWriter(GIS.trainModel(new CollectionEventStream(events),100,10),new File(modelName+modelExtension))).persist(); } private boolean isName(Context np) { return np.getHeadTokenTag().startsWith("NNP"); } private boolean isCommonNoun(Context np) { return !np.getHeadTokenTag().startsWith("NNP") && np.getHeadTokenTag().startsWith("NN"); } private boolean isPronoun(Context np) { return np.getHeadTokenTag().startsWith("PRP"); } private boolean isNumber(Context np) { return np.getHeadTokenTag().equals("CD"); } private List getNameCommonFeatures(Context name, Context common) { Set synsets = common.getSynsets(); List features = new ArrayList(2 + synsets.size()); features.add("nn=" + name.getNameType() + "," + common.getNameType()); features.add("nw=" + name.getNameType() + "," + common.getHeadTokenText().toLowerCase()); for (Iterator si = synsets.iterator(); si.hasNext();) { features.add("ns=" + name.getNameType() + "," + si.next()); } if (name.getNameType() == null) { //features.addAll(getCommonCommonFeatures(name,common)); } return features; } private List getNameNumberFeatures(Context name, Context number) { List features = new ArrayList(2); features.add("nt=" + name.getNameType() + "," + number.getHeadTokenTag()); features.add("nn=" + name.getNameType() + "," + number.getNameType()); return features; } private List getNamePronounFeatures(Context name, Context pronoun) { List features = new ArrayList(2); features.add("nw=" + name.getNameType() + "," + pronoun.getHeadTokenText().toLowerCase()); features.add("ng=" + name.getNameType() + "," + AbstractResolver.getPronounGender(pronoun.getHeadTokenText().toLowerCase())); return features; } private List getCommonPronounFeatures(Context common, Context pronoun) { List features = new ArrayList(); Set synsets1 = common.getSynsets(); String p = pronoun.getHeadTokenText().toLowerCase(); String gen = AbstractResolver.getPronounGender(p); features.add("wn=" + p + "," + common.getNameType()); for (Iterator si = synsets1.iterator(); si.hasNext();) { Object synset = si.next(); features.add("ws=" + p + "," + synset); features.add("gs=" + gen + "," + synset); } return features; } private List getCommonNumberFeatures(Context common, Context number) { List features = new ArrayList(); Set synsets1 = common.getSynsets(); for (Iterator si = synsets1.iterator(); si.hasNext();) { Object synset = si.next(); features.add("ts=" + number.getHeadTokenTag() + "," + synset); features.add("ns=" + number.getNameType() + "," + synset); } features.add("nn=" + number.getNameType() + "," + common.getNameType()); return features; } private List getNumberPronounFeatures(Context number, Context pronoun) { List features = new ArrayList(); String p = pronoun.getHeadTokenText().toLowerCase(); String gen = AbstractResolver.getPronounGender(p); features.add("wt=" + p + "," + number.getHeadTokenTag()); features.add("wn=" + p + "," + number.getNameType()); features.add("wt=" + gen + "," + number.getHeadTokenTag()); features.add("wn=" + gen + "," + number.getNameType()); return features; } private List getNameNameFeatures(Context name1, Context name2) { List features = new ArrayList(1); if (name1.getNameType() == null && name2.getNameType() == null) { features.add("nn=" + name1.getNameType() + "," + name2.getNameType()); //features.addAll(getCommonCommonFeatures(name1,name2)); } else if (name1.getNameType() == null) { features.add("nn=" + name1.getNameType() + "," + name2.getNameType()); //features.addAll(getNameCommonFeatures(name2,name1)); } else if (name2.getNameType() == null) { features.add("nn=" + name2.getNameType() + "," + name1.getNameType()); //features.addAll(getNameCommonFeatures(name1,name2)); } else { if (name1.getNameType().compareTo(name2.getNameType()) < 0) { features.add("nn=" + name1.getNameType() + "," + name2.getNameType()); } else { features.add("nn=" + name2.getNameType() + "," + name1.getNameType()); } if (name1.getNameType().equals(name2.getNameType())) { features.add("sameNameType"); } } return features; } private List getCommonCommonFeatures(Context common1, Context common2) { List features = new ArrayList(); Set synsets1 = common1.getSynsets(); Set synsets2 = common2.getSynsets(); if (synsets1.size() == 0) { //features.add("missing_"+common1.headToken); return features; } if (synsets2.size() == 0) { //features.add("missing_"+common2.headToken); return features; } int numCommonSynsets = 0; boolean same = false; if (numCommonSynsets == 0) { features.add("ncss"); } else if (numCommonSynsets == synsets1.size() && numCommonSynsets == synsets2.size()) { same = true; features.add("samess"); } else if (numCommonSynsets == synsets1.size()) { features.add("2isa1"); //features.add("2isa1-"+(synsets2.size() - numCommonSynsets)); } else if (numCommonSynsets == synsets2.size()) { features.add("1isa2"); //features.add("1isa2-"+(synsets1.size() - numCommonSynsets)); } if (!same) { for (Iterator si = synsets1.iterator(); si.hasNext();) { Object synset = si.next(); if (synsets2.contains(synset)) { features.add("ss=" + synset); numCommonSynsets++; } } } if (numCommonSynsets == 0) { features.add("ncss"); } else if (numCommonSynsets == synsets1.size() && numCommonSynsets == synsets2.size()) { features.add("samess"); } else if (numCommonSynsets == synsets1.size()) { features.add("2isa1"); //features.add("2isa1-"+(synsets2.size() - numCommonSynsets)); } else if (numCommonSynsets == synsets2.size()) { features.add("1isa2"); //features.add("1isa2-"+(synsets1.size() - numCommonSynsets)); } return features; } private List getPronounPronounFeatures(Context pronoun1, Context pronoun2) { List features = new ArrayList(); String g1 = AbstractResolver.getPronounGender(pronoun1.getHeadTokenText()); String g2 = AbstractResolver.getPronounGender(pronoun2.getHeadTokenText()); if (g1.equals(g2)) { features.add("sameGender"); } else { features.add("diffGender"); } return features; } private List getFeatures(Context np1, Context np2) { List features = new ArrayList(); features.add("default"); // semantic categories String w1 = np1.getHeadTokenText().toLowerCase(); String w2 = np2.getHeadTokenText().toLowerCase(); if (w1.compareTo(w2) < 0) { features.add("ww=" + w1 + "," + w2); } else { features.add("ww=" + w2 + "," + w1); } if (w1.equals(w2)) { features.add("sameHead"); } //features.add("tt="+np1.headTag+","+np2.headTag); if (isName(np1)) { if (isName(np2)) { features.addAll(getNameNameFeatures(np1, np2)); } else if (isCommonNoun(np2)) { features.addAll(getNameCommonFeatures(np1, np2)); } else if (isPronoun(np2)) { features.addAll(getNamePronounFeatures(np1, np2)); } else if (isNumber(np2)) { features.addAll(getNameNumberFeatures(np1, np2)); } } else if (isCommonNoun(np1)) { if (isName(np2)) { features.addAll(getNameCommonFeatures(np2, np1)); } else if (isCommonNoun(np2)) { features.addAll(getCommonCommonFeatures(np1, np2)); } else if (isPronoun(np2)) { features.addAll(getCommonPronounFeatures(np1, np2)); } else if (isNumber(np2)) { features.addAll(getCommonNumberFeatures(np1, np2)); } else { //System.err.println("unknown group for " + np1.headTokenText + " -> " + np2.headTokenText); } } else if (isPronoun(np1)) { if (isName(np2)) { features.addAll(getNamePronounFeatures(np2, np1)); } else if (isCommonNoun(np2)) { features.addAll(getCommonPronounFeatures(np2, np1)); } else if (isPronoun(np2)) { features.addAll(getPronounPronounFeatures(np1, np2)); } else if (isNumber(np2)) { features.addAll(getNumberPronounFeatures(np2, np1)); } else { //System.err.println("unknown group for " + np1.headTokenText + " -> " + np2.headTokenText); } } else if (isNumber(np1)) { if (isName(np2)) { features.addAll(getNameNumberFeatures(np2, np1)); } else if (isCommonNoun(np2)) { features.addAll(getCommonNumberFeatures(np2, np1)); } else if (isPronoun(np2)) { features.addAll(getNumberPronounFeatures(np1, np2)); } else if (isNumber(np2)) {} else { //System.err.println("unknown group for " + np1.headTokenText + " -> " + np2.headTokenText); } } else { //System.err.println("unknown group for " + np1.headToken); } return (features); } public static void main(String[] args) throws IOException { if (args.length == 0) { System.err.println("Usage: SimilarityModel modelName < tiger/NN bear/NN"); System.exit(1); } String modelName = args[0]; SimilarityModel model = new SimilarityModel(modelName, false); //Context.wn = new WordNet(System.getProperty("WNHOME"), true); //Context.morphy = new Morphy(Context.wn); BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); for (String line = in.readLine(); line != null; line = in.readLine()) { String[] words = line.split(" "); double p = model.compatible(Context.parseContext(words[0]), Context.parseContext(words[1])); System.out.println(p + " " + model.getFeatures(Context.parseContext(words[0]), Context.parseContext(words[1]))); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -