📄 maxentresolver.java

📁 自然语言处理领域的一个开发包
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
    MentionContext cec = entity.getLastExtent();    int entityDistance = mention.getNounPhraseDocumentIndex()- cec.getNounPhraseDocumentIndex();    int sentenceDistance = mention.getSentenceNumber() - cec.getSentenceNumber();    int hobbsEntityDistance;    if (sentenceDistance == 0) {      hobbsEntityDistance = cec.getNounPhraseSentenceIndex();    }    else {      //hobbsEntityDistance = entityDistance - (entities within sentence from mention to end) + (entities within sentence form start to mention)       //hobbsEntityDistance = entityDistance - (cec.maxNounLocation - cec.getNounPhraseSentenceIndex) + cec.getNounPhraseSentenceIndex;       hobbsEntityDistance = entityDistance + (2 * cec.getNounPhraseSentenceIndex()) - cec.getMaxNounPhraseSentenceIndex();    }    features.add("hd=" + hobbsEntityDistance);    features.add("de=" + entityDistance);    features.add("ds=" + sentenceDistance);    //features.add("ds=" + sdist + pronoun);    //features.add("dn=" + cec.sentenceNumber);    //features.add("ep=" + cec.nounLocation);    return (features);  }    private Map getPronounFeatureMap(String pronoun) {    Map pronounMap = new HashMap();    if (Linker.malePronounPattern.matcher(pronoun).matches()) {      pronounMap.put("gender","male");    }    else if (Linker.femalePronounPattern.matcher(pronoun).matches()) {      pronounMap.put("gender","female");    }    else if (Linker.neuterPronounPattern.matcher(pronoun).matches()) {      pronounMap.put("gender","neuter");    }    if (Linker.singularPronounPattern.matcher(pronoun).matches()) {      pronounMap.put("number","singular");    }    else if (Linker.pluralPronounPattern.matcher(pronoun).matches()) {      pronounMap.put("number","plural");    }    /*    if (Linker.firstPersonPronounPattern.matcher(pronoun).matches()) {      pronounMap.put("person","first");    }    else if (Linker.secondPersonPronounPattern.matcher(pronoun).matches()) {      pronounMap.put("person","second");    }    else if (Linker.thirdPersonPronounPattern.matcher(pronoun).matches()) {      pronounMap.put("person","third");    }    */    return pronounMap;  }    /**   * Returns features indicating whether the specified mention is compatible with the pronouns   * of the specified entity.   * @param mention The mention.   * @param entity The entity.   * @return list of features indicating whether the specified mention is compatible with the pronouns   * of the specified entity.   */  protected List getPronounMatchFeatures(MentionContext mention, DiscourseEntity entity) {    boolean foundCompatiblePronoun = false;    boolean foundIncompatiblePronoun = false;    if (mention.getHeadTokenTag().startsWith("PRP")) {      Map pronounMap = getPronounFeatureMap(mention.getHeadTokenText());      //System.err.println("getPronounMatchFeatures.pronounMap:"+pronounMap);      for (Iterator mi=entity.getMentions();mi.hasNext();) {        MentionContext candidateMention = (MentionContext) mi.next();        if (candidateMention.getHeadTokenTag().startsWith("PRP")) {          if (mention.getHeadTokenText().equalsIgnoreCase(candidateMention.getHeadTokenText())) {            foundCompatiblePronoun = true;            break;          }          else {            Map candidatePronounMap = getPronounFeatureMap(candidateMention.getHeadTokenText());            //System.err.println("getPronounMatchFeatures.candidatePronounMap:"+candidatePronounMap);            boolean allKeysMatch = true;            for (Iterator ki = pronounMap.keySet().iterator(); ki.hasNext();) {              Object key = ki.next();              Object cfv = candidatePronounMap.get(key);              if (cfv != null) {                if (!pronounMap.get(key).equals(cfv)) {                  foundIncompatiblePronoun = true;                  allKeysMatch = false;                }              }              else {                allKeysMatch = false;              }            }            if (allKeysMatch) {              foundCompatiblePronoun = true;            }          }        }      }    }    List pronounFeatures = new ArrayList();    if (foundCompatiblePronoun) {      pronounFeatures.add("compatiblePronoun");    }    if (foundIncompatiblePronoun) {      pronounFeatures.add("incompatiblePronoun");    }    return pronounFeatures;  }  /**   * Returns string-match features for the the specified mention and entity.   * @param mention The mention.   * @param entity The entity.   * @return list of string-match features for the the specified mention and entity.   */  protected List getStringMatchFeatures(MentionContext mention, DiscourseEntity entity) {    boolean sameHead = false;    boolean modsMatch = false;    boolean titleMatch = false;    boolean nonTheModsMatch = false;    List features = new ArrayList();    Parse[] mtokens = mention.getTokenParses();    Set ecModSet = constructModifierSet(mtokens, mention.getHeadTokenIndex());    String mentionHeadString = mention.getHeadTokenText().toLowerCase();    Set featureSet = new HashSet();    for (Iterator ei = entity.getMentions(); ei.hasNext();) {      MentionContext entityMention = (MentionContext) ei.next();      String exactMatchFeature = getExactMatchFeature(entityMention, mention);      if (exactMatchFeature != null) {        featureSet.add(exactMatchFeature);      }      else if (entityMention.getParse().isCoordinatedNounPhrase() && !mention.getParse().isCoordinatedNounPhrase()) {        featureSet.add("cmix");      }      else {        String mentionStrip = stripNp(mention);        String entityMentionStrip = stripNp(entityMention);        if (mentionStrip != null && entityMentionStrip != null) {          if (isSubstring(mentionStrip, entityMentionStrip)) {            featureSet.add("substring");          }        }      }      Parse[] xtoks = entityMention.getTokenParses();      int headIndex = entityMention.getHeadTokenIndex();      //if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) {      //  //System.err.println("skipping "+mention.headTokenText+" with "+xec.headTokenText+" because "+mention.headTokenTag+" != "+xec.headTokenTag);      //  continue;      //}  want to match NN NNP      String entityMentionHeadString = entityMention.getHeadTokenText().toLowerCase();      // model lexical similarity      if (mentionHeadString.equals(entityMentionHeadString)) {        sameHead = true;        featureSet.add("hds=" + mentionHeadString);        if (!modsMatch || !nonTheModsMatch) { //only check if we haven't already found one which is the same          modsMatch = true;          nonTheModsMatch = true;          Set entityMentionModifierSet = constructModifierSet(xtoks, headIndex);          for (Iterator mi = ecModSet.iterator(); mi.hasNext();) {            String mw = (String) mi.next();            if (!entityMentionModifierSet.contains(mw)) {              modsMatch = false;              if (!mw.equals("the")) {                nonTheModsMatch = false;                featureSet.add("mmw=" + mw);              }            }          }        }      }      Set descModSet = constructModifierSet(xtoks, entityMention.getNonDescriptorStart());      if (descModSet.contains(mentionHeadString)) {        titleMatch = true;      }    }    if (!featureSet.isEmpty()) {      features.addAll(featureSet);    }    if (sameHead) {      features.add("sameHead");      if (modsMatch) {        features.add("modsMatch");      }      else if (nonTheModsMatch) {        features.add("nonTheModsMatch");      }      else {        features.add("modsMisMatch");      }    }    if (titleMatch) {      features.add("titleMatch");    }    return features;  }    private String mentionString(MentionContext ec) {    StringBuffer sb = new StringBuffer();    Object[] mtokens = ec.getTokens();    sb.append(mtokens[0].toString());    for (int ti = 1, tl = mtokens.length; ti < tl; ti++) {      String token = mtokens[ti].toString();      sb.append(" ").append(token);    }    //System.err.println("mentionString "+ec+" == "+sb.toString()+" mtokens.length="+mtokens.length);    return sb.toString();  }  private String excludedTheMentionString(MentionContext ec) {    StringBuffer sb = new StringBuffer();    boolean first = true;    Object[] mtokens = ec.getTokens();    for (int ti = 0, tl = mtokens.length; ti < tl; ti++) {      String token = mtokens[ti].toString();      if (!token.equals("the") && !token.equals("The") && !token.equals("THE")) {        if (!first) {          sb.append(" ");        }        sb.append(token);        first = false;      }    }    return sb.toString();  }  private String excludedHonorificMentionString(MentionContext ec) {    StringBuffer sb = new StringBuffer();    boolean first = true;    Object[] mtokens = ec.getTokens();    for (int ti = 0, tl = mtokens.length; ti < tl; ti++) {      String token = mtokens[ti].toString();      if (!Linker.honorificsPattern.matcher(token).matches()) {        if (!first) {          sb.append(" ");        }        sb.append(token);        first = false;      }    }    return sb.toString();  }  private String excludedDeterminerMentionString(MentionContext ec) {    StringBuffer sb = new StringBuffer();    boolean first = true;    Parse[] mtokens = ec.getTokenParses();    for (int ti = 0, tl = mtokens.length; ti < tl; ti++) {      Parse token = mtokens[ti];      String tag = token.getSyntacticType();      if (!tag.equals("DT")) {        if (!first) {          sb.append(" ");        }        sb.append(token.toString());        first = false;      }    }    return sb.toString();  }  private String getExactMatchFeature(MentionContext ec, MentionContext xec) {    //System.err.println("getExactMatchFeature: ec="+mentionString(ec)+" mc="+mentionString(xec));    if (mentionString(ec).equals(mentionString(xec))) {      return "exactMatch";    }    else if (excludedHonorificMentionString(ec).equals(excludedHonorificMentionString(xec))) {      return "exactMatchNoHonor";    }    else if (excludedTheMentionString(ec).equals(excludedTheMentionString(xec))) {      return "exactMatchNoThe";    }    else if (excludedDeterminerMentionString(ec).equals(excludedDeterminerMentionString(xec))) {      return "exactMatchNoDT";    }    return null;  }  /**   * Returns a list of word features for the specified tokens.   * @param token The token for which fetures are to be computed.   * @return a list of word features for the specified tokens.   */  public static List getWordFeatures(Parse token) {    List wordFeatures = new ArrayList();    String word = token.toString().toLowerCase();    String wf = "";    if (endsWithPeriod.matcher(word).find()) {      wf = ",endWithPeriod";    }    String tokTag = token.getSyntacticType();    wordFeatures.add("w=" + word + ",t=" + tokTag + wf);    wordFeatures.add("t=" + tokTag + wf);    return (wordFeatures);  }}
上一页 1 23
💿 文件大小 1863 K
👤 上传用户 yangbaochun
📂 所属分类多国语言处理
🏷️ 相关标签

#自然语言处理 #开发包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -