⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 maxentresolver.java

📁 自然语言处理领域的一个开发包
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
    if (ec.getNounPhraseSentenceIndex() == 0) {      return (true);    }    return (false);  }  public DiscourseEntity retain(MentionContext mention, DiscourseModel dm) {    //System.err.println(this+".retain("+ec+") "+mode);    if (ResolverMode.TRAIN == mode) {      DiscourseEntity de = null;      boolean referentFound = false;      boolean hasReferentialCandidate = false;      boolean nonReferentFound = false;      for (int ei = 0; ei < getNumEntities(dm); ei++) {        DiscourseEntity cde = (DiscourseEntity) dm.getEntity(ei);        MentionContext entityMention = cde.getLastExtent();        if (outOfRange(mention, cde)) {          if (mention.getId() != -1 && !referentFound) {            //System.err.println("retain: Referent out of range: "+ec.toText()+" "+ec.parse.getSpan());          }          break;        }        if (excluded(mention, cde)) {          if (showExclusions) {            if (mention.getId() != -1 && entityMention.getId() == mention.getId()) {              System.err.println(this +".retain: Referent excluded: (" + mention.getId() + ") " + mention.toText() + " " + mention.getIndexSpan() + " -> (" + entityMention.getId() + ") " + entityMention.toText() + " " + entityMention.getSpan() + " " + this);            }          }        }        else {          hasReferentialCandidate = true;          boolean useAsDifferentExample = defaultReferent(cde);          //if (!sampleSelection || (mention.getId() != -1 && entityMention.getId() == mention.getId()) || (!nonReferentFound && useAsDifferentExample)) {            List features = getFeatures(mention, cde);            //add Event to Model            if (debugOn) {              System.err.println(this +".retain: " + mention.getId() + " " + mention.toText() + " -> " + entityMention.getId() + " " + cde);            }            if (mention.getId() != -1 && entityMention.getId() == mention.getId()) {              referentFound = true;              events.add(new Event(SAME, (String[]) features.toArray(new String[features.size()])));              de = cde;              //System.err.println("MaxentResolver.retain: resolved at "+ei);              distances.add(new Integer(ei));            }            else if (!pairedSampleSelection || (!nonReferentFound && useAsDifferentExample)) {              nonReferentFound = true;              events.add(new Event(DIFF, (String[]) features.toArray(new String[features.size()])));            }          //}        }        if (pairedSampleSelection && referentFound && nonReferentFound) {          break;        }        if (preferFirstReferent && referentFound) {          break;        }      }      // doesn't refer to anything      if (hasReferentialCandidate) {        nonReferentialResolver.addEvent(mention);      }      return (de);    }    else {      return (super.retain(mention, dm));    }  }  protected String getMentionCountFeature(DiscourseEntity de) {    if (de.getNumMentions() >= 5) {      return ("mc=5+");    }    else {      return ("mc=" + de.getNumMentions());    }  }    /**    * Returns a list of features for deciding whether the specificed mention refers to the specified discourse entity.   * @param mention the mention being considers as possibly referential.    * @param entity The disource entity with which the mention is being considered referential.     * @return a list of features used to predict reference between the specified mention and entity.   */  protected List getFeatures(MentionContext mention, DiscourseEntity entity) {    List features = new ArrayList();    features.add(DEFAULT);    features.addAll(getCompatibilityFeatures(mention, entity));    return features;  }  public void train() throws IOException {    if (ResolverMode.TRAIN == mode) {      if (debugOn) {        System.err.println(this +" referential");        FileWriter writer = new FileWriter(modelName+".events");        for (Iterator ei=events.iterator();ei.hasNext();) {          Event e = (Event) ei.next();          writer.write(e.toString()+"\n");        }        writer.close();      }      (new SuffixSensitiveGISModelWriter(GIS.trainModel(new CollectionEventStream(events),100,10),new File(modelName+modelExtension))).persist();      nonReferentialResolver.train();    }  }  public static void setSimilarityModel(TestSimilarityModel sm) {    simModel = sm;  }    private String getSemanticCompatibilityFeature(MentionContext ec, DiscourseEntity de) {    if (simModel != null) {      double best = 0;      for (Iterator xi = de.getMentions(); xi.hasNext();) {        MentionContext ec2 = (MentionContext) xi.next();        double sim = simModel.compatible(ec, ec2);        if (debugOn) {          System.err.println("MaxentResolver.getSemanticCompatibilityFeature: sem-compat " + sim + " " + ec.toText() + " " + ec2.toText());        }        if (sim > best) {          best = sim;        }      }      if (best > minSimProb) {        return SIM_COMPATIBLE;      }      else if (best > (1 - minSimProb)) {        return SIM_UNKNOWN;      }      else {        return SIM_INCOMPATIBLE;      }    }    else {      System.err.println("MaxentResolver: Uninitialized Semantic Model");      return SIM_UNKNOWN;    }  }  private String getGenderCompatibilityFeature(MentionContext ec, DiscourseEntity de) {    GenderEnum eg = de.getGender();    //System.err.println("getGenderCompatibility: mention="+ec.getGender()+" entity="+eg);    if (eg == GenderEnum.UNKNOWN || ec.getGender() == GenderEnum.UNKNOWN) {      return GEN_UNKNOWN;    }    else if (ec.getGender() == eg) {      return GEN_COMPATIBLE;    }    else {      return GEN_INCOMPATIBLE;    }  }  private String getNumberCompatibilityFeature(MentionContext ec, DiscourseEntity de) {    NumberEnum en = de.getNumber();    if (en == NumberEnum.UNKNOWN || ec.getNumber() == NumberEnum.UNKNOWN) {      return NUM_UNKNOWN;    }    else if (ec.getNumber() == en) {      return NUM_COMPATIBLE;    }    else {      return NUM_INCOMPATIBLE;    }  }  /**   * Returns features indicating whether the specified mention and the specified entity are compatible.   * @param mention The mention.   * @param entity The entity.   * @return list of features indicating whether the specified mention and the specified entity are compatible.   */  private List getCompatibilityFeatures(MentionContext mention, DiscourseEntity entity) {    List compatFeatures = new ArrayList();    String semCompatible = getSemanticCompatibilityFeature(mention, entity);    compatFeatures.add(semCompatible);    String genCompatible = getGenderCompatibilityFeature(mention, entity);    compatFeatures.add(genCompatible);    String numCompatible = getNumberCompatibilityFeature(mention, entity);    compatFeatures.add(numCompatible);    if (semCompatible.equals(SIM_COMPATIBLE) && genCompatible.equals(GEN_COMPATIBLE) && numCompatible.equals(NUM_COMPATIBLE)) {      compatFeatures.add("all.compatible");    }    else if (semCompatible.equals(SIM_INCOMPATIBLE) || genCompatible.equals(GEN_INCOMPATIBLE) || numCompatible.equals(NUM_INCOMPATIBLE)) {      compatFeatures.add("some.incompatible");    }    return compatFeatures;  }    /**   * Returns a list of features based on the surrounding context of the specified mention.   * @param mention he mention whose surround context the features model.    * @return a list of features based on the surrounding context of the specified mention   */  public static List getContextFeatures(MentionContext mention) {    List features = new ArrayList();    if (mention.getPreviousToken() != null) {      features.add("pt=" + mention.getPreviousToken().getSyntacticType());      features.add("pw=" + mention.getPreviousToken().toString());    }    else {      features.add("pt=BOS");      features.add("pw=BOS");    }    if (mention.getNextToken() != null) {      features.add("nt=" + mention.getNextToken().getSyntacticType());      features.add("nw=" + mention.getNextToken().toString());    }    else {      features.add("nt=EOS");      features.add("nw=EOS");    }    if (mention.getNextTokenBasal() != null) {      features.add("bnt=" + mention.getNextTokenBasal().getSyntacticType());      features.add("bnw=" + mention.getNextTokenBasal().toString());    }    else {      features.add("bnt=EOS");      features.add("bnw=EOS");    }    return (features);  }  private Set constructModifierSet(Parse[] tokens, int headIndex) {    Set modSet = new HashSet();    for (int ti = 0; ti < headIndex; ti++) {      Parse tok = tokens[ti];      modSet.add(tok.toString().toLowerCase());    }    return (modSet);  }  /**   * Returns whether the specified token is a definite article.   * @param tok The token.   * @param tag The pos-tag for the specified token.   * @return whether the specified token is a definite article.   */  protected boolean definiteArticle(String tok, String tag) {    tok = tok.toLowerCase();    if (tok.equals("the") || tok.equals("these") || tok.equals("these") || tag.equals("PRP$")) {      return (true);    }    return (false);  }  private boolean isSubstring(String ecStrip, String xecStrip) {    //System.err.println("MaxentResolver.isSubstring: ec="+ecStrip+" xec="+xecStrip);    int io = xecStrip.indexOf(ecStrip);    if (io != -1) {      //check boundries      if (io != 0 && xecStrip.charAt(io - 1) != ' ') {        return false;      }      int end = io + ecStrip.length();      if (end != xecStrip.length() && xecStrip.charAt(end) != ' ') {        return false;      }      return true;    }    return false;  }  protected boolean excluded(MentionContext ec, DiscourseEntity de) {    if (super.excluded(ec, de)) {      return true;    }    return false;    /*    else {      if (GEN_INCOMPATIBLE == getGenderCompatibilityFeature(ec,de)) {        return true;       }      else if (NUM_INCOMPATIBLE == getNumberCompatibilityFeature(ec,de)) {        return true;      }      else if (SIM_INCOMPATIBLE == getSemanticCompatibilityFeature(ec,de)) {        return true;      }      return false;    }    */  }  /**   * Returns distance features for the specified mention and entity.   * @param mention The mention.   * @param entity The entity.   * @return list of distance features for the specified mention and entity.   */  protected List getDistanceFeatures(MentionContext mention, DiscourseEntity entity) {    List features = new ArrayList();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -