📄 parse.java

📁 自然语言处理领域的一个开发包
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/////////////////////////////////////////////////////////////////////////////////Copyright (C) 2003 Thomas Morton////This library is free software; you can redistribute it and/or//modify it under the terms of the GNU Lesser General Public//License as published by the Free Software Foundation; either//version 2.1 of the License, or (at your option) any later version.////This library is distributed in the hope that it will be useful,//but WITHOUT ANY WARRANTY; without even the implied warranty of//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the//GNU General Public License for more details.////You should have received a copy of the GNU Lesser General Public//License along with this program; if not, write to the Free Software//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.//////////////////////////////////////////////////////////////////////////////   package opennlp.tools.parser;import java.util.Collection;import java.util.HashSet;import java.util.Iterator;import java.util.LinkedHashSet;import java.util.LinkedList;import java.util.List;import java.util.Set;import java.util.Stack;import java.util.regex.Matcher;import java.util.regex.Pattern;import opennlp.tools.util.Span;//import java.text.DecimalFormat;/** Data structure for holding parse constitents. */public class Parse implements Cloneable, Comparable {  /** The text string on which this parse is based.  This object is shared amonung all parses for the same sentence. */  private String text;  /** The character offsets into the text for this constituent. */  private Span span;  /** The syntactic type of this parse. */  private String type;  /** The sub-constituents of this parse. */  private List parts;  /** The head parse of this parse. A parse can be its own head.*/  private Parse head;  /** The outcome assigned to this parse during cconstruction of its parent parse. */  private String label;  /** The parent parse of this parse. */  private Parse parent;  /** The probability associated with the syntactic type assigned to this parse. */  private double prob;  /** The string buffer used to track the derivation of this parse. */  private StringBuffer derivation;  /** The pattern used to find the base constituent label of a Penn Treebank labeled constituent. */  private static Pattern typePattern = Pattern.compile("^([^ =-]+)");  /** The pattern used to find the function tags. */   private static Pattern funTypePattern = Pattern.compile("^[^ =-]+-([^ =-]+)");  /** The patter used to identify tokens in Penn Treebank labeled constituents. */  private static Pattern tokenPattern = Pattern.compile("^[^ ()]+ ([^ ()]+)\\s*\\)");    /** The set of punctuation parses which are between this parse and the previous parse. */  private Collection prevPunctSet;  /** The set of punctuation parses which are between this parse and the subsequent parse. */  private Collection nextPunctSet;    private static boolean useFunctionTags;    public Object clone() {    Parse p = new Parse(this.text, this.span, this.type, this.prob, this.head);    p.parts = (List) ((LinkedList) this.parts).clone();    if (derivation != null) {      p.derivation = new StringBuffer(100);      p.derivation.append(this.derivation.toString());    }    p.label = this.label;    return (p);  }      public static void useFunctionTags(boolean uft) {    useFunctionTags = uft;  }    public Parse(String text, Span span, String type, double p) {    this.text = text;    this.span = span;    this.type = type;    this.prob = p;    this.head = this;    this.parts = new LinkedList();    this.label = null;    this.parent = null;  }  public Parse(String text, Span span, String type, double p, Parse h) {    this(text, span, type, p);    this.head = h;  }  /**   * Set the type of this constituent to the specified type.   * @param type The type of this constituent.   */  public void setType(String type) {    this.type = type;  }  /**   * Returns the constituent label for this node of the parse.   * @return The constituent label for this node of the parse.   */  public String getType() {    return type;  }    /**   * Returns the set of punctuation parses that occur immediately before this parse.   * @return the set of punctuation parses that occur immediately before this parse.   */  public Collection getPreviousPunctuationSet() {    return prevPunctSet;  }    /**   * Designates that the specifed punctuation should is prior to this parse.   * @param punct The punctuation.   */  public void addPreviousPunctuation(Parse punct) {    if (this.prevPunctSet == null) {      this.prevPunctSet = new LinkedHashSet();    }    prevPunctSet.add(punct);  }    /**   * Returns the set of punctuation parses that occur immediately after this parse.   * @return the set of punctuation parses that occur immediately after this parse.   */  public Collection getNextPunctuationSet() {    return nextPunctSet;  }    /**   * Designates that the specifed punctuation follows this parse.   * @param punct The punctuation set.   */  public void addNextPunctuation(Parse punct) {    if (this.nextPunctSet == null) {      this.nextPunctSet = new LinkedHashSet();    }    nextPunctSet.add(punct);  }    /**   * Sets the set of punctuation tags which follow this parse.   * @param punctSet The set of puncuation tags which follow this parse.   */  public void setNextPunctuation(Collection punctSet) {    this.nextPunctSet = punctSet;  }    /**   * Sets the set of punctuation tags which preceed this parse.   * @param punctSet The set of puncuation tags which preceed this parse.   */  public void setPrevPunctuation(Collection punctSet) {    this.prevPunctSet = punctSet;  }  /**   * Inserts the specified constituent into this parse based on its text span.  This   * method assumes that the specified constituent can be inserted into this parse.   * @param constituent The constituent to be inserted.   */  public void insert(final Parse constituent) {    Span ic = constituent.span;    if (span.contains(ic)) {      //double oprob=c.prob;      int pi=0;      int pn = parts.size();      for (; pi < pn; pi++) {        Parse subPart = (Parse) parts.get(pi);        //System.err.println("Parse.insert:con="+constituent+" sp["+pi+"] "+subPart+" "+subPart.getType());        Span sp = subPart.span;        if (sp.getStart() >= ic.getEnd()) {          break;        }        // constituent contains subPart        else if (ic.contains(sp)) {          //System.err.println("Parse.insert:con contains subPart");          parts.remove(pi);          pi--;          constituent.parts.add(subPart);          subPart.setParent(constituent);          //System.err.println("Parse.insert: "+subPart.hashCode()+" -> "+subPart.getParent().hashCode());          pn = parts.size();        }        else if (sp.contains(ic)) {          //System.err.println("Parse.insert:subPart contains con");          subPart.insert(constituent);          return;        }      }      //System.err.println("Parse.insert:adding con="+constituent+" to "+this);      parts.add(pi, constituent);      constituent.setParent(this);      //System.err.println("Parse.insert: "+constituent.hashCode()+" -> "+constituent.getParent().hashCode());    }    else {      throw (new InternalError("Inserting constituent not contained in the sentence!"));    }  }    /**   * Appends the specified string buffer with a string representation of this parse.   * @param sb A string buffer into which the parse string can be appended.    */  public void show(StringBuffer sb) {    int start;    start = span.getStart();    if (!type.equals(ParserME.TOK_NODE)) {      sb.append("(");      sb.append(type +" ");      //System.out.print(label+" ");      //System.out.print(head+" ");      //System.out.print(df.format(prob)+" ");    }    for (Iterator i = parts.iterator(); i.hasNext();) {      Parse c = (Parse) i.next();      Span s = c.span;      if (start < s.getStart()) {        //System.out.println("pre "+start+" "+s.getStart());        sb.append(text.substring(start, s.getStart()));      }      c.show(sb);      start = s.getEnd();    }    sb.append(text.substring(start, span.getEnd()));    if (!type.equals(ParserME.TOK_NODE)) {      sb.append(")");    }  }  /**   * Displays this parse using Penn Treebank-style formatting.    */  public void show() {    StringBuffer sb = new StringBuffer(text.length()*4);    show(sb);    System.out.println(sb);  }  /**   * Returns the probability associed with the pos-tag sequence assigned to this parse.   * @return The probability associed with the pos-tag sequence assigned to this parse.   */  public double getTagSequenceProb() {    //System.err.println("Parse.getTagSequenceProb: "+type+" "+this);    if (parts.size() == 1 && ((Parse) parts.get(0)).type.equals(ParserME.TOK_NODE)) {      //System.err.println(this+" "+prob);      return (Math.log(prob));    }    else if (parts.size() == 0) {      System.err.println("Parse.getTagSequenceProb: Wrong base case!");      return (0.0);    }    else {      double sum = 0.0;      for (Iterator pi = parts.iterator(); pi.hasNext();) {        sum += ((Parse) pi.next()).getTagSequenceProb();      }      return (sum);    }  }  /**    * Returns whether this parse is complete.   * @return Returns true if the parse contains a single top-most node.   */  public boolean complete() {    return (parts.size() == 1);  }  public String toString() {    return (text.substring(span.getStart(), span.getEnd()));  }  /**   * Returns the text of the sentence over which this parse was formed.    * @return The text of the sentence over which this parse was formed.   */  public String getText() {    return text;  }  /**   * Returns the character offsets for this constituent.   * @return The character offsets for this constituent.   */  public Span getSpan() {    return span;  }  /**   * Returns the log of the product of the probability associated with all the decisions which formed this constituent.   * @return The log of the product of the probability associated with all the decisions which formed this constituent.   */  public double getProb() {    return prob;  }    /**   * Adds the specified probability log to this current log for this parse.   * @param logProb The probaility of an action performed on this parse.   */  public void addProb(double logProb) {    this.prob+=logProb;  }  /**   * Returns the child constituents of this constiuent.    * @return The child constituents of this constiuent.   */
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -