📄 parse.java
字号:
/////////////////////////////////////////////////////////////////////////////////Copyright (C) 2003 Thomas Morton////This library is free software; you can redistribute it and/or//modify it under the terms of the GNU Lesser General Public//License as published by the Free Software Foundation; either//version 2.1 of the License, or (at your option) any later version.////This library is distributed in the hope that it will be useful,//but WITHOUT ANY WARRANTY; without even the implied warranty of//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the//GNU General Public License for more details.////You should have received a copy of the GNU Lesser General Public//License along with this program; if not, write to the Free Software//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.////////////////////////////////////////////////////////////////////////////// package opennlp.tools.parser;import java.util.Collection;import java.util.HashSet;import java.util.Iterator;import java.util.LinkedHashSet;import java.util.LinkedList;import java.util.List;import java.util.Set;import java.util.Stack;import java.util.regex.Matcher;import java.util.regex.Pattern;import opennlp.tools.util.Span;//import java.text.DecimalFormat;/** Data structure for holding parse constitents. */public class Parse implements Cloneable, Comparable { /** The text string on which this parse is based. This object is shared amonung all parses for the same sentence. */ private String text; /** The character offsets into the text for this constituent. */ private Span span; /** The syntactic type of this parse. */ private String type; /** The sub-constituents of this parse. */ private List parts; /** The head parse of this parse. A parse can be its own head.*/ private Parse head; /** The outcome assigned to this parse during cconstruction of its parent parse. */ private String label; /** The parent parse of this parse. */ private Parse parent; /** The probability associated with the syntactic type assigned to this parse. */ private double prob; /** The string buffer used to track the derivation of this parse. */ private StringBuffer derivation; /** The pattern used to find the base constituent label of a Penn Treebank labeled constituent. */ private static Pattern typePattern = Pattern.compile("^([^ =-]+)"); /** The pattern used to find the function tags. */ private static Pattern funTypePattern = Pattern.compile("^[^ =-]+-([^ =-]+)"); /** The patter used to identify tokens in Penn Treebank labeled constituents. */ private static Pattern tokenPattern = Pattern.compile("^[^ ()]+ ([^ ()]+)\\s*\\)"); /** The set of punctuation parses which are between this parse and the previous parse. */ private Collection prevPunctSet; /** The set of punctuation parses which are between this parse and the subsequent parse. */ private Collection nextPunctSet; private static boolean useFunctionTags; public Object clone() { Parse p = new Parse(this.text, this.span, this.type, this.prob, this.head); p.parts = (List) ((LinkedList) this.parts).clone(); if (derivation != null) { p.derivation = new StringBuffer(100); p.derivation.append(this.derivation.toString()); } p.label = this.label; return (p); } public static void useFunctionTags(boolean uft) { useFunctionTags = uft; } public Parse(String text, Span span, String type, double p) { this.text = text; this.span = span; this.type = type; this.prob = p; this.head = this; this.parts = new LinkedList(); this.label = null; this.parent = null; } public Parse(String text, Span span, String type, double p, Parse h) { this(text, span, type, p); this.head = h; } /** * Set the type of this constituent to the specified type. * @param type The type of this constituent. */ public void setType(String type) { this.type = type; } /** * Returns the constituent label for this node of the parse. * @return The constituent label for this node of the parse. */ public String getType() { return type; } /** * Returns the set of punctuation parses that occur immediately before this parse. * @return the set of punctuation parses that occur immediately before this parse. */ public Collection getPreviousPunctuationSet() { return prevPunctSet; } /** * Designates that the specifed punctuation should is prior to this parse. * @param punct The punctuation. */ public void addPreviousPunctuation(Parse punct) { if (this.prevPunctSet == null) { this.prevPunctSet = new LinkedHashSet(); } prevPunctSet.add(punct); } /** * Returns the set of punctuation parses that occur immediately after this parse. * @return the set of punctuation parses that occur immediately after this parse. */ public Collection getNextPunctuationSet() { return nextPunctSet; } /** * Designates that the specifed punctuation follows this parse. * @param punct The punctuation set. */ public void addNextPunctuation(Parse punct) { if (this.nextPunctSet == null) { this.nextPunctSet = new LinkedHashSet(); } nextPunctSet.add(punct); } /** * Sets the set of punctuation tags which follow this parse. * @param punctSet The set of puncuation tags which follow this parse. */ public void setNextPunctuation(Collection punctSet) { this.nextPunctSet = punctSet; } /** * Sets the set of punctuation tags which preceed this parse. * @param punctSet The set of puncuation tags which preceed this parse. */ public void setPrevPunctuation(Collection punctSet) { this.prevPunctSet = punctSet; } /** * Inserts the specified constituent into this parse based on its text span. This * method assumes that the specified constituent can be inserted into this parse. * @param constituent The constituent to be inserted. */ public void insert(final Parse constituent) { Span ic = constituent.span; if (span.contains(ic)) { //double oprob=c.prob; int pi=0; int pn = parts.size(); for (; pi < pn; pi++) { Parse subPart = (Parse) parts.get(pi); //System.err.println("Parse.insert:con="+constituent+" sp["+pi+"] "+subPart+" "+subPart.getType()); Span sp = subPart.span; if (sp.getStart() >= ic.getEnd()) { break; } // constituent contains subPart else if (ic.contains(sp)) { //System.err.println("Parse.insert:con contains subPart"); parts.remove(pi); pi--; constituent.parts.add(subPart); subPart.setParent(constituent); //System.err.println("Parse.insert: "+subPart.hashCode()+" -> "+subPart.getParent().hashCode()); pn = parts.size(); } else if (sp.contains(ic)) { //System.err.println("Parse.insert:subPart contains con"); subPart.insert(constituent); return; } } //System.err.println("Parse.insert:adding con="+constituent+" to "+this); parts.add(pi, constituent); constituent.setParent(this); //System.err.println("Parse.insert: "+constituent.hashCode()+" -> "+constituent.getParent().hashCode()); } else { throw (new InternalError("Inserting constituent not contained in the sentence!")); } } /** * Appends the specified string buffer with a string representation of this parse. * @param sb A string buffer into which the parse string can be appended. */ public void show(StringBuffer sb) { int start; start = span.getStart(); if (!type.equals(ParserME.TOK_NODE)) { sb.append("("); sb.append(type +" "); //System.out.print(label+" "); //System.out.print(head+" "); //System.out.print(df.format(prob)+" "); } for (Iterator i = parts.iterator(); i.hasNext();) { Parse c = (Parse) i.next(); Span s = c.span; if (start < s.getStart()) { //System.out.println("pre "+start+" "+s.getStart()); sb.append(text.substring(start, s.getStart())); } c.show(sb); start = s.getEnd(); } sb.append(text.substring(start, span.getEnd())); if (!type.equals(ParserME.TOK_NODE)) { sb.append(")"); } } /** * Displays this parse using Penn Treebank-style formatting. */ public void show() { StringBuffer sb = new StringBuffer(text.length()*4); show(sb); System.out.println(sb); } /** * Returns the probability associed with the pos-tag sequence assigned to this parse. * @return The probability associed with the pos-tag sequence assigned to this parse. */ public double getTagSequenceProb() { //System.err.println("Parse.getTagSequenceProb: "+type+" "+this); if (parts.size() == 1 && ((Parse) parts.get(0)).type.equals(ParserME.TOK_NODE)) { //System.err.println(this+" "+prob); return (Math.log(prob)); } else if (parts.size() == 0) { System.err.println("Parse.getTagSequenceProb: Wrong base case!"); return (0.0); } else { double sum = 0.0; for (Iterator pi = parts.iterator(); pi.hasNext();) { sum += ((Parse) pi.next()).getTagSequenceProb(); } return (sum); } } /** * Returns whether this parse is complete. * @return Returns true if the parse contains a single top-most node. */ public boolean complete() { return (parts.size() == 1); } public String toString() { return (text.substring(span.getStart(), span.getEnd())); } /** * Returns the text of the sentence over which this parse was formed. * @return The text of the sentence over which this parse was formed. */ public String getText() { return text; } /** * Returns the character offsets for this constituent. * @return The character offsets for this constituent. */ public Span getSpan() { return span; } /** * Returns the log of the product of the probability associated with all the decisions which formed this constituent. * @return The log of the product of the probability associated with all the decisions which formed this constituent. */ public double getProb() { return prob; } /** * Adds the specified probability log to this current log for this parse. * @param logProb The probaility of an action performed on this parse. */ public void addProb(double logProb) { this.prob+=logProb; } /** * Returns the child constituents of this constiuent. * @return The child constituents of this constiuent. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -