📄 parserme.java
字号:
return new Parse[] {(Parse) parses.first()}; } else { List topParses = new ArrayList(numParses); while(!parses.isEmpty() && topParses.size() < numParses) { Parse tp = (Parse) parses.first(); topParses.add(tp); parses.remove(tp); } return (Parse[]) topParses.toArray(new Parse[topParses.size()]); } } /** * Assigns parent references for the specified parse so that they * are consistent with the children references. * @param p The parse whose parent references need to be assigned. */ public static void setParents(Parse p) { Parse[] children = p.getChildren(); for (int ci = 0; ci < children.length; ci++) { children[ci].setParent(p); setParents(children[ci]); } } /** * Returns a parse for the specified parse of tokens. * @param tokens The root node of a flat parse containing only tokens. * @return A full parse of the specified tokens or the flat chunks of the tokens if a fullparse could not be found. */ public Parse parse(Parse tokens) { Parse p = parse(tokens,1)[0]; setParents(p); return p; } /** * Adds the "TOP" node to the specified parse. * @param p The complete parse. */ protected void advanceTop(Parse p) { buildModel.eval(buildContextGenerator.getContext(p.getChildren(), 0), bprobs); p.addProb(Math.log(bprobs[topStartIndex])); checkModel.eval(checkContextGenerator.getContext(p.getChildren(), TOP_NODE, 0, 0), cprobs); p.addProb(Math.log(cprobs[completeIndex])); p.setType(TOP_NODE); } /** * Determines the mapping between the specified index into the specified parses without punctuation to * the coresponding index into the specified parses. * @param index An index into the parses without punctuation. * @param nonPunctParses The parses without punctuation. * @param parses The parses wit punctuation. * @return An index into the specified parses which coresponds to the same node the specified index * into the parses with punctuation. */ private int mapParseIndex(int index, Parse[] nonPunctParses, Parse[] parses) { int parseIndex = index; while (parses[parseIndex] != nonPunctParses[index]) { parseIndex++; } return parseIndex; } /** Advances the specified parse and returns the an array advanced parses whose probability accounts for * more than the speicficed amount of probability mass, Q. * @param p The parse to advance. * @param Q The amount of probability mass that should be accounted for by the advanced parses. */ protected Parse[] advanceParses(final Parse p, double Q) { double q = 1 - Q; /** The closest previous node which has been labeled as a start node. */ Parse lastStartNode = null; /** The index of the closest previous node which has been labeled as a start node. */ int lastStartIndex = -1; /** The type of the closest previous node which has been labeled as a start node. */ String lastStartType = null; /** The index of the node which will be labeled in this iteration of advancing the parse. */ int advanceNodeIndex; /** The node which will be labeled in this iteration of advancing the parse. */ Parse advanceNode=null; Parse[] originalChildren = p.getChildren(); Parse[] children = collapsePunctuation(originalChildren,punctSet); int numNodes = children.length; if (numNodes == 0) { return null; } //determines which node needs to be labeled and prior labels. for (advanceNodeIndex = 0; advanceNodeIndex < numNodes; advanceNodeIndex++) { advanceNode = children[advanceNodeIndex]; if (advanceNode.getLabel() == null) { break; } else if (startTypeMap.containsKey(advanceNode.getLabel())) { lastStartType = (String) startTypeMap.get(advanceNode.getLabel()); lastStartNode = advanceNode; lastStartIndex = advanceNodeIndex; //System.err.println("lastStart "+i+" "+lastStart.label+" "+lastStart.prob); } } int originalAdvanceIndex = mapParseIndex(advanceNodeIndex,children,originalChildren); List newParsesList = new ArrayList(buildModel.getNumOutcomes()); //call build buildModel.eval(buildContextGenerator.getContext(children, advanceNodeIndex), bprobs); double bprobSum = 0; while (bprobSum < Q) { /** The largest unadvanced labeling. */ int max = 0; for (int pi = 1; pi < bprobs.length; pi++) { //for each build outcome if (bprobs[pi] > bprobs[max]) { max = pi; } } if (bprobs[max] == 0) { break; } double bprob = bprobs[max]; bprobs[max] = 0; //zero out so new max can be found bprobSum += bprob; String tag = buildModel.getOutcome(max); //System.out.println("trying "+tag+" "+bprobSum+" lst="+lst); if (max == topStartIndex) { // can't have top until complete continue; } //System.err.println(i+" "+tag+" "+bprob); if (startTypeMap.containsKey(tag)) { //update last start lastStartIndex = advanceNodeIndex; lastStartNode = advanceNode; lastStartType = (String) startTypeMap.get(tag); } else if (contTypeMap.containsKey(tag)) { if (lastStartNode == null || !lastStartType.equals(contTypeMap.get(tag))) { continue; //Cont must match previous start or continue } } Parse newParse1 = (Parse) p.clone(); //clone parse if (createDerivationString) newParse1.getDerivation().append(max).append("-"); newParse1.setChild(originalAdvanceIndex,tag); //replace constituent labeled newParse1.addProb(Math.log(bprob)); //check //String[] context = checkContextGenerator.getContext(newParse1.getChildren(), lastStartType, lastStartIndex, advanceNodeIndex); checkModel.eval(checkContextGenerator.getContext(collapsePunctuation(newParse1.getChildren(),punctSet), lastStartType, lastStartIndex, advanceNodeIndex), cprobs); //System.out.println("check "+lastStartType+" "+cprobs[completeIndex]+" "+cprobs[incompleteIndex]+" "+tag+" "+java.util.Arrays.asList(context)); Parse newParse2 = newParse1; if (cprobs[completeIndex] > q) { //make sure a reduce is likely newParse2 = (Parse) newParse1.clone(); if (createDerivationString) newParse2.getDerivation().append(1).append("."); newParse2.addProb(Math.log(cprobs[1])); Parse[] cons = new Parse[advanceNodeIndex - lastStartIndex + 1]; boolean flat = true; //first cons[0] = lastStartNode; if (!cons[0].getType().equals(cons[0].getHead().getType())) { flat = false; } //last cons[advanceNodeIndex - lastStartIndex] = advanceNode; if (flat && !cons[advanceNodeIndex - lastStartIndex].getType().equals(cons[advanceNodeIndex - lastStartIndex].getHead().getType())) { flat = false; } //middle for (int ci = 1; ci < advanceNodeIndex - lastStartIndex; ci++) { cons[ci] = children[ci + lastStartIndex]; if (flat && !cons[ci].getType().equals(cons[ci].getHead().getType())) { flat = false; } } if (!flat) { //flat chunks are done by chunker if (lastStartIndex == 0 && advanceNodeIndex == numNodes-1) { //check for top node to include end and begining punctuation //System.err.println("ParserME.advanceParses: reducing entire span: "+new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd())+" "+lastStartType+" "+java.util.Arrays.asList(children)); newParse2.insert(new Parse(p.getText(), p.getSpan(), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType))); } else { newParse2.insert(new Parse(p.getText(), new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd()), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType))); } newParsesList.add(newParse2); } } if (cprobs[incompleteIndex] > q) { //make sure a shift is likly if (createDerivationString) newParse1.getDerivation().append(0).append("."); if (advanceNodeIndex != numNodes - 1) { //can't shift last element newParse1.addProb(Math.log(cprobs[0])); newParsesList.add(newParse1); } } } Parse[] newParses = new Parse[newParsesList.size()]; newParsesList.toArray(newParses); return newParses; } /** * Reutrns the top chunk sequences for the specified parse. * @param p A pos-tag assigned parse. * @return The top chunk assignments to the specified parse. */ protected Parse[] advanceChunks(final Parse p, double minChunkScore) { // chunk Parse[] children = p.getChildren(); String words[] = new String[children.length]; String ptags[] = new String[words.length]; double probs[] = new double[words.length]; Parse sp = null; for (int i = 0, il = children.length; i < il; i++) { sp = children[i]; words[i] = sp.getHead().toString(); ptags[i] = sp.getType(); } //System.err.println("adjusted mcs = "+(minChunkScore-p.getProb())); Sequence[] cs = chunker.topKSequences(words, ptags,minChunkScore-p.getProb()); Parse[] newParses = new Parse[cs.length]; for (int si = 0, sl = cs.length; si < sl; si++) { newParses[si] = (Parse) p.clone(); //copies top level if (createDerivationString) newParses[si].getDerivation().append(si).append("."); String[] tags = (String[]) cs[si].getOutcomes().toArray(new String[words.length]); cs[si].getProbs(probs); int start = -1; int end = 0; String type = null; //System.err.print("sequence "+si+" "); for (int j = 0; j <= tags.length; j++) { //if (j != tags.length) {System.err.println(words[j]+" "+ptags[j]+" "+tags[j]+" "+probs.get(j));} if (j != tags.length) { newParses[si].addProb(Math.log(probs[j])); } if (j != tags.length && tags[j].startsWith(CONT)) { // if continue just update end chunking tag don't use contTypeMap end = j; } else { //make previous constituent if it exists if (type != null) { //System.err.println("inserting tag "+tags[j]); Parse p1 = p.getChildren()[start]; Parse p2 = p.getChildren()[end]; //System.err.println("Putting "+type+" at "+start+","+end+" "+newParses[si].prob); Parse[] cons = new Parse[end - start + 1]; cons[0] = p1; //cons[0].label="Start-"+type; if (end - start != 0) { cons[end - start] = p2; //cons[end-start].label="Cont-"+type; for (int ci = 1; ci < end - start; ci++) { cons[ci] = p.getChildren()[ci + start]; //cons[ci].label="Cont-"+type; } } newParses[si].insert(new Parse(p1.getText(), new Span(p1.getSpan().getStart(), p2.getSpan().getEnd()), type, 1, headRules.getHead(cons, type))); } if (j != tags.length) { //update for new constituent if (tags[j].startsWith(START)) { // don't use startTypeMap these are chunk tags type = tags[j].substring(START.length()); start = j; end = j; } else { // other type = null; } } } } //newParses[si].show();System.out.println(); } return newParses; } /**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -