📄 mentionpairiterator.java
字号:
while (iter.hasNext()) { Element e1 = (Element)iter.next(); Matcher m = p1.matcher(e1.getName()); if (m.matches()) return false; } } for (int i=0; i < patterns.size(); i++) { Matcher m = ((Pattern)patterns.elementAt(i)).matcher(n.getName()); // matches against Element Name if (m.matches()) { return true; } } return false; } private void printMentions (java.util.Vector mentions) { for (int i=0; i < mentions.size(); i++) { Mention m = (Mention)mentions.elementAt(i); System.out.println("Mention: " + m.getString() + " -- " + m); } } private int fillNodePairArray(java.util.Vector patterns) { java.util.Vector nodes = new java.util.Vector(); java.util.Vector mentions = new java.util.Vector(); Element outsideNode; Element insideNode; getAllNodes(malletDocument,nodes,patterns); mentions = convertToMentions(nodes); //printMentions(mentions); int pairCount = 0; Mention ant,ref; for (int i=0; i < mentions.size(); i++) { ref = (Mention)mentions.elementAt(i); outsideNode = ref.getElement(); if ((compatible (outsideNode, patterns)) && validReferent(ref)) { for (int j=i-1; (j > -1 && (i - j) < NP_LOOKBACK) ; j--) { ant = (Mention)mentions.elementAt(j); insideNode = ant.getElement(); // added check to ensure antecedent isn't empty (can happen with Treebank) if ((ant.getString().length() > 0) && (compatible (insideNode, patterns))) { MentionPair pair = new MentionPair(ant,ref); ant.setSentence(getSentenceParent(ant.getElement())); ref.setSentence(getSentenceParent(ref.getElement())); ant.setGender(findGender(ant.getString())); ref.setGender(findGender(ref.getString())); //pair.setFeatureValue("NPDistance", i - j); // feature set here (more efficient) pair.setReferentIndex(i); nodePairArray.add((Object)pair); pairCount++; } } // only add null antecedent if we're supposed to if (addNullAntecedent) { MentionPair nullPair = new MentionPair(null,ref); nullPair.setReferentIndex(i); nodePairArray.add((Object)nullPair); // the null pair HAS to be } // the last pair of a particular referent in order for the code // that determines whether a null pair is a positive instance or // not to work } } return pairCount; } public java.util.Vector convertToMentions(java.util.Vector nodes) { int curId = 1; Iterator iterator = nodes.iterator(); java.util.Vector mentions = new java.util.Vector(); while (iterator.hasNext()) { mentions.add(new Mention((MalletPhrase)iterator.next(), targetDocPath, currentDocument, curId, sourceType)); curId++; // give unique ids to mentions now } return mentions; } private String findGender (String string) { for (int i=0; i < numMalePronouns; i++) { if (((String)malePronouns[i]).equals(string)) return "male"; } for (int i=0; i < numFemalePronouns; i++) { if (((String)femalePronouns[i]).equals(string)) return "female"; } return "unknown"; } private Element getSentenceParent (Element node) { if (node != null) { if ((node.getName().equals("S")) || (node.getName().equals("s"))) { return node; } return getSentenceParent(node.getParent()); } return null; } private boolean validReferent (Mention referent) { if (includeEverything) { if (sourceType.equals("MUC")) return true; else { if (referent.getElement().getName().equals("lex")) { if (MentionPairIterator.referentPronoun (referent)) { return true; } else { return false; } } else { return true; } } } else if (includeProperNouns) return (MentionPairIterator.referentProperNoun (referent) || MentionPairIterator.referentPronoun (referent)); else return MentionPairIterator.referentPronoun(referent); } private void getAllNodes (MalletDocument malletDoc, java.util.Vector allNodes, java.util.Vector patterns) { //System.out.println("Nodes from malletDoc construction: " ); //printAllNodesInContext(malletDoc.getPhrases()); Iterator phraseIter = malletDoc.getPhrases().iterator(); while (phraseIter.hasNext()) { MalletPhrase ph = (MalletPhrase)phraseIter.next(); if ((compatible (ph.getElement(),patterns)) || ((ph.getElement().getName().equals("lex") && argumentsIncludeCoreferentialInfo(ph)))) { allNodes.add(ph); } } //System.out.println("Actual nodes: "); //printAllNodesInContext(allNodes); // XXXX Debugging } public void printAllNodesInContext (java.util.Vector nodes) { for (int i = 0; i < nodes.size(); i++) { MalletPhrase ph = (MalletPhrase)nodes.elementAt(i); if (ph.getPreceedingPreTerm() != null) System.out.print(ph.getPreceedingPreTerm().getString() + " "); else System.out.print("NULL "); ph.printPreTerms(); if (ph.getFollowingPreTerm() != null) System.out.println(" " + ph.getFollowingPreTerm().getString()); else System.out.println(" NULL"); } } public void printAllNodesInContext (Set nodes) { Iterator iter = nodes.iterator(); while (iter.hasNext()) { MalletPhrase ph = (MalletPhrase)iter.next(); if (ph.getPreceedingPreTerm() != null) System.out.print(ph.getPreceedingPreTerm().getString() + " "); else System.out.print("NULL "); ph.printPreTerms(); if (ph.getFollowingPreTerm() != null) System.out.println(" " + ph.getFollowingPreTerm().getString()); else System.out.println(" NULL"); } } private boolean argumentsIncludeCoreferentialInfo (MalletPhrase ph) { Pattern p = Pattern.compile(".*OBJREF-.*"); List attrs = ph.getElement().getAttributes(); Iterator i1 = attrs.iterator(); while (i1.hasNext()) { Attribute a = (Attribute)i1.next(); Matcher m = p.matcher(a.getName()); if (m.matches()) return true; } return false; } private String getTargetValue (MentionPair pair) { return pair.getEntityReference(); } private void updateMentionFeatures (MentionPair pair) { Mention ant = pair.getAntecedent(); Mention ref = pair.getReferent(); ref.setAntecedentCount (ant.getAntecedentCount() + 1); // update the antecedent count } private boolean hasNextNodePair() { return (currentIndex < pairCount); } private MentionPair nextNodePair() { int index; if (currentIndex < pairCount) { index = currentIndex; currentIndex++; MentionPair curPair = (MentionPair)nodePairArray.get(index); return curPair; } else { return null; } } public boolean hasNext() { return hasNextNodePair(); } public Object next() { return nextNodePair(); } public void remove () { throw new UnsupportedOperationException(); } } public static Set partitionIntoDocumentInstances (InstanceList allInstances) { Set setOfDocInstances = new LinkedHashSet(); Iterator instIterator = allInstances.iterator(); Document curDoc = null; ArrayList curList = null; while (instIterator.hasNext()) { Instance inst = (Instance)instIterator.next(); Document doc = ((MentionPair)inst.getSource()).getReferent().getDocument(); if (curDoc != doc) { // we have something from a new document if (curList != null) setOfDocInstances.add(curList); curList = new ArrayList(); curDoc = doc; // update curDoc to new doc } curList.add(inst); } setOfDocInstances.add(curList); return setOfDocInstances; } // NOTE: we could have issues where there are part of speech errors here public static boolean referentProperNoun (Mention referent) { MalletPhrase ph = referent.getMalletPhrase(); MalletPreTerm pt = ph.getHeadPreTerm(); if ((pt.getPartOfSpeech() != null) && (pt.getPartOfSpeech().equals("NNP"))) return true; else return false; } public static boolean referentPronoun (Mention referent) { String refString = referent.getString(); for (int i=0; i < pronounsSize; i++) { if (((String)pronouns[i]).equals(refString)) { return true; } } return false; } public static void main(String[] args) { if (args.length != 1) { System.err.println ("Usage: "+TUI.class.getName()+" <directory of ACE files>"); System.exit(-1); } // This iterator takes a directory and iterates over the files contained in it FileIterator fileIterator = new FileIterator (new File(args[0])); MentionPairIterator pairIterator = new MentionPairIterator (fileIterator, "TB"); while (pairIterator.hasNext()) { pairIterator.next(); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -