📄 venuecoreference.java
字号:
if (nodes3 != null) allnodes.addAll(nodes3); //allnodes.addAll(test_nodes); //double trainingCRFscore = scoreCitations(allnodes); //System.out.println("CRF Score for Training Citations: " + trainingCRFscore); //System.out.println("CRF Score for Testing Citations: " + testingCRFscore); //make distance metrics tfidf = new TFIDF(); NGramTokenizer nGramTokenizer = new NGramTokenizer(3,3,false, new SimpleTokenizer(true, true)); triGramDistanceMetric = new TFIDF (nGramTokenizer); CitationUtils.makeDistMetric(allnodes, tfidf, triGramDistanceMetric); System.out.println("finished computing nodes, about to compute distanceMetric params "); // compute the string distance using SecondString utilities // this will serve as a useful feature // Possible extension (later): build different string metrics for // different fields - this will then be an array of them AbstractStatisticalTokenDistance distanceMetric = (AbstractStatisticalTokenDistance)CitationUtils.computeDistanceMetric (allnodes); Pipe instancePipe = new SerialPipes (new Pipe[] { new ExactFieldMatchPipe(Citation.corefFields), new PageMatchPipe(), new YearsWithinFivePipe(), //new FieldStringDistancePipe(new NeedlemanWunsch(), //Citation.corefFields, "EDIST"), //new FieldStringDistancePipe(softtfidf, Citation.corefFields, "softTFIDF"), new FieldStringDistancePipe(triGramDistanceMetric, Citation.corefFields, "trigramTFIDF"), //new PlainFieldPipe (distanceMetric, distanceMetricEditDist), new GlobalPipe(distanceMetric), //new TitlePipe(distanceMetric), new AuthorPipe(distanceMetric), //new JournalPipe(distanceMetric), ///new BooktitlePipe(distanceMetric), new VenuePipe(distanceMetric), new VenueAcronymPipe(), //new PagesPipe(distanceMetric), new HeuristicPipe(Citation.corefFields), new InterFieldPipe(), //new HeuristicPipe(Citation.corefFields), //new DatePipe(distanceMetric), //new FuchunPipe(distanceMetricEditDist), new NodePair2FeatureVector (), new Target2Label (), // new PrintInputAndTarget (), }); InstanceList ilist = new InstanceList(); if (loadMEFile.value() == null) { InstanceList ilist1 = CitationUtils.makePairs(instancePipe, nodes1); ilist.add(ilist1); if (nodes2 != null) { InstanceList ilist2 = CitationUtils.makePairs(instancePipe, nodes2); ilist.add(ilist2); } if (nodes3 != null) { InstanceList ilist3 = CitationUtils.makePairs(instancePipe, nodes3); ilist.add(ilist3); } } FeatureInducer fi = null; // try doing some feature induction now if (useFeatureInduction.value()) { RankedFeatureVector.Factory gainFactory = null; gainFactory = new InfoGain.Factory(); fi = new FeatureInducer (gainFactory, ilist, 10); fi.induceFeaturesFor(ilist, false, false); } TreeModel tmodel = null; if (useTreeModel.value()) { if (pubs2 != null && pubs3 != null) { tmodel = new TreeModel(instancePipe, nodes1, nodes2, nodes3, pubs1, pubs3, pubs3); } else { tmodel = new TreeModel(instancePipe, nodes1, pubs1); } //tmodel.setMultiTree (true); } //List pairsFromCanopy = Util.readPairsFromFile("/tmp/pairs"); //InstanceList ilistToCluster = CitationUtils.makePairs(instancePipe, nodes, pairsFromCanopy); InstanceList itestlist = CitationUtils.makePairs(instancePipe, test_nodes); if (useFeatureInduction.value()) { System.out.println("\n\nINDUCING FEATURES FOR TEST INSTANCES"); fi.induceFeaturesFor(itestlist, false, false); } CorefClusterAdv cl = null; //CorefClusterAdv cl_old = null; CorefClusterAdv cl_old = null; //training //CitationClustering cl = new CitationClustering(); if (oldCluster) { cl_old = new CorefClusterAdv(instancePipe); cl_old.setTrueNumStop (useTrueNumClusters.value()); cl_old.train(ilist); } if (newCluster) { cl = new CorefClusterAdv(instancePipe, tmodel); cl.setTrueNumStop (useTrueNumClusters.value()); cl.setConfWeightedScores(useWeightedAvg.value()); cl.setOptimality (useOptimal.value()); cl.setRBeamSize (rBeamSize.value()); cl.setNBestInference (useNBest.value()); // actually use n-best list in //coref cl.setFullPartition(fullPartition.value()); int si = searchIters.value(); int sd = searchReductions.value(); cl.setSearchParams (si, sd); if (loadMEFile.value() != null) cl.loadME(loadMEFile.value()); else cl.train(ilist); cl.testClassifier(itestlist); } Collection key = CitationUtils.makeCollections(allnodes); // make key collections //System.out.println("KEY: " + key); //System.out.println("NODES: " + nodes); Collection testKey = CitationUtils.makeCollections(test_nodes); Collection s = null; if (newCluster) { //cl.setKeyPartitioning(key); //s = cl.clusterMentions(ilist, allnodes, optimalNBest.value(), useCorrelational.value()); } System.out.println("Resulting clustering: " + s); Collection c1 = null; if (oldCluster) { cl_old.setKeyPartitioning(testKey); c1 = cl_old.clusterMentions(ilist, allnodes, optimalNBest.value(), false); if (newCluster) { System.out.println("Objective fn of KEY: " + cl.evaluatePartitioningExternal(ilist, allnodes, key, optimalNBest.value())); System.out.println("Objective fn of GREEDY CLUSTERING: " + cl.evaluatePartitioningExternal(ilist, allnodes, c1, optimalNBest.value())); } } // System.out.println("Objective fn of KEY w/optimal edges: " + // cl.evaluatePartitioningExternal(ilist, nodes, key, true)); if (oldCluster) { // System.out.println("Objective fn of OLD CLUSTERING w/optimal edges: " + // cl.evaluatePartitioningExternal(ilist, nodes, c1, true)); ClusterEvaluate eval1 = new ClusterEvaluate(key, c1); eval1.evaluate(); System.out.println("Threshold Training Cluster F1: " + eval1.getF1()); System.out.println("Threshold Training Cluster Recall: " + eval1.getRecall()); System.out.println("Threshold Training Cluster Precision: " + eval1.getPrecision()); System.out.println("Number of clusters " + c1.size()); PairEvaluate p1 = new PairEvaluate (key, c1); p1.evaluate(); System.out.println("Threshold Pair F1: " + p1.getF1()); System.out.println("Threshold Pair Recall: " + p1.getRecall()); System.out.println("Threshold Pair Precision: " + p1.getPrecision()); } if (newCluster) { if (s != null) { ClusterEvaluate eval = new ClusterEvaluate(key, s); eval.evaluate(); PairEvaluate pairEval = new PairEvaluate(key, s); pairEval.evaluate(); /* System.out.println("Objective fn of CORRELATIONAL CLUSTERING Training: " + cl.evaluatePartitioningExternal(ilist, allnodes, s, optimalNBest.value())); */ //eval.printVerbose(); System.out.println("ObjFn Training Cluster F1: " + eval.getF1()); System.out.println("ObjFn Training Cluster Recall: " + eval.getRecall()); System.out.println("ObjFnTraining Cluster Precision: " + eval.getPrecision()); System.out.println("Number of clusters " + s.size()); System.out.println("ObjFn Pair F1: " + pairEval.getF1()); System.out.println("ObjFn Pair Recall: " + pairEval.getRecall()); System.out.println("ObjFn Pair Precision: " + pairEval.getPrecision()); } } cl.setKeyPartitioning (testKey); if (oldCluster) { //evaluate on testing set Collection testS_old = cl_old.clusterMentions(itestlist, test_nodes, -1, useCorrelational.value()); //Collection testS_old = cl_old.clusterMentions(itestlist, test_nodes, -1, false); //Collection testS_old = cl_old.clusterMentions(itestlist, test_nodes); ClusterEvaluate eval_t_old = new ClusterEvaluate(testKey, testS_old); eval_t_old.evaluate(); if (newCluster) { System.out.println("Objective fn of OLD CLUSTERING: " + cl.evaluatePartitioningExternal(itestlist, test_nodes, testS_old, optimalNBest.value())); } System.out.println("Threshold Testing Cluster F1: " + eval_t_old.getF1()); System.out.println("Threshold Testing Cluster Recall: " + eval_t_old.getRecall()); System.out.println("Threshold Testing Cluster Precision: " + eval_t_old.getPrecision()); System.out.println("Number of clusters " + testS_old.size()); PairEvaluate p_t_old = new PairEvaluate (testKey, testS_old); p_t_old.evaluate(); System.out.println("Threshold Pair F1: " + p_t_old.getF1()); System.out.println("Threshold Pair Recall: " + p_t_old.getRecall()); System.out.println("Threshold Pair Precision: " + p_t_old.getPrecision()); } if (newCluster) { Collection testS = cl.clusterMentions(itestlist, test_nodes, -1, useCorrelational.value()); ClusterEvaluate evalTest = new ClusterEvaluate(testKey, testS); evalTest.evaluate(); evalTest.printVerbose(); PairEvaluate pairEvalTest = new PairEvaluate(testKey, testS); pairEvalTest.evaluate(); System.out.println("TESTING Objective fn of KEY: " + cl.evaluatePartitioningExternal(itestlist, test_nodes, testKey, optimalNBest.value())); System.out.println("TESTING Objective fn of CORRELATIONAL CLUSTERING Testing: " + cl.evaluatePartitioningExternal(itestlist, test_nodes, testS, optimalNBest.value())); //cl.exportGraph("/tmp/testGraphEdges"); //eval.printVerbose(); System.out.println("TESTING ObjFn Cluster F1: " + evalTest.getF1()); System.out.println("TESTING ObjFn Cluster Recall: " + evalTest.getRecall()); System.out.println("TESTING ObjFn Cluster Precision: " + evalTest.getPrecision()); System.out.println("Number of clusters " + testS.size()); System.out.println("TESTING ObjFn Pair F1: " + pairEvalTest.getF1()); System.out.println("TESTING ObjFn Pair Recall: " + pairEvalTest.getRecall()); System.out.println("TESTING ObjFn Pair Precision: " + pairEvalTest.getPrecision()); if (outputFile.value() != null) printClustersToFile (testS, outputFile.value()); } /* System.out.println("Final parameters used: "); double [] ps = cl.getClassifier().getParameters(); for (int k=0; k < Array.getLength(ps); k++) { System.out.print(" " + ps[k]); }*/ } protected static void printClustersToFile (Collection citations, String file) { try { BufferedWriter out = new BufferedWriter(new FileWriter(file)); printClustersAsReceived (citations, out); out.close(); } catch (Exception e) {e.printStackTrace();} } protected static void printClustersAsReceived (Collection citations, BufferedWriter out) { int refNum = 1; int clNum = 1; for (Iterator it = citations.iterator(); it.hasNext();) { Collection cl = (Collection)it.next(); for (Iterator i2 = cl.iterator(); i2.hasNext(); ) { Citation c = (Citation)i2.next(); String lab = (String)c.getLabel(); try { out.write("<NEWREFERENCE>\n"); out.write("<meta reference_no=\"" + refNum + "\" cluster_no=\"" + clNum + "\" true_id=\"" + lab + "\"></meta>"); out.write(c.getOrigString()); } catch (Exception e) {} refNum++; } clNum++; } } protected static void printCollectionReferences (Collection collection) { Iterator i1 = collection.iterator(); while (i1.hasNext()) { Iterator i2 = ((Collection)i1.next()).iterator(); while (i2.hasNext()) { Object o = i2.next(); if (o instanceof Node) { Node n = (Node)o; System.out.println("Node: " + n); System.out.println("Node label: " + n.getLabel()); System.out.println("Node index: " + n.getIndex()); } else { System.out.println("Node: " + o); } } } } public static double scoreCitations(List citations) { double score = 0.0; for (Iterator i = citations.iterator(); i.hasNext(); ) { score += (double)((Citation)i.next()).getScore(); } return score/(double)citations.size(); } /* This method will create a collection of collections from the citation nodes */ /* protected static Collection makeCollections (ArrayList nodes) { HashMap map = new HashMap(); // keep an index of node label values to collections Collection collection = new LinkedHashSet(); for (int i=0; i<nodes.size(); i++) { Node n = (Node)nodes.get(i); Object o1 = n.getLabel(); Collection c = (Collection)map.get(o1); if (c != null) { c.add(n); //System.out.println("adding new node " + n + " to existing collection"); } else { Collection newC = new LinkedHashSet(); System.out.println("Creating new collection"); newC.add(n); map.put(o1, newC); } } Iterator i1 = map.values().iterator(); while (i1.hasNext()) { collection.add((Collection)i1.next()); } return collection; }*/ /* protected static List runCanopies(List files) throws Exception { double loose = 0.3; double tight = 0.7; String indexName = "/tmp/index"; Analyzer analyzer = new SimpleAnalyzer(); //Analyzer analyzer = new NGramAnalyzer(); //Analyzer analyzer = new TriGramAnalyzer(); //QueryConstructor queryConstructor = new QueryConstructorSimple(analyzer); QueryConstructor queryConstructor = new QueryConstructorAuthDateTitle(analyzer); IndexFiles.indexFiles(files, indexName, analyzer); CanopyMaker cm = new CanopyMaker(indexName, queryConstructor); cm.setLooseThreshold(loose); cm.setTightThreshold(tight); cm.makeCanopies(); Util.allScores(cm); return Util.getUniquePairsFromSets(Util.convertIds(cm.getCanopies(), cm.getDocIdToDocno())); } */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -