📄 jointconditionalclusterertui.java
字号:
(randomSeed.value()), positiveInstanceRatio.value(), sampleTrainingInstances.value(), numberTrainingInstances.value()); System.err.println ("TRAINING SOLO PAPER CLUSTERER"); ConditionalClusterer paperClusterer = getClusterer (paperInstanceIterator, getPaperClusterPipe(paperPairwiseClassifier)); System.err.println ("TRAINING SOLO VENUE CLUSTERER"); ConditionalClusterer venueClusterer = getClusterer (venueInstanceIterator, getVenueClusterPipe(venuePairwiseClassifier)); // train joint clusterer System.err.println ("TRAINING JOINT CLUSTERER."); Pipe p = getJointPipe (paperPairwiseClassifier, venuePairwiseClassifier, paperClusterer.getClassifier(), venueClusterer.getClassifier()); AbstractPipeInputIterator jointInstanceIterator = new VenuePaperClusterIterator (paperTrainingTruth, venueTrainingTruth, new java.util.Random (randomSeed.value()), positiveInstanceRatio.value()); ConditionalClustererTrainer jointTrainer = new ConditionalClustererTrainer (p, -negativeClusterThreshold.value()); ConditionalClusterer jointClusterer = jointTrainer.train (jointInstanceIterator, false); System.err.println ("DONE TRAINING JOINT CLUSTERER. BEGIN CLUSTERING."); Collection predictedClustering = null; if (randomOrderClustering.value()) { for (int i=0; i < numRandomTrials.value(); i++) { predictedClustering = jointClusterer.clusterPapersAndVenues (allPaperTestingNodes, allVenueTestingNodes, paperTestingTruth, venueTestingTruth, paperClusterer.getClassifier(), venueClusterer.getClassifier(), new java.util.Random (randomSeed.value() + i*10)); System.err.println ("FINISHED CLUSTERING. BEGIN EVALUATION."); CitationUtils.evaluateClustering (paperTestingTruth, getPaperClusters (predictedClustering), "RANDOM TRIAL " + i + " PAPER COREFERENCE RESULTS"); CitationUtils.evaluateClustering (venueTestingTruth, getVenueClusters (predictedClustering), "RANDOM TRIAL " + i + " VENUE COREFERENCE RESULTS"); } } } private static ConditionalClusterer getClusterer (AbstractPipeInputIterator instanceIterator, Pipe p) { ConditionalClustererTrainer cct = new ConditionalClustererTrainer (p, -negativeClusterThreshold.value()); return cct.train (instanceIterator, useFeatureInduction.value()); } private static Collection getPaperClusters (Collection clustering) { Collection ret = new ArrayList(); Iterator iter = clustering.iterator (); while (iter.hasNext()) { Collection cluster = (Collection) iter.next(); Iterator subIter = cluster.iterator(); Object node = subIter.next(); if (node instanceof PaperCitation) ret.add (cluster); else if (!(node instanceof VenueCitation)) throw new IllegalArgumentException ("Node is neither venue nor paper, it's a " + node.getClass().getName()); } return ret; } private static Collection getVenueClusters (Collection clustering) { Collection ret = new ArrayList(); Iterator iter = clustering.iterator (); while (iter.hasNext()) { Collection cluster = (Collection) iter.next(); Iterator subIter = cluster.iterator(); Object node = subIter.next(); if (node instanceof VenueCitation) ret.add (cluster); else if (!(node instanceof PaperCitation)) throw new IllegalArgumentException ("Node is neither venue nor paper, it's a " + node.getClass().getName()); } return ret; } private static Classifier trainPairwiseClassifier (ArrayList[] nodes, Pipe p) { InstanceList ilist = new InstanceList (p); for (int i=0; i < nodes.length; i++) ilist.add (CitationUtils.makePairs (p, nodes[i])); MaxEnt me = (MaxEnt)(new MaxEntTrainer().train(ilist, null, null, null, null)); ilist.getDataAlphabet().stopGrowth(); Trial t = new Trial(me, ilist); System.out.println("Pairwise classifier: -> Training F1 on \"yes\" is: " + t.labelF1("yes")); return me; } private static Pipe getVenueClusterPipe (Classifier pairwiseClassifier) { // same for now return getPaperClusterPipe(pairwiseClassifier); } private static Pipe getPaperClusterPipe (Classifier pairwiseClassifier) { ArrayList pipes = new ArrayList (); pipes.add (new ForAll (Citation.corefFields)); if (useThereExists.value()) pipes.add (new ThereExists(Citation.corefFields)); if (pairwiseClassifier != null) { //pipes.add (new ClosestSingleLink (pairwiseClassifier, true)); //pipes.add (new FarthestSingleLink (pairwiseClassifier)); //pipes.add (new AverageLink (pairwiseClassifier)); //pipes.add (new NNegativeNodes (pairwiseClassifier, 1)); // previous 4 pipes subsumed by AllLinks - saves time pipes.add (new AllLinks (pairwiseClassifier)); if (useClusterHomogeneity.value()) pipes.add (new ClusterHomogeneity(pairwiseClassifier)); } if (useClusterSize.value()) pipes.add (new ClusterSize ()); // didn't help: pipes.add (new ThereExistsMatch (new NeedlemanWunsch())); pipes.add (new NodeClusterPair2FeatureVector ()); if (printInputAndTarget.value()) pipes.add (new PrintInputAndTarget()); pipes.add (new Target2Label ()); Pipe p = new SerialPipes ((Pipe[])pipes.toArray (new Pipe[] {})); return p; } private static Pipe getPaperPipe (AbstractStatisticalTokenDistance distanceMetric, TFIDF triGramDistanceMetric) { Pipe p = new SerialPipes (new Pipe[] { new ExactFieldMatchPipe(Citation.corefFields), new PageMatchPipe(), new YearsWithinFivePipe(), new FieldStringDistancePipe(triGramDistanceMetric, Citation.corefFields, "trigramTFIDF"), new GlobalPipe(distanceMetric), new AuthorPipe(distanceMetric), new HeuristicPipe(Citation.corefFields), new InterFieldPipe(), new NodePair2FeatureVector (), new Target2Label (), }); return p; } private static Pipe getVenuePipe (AbstractStatisticalTokenDistance distanceMetric, TFIDF triGramDistanceMetric) { Pipe p = new SerialPipes (new Pipe[] { new ExactFieldMatchPipe(Citation.corefFields), new PageMatchPipe(), new YearsWithinFivePipe(), new FieldStringDistancePipe(triGramDistanceMetric, Citation.corefFields, "trigramTFIDF"), new GlobalPipe(distanceMetric), new AuthorPipe(distanceMetric), new VenuePipe(distanceMetric), new VenueAcronymPipe(), new HeuristicPipe(Citation.corefFields), new InterFieldPipe(), new NodePair2FeatureVector (), new Target2Label (), }); return p; } /** Create pipe for conditionalClusterer */ private static Pipe getJointPipe (Classifier paperPairwiseClassifier, Classifier venuePairwiseClassifier, Classifier paperClusterClassifier, Classifier venueClusterClassifier) { ArrayList pipes = new ArrayList (); pipes.add (new PaperClusterPrediction (paperClusterClassifier)); pipes.add (new VenueClusterPrediction (venueClusterClassifier)); /*if (useThereExists.value()) pipes.add (new ThereExists(Citation.corefFields)); if (pairwiseClassifier != null) { //pipes.add (new ClosestSingleLink (pairwiseClassifier, true)); //pipes.add (new FarthestSingleLink (pairwiseClassifier)); //pipes.add (new AverageLink (pairwiseClassifier)); //pipes.add (new NNegativeNodes (pairwiseClassifier, 1)); // previous 4 pipes subsumed by AllLinks - saves time pipes.add (new AllLinks (pairwiseClassifier)); if (useClusterHomogeneity.value()) pipes.add (new ClusterHomogeneity(pairwiseClassifier)); } if (useClusterSize.value()) pipes.add (new ClusterSize ()); // didn't help: pipes.add (new ThereExistsMatch (new NeedlemanWunsch())); */ pipes.add (new VenuePaperCluster2FeatureVector ()); if (printInputAndTarget.value()) pipes.add (new PrintInputAndTarget()); pipes.add (new Target2Label ()); Pipe p = new SerialPipes ((Pipe[])pipes.toArray (new Pipe[] {})); return p; } /** if useCRF==true, load the CRF and create a IEInterface object to * be used during coref*/ private static IEInterface loadIEInterface () { IEInterface iei = null; if (useCRF.value()) { File crfFile = new File(crfInputFile.value()); iei= new IEInterface(crfFile); iei.loadCRF(crfFile); } return iei; } /** Read citation files and create nodes */ private static ArrayList[] createNodesFromFiles (String[] dirNames, IEInterface ieInterface, String type) { ArrayList[] ret = new ArrayList[dirNames.length]; ArrayList files = new ArrayList(); for (int i=0; i < dirNames.length; i++) { FileIterator fi = new FileIterator (new File(dirNames[i]), new RegexFileFilter(Pattern.compile(".*"))); ret[i] = CitationUtils.computeNodes (fi.getFileArray(), ieInterface, useCRF.value(), numNBest.value(), nthViterbi.value(), type); } return ret; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -