⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 jointconditionalclusterertui.java

📁 mallet是自然语言处理、机器学习领域的一个开源项目。
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
																																									 (randomSeed.value()),																																									 positiveInstanceRatio.value(),																																									 sampleTrainingInstances.value(),																																									 numberTrainingInstances.value());				System.err.println ("TRAINING SOLO PAPER CLUSTERER");		ConditionalClusterer paperClusterer = getClusterer (paperInstanceIterator,																												getPaperClusterPipe(paperPairwiseClassifier));		System.err.println ("TRAINING SOLO VENUE CLUSTERER");		ConditionalClusterer venueClusterer  = getClusterer (venueInstanceIterator,																												 getVenueClusterPipe(venuePairwiseClassifier));		// train joint clusterer		System.err.println ("TRAINING JOINT CLUSTERER.");		Pipe p = getJointPipe (paperPairwiseClassifier, venuePairwiseClassifier,													 paperClusterer.getClassifier(), venueClusterer.getClassifier());		AbstractPipeInputIterator jointInstanceIterator = new VenuePaperClusterIterator (paperTrainingTruth,																																										 venueTrainingTruth,																																										 new java.util.Random																																										 (randomSeed.value()),																																										 positiveInstanceRatio.value());		ConditionalClustererTrainer jointTrainer =			new ConditionalClustererTrainer (p, -negativeClusterThreshold.value());		ConditionalClusterer jointClusterer = jointTrainer.train (jointInstanceIterator, false);		System.err.println ("DONE TRAINING JOINT CLUSTERER. BEGIN CLUSTERING.");		Collection predictedClustering = null; 		if (randomOrderClustering.value()) {			for (int i=0; i < numRandomTrials.value(); i++) {				predictedClustering = jointClusterer.clusterPapersAndVenues (allPaperTestingNodes, allVenueTestingNodes,																																		paperTestingTruth, venueTestingTruth,																																		paperClusterer.getClassifier(),																																		venueClusterer.getClassifier(),																																		new java.util.Random (randomSeed.value() + i*10));				System.err.println ("FINISHED CLUSTERING. BEGIN EVALUATION.");								CitationUtils.evaluateClustering (paperTestingTruth,																					getPaperClusters (predictedClustering),																					"RANDOM TRIAL " + i + " PAPER COREFERENCE RESULTS");				CitationUtils.evaluateClustering (venueTestingTruth,																					getVenueClusters (predictedClustering),																					"RANDOM TRIAL " + i + " VENUE COREFERENCE RESULTS");			}		}	}	private static ConditionalClusterer getClusterer (AbstractPipeInputIterator instanceIterator, Pipe p) {		ConditionalClustererTrainer cct = new ConditionalClustererTrainer (p, -negativeClusterThreshold.value());		return cct.train (instanceIterator, useFeatureInduction.value());	}	private static Collection getPaperClusters (Collection clustering) {		Collection ret = new ArrayList();		Iterator iter = clustering.iterator ();		while (iter.hasNext()) {			Collection cluster = (Collection) iter.next();			Iterator subIter = cluster.iterator();			Object node = subIter.next();			if (node instanceof PaperCitation)				ret.add (cluster);			else if (!(node instanceof VenueCitation))				throw new IllegalArgumentException ("Node is neither venue nor paper, it's a " + node.getClass().getName());		}		return ret;	}	private static Collection getVenueClusters (Collection clustering) {		Collection ret = new ArrayList();		Iterator iter = clustering.iterator ();		while (iter.hasNext()) {			Collection cluster = (Collection) iter.next();			Iterator subIter = cluster.iterator();			Object node = subIter.next();			if (node instanceof VenueCitation)				ret.add (cluster);			else if (!(node instanceof PaperCitation))				throw new IllegalArgumentException ("Node is neither venue nor paper, it's a " + node.getClass().getName());		}		return ret;	}		private static Classifier trainPairwiseClassifier (ArrayList[] nodes, Pipe p) {			InstanceList ilist = new InstanceList (p);		for (int i=0; i < nodes.length; i++) 			ilist.add (CitationUtils.makePairs (p, nodes[i]));		MaxEnt me = (MaxEnt)(new MaxEntTrainer().train(ilist, null, null, null, null));		ilist.getDataAlphabet().stopGrowth();		Trial t = new Trial(me, ilist);		System.out.println("Pairwise classifier: -> Training F1 on \"yes\" is: " + t.labelF1("yes"));		return me;	}	private static Pipe getVenueClusterPipe (Classifier pairwiseClassifier) {		// same for now		return getPaperClusterPipe(pairwiseClassifier);	}	private static Pipe getPaperClusterPipe (Classifier pairwiseClassifier) {		ArrayList pipes = new ArrayList ();		pipes.add (new ForAll (Citation.corefFields));		if (useThereExists.value())			pipes.add (new ThereExists(Citation.corefFields));		if (pairwiseClassifier != null) {			//pipes.add (new ClosestSingleLink (pairwiseClassifier, true));			//pipes.add (new FarthestSingleLink (pairwiseClassifier));			//pipes.add (new AverageLink (pairwiseClassifier));			//pipes.add (new NNegativeNodes (pairwiseClassifier, 1));			// previous 4 pipes subsumed by AllLinks - saves time			pipes.add (new AllLinks (pairwiseClassifier));			if (useClusterHomogeneity.value())				pipes.add (new ClusterHomogeneity(pairwiseClassifier));		}		if (useClusterSize.value())			pipes.add (new ClusterSize ());		//  didn't help:  pipes.add (new ThereExistsMatch (new NeedlemanWunsch()));		pipes.add (new NodeClusterPair2FeatureVector ());		if (printInputAndTarget.value())			pipes.add (new PrintInputAndTarget());	  pipes.add (new Target2Label ());		Pipe p = new SerialPipes ((Pipe[])pipes.toArray (new Pipe[] {}));		return p;			}		private static Pipe getPaperPipe (AbstractStatisticalTokenDistance distanceMetric,																		TFIDF triGramDistanceMetric) {		Pipe p = new SerialPipes (new Pipe[] {	    new ExactFieldMatchPipe(Citation.corefFields),				    new PageMatchPipe(),	    new YearsWithinFivePipe(),	    new FieldStringDistancePipe(triGramDistanceMetric, Citation.corefFields, "trigramTFIDF"),				    new GlobalPipe(distanceMetric),	    new AuthorPipe(distanceMetric),	    new HeuristicPipe(Citation.corefFields),	    new InterFieldPipe(),	    new NodePair2FeatureVector (),	    new Target2Label (),		});		return p;	}		private static Pipe getVenuePipe (AbstractStatisticalTokenDistance distanceMetric,																		TFIDF triGramDistanceMetric) {		Pipe p = new SerialPipes (new Pipe[] {	    new ExactFieldMatchPipe(Citation.corefFields),	    new PageMatchPipe(),	    new YearsWithinFivePipe(),	    new FieldStringDistancePipe(triGramDistanceMetric, Citation.corefFields, "trigramTFIDF"),	    new GlobalPipe(distanceMetric),			new AuthorPipe(distanceMetric),			new VenuePipe(distanceMetric),			new VenueAcronymPipe(),	    new HeuristicPipe(Citation.corefFields),	    new InterFieldPipe(),	    new NodePair2FeatureVector (),	    new Target2Label (),		});		return p;	}	/** Create pipe for conditionalClusterer */	private static Pipe getJointPipe (Classifier paperPairwiseClassifier, Classifier venuePairwiseClassifier,																		Classifier paperClusterClassifier, Classifier venueClusterClassifier) {		ArrayList pipes = new ArrayList ();		pipes.add (new PaperClusterPrediction (paperClusterClassifier));		pipes.add (new VenueClusterPrediction (venueClusterClassifier));		/*if (useThereExists.value())			pipes.add (new ThereExists(Citation.corefFields));		if (pairwiseClassifier != null) {			//pipes.add (new ClosestSingleLink (pairwiseClassifier, true));			//pipes.add (new FarthestSingleLink (pairwiseClassifier));			//pipes.add (new AverageLink (pairwiseClassifier));			//pipes.add (new NNegativeNodes (pairwiseClassifier, 1));			// previous 4 pipes subsumed by AllLinks - saves time			pipes.add (new AllLinks (pairwiseClassifier));			if (useClusterHomogeneity.value())				pipes.add (new ClusterHomogeneity(pairwiseClassifier));		}		if (useClusterSize.value())			pipes.add (new ClusterSize ());		//  didn't help:  pipes.add (new ThereExistsMatch (new NeedlemanWunsch()));		*/ 		pipes.add (new VenuePaperCluster2FeatureVector ());		if (printInputAndTarget.value())		 	pipes.add (new PrintInputAndTarget());		pipes.add (new Target2Label ());		Pipe p = new SerialPipes ((Pipe[])pipes.toArray (new Pipe[] {}));	 	return p; 	}	/** if useCRF==true, load the CRF and create a IEInterface object to	 * be used during coref*/	private static IEInterface loadIEInterface () {		IEInterface iei = null;		if (useCRF.value()) {			File crfFile = new File(crfInputFile.value());			iei= new IEInterface(crfFile);			iei.loadCRF(crfFile);					}		return iei;	}	/** Read citation files and create nodes */	private static ArrayList[] createNodesFromFiles (String[] dirNames, IEInterface ieInterface, String type) {		ArrayList[] ret = new ArrayList[dirNames.length];		ArrayList files = new ArrayList();		for (int i=0; i < dirNames.length; i++) {			FileIterator fi = new FileIterator (new File(dirNames[i]), new RegexFileFilter(Pattern.compile(".*"))); 			ret[i] = CitationUtils.computeNodes (fi.getFileArray(), ieInterface, useCRF.value(),																					 numNBest.value(), nthViterbi.value(), type);		}		return ret;	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -