⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 jointconditionalclusterertui.java

📁 mallet是自然语言处理、机器学习领域的一个开源项目。
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* Copyright (C) 2002 Dept. of Computer Science, Univ. of Massachusetts, Amherst   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This program toolkit free software; you can redistribute it and/or   modify it under the terms of the GNU General Public License as   published by the Free Software Foundation; either version 2 of the   License, or (at your option) any later version.   This program is distributed in the hope that it will be useful, but   WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  For more   details see the GNU General Public License and the file README-LEGAL.   You should have received a copy of the GNU General Public License   along with this program; if not, write to the Free Software   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA   02111-1307, USA. *//**	 @author Aron Culotta */package edu.umass.cs.mallet.projects.seg_plus_coref.condclust.tui;import edu.umass.cs.mallet.projects.seg_plus_coref.condclust.cluster.*;import edu.umass.cs.mallet.projects.seg_plus_coref.condclust.pipe.*;import edu.umass.cs.mallet.projects.seg_plus_coref.condclust.pipe.iterator.*;import edu.umass.cs.mallet.projects.seg_plus_coref.coreference.*;import edu.umass.cs.mallet.projects.seg_plus_coref.ie.*;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.classify.*;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.pipe.iterator.*;import edu.umass.cs.mallet.base.util.*;import com.wcohen.secondstring.*;import com.wcohen.secondstring.tokens.NGramTokenizer;import com.wcohen.secondstring.tokens.SimpleTokenizer;import java.util.logging.*;import java.util.*;import java.util.regex.*;import java.io.*;/** Interface to train and test a ConditionalClusterer to cluster * Papers and Venues simultaneously. Uses Citeseer data. */	 public class JointConditionalClustererTUI {	private static Logger logger = MalletLogger.getLogger(JointConditionalClustererTUI.class.getName());	static CommandOption.SpacedStrings trainingDirs =	new CommandOption.SpacedStrings	(JointConditionalClustererTUI.class, "training-dirs", "DIR...", true, null,	 "The directories containing the citations to be clustered at training time. One file per cluster.", null);		static CommandOption.SpacedStrings testingDirs =	new CommandOption.SpacedStrings	(JointConditionalClustererTUI.class, "testing-dirs", "DIR...", true, null,	 "The directories containing the citations to be clustered at test time. One file per cluster.", null);	static CommandOption.Boolean randomOrderClustering = new CommandOption.Boolean	(JointConditionalClustererTUI.class, "random-order-clustering", "BOOL", false, false,	 "At test time, choose the nodes to consider at random", null);	static CommandOption.Boolean sampleTrainingInstances = new CommandOption.Boolean	(JointConditionalClustererTUI.class, "sample-training-instances", "BOOL", false, true,	 "Generate instances by sampling from true clusters", null);	static CommandOption.Integer numberTrainingInstances = new CommandOption.Integer	(JointConditionalClustererTUI.class, "number-training-instances", "INTEGER", true, 5000,	 "The number of training instances to sample", null);	static CommandOption.Integer randomSeed = new CommandOption.Integer	(JointConditionalClustererTUI.class, "random-seed", "INTEGER", true, 1,	 "Seed for random number in random order clustering", null);	static CommandOption.Integer numRandomTrials = new CommandOption.Integer	(JointConditionalClustererTUI.class, "num-random-trials", "INTEGER", true, 5,	 "number of random trials to run", null);	static CommandOption.Boolean errorAnalysis = new CommandOption.Boolean	(JointConditionalClustererTUI.class, "error-analysis", "BOOL", false, false,	 "Print errors (False positives)", null);	static CommandOption.Boolean useCRF = new CommandOption.Boolean	(JointConditionalClustererTUI.class, "use-crf", "BOOL", false, false,	 "Use CRF or not.", null);	static CommandOption.Boolean useFeatureInduction = new CommandOption.Boolean	(JointConditionalClustererTUI.class, "use-feature-induction", "BOOL", false, false,	 "Use Feature Induction or Not.", null);	static CommandOption.Boolean useClusterSize = new CommandOption.Boolean	(JointConditionalClustererTUI.class, "use-cluster-size", "BOOL", true, true,	 "add feature that is cluster's size", null);	static CommandOption.Boolean useThereExists = new CommandOption.Boolean	(JointConditionalClustererTUI.class, "use-there-exists", "BOOL", true, true,	 "Use thereExists pipe.", null);	static CommandOption.Boolean usePairwiseClassifier = new CommandOption.Boolean	(JointConditionalClustererTUI.class, "use-pairwise-classifier", "BOOL", true, true,	 "Use pairwise classifier to weight edges.", null);	static CommandOption.Boolean useClusterHomogeneity = new CommandOption.Boolean	(JointConditionalClustererTUI.class, "use-cluster-homogeneity", "BOOL", true, true,	 "add feature that is within-cluster similarity.", null);	static CommandOption.Boolean printInputAndTarget = new CommandOption.Boolean	(JointConditionalClustererTUI.class, "print-input-and-target", "BOOL", false, false,	 "Print features and target.", null);	static CommandOption.String crfInputFile = new CommandOption.String	(JointConditionalClustererTUI.class, "crf-input-file", "FILENAME", true, null,	 "The name of the file to read the trained CRF for testing.", null);		static CommandOption.Integer numNBest = new CommandOption.Integer	(JointConditionalClustererTUI.class, "num-n-best", "INTEGER", true, 3,	 "Number of n-best candidates to store.", null);		static CommandOption.Integer nthViterbi = new CommandOption.Integer	(JointConditionalClustererTUI.class, "nth-viterbi", "INTEGER", true, 0,	 "Number of n-best candidates to use .", null);	static CommandOption.Double negativeClusterThreshold = new CommandOption.Double	(JointConditionalClustererTUI.class, "negative-cluster-threshold", "DECIMAL", true, 0.0,	 "Decision threhold to place a node in a cluster. Takes opposite of input because CommandOptions seem to have trouble with negative inputs", null);	static CommandOption.Double positiveInstanceRatio = new CommandOption.Double	(JointConditionalClustererTUI.class, "positive-instance-ratio", "DECIMAL", true, 0.1,	 "Ratio of positive to negative training instances", null);	static final CommandOption.List commandOptions =	new CommandOption.List (		"Training and testing a conditional clusterer.",		new CommandOption[] {			trainingDirs,			testingDirs,			sampleTrainingInstances,			numberTrainingInstances,			errorAnalysis,			useCRF,			useFeatureInduction,			crfInputFile,			numNBest,			nthViterbi,			negativeClusterThreshold,			randomOrderClustering,			randomSeed,			numRandomTrials,			usePairwiseClassifier,			useThereExists,			useClusterSize,			useClusterHomogeneity,			printInputAndTarget,			positiveInstanceRatio		});			public static void main (String[] args) {		commandOptions.process (args);		commandOptions.logOptions (logger);		IEInterface ieInterface = loadIEInterface ();		// load papers	  ArrayList[] paperTrainingNodes = createNodesFromFiles (trainingDirs.value(), ieInterface, CitationUtils.PAPER);		ArrayList[] paperTestingNodes = createNodesFromFiles (testingDirs.value(), ieInterface, CitationUtils.PAPER);		ArrayList allPaperTrainingNodes = new ArrayList();		for (int i=0; i < paperTrainingNodes.length; i++)			allPaperTrainingNodes.addAll (paperTrainingNodes[i]);		ArrayList allPaperTestingNodes = new ArrayList();		for (int i=0; i < paperTestingNodes.length; i++)			allPaperTestingNodes.addAll (paperTestingNodes[i]);		Collection paperTrainingTruth = CitationUtils.makeCollections (allPaperTrainingNodes);		Collection paperTestingTruth = CitationUtils.makeCollections (allPaperTestingNodes);		// load venues	  ArrayList[] venueTrainingNodes = createNodesFromFiles (trainingDirs.value(), ieInterface, CitationUtils.VENUE);		ArrayList[] venueTestingNodes = createNodesFromFiles (testingDirs.value(), ieInterface, CitationUtils.VENUE);		ArrayList allVenueTrainingNodes = new ArrayList();		for (int i=0; i < venueTrainingNodes.length; i++)			allVenueTrainingNodes.addAll (venueTrainingNodes[i]);		ArrayList allVenueTestingNodes = new ArrayList();		for (int i=0; i < venueTestingNodes.length; i++)			allVenueTestingNodes.addAll (venueTestingNodes[i]);				Collection venueTrainingTruth = CitationUtils.makeCollections (allVenueTrainingNodes);		Collection venueTestingTruth = CitationUtils.makeCollections (allVenueTestingNodes);		// train pairwise classifiers		Classifier paperPairwiseClassifier = null;		Classifier venuePairwiseClassifier = null;		if (usePairwiseClassifier.value()) {			System.err.println ("TRAINING PAIRWISE CLASSIFIERS");			AbstractStatisticalTokenDistance distanceMetric =				(AbstractStatisticalTokenDistance)CitationUtils.computeDistanceMetric (allPaperTrainingNodes);			TFIDF tfidf = new TFIDF();			NGramTokenizer nGramTokenizer =				new NGramTokenizer(3,3,false, new SimpleTokenizer(true, true));							TFIDF triGramDistanceMetric = new TFIDF (nGramTokenizer);			CitationUtils.makeDistMetric(allPaperTrainingNodes, tfidf, triGramDistanceMetric);									paperPairwiseClassifier = trainPairwiseClassifier (paperTrainingNodes, getPaperPipe(distanceMetric,																																													triGramDistanceMetric));			venuePairwiseClassifier = trainPairwiseClassifier (venueTrainingNodes, getVenuePipe(distanceMetric,																																													triGramDistanceMetric));		}		// train solo clusterers		AbstractPipeInputIterator paperInstanceIterator = new NodeClusterPairIterator (paperTrainingTruth,																																									 new java.util.Random																																									 (randomSeed.value()),																																									 positiveInstanceRatio.value(),																																									 sampleTrainingInstances.value(),																																									 numberTrainingInstances.value());		AbstractPipeInputIterator venueInstanceIterator = new NodeClusterPairIterator (venueTrainingTruth,																																									 new java.util.Random

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -