⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 clusterpapersandvenues.java

📁 mallet是自然语言处理、机器学习领域的一个开源项目。
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
				ieInterface1 = new IEInterface(crfFile1);				ieInterface1.loadCRF(crfFile1);				File crfFile2 = new File(crfInputFile2.value());				ieInterface2 = new IEInterface(crfFile2);				ieInterface2.loadCRF(crfFile2);				File crfFile3 = new File(crfInputFile3.value());				ieInterface3 = new IEInterface(crfFile3);				ieInterface3.loadCRF(crfFile3);				File crfFile4 = new File(crfInputFile4.value());				ieInterface4 = new IEInterface(crfFile4);				ieInterface4.loadCRF(crfFile4);	    } else {				File crfFile = new File(crfInputFile.value());				ieInterface = new IEInterface(crfFile);				ieInterface.loadCRF(crfFile);	    }		}	}	/** Create citation nodes of type "type" from training files */	private static ArrayList[] createNodesFromTraining (String type) {		FileIterator trainFI_1 = null;		FileIterator trainFI_2 = null;		FileIterator trainFI_3 = null;		if (useCRF.value() == true) {	    trainFI_1 = new FileIterator (trainingDir1.value(),																		new RegexFileFilter(Pattern.compile(".*")));	    if (trainingDir2.value() != null)				trainFI_2 = new FileIterator (trainingDir2.value(),																			new RegexFileFilter(Pattern.compile(".*")));	    if (trainingDir3.value() != null)				trainFI_3 = new FileIterator (trainingDir3.value(),																			new RegexFileFilter(Pattern.compile(".*")));					}		else {	    trainFI_1 = new FileIterator (trainingDir1.value(),																		new RegexFileFilter(Pattern.compile(".*")));	    if (trainingDir2.value() != null)				trainFI_2 = new FileIterator (trainingDir2.value(),																			new RegexFileFilter(Pattern.compile(".*")));	    if (trainingDir3.value() != null)				trainFI_3 = new FileIterator (trainingDir3.value(),																			new RegexFileFilter(Pattern.compile(".*")));					}				ArrayList trainFileArray1 = trainFI_1.getFileArray();		ArrayList pubs1 = new ArrayList();		System.out.println("Number of files 1: " + trainFileArray1.size());		ArrayList nodes1;		if (useMultipleCRFs.value() == true) {	    if (useTreeModel.value())				throw new UnsupportedOperationException ("tree model unsupported");				//nodes1 = CitationUtils.computeNodesWPubs(trainFileArray1, pubs1, ieInterface1, !trainUsingLabeled.value(), numNBest.value(), nthViterbi.value(), type);	    else				nodes1 = CitationUtils.computeNodes(trainFileArray1, ieInterface1, !trainUsingLabeled.value(), numNBest.value(), nthViterbi.value(), type);		}																																else {	    if (useTreeModel.value())				throw new UnsupportedOperationException ("tree model unsupported");			//nodes1 = CitationUtils.computeNodesWPubs(trainFileArray1, pubs1, ieInterface, !trainUsingLabeled.value(), numNBest.value(), nthViterbi.value(), type);	    else				nodes1 = CitationUtils.computeNodes(trainFileArray1, ieInterface, !trainUsingLabeled.value(), numNBest.value(), nthViterbi.value(), type);		}		ArrayList nodes2 = null;		ArrayList nodes3 = null;		ArrayList pubs2 = null;		ArrayList pubs3 = null;		if (trainFI_2 != null) {	    ArrayList trainFileArray2 = trainFI_2.getFileArray();	    pubs2 = new ArrayList ();	    System.out.println("Number of files 2: " + trainFileArray2.size());	    if (useMultipleCRFs.value() == true) {				if (useTreeModel.value())					throw new UnsupportedOperationException ("tree model unsupported");				//nodes2 = CitationUtils.computeNodesWPubs(trainFileArray2, pubs2, ieInterface2, !trainUsingLabeled.value(), numNBest.value(), nthViterbi.value(), type);				else					nodes2 = CitationUtils.computeNodes(trainFileArray2, ieInterface2, !trainUsingLabeled.value(), numNBest.value(), nthViterbi.value(), type);	    }	    else {				if (useTreeModel.value())					throw new UnsupportedOperationException ("tree model unsupported");				//nodes2 = CitationUtils.computeNodesWPubs(trainFileArray2, pubs2, ieInterface, !trainUsingLabeled.value(), numNBest.value(), nthViterbi.value(), type);				else					nodes2 = CitationUtils.computeNodes(trainFileArray2, ieInterface, !trainUsingLabeled.value(), numNBest.value(), nthViterbi.value(), type);	    }		}		if (trainFI_3 != null) {	    ArrayList trainFileArray3 = trainFI_3.getFileArray();	    pubs3 = new ArrayList();	    System.out.println("Number of files 3: " + trainFileArray3.size());	    //nodes3 = computeNodesWPubs(trainFileArray3, pubs3, ieInterface3);	    if (useMultipleCRFs.value() == true) {				if (useTreeModel.value())					throw new UnsupportedOperationException ("tree model unsupported");				//nodes3 = CitationUtils.computeNodesWPubs(trainFileArray3, pubs3, ieInterface3, !trainUsingLabeled.value(), numNBest.value(), nthViterbi.value(), type);				else					nodes3 = CitationUtils.computeNodes(trainFileArray3, ieInterface3, !trainUsingLabeled.value(), numNBest.value(), nthViterbi.value(), type);	    }	    else {				if (useTreeModel.value())					throw new UnsupportedOperationException ("tree model unsupported");				//nodes3 = CitationUtils.computeNodesWPubs(trainFileArray3, pubs3, ieInterface, !trainUsingLabeled.value(), numNBest.value(), nthViterbi.value(), type);				else					nodes3 = CitationUtils.computeNodes(trainFileArray3, ieInterface, !trainUsingLabeled.value(), numNBest.value(), nthViterbi.value(), type);	    }	    System.out.println(" There are " + nodes3.size() + " training nodes");		}		ArrayList[] ret = null;		int numLists = 1;		if (nodes2 != null)			numLists++;		if (nodes3 != null)			numLists++;		if (numLists == 3)			ret = new ArrayList[] {nodes1, nodes2, nodes3};		else if (numLists == 2)			ret = new ArrayList[] {nodes1, nodes2};		else			ret = new ArrayList[] {nodes1};		return ret;			}	/** Create citation nodes of type "type" from testing files */	private static ArrayList createNodesFromTesting (String type) {		FileIterator testFI = null;		if (useCRF.value() == true)	    testFI = new FileIterator (testingDir.value(), new RegexFileFilter(Pattern.compile(".*")));		else		    testFI = new FileIterator (testingDir.value(), new RegexFileFilter(Pattern.compile(".*")));				ArrayList testFileArray = testFI.getFileArray();		ArrayList testPubList = new ArrayList();				ArrayList test_nodes;		if (useMultipleCRFs.value() == true) {	    test_nodes = CitationUtils.computeNodes(testFileArray,ieInterface4, false, numNBest.value(), nthViterbi.value(), type);		}		else {	    if (useTreeModel.value())				throw new UnsupportedOperationException ("tree model unsupported");			//test_nodes = CitationUtils.computeNodesWPubs(testFileArray, testPubList, ieInterface, useCRF.value(), numNBest.value(), nthViterbi.value(), type);	    else				test_nodes = CitationUtils.computeNodes(testFileArray, ieInterface, useCRF.value(), numNBest.value(), nthViterbi.value(),type);		}		return test_nodes;	}	private static InstanceList getTrainingList (ArrayList[] nodes, Pipe p) {		InstanceList ilist = new InstanceList();		if (loadMEFile.value() == null) {	    InstanceList ilist1 = CitationUtils.makePairs(p, nodes[0]);	    ilist.add(ilist1);	    if (nodes.length > 1) {				InstanceList ilist2 = CitationUtils.makePairs(p, nodes[1]);				ilist.add(ilist2);	    }	    if (nodes.length > 2) {				InstanceList ilist3 = CitationUtils.makePairs(p, nodes[2]);				ilist.add(ilist3);	    }		}		return ilist;	}		private static Pipe getPaperPipe (AbstractStatisticalTokenDistance distanceMetric, StringDistance triGramDistanceMetric) {		Pipe p = new SerialPipes (new Pipe[] {	    new ExactFieldMatchPipe(Citation.corefFields),				    new PageMatchPipe(),	    new YearsWithinFivePipe(),	    //new FieldStringDistancePipe(new NeedlemanWunsch(),	    //Citation.corefFields, "EDIST"),	    //new FieldStringDistancePipe(softtfidf, Citation.corefFields, "softTFIDF"),	    new FieldStringDistancePipe(triGramDistanceMetric, Citation.corefFields, "trigramTFIDF"),				    //new PlainFieldPipe (distanceMetric, distanceMetricEditDist),	    new GlobalPipe(distanceMetric),	    //new TitlePipe(distanceMetric),	    new AuthorPipe(distanceMetric),	    //new VenueClusterPipe(),	    //new JournalPipe(distanceMetric),	    //new PagesPipe(distanceMetric),	    new HeuristicPipe(Citation.corefFields),	    new InterFieldPipe(),	    //new HeuristicPipe(Citation.corefFields),	    //new DatePipe(distanceMetric),	    //new FuchunPipe(distanceMetricEditDist),				    new NodePair2FeatureVector (),	    new Target2Label (),		});		return p;	}	private static Pipe getVenuePipe (AbstractStatisticalTokenDistance distanceMetric, StringDistance triGramDistanceMetric) {		Pipe p = new SerialPipes (new Pipe[] {	    new ExactFieldMatchPipe(Citation.corefFields),	    new PageMatchPipe(),	    new YearsWithinFivePipe(),	    //new FieldStringDistancePipe(new NeedlemanWunsch(),	    //Citation.corefFields, "EDIST"),	    //new FieldStringDistancePipe(softtfidf, Citation.corefFields, "softTFIDF"),	    new FieldStringDistancePipe(triGramDistanceMetric, Citation.corefFields, "trigramTFIDF"),				    //new PlainFieldPipe (distanceMetric, distanceMetricEditDist),	    new GlobalPipe(distanceMetric),	    //new TitlePipe(distanceMetric),			new AuthorPipe(distanceMetric),	    //new JournalPipe(distanceMetric),			///new BooktitlePipe(distanceMetric),						new VenuePipe(distanceMetric),			new VenueAcronymPipe(),	    //new PagesPipe(distanceMetric),	    new HeuristicPipe(Citation.corefFields),	    new InterFieldPipe(),	    //new HeuristicPipe(Citation.corefFields),	    //new DatePipe(distanceMetric),	    //new FuchunPipe(distanceMetricEditDist),	    new NodePair2FeatureVector (),	    new Target2Label (),			//	new PrintInputAndTarget (),		});		return p;	}		private static TFIDF getDistanceMetric (ArrayList allnodes) {		//make distance metrics		TFIDF tfidf = new TFIDF();		NGramTokenizer nGramTokenizer =	    new NGramTokenizer(3,3,false, new SimpleTokenizer(true, true));						TFIDF ret = new TFIDF (nGramTokenizer);		CitationUtils.makeDistMetric(allnodes, tfidf, ret);		return ret;	}		private static void readCluster (File f) {			}	protected static void printClustersToFile (Collection citations, String file) {		try {	    BufferedWriter out = new BufferedWriter(new FileWriter(file));	    printClustersAsReceived (citations, out);	    out.close();		} catch (Exception e) {e.printStackTrace();}	}	protected static void printClustersAsReceived (Collection citations, BufferedWriter out) {		int refNum = 1;		int clNum = 1;		for (Iterator it = citations.iterator(); it.hasNext();) {	    Collection cl = (Collection)it.next();	    for (Iterator i2 = cl.iterator(); i2.hasNext(); ) {				Citation c = (Citation)i2.next();				String lab = (String)c.getLabel();				try {					out.write("<NEWREFERENCE>\n");					out.write("<meta reference_no=\"" + refNum +										"\" cluster_no=\"" + clNum + "\" true_id=\"" + lab + "\"></meta>");					out.write(c.getOrigString());				} catch (Exception e) {}				refNum++;	    }	    clNum++;		}	}	protected static void printCollectionReferences (Collection collection) {		Iterator i1 = collection.iterator();		while (i1.hasNext()) {	    Iterator i2 = ((Collection)i1.next()).iterator();	    while (i2.hasNext()) {				Object o = i2.next();				if (o instanceof Node) {					Node n = (Node)o;					System.out.println("Node: " + n);					System.out.println("Node label: " + n.getLabel());					System.out.println("Node index: " + n.getIndex());				} else {					System.out.println("Node: " + o);				}	    }		}	}	public static double scoreCitations(List citations) {		double score = 0.0;		for (Iterator i = citations.iterator(); i.hasNext(); ) {	    score += (double)((Citation)i.next()).getScore();		}		return score/(double)citations.size();	}	/*		This method will create a collection of collections from the citation nodes	*/	/*		protected static Collection makeCollections (ArrayList nodes) {		HashMap map = new HashMap(); // keep an index of node label values to collections		Collection collection = new LinkedHashSet();		for (int i=0; i<nodes.size(); i++) {		Node n = (Node)nodes.get(i);		Object o1 = n.getLabel();		Collection c = (Collection)map.get(o1);		if (c != null) {		c.add(n);		//System.out.println("adding new node " + n + " to existing collection");		} else {		Collection newC = new LinkedHashSet();		System.out.println("Creating new collection");		newC.add(n);		map.put(o1, newC);		}		}		Iterator i1 = map.values().iterator();		while (i1.hasNext()) {		collection.add((Collection)i1.next());		}		return collection;		}*/	/*		protected static List runCanopies(List files) throws Exception {		double loose = 0.3;		double tight = 0.7;		String indexName = "/tmp/index";		Analyzer analyzer = new SimpleAnalyzer();		//Analyzer analyzer = new NGramAnalyzer();		//Analyzer analyzer = new TriGramAnalyzer();		//QueryConstructor queryConstructor = new QueryConstructorSimple(analyzer);		QueryConstructor queryConstructor = new QueryConstructorAuthDateTitle(analyzer);		IndexFiles.indexFiles(files, indexName, analyzer);		CanopyMaker cm = new CanopyMaker(indexName, queryConstructor);		cm.setLooseThreshold(loose);		cm.setTightThreshold(tight);		cm.makeCanopies();		Util.allScores(cm);		return Util.getUniquePairsFromSets(Util.convertIds(cm.getCanopies(), cm.getDocIdToDocno()));		}	*/}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -