⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 computeupperbound1.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).http://www.cs.umass.edu/~mccallum/malletThis software is provided under the terms of the Common Public License,version 1.0, as published by http://www.opensource.org.  For furtherinformation, see the file `LICENSE' included with this distribution. */package edu.umass.cs.mallet.projects.seg_plus_coref.coreference;import com.wcohen.secondstring.AbstractStringDistance;import com.wcohen.secondstring.Jaccard;import edu.umass.cs.mallet.base.fst.CRF;import edu.umass.cs.mallet.base.fst.Transducer;import edu.umass.cs.mallet.base.pipe.SerialPipes;import edu.umass.cs.mallet.base.pipe.iterator.FileIterator;import edu.umass.cs.mallet.base.pipe.iterator.LineGroupIterator;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.projects.seg_plus_coref.ie.IEInterface;import java.io.*;import java.util.ArrayList;import java.util.logging.Logger;import java.util.regex.Pattern;public class ComputeUpperBound1 {	String seperator = "";	private static Logger logger = Logger.getLogger(ComputeUpperBound1.class.getName());	private File crfFile;	private CRF crf = null;	private SerialPipes pipe;	private TokenSequence tokenSequence;	private Sequence viterbiSequence;	private double confidence;	private Transducer.ViterbiPath viterbiP;	private Transducer.ViterbiPath_NBest viterbiP_NBest;	private int instance_error_num = 0;	private int instance_size = 0;	private double instance_accuracy;	private double[] instance_accuracy_nbest;	boolean printFont = true;	IEInterface ieInterface;	InstanceList instancelist;	ArrayList optimalViterbi;	AbstractStringDistance nw;	double default_Max_Dist = 0;	double default_Ignore_Dist = 0;	String[] startTags = new String[]	{"<author>", "<title>", "<booktitle>", "<publisher>", "<journal>","<date>", "<location>", "<pages>",	"<note>", "<institution>", "<editor>",  "<volume>", "<tech>"};	String[] endTags = new String[]	{"</author>", "</title>", "</booktitle>", "</publisher>", "</journal>", "</date>", "</location>", "</pages>",	"</note>", "</institution>", "</editor>",  "</volume>", "</tech>"};	double[] tagWeight = new double[]{1.0, 10.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};	public ComputeUpperBound1()	{		this.crfFile = null;	}	public ComputeUpperBound1(File crfFile)	{		assert(crfFile != null);		this.crfFile = crfFile;	}	// load in CRF and its pipe from a trained crfFile	public boolean loadCRF()	{		ieInterface = new IEInterface(this.crfFile);		boolean flag = ieInterface.loadCRF(crfFile);		this.crf = ieInterface.crf;		this.pipe = ieInterface.pipe;//		nw = new NeedlemanWunsch(); // for edit distance//		nw = new JaroWinkler();//		nw = new CharJaccard();		nw = new Jaccard();		//		nw = new JelinekMercerJS();//		nw = new TFIDF(); // x//		nw = new Mixture(); //x//		nw = new DirichletJS();//x		return flag;		}		//given an input file, label it, and output in the format of inline SGML	public void viterbiCRF(File inputFile, boolean sgml, String seperator, int N)	{		instancelist = new InstanceList (pipe);		Reader reader;		try {			reader = new FileReader (inputFile);		} catch (Exception e) {			throw new IllegalArgumentException ("Can't read file "+inputFile);		}		instancelist.add (new LineGroupIterator (reader, Pattern.compile(seperator), true));		ArrayList nbestlists = new ArrayList(instancelist.size());		for(int i=0; i<instancelist.size(); i++){				Instance instance = instancelist.getInstance(i);				//N-best tagging				viterbiP_NBest = crf.viterbiPath_NBest((Sequence)instance.getData(), N);//n-best list				nbestlists.add(i, (Sequence[]) viterbiP_NBest.outputNBest());		}		String outputFileStr = inputFile.toString() + "_tagged";				System.out.println(inputFile.toString() + " ---> " + outputFileStr);		PrintStream taggedOut = null;		try{			FileOutputStream fos = new FileOutputStream (outputFileStr);			taggedOut = new PrintStream (fos);		} catch (IOException e) {			logger.warning ("Couldn't open output file '"+ outputFileStr+"'");		}				if(taggedOut == null){			taggedOut = System.out;		}		System.out.print( nbestlists.size() + ": ");		// using approximation 		//int[] indexList = indexListSearch_approximate(instancelist, nbestlists);		// using exaustive search		int[] indexList = indexListSearch_exaustive(instancelist, nbestlists, N);	}	protected int[] indexListSearch_exaustive(InstanceList instancelist, ArrayList nbestlists, int N)	{		int[] indexList = new int[instancelist.size()];		for(int i=0; i<indexList.length; i++){			indexList[i] = 0;		}		int[] optimalIndexList = (int[])indexList.clone();		double highestWeight = weightOfConfig(indexList, instancelist, nbestlists);		while( hasNextIndexList(indexList, N) ){			indexList = nextIndexList(indexList, N);//			System.out.print(num + ": " + nbestlists.size() + ": ");//			for(int j=0; j<indexList.length; j++){//				System.out.print(optimalIndexList[j]);//			}//			System.out.println();			double weight = weightOfConfig(indexList, instancelist, nbestlists);			if( weight > highestWeight ){				highestWeight = weight;				optimalIndexList = (int[])indexList.clone();			}		}//		System.out.println(instancelist.size());			return optimalIndexList;	}	protected double weightOfConfig(int[] indexList, InstanceList instancelist, ArrayList nbestlists)	{		double weight = 0;		for(int i=0; i<indexList.length; i++){                        Sequence[] lists1 = (Sequence[]) nbestlists.get(i);			for(int j=i+1; j<indexList.length; j++){	                        Sequence[] lists2 = (Sequence[]) nbestlists.get(j);                                double sim = PairSimilarity(lists1[indexList[i]], lists2[indexList[j]],                                           instancelist.getInstance(i), instancelist.getInstance(j));				weight += sim;								}		}		return weight;	}	protected boolean hasNextIndexList(int[] indexList, int N)	{		for(int i=0; i<indexList.length; i++){			if(indexList[i] < N-1) return true;		}		return false;	}	protected int[] nextIndexList(int[] indexList, int N)	{		for(int i=indexList.length-1; i>=0; i--){			if(indexList[i] <= N-2){				indexList[i] ++;				for(int j=i+1; j<=indexList.length-1;j++){					indexList[j] = 0;				}				break;			}		}				return indexList;	}	protected int[] indexListSearch_approximate(InstanceList instancelist, ArrayList nbestlists)	{		int[] indexList = new int[instancelist.size()];		System.out.println(instancelist.size());			if(instancelist.size() == 1){			indexList[0] = 0;		}		else if(instancelist.size() == 2){			Sequence[] lists1 = (Sequence[]) nbestlists.get(0);			Sequence[] lists2 = (Sequence[]) nbestlists.get(1);			double highestSimilarity = Double.NEGATIVE_INFINITY;			indexList[0] = indexList[1] = 0;			for(int i=0; i<lists1.length; i++){				for(int j=0; j<lists2.length; j++){					double sim = PairSimilarity(lists1[i], lists2[j], 							instancelist.getInstance(0), instancelist.getInstance(1));										if(sim > highestSimilarity){						highestSimilarity = sim;						indexList[0] = i;						indexList[1] = j;					}				}			}		}		else {			//process the first two citations			Sequence[] lists1 = (Sequence[]) nbestlists.get(0);			Sequence[] lists2 = (Sequence[]) nbestlists.get(1);			double highestSimilarity = Double.NEGATIVE_INFINITY;			indexList[0] = indexList[1] = 0;			for(int i=0; i<lists1.length; i++){				for(int j=0; j<lists2.length; j++){					double sim = PairSimilarity(lists1[i], lists2[j],							instancelist.getInstance(0), instancelist.getInstance(1) );										if(sim > highestSimilarity){						highestSimilarity = sim;						indexList[0] = i;						indexList[1] = j;					}				}			}			//dynamically process the rest citations			for(int i=2; i<instancelist.size(); i++){				indexList[i] = 0;				Sequence[] sequence_prev = (Sequence[]) nbestlists.get(i-1);				Sequence[] sequence_current = (Sequence[]) nbestlists.get(i);				highestSimilarity = PairSimilarity(sequence_prev[indexList[i-1]], sequence_current[0],							instancelist.getInstance(i-1), instancelist.getInstance(i) );				for(int j=1; j<sequence_current.length; j++){					double sim = PairSimilarity(sequence_prev[indexList[i-1]], sequence_current[j],							instancelist.getInstance(i-1), instancelist.getInstance(i) );						if(sim > highestSimilarity){						indexList[i] = j;					}				}			}		}		return indexList;	}	protected double computeSGMLObjDistance(String string1, String string2)	{		double dist = 0.0;		double distTemp;		int usedNumFields = 0;		int NumFields = startTags.length;		double totalWeight = 0;//		System.out.println(string1 + "\n" + string2);		for(int i=0; i<NumFields; i++){			String[] strs1 = locateFields(startTags[i], endTags[i], string1);			String[] strs2 = locateFields(startTags[i], endTags[i], string2);/*			if( startTags[i].equals("<author>") ){//only use last names				if(strs1 != null)				for(int k=0; k<strs1.length; k++){					ArrayList namelist1 = LastName(strs1[k]);					String tempStr = "";					for(int j=0; j<namelist1.size(); j++){						tempStr += (String)namelist1.get(j);						if( j<namelist1.size()-1){							tempStr += " ";						}					}					strs1[k] = tempStr;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -