⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ieinterface3.java

📁 这是一个matlab的java实现。里面有许多内容。请大家慢慢捉摸。
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This software is provided under the terms of the Common Public License,   version 1.0, as published by http://www.opensource.org.  For further   information, see the file `LICENSE' included with this distribution. *//**    @author Fuchun Peng <a href="mailto:fuchun@cs.umass.edu">fuchun@cs.umass.edu</a>	July 2003	This class provides information extraction interface to other applications */package edu.umass.cs.mallet.projects.seg_plus_coref.ie;import edu.umass.cs.mallet.base.types.*;import edu.umass.cs.mallet.base.fst.*;import edu.umass.cs.mallet.base.pipe.*;import edu.umass.cs.mallet.base.pipe.iterator.*;import edu.umass.cs.mallet.base.pipe.tsf.*;import edu.umass.cs.mallet.base.util.*;import junit.framework.*;import java.util.Iterator;import java.util.Random;import java.util.regex.*;import java.io.*;import java.util.logging.*;import java.util.ArrayList;public class IEInterface3{	String seperator = "";	private static Logger logger = Logger.getLogger(IEInterface3.class.getName());	private File crfFile;	private CRF3 crf = null;	private SerialPipes pipe;	private TokenSequence tokenSequence;	private Sequence viterbiSequence;	private double confidence;	private Transducer.ViterbiPath viterbiP;	private Transducer.ViterbiPath_NBest viterbiP_NBest;	private int instance_error_num = 0;	private int instance_size = 0;	private double instance_accuracy;	boolean printFont = true;	public IEInterface3()	{		this.crfFile = null;	}	public IEInterface3(File crfFile)	{		assert(crfFile != null);		this.crfFile = crfFile;	}	public void setPipe(SerialPipes pipe)	{		this.pipe = pipe;	}	// load in CRF3 and its pipe from a trained crfFile	public boolean loadCRF()	{		return loadCRF(crfFile);	}		public boolean loadCRF(File crfFile)	{		assert(crfFile != null);		CRF3 crf = null;		try {			ObjectInputStream ois = new ObjectInputStream(new FileInputStream( crfFile ));			crf = (CRF3) ois.readObject();			ois.close();		}		catch (IOException e) {			System.err.println("Exception reading crf file: " + e);			crf= null;		}		catch (ClassNotFoundException cnfe) {			System.err.println("Cound not find class reading in object: " + cnfe);			crf= null;		}//		crf = CRFIO.readCRF(crfFile.toString());		if(crf==null) {			System.err.println("Read a null crf from file: " + crfFile);			System.exit(1);		}				this.crf = crf;		this.pipe = (SerialPipes) crf.getInputPipe();		if (this.pipe == null) {			System.err.println("Get a null pipe from CRF");			System.exit(1);		}		//xxx print out the read-in pipes, just for debugging purpose/*                ArrayList pipes1 = (this.pipe).getPipes();                System.out.println("pipes1");                for (int i = 0; i < pipes1.size(); i++) {                        System.out.print("Pipe: " + i + ": ");                        Pipe tempP = (Pipe) pipes1.get (i);                        if (tempP == null) {                                System.out.println("Pipe is null");                        }                        else {                                String pipeName = tempP.getClass().getName();                                System.out.println(pipeName);				if(tempP instanceof SerialPipes){					ArrayList pipes2 = ((SerialPipes)tempP).getPipes();								for(int j=0; j<pipes2.size(); j++){						System.out.print("	Pipe: " + j + ": ");						Pipe tempP2 = (Pipe) pipes2.get(j);						if(tempP2 == null){							System.out.println("	Pipe is null");						}						else{				                        String pipeName2 = tempP2.getClass().getName();                                			System.out.println(pipeName2);						}					}				}                        }                }*///		System.out.println("================= start of CRF ============");//		crf.print();//		System.out.println("==================end of crf ==============");		//xxx		logger.log(Level.INFO, "Load CRF successfully\n");		return true;	}	public boolean loadCRF(CRF3 crf)	{		this.crf = crf;		this.pipe = (SerialPipes) crf.getInputPipe();		if (this.pipe == null) {			System.err.println("Get a null pipe from CRF");			return false;		}		return true;	}		public String printResultInFormat(boolean sgml)	{		String viterbiStr = "";		assert(tokenSequence != null);		assert(viterbiSequence != null);		assert(tokenSequence.size() == viterbiSequence.size());		String font = "";		String current_font = "";		if(sgml){			String old_tag = null;			String startTag, endTag;			for(int i=0; i<tokenSequence.size(); i++){				Token token = (Token)tokenSequence.getToken(i);				String word = token.getText();				String tag = viterbiSequence.get(i).toString();				if(tag != old_tag){					if(old_tag != null){						endTag = "</"+old_tag+">";						viterbiStr += endTag;						}								startTag = "<"+tag+">";						viterbiStr += startTag;					old_tag = tag;				}				if(token.hasProperty("FONT")){					current_font = (String)token.getProperty("FONT");				}							if(!current_font.equals(font) && printFont){						viterbiStr += "<font value=\""+current_font+"\" />";					font = current_font;				}				viterbiStr += word;				viterbiStr += " ";				if(i == tokenSequence.size() - 1){					endTag = "</"+tag+">";					viterbiStr += endTag;				}				if(token.hasProperty("LINE_END")){					viterbiStr += "\n";				}			}		}		else{			for(int i=0; i<tokenSequence.size(); i++){				viterbiStr += ((Token)tokenSequence.getToken(i)).getText();				viterbiStr += ": ";				viterbiStr += viterbiSequence.get(i).toString();				viterbiStr += "\n";			}		}		return viterbiStr;			}	//given an input string, label it, and output in the format of inline SGML	public String viterbiCRFString(String line, boolean sgml)	{		Instance lineCarrier = new Instance(line, null, null, null, pipe);		assert(pipe != null);		Instance featureCarrier = pipe.pipe(lineCarrier, 0);		assert(crf != null);				viterbiP = crf.viterbiPath((Sequence)featureCarrier.getData());		viterbiSequence = viterbiP.output();		//confidence = Math.exp(-viterbiP.getCost()/viterbiSequence.size());		confidence = viterbiP.getCost();				tokenSequence = (TokenSequence)featureCarrier.getSource();		assert(viterbiSequence.size() == tokenSequence.size());		return printResultInFormat(sgml);	}	// to use this method successfully, tokenization should use "\\w+-\\w+|\\w+|'s|``|''|\\S" pattern	// or change the wordPattern in WSJPOSSentence2TokenSequence to match your tokenization pattern.	// 		public Sequence viterbiCRFTokenSequence(TokenSequence ts)	{			assert(crf != null);		String line = "";		for(int i=0; i<ts.size(); i++){			line += ts.getToken(i).getText()+" ";//			System.out.println(i+": "+ts.getToken(i).getText());		}				assert(pipe != null);		Instance lineCarrier = new Instance(line, null, null, null, pipe);		viterbiP = crf.viterbiPath((Sequence)lineCarrier.getData());		viterbiSequence = viterbiP.output();		confidence = Math.exp(-viterbiP.getCost()/viterbiSequence.size());//		viterbiSequence = crf.viterbiPath((Sequence)lineCarrier.getData()).output();//		Sequence tempTS = (Sequence)lineCarrier.getData();//		for(int i=0; i<tempTS.size(); i++){//			System.out.println(i+": "+tempTS.get(i).toString() + "/" + viterbiSequence.get(i).toString());//		}				assert(viterbiSequence.size() == ts.size()): "ts.size=" + ts.size() + " " + "viterSequence.size=" + viterbiSequence.size();				return viterbiSequence;	}	private double InstanceAccuracy(Sequence viterbiSequence, Sequence targetSequence)	{		assert(viterbiSequence.size() == targetSequence.size());		instance_size = viterbiSequence.size();		instance_error_num = 0;		for(int i=0; i<instance_size; i++){			String predO = viterbiSequence.get(i).toString();			String trueO = targetSequence.get(i).toString();			if(!predO.equals(trueO)){				instance_error_num ++;			}			}			double accuracy = (double)instance_error_num/instance_size;		return accuracy;	}	//viterbi for a piped instance	public String viterbiCRFInstance(Instance instance, boolean sgml )	{		assert(crf != null);		viterbiP = crf.viterbiPath((Sequence)instance.getData());// regular viterbi		viterbiSequence = viterbiP.output();//		confidence = Math.exp(viterbiP.getCost()/viterbiSequence.size());//		confidence = viterbiP.getCost()/viterbiSequence.size();	//		viterbiSequence = crf.viterbiPath((Sequence)instance.getData()).output();		instance_accuracy= InstanceAccuracy(viterbiSequence, (Sequence)instance.getTarget());		tokenSequence = (TokenSequence)instance.getSource(); 		assert(viterbiSequence.size() == tokenSequence.size());		return printResultInFormat(sgml);	}	public String viterbiCRFInstance_NBest(Instance instance, boolean sgml )	{		String str = "";		assert(crf != null);	        tokenSequence = (TokenSequence)instance.getSource();                assert(viterbiSequence.size() == tokenSequence.size());		int N = 1;		viterbiP_NBest = crf.viterbiPath_NBest((Sequence)instance.getData(), N);//n-best list		Sequence[] nbestlist = viterbiP_NBest.outputNBest();/*		for(int i=0; i<nbestlist.length; i++)	{			viterbiSequence = nbestlist[i];	//		viterbiSequence = viterbiP_NBest.output();			str += "\n" + i + ": " + (viterbiP_NBest.costNBest())[i] + " : " + viterbiP_NBest.getCost() + "\n";			str += printResultInFormat(sgml);		}*/		viterbiSequence = nbestlist[N-1];		str += printResultInFormat(sgml);		return str;	}	//given an input file, label it, and output in the format of inline SGML	public void viterbiCRF(File inputFile, boolean sgml, String seperator)	{		assert(pipe!= null);		InstanceList instancelist = new InstanceList (pipe);		Reader reader;		try {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -