⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ieinterface.java

📁 mallet是自然语言处理、机器学习领域的一个开源项目。
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
		instance_accuracy_nbest = new double[N];		viterbiP_NBest = crf.viterbiPath_NBest((Sequence)instance.getData(), N);//n-best list		Sequence[] nbestlist = viterbiP_NBest.outputNBest();		// print all N candidates		for(int i=0; i<nbestlist.length; i++)	{			viterbiSequence = nbestlist[i];			str += "\n" + i + ": cost=" + (viterbiP_NBest.costNBest())[i] + " : viterbicost=" + viterbiP_NBest.getCost() + " ";//			double tempW = viterbiP_NBest.costNBest()[i] - viterbiP_NBest.costNBest()[0];//			double weight = Math.exp(-tempW);			double confidence = viterbiP_NBest.confidenceNBest()[i];			str += " confidene=" + confidence + " ";                        instance_accuracy_nbest[i]= InstanceAccuracy(viterbiSequence, (Sequence)instance.getTarget(), instance);//			System.out.println(instance_accuracy_nbest[i]);			str += instance_accuracy_nbest[i] + "\n";			str += printResultInFormat(sgml);		}		// print only the Nth candidate//		viterbiSequence = nbestlist[N-1];//		str += printResultInFormat(sgml);             // use the combined results       //         viterbiSequence = crf.combineNBest_fieldLevel(instance, viterbiP_NBest, null, null);//		viterbiSequence = crf.combineNBest_fieldLevel2(instance, N, 99);//		viterbiSequence = crf.combineNBest_fieldLevel3(instance, N, 99);//	         str += "\ncombined result\n"; //               str += InstanceAccuracy(viterbiSequence, (Sequence)instance.getTarget(), instance) + " \n";   //             str += printResultInFormat(sgml);		return str;	}	//given an input file, label it, and output in the format of inline SGML	public void viterbiCRF(File inputFile, boolean sgml, String seperator)	{		assert(pipe!= null);		InstanceList instancelist = new InstanceList (pipe);		Reader reader;		try {			reader = new FileReader (inputFile);		} catch (Exception e) {			throw new IllegalArgumentException ("Can't read file "+inputFile);		}				instancelist.add (new LineGroupIterator (reader, Pattern.compile(seperator), true));		String outputFileStr = inputFile.toString() + "_tagged";				System.out.println(inputFile.toString() + " ---> " + outputFileStr);		PrintStream taggedOut = null;		try{			FileOutputStream fos = new FileOutputStream (outputFileStr);			taggedOut = new PrintStream (fos);		} catch (IOException e) {			logger.warning ("Couldn't open output file '"+ outputFileStr+"'");		}				if(taggedOut == null){			taggedOut = System.out;		}					String viterbiStr = "";//		taggedOut.println("testing instance number: " + instancelist.size() );		for(int i=0; i<instancelist.size(); i++){//				taggedOut.println("\ntesting instance " + i);				Instance instance = instancelist.getInstance(i);				String crfStr = viterbiCRFInstance(instance,sgml);							taggedOut.println(seperator);				taggedOut.println(" instance accuracy= " 					+ instance_error_num + "/" + instance_size + "=" + instance_accuracy);				taggedOut.println(crfStr);				viterbiStr += crfStr;				//N-best tagging				int N = 10;				crfStr = viterbiCRFInstance_NBest(instance,sgml, N);				taggedOut.println("N-best result:");				taggedOut.println(seperator);				taggedOut.println(crfStr);				viterbiStr += crfStr;		}		if(taggedOut != System.out){			taggedOut.close();		}	}	//viterbi for all files under a given directory, 	//if the given directory is a plain file, viterbi for this file	public void viterbiCRF(String inputDir, boolean sgml, String seperator)	{		// if inputDir is a plain file		File file = new File(inputDir);		if( file.isFile() ){			viterbiCRF(file, sgml, seperator);		}		else{			// continue if it is a directory			FileIterator fileIter = new FileIterator (inputDir);				ArrayList fileList = fileIter.getFileArray();			for(int i=0; i<fileList.size(); i++){				file = (File) fileList.get(i);				viterbiCRF(file, sgml, seperator);			}		}	}	public void viterbiCRF(String inputDir)	{		viterbiCRF(inputDir, true);	}	public void viterbiCRF(String inputDir, boolean sgml)	{		viterbiCRF(inputDir, sgml, seperator);	}        // cumulative evaluation for N-best list        public void cumulativeEvaluate_InstanceLevel(File inputFile, String seperator, int N)        {                assert(pipe!= null);                InstanceList instancelist = new InstanceList (pipe);                Reader reader;                try {                        reader = new FileReader (inputFile);                } catch (Exception e) {                        throw new IllegalArgumentException ("Can't read file "+inputFile);                }                instancelist.add (new LineGroupIterator (reader, Pattern.compile(seperator), true));                Alphabet targets = (this.pipe).getTargetAlphabet();                assert(targets != null);                int numCorrectTokens = 0, totalTokens = 0;                int[] numTrueSegments, numPredictedSegments, numCorrectSegments;                int[] numCorrectSegmentsInVocabulary, numCorrectSegmentsOOV;                int[] numIncorrectSegmentsInVocabulary, numIncorrectSegmentsOOV;                int[][] matrixEntry;                int numCorrectWholeInstance = 0;                numTrueSegments = new int[targets.size()];                numPredictedSegments = new int[targets.size()];                numCorrectSegments = new int[targets.size()];                matrixEntry = new int[targets.size()][targets.size()];//                String PUNT = "[,\\.;:?!()*]";//                Pattern puntPattern = Pattern.compile(PUNT);                for(int i=0; i<instancelist.size(); i++){                                Instance instance = instancelist.getInstance(i);                                //N-best tagging                                viterbiP_NBest = crf.viterbiPath_NBest((Sequence)instance.getData(), N);//n-best list                                Sequence[] nbestlist = viterbiP_NBest.outputNBest();				instance_accuracy_nbest = new double[N];//				System.out.println(nbestlist.length);		                for(int k=0; k<nbestlist.length; k++)   {                	        	Sequence tempViterbiSequence = nbestlist[k];                       		 	instance_accuracy_nbest[k]= InstanceAccuracy(tempViterbiSequence, (Sequence)instance.getTarget(), instance);               			}								int optimalIndex = 0;				for(int k=1; k<nbestlist.length; k++){//					System.out.println(i + " : " + k + " : " +  instance_accuracy_nbest[k]);					if(instance_accuracy_nbest[k] > instance_accuracy_nbest[optimalIndex]) {						optimalIndex = k;		//				System.out.println(optimalIndex + " : " +  instance_accuracy_nbest[k]);					}				}//				System.out.println(optimalIndex + "/" + nbestlist.length + " : " +  instance_accuracy_nbest[optimalIndex]);                                boolean wholeInstanceCorrect = true;                                Sequence trueSequence = (Sequence)instance.getTarget();                                tokenSequence = (TokenSequence)instance.getSource();                                for (int j = 0; j < trueSequence.size(); j++) {                                        String tokenStr = tokenSequence.getToken(j).getText();                                        if(puntPattern.matcher(tokenStr).matches() && ignorePunct ){//ignore punct;                                                continue;                                        }                                        totalTokens ++;                                        Object trueO = trueSequence.get(j);				//	String trueO = trueSequence.get(j).toString();                                        int trueIndex = targets.lookupIndex(trueO);                                        numTrueSegments[trueIndex] ++;                                        int predIndex = 0;                                        Object predO = nbestlist[optimalIndex].get(j);				//	String predO = nbestlist[optimalIndex].get(j).toString();                                        predIndex = targets.lookupIndex(predO);                                        numPredictedSegments[predIndex] ++;                                        matrixEntry[trueIndex][predIndex] ++;                                        if(predIndex == trueIndex){                                                numCorrectTokens ++;                                                numCorrectSegments[trueIndex] ++;                                        }                                        else{                                                wholeInstanceCorrect = false;                                        }                                }                                if(wholeInstanceCorrect) numCorrectWholeInstance ++;                }                System.out.println("\n\nAlways select the best instance evalutation results: N = " + N);		double macro_average_p=0;		double macro_average_r=0;		double macro_average_f=0;		double micro_average_p=0;		double micro_average_r=0;		double micro_average_f=0;		int micro_numCorrectSegments = 0;		int micro_numPredictedSegments = 0;		int micro_numTrueSegments = 0;		int classNum=0;                for(int t=0; t<targets.size(); t++){                        double precision = numPredictedSegments[t] == 0 ? 1 : ((double)numCorrectSegments[t]) / numPredictedSegments[t];                        double recall = numTrueSegments[t] == 0 ? 1 : ((double)numCorrectSegments[t]) / numTrueSegments[t];                        double f1 = recall+precision == 0.0 ? 0.0 : (2.0 * recall * precision) / (recall + precision);                        double accuracy_individual = (double)(totalTokens-numPredictedSegments[t]-numTrueSegments[t] + 2*numCorrectSegments[t] )/totalTokens;                        System.out.println (targets.lookupObject(t) + " precision="+precision+" recall="+recall+" f1="+f1 + " accuracy=" + accuracy_individual);                        System.out.println ("segments true="+numTrueSegments[t]+" pred="+numPredictedSegments[t]+" correct="+numCorrectSegments[t]+" misses="+(numTrueSegments[t]-numCorrectSegments[t])+" alarms="+(numPredictedSegments[t]-numCorrectSegments[t]) + "\n");			if(!targets.lookupObject(t).equals("O")){				classNum++;								macro_average_p += precision;				macro_average_r += recall;				macro_average_f += f1;				micro_numCorrectSegments += numCorrectSegments[t];				micro_numPredictedSegments += numPredictedSegments[t];				micro_numTrueSegments +=  numTrueSegments[t];			}                }		micro_average_p = (double)micro_numCorrectSegments/micro_numPredictedSegments;		micro_average_r = (double)micro_numCorrectSegments/micro_numTrueSegments;		micro_average_f =  micro_average_r + micro_average_p == 0.0 ? 0.0 : (2.0 * micro_average_r * micro_average_p) / (micro_average_r + micro_average_p);		macro_average_p /= classNum;		macro_average_r /= classNum;		macro_average_f /= classNum;                System.out.println("\n Confusion Matrix (row: true label, col: predicted label)");                System.out.print("\t");                for(int t=0; t<targets.size(); t++){                        System.out.print(targets.lookupObject(t) + "\t");                }                System.out.println();                for(int t=0; t< targets.size(); t++){                        System.out.print(targets.lookupObject(t)+"\t");                        for(int tt=0; tt<targets.size(); tt++){                                System.out.print(matrixEntry[t][tt] + "\t");                        }                        System.out.println();                }		// print out the overall performance                double accuracy = (double)numCorrectTokens/totalTokens;                System.out.println ("\n" +" accuracy=" + numCorrectTokens +"/"+ totalTokens + " = " +accuracy);                double wholeInstanceAccuracy = (double)numCorrectWholeInstance/instancelist.size();                System.out.println ("Whole instance accuracy = " + numCorrectWholeInstance + "/" + instancelist.size() + " = " + wholeInstanceAccuracy);		System.out.println("\nMacro Average");		System.out.println("macro precision : " + macro_average_p);		System.out.println("macro recall: " + macro_average_r);		System.out.println("macro f : " + macro_average_f);		System.out.println("\nMicro Average");		System.out.println("micro precision : " + micro_average_p);		System.out.println("micro recall: " + micro_average_r);		System.out.println("micro f : " + micro_average_f);/*                double accuracy = (double)numCorrectTokens/totalTokens;                System.out.println ("\n" +" accuracy=" + numCorrectTokens +"/"+ totalTokens + " = " +accuracy);                double wholeInstanceAccuracy = (double)numCorrectWholeInstance/instancelist.size();                System.out.println ("Whole instance accuracy = " + numCorrectWholeInstance + "/" + instancelist.size() + " = " + wholeInstanceAccuracy);                for(int t=0; t<targets.size(); t++){                        double precision = numPredictedSegments[t] == 0 ? 1 : ((double)numCorrectSegments[t]) / numPredictedSegments[t];                        double recall = numTrueSegments[t] == 0 ? 1 : ((double)numCorrectSegments[t]) / numTrueSegments[t];                        double f1 = recall+precision == 0.0 ? 0.0 : (2.0 * recall * precision) / (recall + precision);                        double accuracy_individual = (double)(totalTokens-numPredictedSegments[t]-numTrueSegments[t] + 2*numCorrectSegments[t] )/totalTokens;                        System.out.println (targets.lookupObject(t) + " precision="+precision+" recall="+recall+" f1="+f1 + " accuracy=" + accuracy_individual);                        System.out.println ("segments true="+numTrueSegments[t]+" pred="+numPredictedSegments[t]+" correct="+numCorrectSegments[t]+" misses="+(numTrueSegments[t]-numCorrectSegments[t])+" alarms="+(numPredictedSegments[t]-numCorrectSegments[t]) + "\n");                }                System.out.println();                for(int t=0; t< targets.size(); t++){                        System.out.print(targets.lookupObject(t)+"\t");                        for(int tt=0; tt<targets.size(); tt++){                                System.out.print(matrixEntry[t][tt] + "\t");                        }                        System.out.println();                }*/        }		// cumulative evaluation for N-best list 	public void cumulativeEvaluate_TokenLevel(File inputFile, String seperator, int N)	{		assert(pipe!= null);		InstanceList instancelist = new InstanceList (pipe);		Reader reader;		try {			reader = new FileReader (inputFile);		} catch (Exception e) {			throw new IllegalArgumentException ("Can't read file "+inputFile);		}			instancelist.add (new LineGroupIterator (reader, Pattern.compile(seperator), true));		Alphabet targets = (this.pipe).getTargetAlphabet();                assert(targets != null);		int numCorrectTokens = 0, totalTokens = 0;		int[] numTrueSegments, numPredictedSegments, numCorrectSegments;		int[] numCorrectSegmentsInVocabulary, numCorrectSegmentsOOV;		int[] numIncorrectSegmentsInVocabulary, numIncorrectSegmentsOOV;		int[][] matrixEntry;		int numCorrectWholeInstance = 0;		numTrueSegments = new int[targets.size()];		numPredictedSegments = new int[targets.size()];		numCorrectSegments = new int[targets.size()];		matrixEntry = new int[targets.size()][targets.size()];//        	String PUNT = "[,\\.;:?!()*]";//	        Pattern puntPattern = Pattern.compile(PUNT);		for(int i=0; i<instancelist.size(); i++){				Instance instance = instancelist.getInstance(i);				//N-best tagging				viterbiP_NBest = crf.viterbiPath_NBest((Sequence)instance.getData(), N);//n-best list				Sequence[] nbestlist = viterbiP_NBest.outputNBest();				boolean wholeInstanceCorrect = true;				Sequence trueSequence = (Sequence)instance.getTarget();			        tokenSequence = (TokenSequence)instance.getSource();				for (int j = 0; j < trueSequence.size(); j++) {					String tokenStr = tokenSequence.getToken(j).getText();					if(puntPattern.matcher(tokenStr).matches() &&  ignorePunct ){//ignore punct;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -