⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 poseventreader.cs

📁 英语句子自然语言处理统计分析例子 Statistical parsing of English sentences Shows how to generate parse trees for
💻 CS
字号:
//Copyright (C) 2005 Richard J. Northedge
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

//This file is based on the POSEventCollector.java and POSEventStream.java source files found in the
//original java implementation of OpenNLP.  Those source files contain the following headers:

// Copyright (C) 2003 Tom Morton
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

// Copyright (C) 2002 Jason Baldridge and Gann Bierner
// 
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// 
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
// 
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

using System;
using System.Collections;

namespace OpenNLP.Tools.PosTagger
{
	/// <summary> 
	/// An event generator for the maxent POS Tagger.
	/// </summary>
	public class PosEventReader : SharpEntropy.ITrainingEventReader
	{
		private System.IO.TextReader mTextReader;
		private IPosContextGenerator mContextGenerator;
		private ArrayList mEventList = new ArrayList();
		private int mCurrentEvent = 0;
		
		public PosEventReader(System.IO.TextReader data) : this(data, new DefaultPosContextGenerator())
		{
		}

		public PosEventReader(System.IO.TextReader data, IPosContextGenerator contextGenerator)
		{
			mContextGenerator = contextGenerator;
			mTextReader = data;
			string nextLine = mTextReader.ReadLine();
			if (nextLine != null)
			{
				AddEvents(nextLine);
			}
		}
		
		public virtual bool HasNext()
		{
			return (mCurrentEvent < mEventList.Count);
		}
		
		public virtual SharpEntropy.TrainingEvent ReadNextEvent()
		{
			SharpEntropy.TrainingEvent trainingEvent = (SharpEntropy.TrainingEvent)mEventList[mCurrentEvent];
			mCurrentEvent++;
			if (mEventList.Count == mCurrentEvent)
			{
				mCurrentEvent = 0;
				mEventList.Clear();
				string nextLine = mTextReader.ReadLine();
				if (nextLine != null)
				{
					AddEvents(nextLine);
				}
			}
			return trainingEvent;
		}

		private void AddEvents(string line)
		{
			Util.Pair linePair = ConvertAnnotatedString(line);
			ArrayList tokens = (ArrayList) linePair.A;
			ArrayList outcomes = (ArrayList) linePair.B;
			ArrayList tags = new ArrayList();
					
			for (int currentToken = 0; currentToken < tokens.Count; currentToken++)
			{
				string[] context = mContextGenerator.GetContext(currentToken, tokens.ToArray(), (string[]) tags.ToArray(typeof(string)), null);
				SharpEntropy.TrainingEvent posTrainingEvent = new SharpEntropy.TrainingEvent((string) outcomes[currentToken], context);
				tags.Add(outcomes[currentToken]);
				mEventList.Add(posTrainingEvent);
			}
		}

		private static Util.Pair Split(string input)
		{
			int splitPosition = input.LastIndexOf("_");
			if (splitPosition == -1)
			{
				System.Console.Out.WriteLine("There is a problem in your training data: " + input + " does not conform to the format WORD_TAG.");
				return new Util.Pair(input, "UNKNOWN");
			}
			return new Util.Pair(input.Substring(0, (splitPosition) - (0)), input.Substring(splitPosition + 1));
		}
		
		public static Util.Pair ConvertAnnotatedString(string input)
		{
			ArrayList tokens = new ArrayList();
			ArrayList outcomes = new ArrayList();
			Util.StringTokenizer tokenizer = new Util.StringTokenizer(input);
			string token = tokenizer.NextToken();
			while (token != null)
			{
				Util.Pair linePair = Split(token);
				tokens.Add(linePair.A);
				outcomes.Add(linePair.B);
				token = tokenizer.NextToken();
			}
			return new Util.Pair(tokens, outcomes);
		}
			
//		[STAThread]
//		public static void Main(string[] args)
//		{
//			string sData = "the_DT stories_NNS about_IN well-heeled_JJ communities_NNS and_CC developers_NNS";
//			EventCollector oEventCollector = new PosEventCollector(new System.IO.StringReader(sData), new DefaultPosContextGenerator());
//			Event[] aoEvents = oEventCollector.GetEvents();
//			for (int iCurrentEvent = 0; iCurrentEvent < aoEvents.length; iCurrentEvent++)
//			{
//				System.Console.Out.WriteLine(aoEvents[iCurrentEvent].GetOutcome());
//			}
//		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -