⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parsereventreader.cs

📁 英语句子自然语言处理统计分析例子 Statistical parsing of English sentences Shows how to generate parse trees for
💻 CS
字号:
//Copyright (C) 2005 Richard J. Northedge
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

//This file is based on the ParserEventStream.java source file found in the
//original java implementation of OpenNLP.  That source file contains the following header:

//Copyright (C) 2003 Thomas Morton
// 
//This library is free software; you can redistribute it and/or
//modify it under the terms of the GNU Lesser General Public
//License as published by the Free Software Foundation; either
//version 2.1 of the License, or (at your option) any later version.
// 
//This library is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//GNU Lesser General Public License for more details.
// 
//You should have received a copy of the GNU Lesser General Public
//License along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

using System;
using System.Collections;

namespace OpenNLP.Tools.Parser
{
	/// <summary>
	///  Enumerated type of event types for the parser. 
	/// </summary>
	public enum EventType
	{
		Build,
		Check,
		Chunk,
		Tag
	}

	/// <summary>
	/// Wrapper class for one of four parser event readers.  The particular event stream is specified 
	/// at construction.
	/// </summary>
	public class ParserEventReader : SharpEntropy.ITrainingEventReader
	{
		private BuildContextGenerator mBuildContextGenerator;
		private CheckContextGenerator mCheckContextGenerator;
		private ChunkContextGenerator mChunkContextGenerator;
		private OpenNLP.Tools.PosTagger.IPosContextGenerator mPosContextGenerator;
		private SharpEntropy.ITrainingDataReader mDataReader;
		private SharpEntropy.TrainingEvent[] mEvents;
		private int mEventIndex;
		private IHeadRules mHeadRules;
		private EventType mEventType;
		
		/// <summary>
		/// Create an event reader based on the specified data reader of the specified type using the specified head rules.
		/// </summary>
		/// <param name="dataReader">
		/// A 1-parse-per-line Penn Treebank Style parse. 
		/// </param>
		/// <param name="rules">
		/// The head rules.
		/// </param>
		/// <param name="eventType">
		/// The type of events desired (tag, chunk, build, or check).
		/// </param>
		public ParserEventReader(SharpEntropy.ITrainingDataReader dataReader, IHeadRules rules, EventType eventType)
		{
			if (eventType == EventType.Build)
			{
				mBuildContextGenerator = new BuildContextGenerator();
			}
			else if (eventType == EventType.Check)
			{
				mCheckContextGenerator = new CheckContextGenerator();
			}
			else if (eventType == EventType.Chunk)
			{
				mChunkContextGenerator = new ChunkContextGenerator();
			}
			else if (eventType == EventType.Tag)
			{
				mPosContextGenerator = new PosTagger.DefaultPosContextGenerator();
			}
			mHeadRules = rules;
			mEventType = eventType;
			mDataReader = dataReader;
			mEventIndex = 0;
			if (dataReader.HasNext())
			{
				AddNewEvents();
			}
			else
			{
				mEvents = new SharpEntropy.TrainingEvent[0];
			}
		}
		
		public virtual bool HasNext()
		{
			return (mEventIndex < mEvents.Length || mDataReader.HasNext());
		}
		
		public virtual SharpEntropy.TrainingEvent ReadNextEvent()
		{
			if (mEventIndex == mEvents.Length)
			{
				AddNewEvents();
				mEventIndex = 0;
			}
			return ((SharpEntropy.TrainingEvent) mEvents[mEventIndex++]);
		}
		
		private static void GetInitialChunks(Parse inputParse, ArrayList initialChunks)
		{
			if (inputParse.IsPosTag)
			{
				initialChunks.Add(inputParse);
			}
			else
			{
				Parse[] kids = inputParse.GetChildren();
				bool AreAllKidsTags = true;
				for (int currentChild = 0, childCount = kids.Length; currentChild < childCount; currentChild++)
				{
					if (!(kids[currentChild]).IsPosTag)
					{
						AreAllKidsTags = false;
						break;
					}
				}
				if (AreAllKidsTags)
				{
					initialChunks.Add(inputParse);
				}
				else
				{
					for (int currentChild = 0, childCount = kids.Length; currentChild < childCount; currentChild++)
					{
						GetInitialChunks(kids[currentChild], initialChunks);
					}
				}
			}
		}
		
		private static Parse[] GetInitialChunks(Parse inputParse)
		{
			ArrayList chunks = new ArrayList();
			GetInitialChunks(inputParse, chunks);
			return (Parse[])chunks.ToArray(typeof(Parse));
		}
		
		private static bool IsFirstChild(Parse child, Parse parent)
		{
			Parse[] kids = parent.GetChildren();
			return kids[0] == child;
		}
		
		private static bool IsLastChild(Parse child, Parse parent)
		{
			Parse[] kids = parent.GetChildren();
			return kids[kids.Length - 1] == child;
		}
		
		private void AddNewEvents()
		{
			string parseString = (string) mDataReader.NextToken();
			//System.Console.WriteLine("ParserEventStream.AddNewEvents: " + parseString);
			ArrayList events = new ArrayList();
			Parse rootParse = Parse.FromParseString(parseString);
			rootParse.UpdateHeads(mHeadRules);
			Parse[] chunks = GetInitialChunks(rootParse);
			if (mEventType == EventType.Tag)
			{
				AddTagEvents(events, chunks);
			}
			else if (mEventType == EventType.Chunk)
			{
				AddChunkEvents(events, chunks);
			}
			else
			{
				AddParseEvents(events, chunks);
			}
			mEvents = (SharpEntropy.TrainingEvent[]) events.ToArray(typeof(SharpEntropy.TrainingEvent));
		}
		
		private void AddParseEvents(ArrayList events, Parse[] chunks)
		{
			int currentChunk = 0;
			while (currentChunk < chunks.Length)
			{
				Parse chunkParse = chunks[currentChunk];
				Parse parentParse = chunkParse.Parent;
				if (parentParse != null)
				{
					string type = parentParse.Type;
					string outcome;
					if (IsFirstChild(chunkParse, parentParse))
					{
						outcome = MaximumEntropyParser.StartPrefix + type;
					}
					else
					{
						outcome = MaximumEntropyParser.ContinuePrefix + type;
					}
					chunkParse.Label = outcome;
					if (mEventType == EventType.Build)
					{
						events.Add(new SharpEntropy.TrainingEvent(outcome, mBuildContextGenerator.GetContext(chunks, currentChunk)));
					}
					int start = currentChunk - 1;
					while (start >= 0 && (chunks[start]).Parent == parentParse)
					{
						start--;
					}
					if (IsLastChild(chunkParse, parentParse))
					{
						if (mEventType == EventType.Check)
						{
							events.Add(new SharpEntropy.TrainingEvent(MaximumEntropyParser.CompleteOutcome, mCheckContextGenerator.GetContext(chunks, type, start + 1, currentChunk)));
						}
						//perform reduce
						int reduceStart = currentChunk;
						int reduceEnd = currentChunk;
						while (reduceStart >=0 && chunks[reduceStart].Parent == parentParse) 
						{
							reduceStart--;

						}
						reduceStart++;

						if (!(type == MaximumEntropyParser.TopNode))
						{
							Parse[] reducedChunks = new Parse[chunks.Length - (reduceEnd - reduceStart + 1) + 1]; //total - num_removed + 1 (for new node)
							//insert nodes before reduction
							for (int reductionIndex = 0, reductionCount = reduceStart; reductionIndex < reductionCount; reductionIndex++) 
							{
								reducedChunks[reductionIndex] = chunks[reductionIndex];
							}
							//insert reduced node
							reducedChunks[reduceStart] = parentParse;
							//insert nodes after reduction
							int currentReductionIndex = reduceStart + 1;
							for (int afterReductionIndex = reduceEnd + 1; afterReductionIndex < chunks.Length; afterReductionIndex++) 
							{
								reducedChunks[currentReductionIndex] = chunks[afterReductionIndex];
								currentReductionIndex++;
							}
							chunks = reducedChunks;
							currentChunk = reduceStart - 1; //currentChunk will be incremented at end of loop
						}
						else 
						{
							chunks = new Parse[0];
						}
					}
					else
					{
						if (mEventType == EventType.Check)
						{
							events.Add(new SharpEntropy.TrainingEvent(MaximumEntropyParser.IncompleteOutcome, mCheckContextGenerator.GetContext(chunks, type, start + 1, currentChunk)));
						}
					}
				}
				currentChunk++;
			}
		}
		
		private void AddChunkEvents(ArrayList events, Parse[] chunks)
		{
			ArrayList tokens = new ArrayList();
			ArrayList tags = new ArrayList();
			ArrayList predicates = new ArrayList();
			for (int currentChunk = 0; currentChunk < chunks.Length; currentChunk++)
			{
				Parse chunkParse = chunks[currentChunk];
				if (chunkParse.IsPosTag)
				{
					tokens.Add(chunkParse.ToString());
					tags.Add(chunkParse.Type);
					predicates.Add(MaximumEntropyParser.OtherOutcome);
				}
				else
				{
					bool isStart = true;
					string chunkType = chunkParse.Type;
					Parse[] childParses = chunkParse.GetChildren();
					foreach (Parse tokenParse in childParses)
					{
						tokens.Add(tokenParse.ToString());
						tags.Add(tokenParse.Type);
						if (isStart)
						{
							predicates.Add(MaximumEntropyParser.StartPrefix + chunkType);
							isStart = false;
						}
						else
						{
							predicates.Add(MaximumEntropyParser.ContinuePrefix + chunkType);
						}
					}
				}
			}
			for (int currentToken = 0; currentToken < tokens.Count; currentToken++)
			{
				events.Add(new SharpEntropy.TrainingEvent((string) predicates[currentToken], mChunkContextGenerator.GetContext(currentToken, tokens.ToArray(), (string[]) tags.ToArray(typeof(string)), (string[]) predicates.ToArray(typeof(string)))));
			}
		}
		
		private void AddTagEvents(ArrayList events, Parse[] chunks)
		{
			ArrayList tokens = new ArrayList();
			ArrayList predicates = new ArrayList();
			for (int currentChunk = 0; currentChunk < chunks.Length; currentChunk++)
			{
				Parse chunkParse = chunks[currentChunk];
				if (chunkParse.IsPosTag)
				{
					tokens.Add(chunkParse.ToString());
					predicates.Add(chunkParse.Type);
				}
				else
				{
					Parse[] childParses = chunkParse.GetChildren();
					foreach (Parse tokenParse in childParses)
					{
						tokens.Add(tokenParse.ToString());
						predicates.Add(tokenParse.Type);
					}
				}
			}
			for (int currentToken = 0; currentToken < tokens.Count; currentToken++)
			{
				events.Add(new SharpEntropy.TrainingEvent((string) predicates[currentToken], mPosContextGenerator.GetContext(currentToken, tokens.ToArray(), (string[]) predicates.ToArray(typeof(string)), null)));
			}
		}
		
//		[STAThread]
//		public static void  Main(System.String[] args)
//		{
//			if (args.Length == 0)
//			{
//				System.Console.Error.WriteLine("Usage ParserEventStream -[tag|chunk|build|check] head_rules < parses");
//				System.Environment.Exit(1);
//			}
//			EventTypeEnum etype = null;
//			int ai = 0;
//			if (args[ai].Equals("-build"))
//			{
//				etype = EventTypeEnum.Build;
//			}
//			else if (args[ai].Equals("-check"))
//			{
//				etype = EventTypeEnum.Check;
//			}
//			else if (args[ai].Equals("-chunk"))
//			{
//				etype = EventTypeEnum.Chunk;
//			}
//			else if (args[ai].Equals("-tag"))
//			{
//				etype = EventTypeEnum.Tag;
//			}
//			else
//			{
//				System.Console.Error.WriteLine("Invalid option " + args[ai]);
//				System.Environment.Exit(1);
//			}
//			ai++;
//			EnglishHeadRules rules = new EnglishHeadRules(args[ai++]);
//			MaxEnt.EventStream es = new ParserEventStream(new MaxEnt.PlainTextByLineDataStream(new System.IO.StreamReader(System.Console.In)), rules, etype);
//			while (es.hasNext())
//			{
//				System.Console.Out.WriteLine(es.nextEvent());
//			}
//		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -