⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sentencedetectioneventreader.cs

📁 英语句子自然语言处理统计分析例子 Statistical parsing of English sentences Shows how to generate parse trees for
💻 CS
字号:
//Copyright (C) 2005 Richard J. Northedge
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

//This file is based on the SDEventStream.java source file found in the
//original java implementation of OpenNLP.  That source file contains the following header:

// Copyright (C) 2002 Jason Baldridge and Gann Bierner
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

using System;

namespace OpenNLP.Tools.SentenceDetect
{
	/// <summary>
	/// An implementation of ITrainingEventReader which assumes that it is receiving
	/// its data as one (valid) sentence per token.  The default DataReader
	/// to use with this class is PlainTextByLineDataReader, but you can
	/// provide other types of ITrainingDataReaders if you wish to receive data from
	/// sources other than plain text files; however, be sure that each
	/// token your ITrainingDataReader returns is a valid sentence.
	/// </summary>
	public class SentenceDetectionEventReader : SharpEntropy.ITrainingEventReader
	{
		private SharpEntropy.ITrainingDataReader mDataReader;
		private string mNext;
		private SentenceDetectionEvent mHead, mTail;
		private SharpEntropy.IContextGenerator mContextGenerator;
		private System.Text.StringBuilder mBuffer = new System.Text.StringBuilder();
		private IEndOfSentenceScanner mScanner;
		
		/// <summary>
		/// Creates a new <code>SentenceDetectionEventReader</code> instance.  A
		/// DefaultEndOfSentenceScanner is used to locate sentence endings.
		/// </summary>
		/// <param name="dataReader">a <code>ITrainingDataReader</code> value
		/// </param>
		public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader dataReader) : this(dataReader, new DefaultEndOfSentenceScanner(), new SentenceDetectionContextGenerator(DefaultEndOfSentenceScanner.GetEndOfSentenceCharacters()))
		{
		}
		
		/// <summary>
		/// Class constructor which uses the EndOfSentenceScanner to locate
		/// sentence endings.
		/// </summary>
		public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader dataReader, IEndOfSentenceScanner scanner) : this(dataReader, scanner, new SentenceDetectionContextGenerator(DefaultEndOfSentenceScanner.GetEndOfSentenceCharacters()))
		{
		}
		
		public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader dataReader, IEndOfSentenceScanner scanner, SharpEntropy.IContextGenerator contextGenerator)
		{
			mDataReader = dataReader;
			mScanner = scanner;
			mContextGenerator = contextGenerator;
			if (mDataReader.HasNext())
			{
				string current = (string) mDataReader.NextToken();
				if (mDataReader.HasNext())
				{
					mNext = ((string) mDataReader.NextToken());
				}
				AddNewEvents(current);
			}
		}
		
		public virtual SharpEntropy.TrainingEvent ReadNextEvent()
		{
			SentenceDetectionEvent topEvent = mHead;
			mHead = mHead.NextEvent;
			if (null == mHead)
			{
				mTail = null;
			}
			return topEvent;
		}
		
		private void AddNewEvents(string token)
		{
			System.Text.StringBuilder buffer = mBuffer;
			buffer.Append(token.Trim());
			int sentenceEndPosition = buffer.Length - 1;
			//add following word to stringbuilder
			if (mNext != null && token.Length > 0)
			{
				int positionAfterFirstWordInNext = mNext.IndexOf(" ");
				if (positionAfterFirstWordInNext != - 1)
				{
					// should maybe changes this so that it usually adds a space
					// before the next sentence, but sometimes leaves no space.
					buffer.Append(" ");
					buffer.Append(mNext.Substring(0, (positionAfterFirstWordInNext) - (0)));
				}
			}
			
			for (System.Collections.IEnumerator iterator = mScanner.GetPositions(buffer).GetEnumerator(); iterator.MoveNext(); )
			{
				int candidate = (int) iterator.Current;
				Util.Pair pair = new Util.Pair(buffer, candidate);
				string type = (candidate == sentenceEndPosition) ? "T" : "F";
				SentenceDetectionEvent sentenceEvent = new SentenceDetectionEvent(type, mContextGenerator.GetContext(pair));
				
				if (null != mTail)
				{
					mTail.NextEvent = sentenceEvent;
					mTail = sentenceEvent;
				}
				else if (null == mHead)
				{
					mHead = sentenceEvent;
				}
				else if (null == mHead.NextEvent)
				{
					mHead.NextEvent = mTail = sentenceEvent;
				}
			}
			
			buffer.Length = 0;
		}
		
		public virtual bool HasNext()
		{
			if (null != mHead)
			{
				return true;
			}
			
			while (null == mHead && (object) mNext != null)
			{
				string current = mNext;
				if (mDataReader.HasNext())
				{
					mNext = ((string) mDataReader.NextToken());
				}
				else
				{
					mNext = null;
				}
				AddNewEvents(current);
			}
			return (null != mHead);
		}
		
//		[STAThread]
//		public static void  Main(System.String[] args)
//		{
//			EventStream es = new SDEventStream(new PlainTextByLineDataStream(new System.IO.StreamReader(System.Console.In)));
//			while (es.hasNext())
//			{
//				System.Console.Out.WriteLine(es.nextEvent());
//			}
//		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -