⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 buildcontextgenerator.cs

📁 英语句子自然语言处理统计分析例子 Statistical parsing of English sentences Shows how to generate parse trees for
💻 CS
字号:
//Copyright (C) 2005 Richard J. Northedge
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

//This file is based on the BuildContextGenerator.java source file found in the
//original java implementation of OpenNLP.  That source file contains the following header:

//Copyright (C) 2003 Thomas Morton
// 
//This library is free software; you can redistribute it and/or
//modify it under the terms of the GNU Lesser General Public
//License as published by the Free Software Foundation; either
//version 2.1 of the License, or (at your option) any later version.
// 
//This library is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//GNU Lesser General Public License for more details.
// 
//You should have received a copy of the GNU Lesser General Public
//License along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

using System;
using System.Collections;
using System.Text;

namespace OpenNLP.Tools.Parser
{
	/// <summary>
	/// Class to generator predictive contexts for deciding how constituents should be combined together.
	/// </summary>
	public class BuildContextGenerator : SharpEntropy.IContextGenerator
	{
		
		private const string mEndOfSentence = "eos";
		
		/// <summary> 
		/// Creates a new context generator for making decisions about combining constituents together.
		/// </summary>
		public BuildContextGenerator() : base()
		{
		}
		
		public virtual string[] GetContext(object input)
		{
			object[] parameters = (object[]) input;
			return GetContext((Parse[]) parameters[0], ((int) parameters[1]));
		}
		
		private string MakeConstituent(Parse inputParse, int index)
		{
			StringBuilder feature = new StringBuilder(20);
			feature.Append(index).Append("=");
			if (inputParse != null)
			{
				if (index < 0)
				{
					feature.Append(inputParse.Label).Append("|");
				}
				feature.Append(inputParse.Type).Append("|").Append(inputParse.Head.ToString());
			}
			else
			{
				feature.Append(mEndOfSentence).Append("|").Append(mEndOfSentence).Append("|").Append(mEndOfSentence);
			}
			return feature.ToString();
		}
		
		private string MakeConstituentBackOff(Parse inputParse, int index)
		{
			StringBuilder feature = new StringBuilder(20);
			feature.Append(index).Append("*=");
			if (inputParse != null)
			{
				if (index < 0)
				{
					feature.Append(inputParse.Label).Append("|");
				}
				feature.Append(inputParse.Type);
			}
			else
			{
				feature.Append(mEndOfSentence).Append("|").Append(mEndOfSentence);
			}
			return feature.ToString();
		}
		
		/// <summary>
		/// Returns the predictive context used to determine how constituent at the specified index 
		/// should be combined with other contisuents. 
		/// </summary>
		/// <param name="constituents">
		/// The constituents which have yet to be combined into new constituents.
		/// </param>
		/// <param name="index">
		/// The index of the constituent whcihi is being considered.
		/// </param>
		/// <returns>
		/// the context for building constituents at the specified index.
		/// </returns>
		
		public virtual string[] GetContext(Parse[] constituents, int index)
		{
			ArrayList features = new ArrayList(100);
			int constituentCount = constituents.Length;
			
			//default 
			features.Add("default");
			// cons(-2), cons(-1), cons(0), cons(1), cons(2)
			// cons(-2)
			Parse previousPreviousParse = null;
			Parse previousParse = null;
			Parse currentParse = null;
			Parse nextParse = null;
			Parse nextNextParse = null;
			
			if (index - 2 >= 0)
			{
				previousPreviousParse = constituents[index - 2];
			}
			if (index - 1 >= 0)
			{
				previousParse = constituents[index - 1];
			}
			currentParse = constituents[index];
			if (index + 1 < constituentCount)
			{
				nextParse = constituents[index + 1];
			}
			if (index + 2 < constituentCount)
			{
				nextNextParse = constituents[index + 2];
			}
			
			// cons(-2), cons(-1), cons(0), cons(1), cons(2)
			string previousPreviousConstituent = MakeConstituent(previousPreviousParse, - 2);
			string previousConstituent = MakeConstituent(previousParse, - 1);
			string currentConstituent = MakeConstituent(currentParse, 0);
			string nextConstituent = MakeConstituent(nextParse, 1);
			string nextNextConstituent = MakeConstituent(nextNextParse, 2);
			
			string previousPreviousConstituentBackOff = MakeConstituentBackOff(previousPreviousParse, - 2);
			string previousConstituentBackOff = MakeConstituentBackOff(previousParse, - 1);
			string currentConstituentBackOff = MakeConstituentBackOff(currentParse, 0);
			string nextConstituentBackOff = MakeConstituentBackOff(nextParse, 1);
			string nextNextConstituentBackOff = MakeConstituentBackOff(nextNextParse, 2);
			
			// cons(-2), cons(-1), cons(0), cons(1), cons(2)
			features.Add(previousPreviousConstituent);
			features.Add(previousPreviousConstituentBackOff);
			features.Add(previousConstituent);
			features.Add(previousConstituentBackOff);
			features.Add(currentConstituent);
			features.Add(currentConstituentBackOff);
			features.Add(nextConstituent);
			features.Add(nextConstituentBackOff);
			features.Add(nextNextConstituent);
			features.Add(nextNextConstituentBackOff);
			
			// cons(-1,0), cons(0,1)
			features.Add(previousConstituent + "," + currentConstituent);
			features.Add(previousConstituentBackOff + "," + currentConstituent);
			features.Add(previousConstituent + "," + currentConstituentBackOff);
			features.Add(previousConstituentBackOff + "," + currentConstituentBackOff);
			
			features.Add(currentConstituent + "," + nextConstituent);
			features.Add(currentConstituentBackOff + "," + nextConstituent);
			features.Add(currentConstituent + "," + nextConstituentBackOff);
			features.Add(currentConstituentBackOff + "," + nextConstituentBackOff);
			
			// cons3(-2,-1,0), cons3(-1,0,1), cons3(0,1,2)
			features.Add(previousPreviousConstituent + "," + previousConstituent + "," + currentConstituent);
			features.Add(previousPreviousConstituentBackOff + "," + previousConstituent + "," + currentConstituent);
			features.Add(previousPreviousConstituent + "," + previousConstituentBackOff + "," + currentConstituent);
			features.Add(previousPreviousConstituentBackOff + "," + previousConstituentBackOff + "," + currentConstituent);
			features.Add(previousPreviousConstituentBackOff + "," + previousConstituentBackOff + "," + currentConstituentBackOff);
			
			features.Add(previousConstituent + "," + currentConstituent + "," + nextConstituent);
			features.Add(previousConstituentBackOff + "," + currentConstituent + "," + nextConstituent);
			features.Add(previousConstituent + "," + currentConstituent + "," + nextConstituentBackOff);
			features.Add(previousConstituentBackOff + "," + currentConstituent + "," + nextConstituentBackOff);
			features.Add(previousConstituentBackOff + "," + currentConstituentBackOff + "," + nextConstituentBackOff);
			
			features.Add(currentConstituent + "," + nextConstituent + "," + nextNextConstituent);
			features.Add(currentConstituent + "," + nextConstituentBackOff + "," + nextNextConstituent);
			features.Add(currentConstituent + "," + nextConstituent + "," + nextNextConstituentBackOff);
			features.Add(currentConstituent + "," + nextConstituentBackOff + "," + nextNextConstituentBackOff);
			features.Add(currentConstituentBackOff + "," + nextConstituentBackOff + "," + nextNextConstituentBackOff);
			
			// punct
			string currentParseWord = currentParse.ToString();
			if (currentParseWord == "-RRB-")
			{
				for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--)
				{
					Parse testParse = constituents[parseIndex];
					if (testParse.ToString() == "-LRB-")
					{
						features.Add("bracketsmatch");
						break;
					}
					if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix))
					{
						break;
					}
				}
			}
			if (currentParseWord == "-RCB-")
			{
				for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--)
				{
					Parse testParse = constituents[parseIndex];
					if (testParse.ToString() == "-LCB-")
					{
						features.Add("bracketsmatch");
						break;
					}
					if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix))
					{
						break;
					}
				}
			}
			if (currentParseWord == "''")
			{
				for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--)
				{
					Parse testParse = constituents[parseIndex];
					if (testParse.ToString() == "``")
					{
						features.Add("quotesmatch");
						break;
					}
					if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix))
					{
						break;
					}
				}
			}
			if (currentParseWord == "'")
			{
				for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--)
				{
					Parse testParse = constituents[parseIndex];
					if (testParse.ToString() == "`")
					{
						features.Add("quotesmatch");
						break;
					}
					if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix))
					{
						break;
					}
				}
			}
			if (currentParseWord == ",")
			{
				for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--)
				{
					Parse testParse = constituents[parseIndex];
					if (testParse.ToString() == ",")
					{
						features.Add("iscomma");
						break;
					}
					if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix))
					{
						break;
					}
				}
			}
			if (currentParseWord == (".") && index == constituentCount - 1)
			{
				for (int parseIndex = index - 1; parseIndex >= 0; parseIndex--)
				{
					Parse testParse = constituents[parseIndex];
					if (testParse.Label.StartsWith(MaximumEntropyParser.StartPrefix))
					{
						if (parseIndex == 0)
						{
							features.Add("endofsentence");
						}
						break;
					}
				}
			}
			return (string[]) features.ToArray(typeof(string));
		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -