📄 defaultchunkercontextgenerator.cs

📁 英语句子自然语言处理统计分析例子 Statistical parsing of English sentences Shows how to generate parse trees for
💻 CS
字号:
//Copyright (C) 2005 Richard J. Northedge
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

//This file is based on the Chunker.java source file found in the
//original java implementation of OpenNLP.  That source file contains the following header:

//Copyright (C) 2003 Thomas Morton
// 
//This library is free software; you can redistribute it and/or
//modify it under the terms of the GNU Lesser General Public
//License as published by the Free Software Foundation; either
//version 2.1 of the License, or (at your option) any later version.
// 
//This library is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//GNU Lesser General Public License for more details.
// 
//You should have received a copy of the GNU Lesser General Public
//License along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

using System;
using System.Collections;

namespace OpenNLP.Tools.Chunker
{
	/// <summary>
	/// Features based on chunking model described in Fei Sha and Fernando Pereira. Shallow 
	/// parsing with conditional random fields. In Proceedings of HLT-NAACL 2003. Association 
	/// for Computational Linguistics, 2003.
	/// </summary>
	/// <author> 
	/// Tom Morton
	/// </author>
	public class DefaultChunkerContextGenerator : IChunkerContextGenerator
	{
		
		/// <summary>
		/// Creates the default context generator for a chunker.
		/// </summary>
		public DefaultChunkerContextGenerator() : base()
		{
		}

		/// <summary>
		/// Returns the contexts for chunking of the specified index.
		/// </summary>
		/// <param name="input">
		/// An object array containing:
		/// at index [0]: integer value, the index of the token in the tokens array for which the context should be constructed.
		/// at index [1]: object array, the ToString() methods of these objects make up the tokens of the sentence
		/// at index [2]: a Util.Sequence of previous decisions
		/// at index [3]: a string array, the POS tags for the specified tokens 
		/// </param>
		/// <returns>
		/// An array of predictive contexts on which a model bases its decisions.
		/// </returns>
		public virtual string[] GetContext(object input)
		{
			object[] data = (object[]) input;
			ArrayList outcomes = ((Util.Sequence) data[2]).Outcomes;
			return (GetContext(((int)data[0]), (object[])data[1], (string[])data[3], (string[])outcomes.ToArray(typeof(string))));
		}
		
		/// <summary>
		/// Returns the contexts for chunking of the specified index.
		/// </summary>
		/// <param name="index">
		/// The index of the token in the specified tokens array for which the context should be constructed. 
		/// </param>
		/// <param name="sequence">
		/// The tokens of the sentence.  The <code>ToString</code> methods of these objects should return the token text.
		/// </param>
		/// <param name="priorDecisions">
		/// The previous decisions made in the tagging of this sequence.  Only indices less than index will be examined.
		/// </param>
		/// <param name="additionalContext">
		/// Object array of additional context information. The first object in the array is expected to be a string array
		/// containing the POS tags for the the specified tokens.
		/// </param>
		/// <returns>
		/// An array of predictive contexts on which a model bases its decisions.
		/// </returns>
		public virtual string[] GetContext(int index, object[] sequence, string[] priorDecisions, object[] additionalContext) 
		{
			return GetContext(index, sequence, (string[])additionalContext[0], priorDecisions); 
		}  

		/// <summary>
		/// Returns the contexts for chunking of the specified index.
		/// </summary>
		/// <param name="tokenIndex">
		/// The index of the token in the specified tokens array for which the context should be constructed. 
		/// </param>
		/// <param name="tokens">
		/// The tokens of the sentence.  The <code>ToString</code> methods of these objects should return the token text.
		/// </param>
		/// <param name="tags">
		/// The POS tags for the the specified tokens.
		/// </param>
		/// <param name="predicates">
		/// The previous decisions made in the tagging of this sequence.  Only indices less than tokenIndex will be examined.
		/// </param>
		/// <returns>
		/// An array of predictive contexts on which a model bases its decisions.
		/// </returns>
		public virtual string[] GetContext(int tokenIndex, object[] tokens, string[] tags, string[] predicates)
		{
			ArrayList features = new ArrayList(45);
			//words in a 5-word window
			string wordPreviousPrevious, wordPrevious, word, wordNext, wordNextNext;
			//tags in a 5-word window 
			string tagPreviousPrevious, tagPrevious, tag, tagNext, tagNextNext;
			//Previous predictions
			string predicatePreviousPrevious, predicatePrevious;
			if (tokenIndex < 2)
			{
				wordPreviousPrevious = "w_2=bos";
				tagPreviousPrevious = "t_2=bos";
				predicatePreviousPrevious = "p_2=bos";
			}
			else
			{
				wordPreviousPrevious = "w_2=" + tokens[tokenIndex - 2];
				tagPreviousPrevious = "t_2=" + tags[tokenIndex - 2];
				predicatePreviousPrevious = "p_2" + predicates[tokenIndex - 2];
			}
			if (tokenIndex < 1)
			{
				wordPrevious = "w_1=bos";
				tagPrevious = "t_1=bos";
				predicatePrevious = "p_1=bos";
			}
			else
			{
				wordPrevious = "w_1=" + tokens[tokenIndex - 1];
				tagPrevious = "t_1=" + tags[tokenIndex - 1];
				predicatePrevious = "p_1=" + predicates[tokenIndex - 1];
			}
			word = "w0=" + tokens[tokenIndex];
			tag = "t0=" + tags[tokenIndex];
			if (tokenIndex + 1 >= tokens.Length)
			{
				wordNext = "w1=eos";
				tagNext = "t1=eos";
			}
			else
			{
				wordNext = "w1=" + tokens[tokenIndex + 1];
				tagNext = "t1=" + tags[tokenIndex + 1];
			}
			if (tokenIndex + 2 >= tokens.Length)
			{
				wordNextNext = "w2=eos";
				tagNextNext = "t2=eos";
			}
			else
			{
				wordNextNext = "w2=" + tokens[tokenIndex + 2];
				tagNextNext = "t2=" + tags[tokenIndex + 2];
			}

			//add word features
			features.Add(wordPreviousPrevious);
			features.Add(wordPrevious);
			features.Add(word);
			features.Add(wordNext);
			features.Add(wordNextNext);
			features.Add(wordPrevious + word);
			features.Add(word + wordNext);

			//add tag features
			features.Add(tagPreviousPrevious);
			features.Add(tagPrevious);
			features.Add(tag);
			features.Add(tagNext);
			features.Add(tagNextNext);
			features.Add(tagPreviousPrevious + tagPrevious);
			features.Add(tagPrevious + tag);
			features.Add(tag + tagNext);
			features.Add(tagNext + tagNextNext);
			features.Add(tagPreviousPrevious + tagPrevious + tag);
			features.Add(tagPrevious + tag + tagNext);
			features.Add(tag + tagNext + tagNextNext);

			//add pred tags
			features.Add(predicatePreviousPrevious);
			features.Add(predicatePrevious);
			features.Add(predicatePreviousPrevious + predicatePrevious);

			//add pred and tag
			features.Add(predicatePrevious + tagPreviousPrevious);
			features.Add(predicatePrevious + tagPrevious);
			features.Add(predicatePrevious + tag);
			features.Add(predicatePrevious + tagNext);
			features.Add(predicatePrevious + tagNextNext);
			features.Add(predicatePrevious + tagPreviousPrevious + tagPrevious);
			features.Add(predicatePrevious + tagPrevious + tag);
			features.Add(predicatePrevious + tag + tagNext);
			features.Add(predicatePrevious + tagNext + tagNextNext);
			features.Add(predicatePrevious + tagPreviousPrevious + tagPrevious + tag);
			features.Add(predicatePrevious + tagPrevious + tag + tagNext);
			features.Add(predicatePrevious + tag + tagNext + tagNextNext);

			//add pred and word
			features.Add(predicatePrevious + wordPreviousPrevious);
			features.Add(predicatePrevious + wordPrevious);
			features.Add(predicatePrevious + word);
			features.Add(predicatePrevious + wordNext);
			features.Add(predicatePrevious + wordNextNext);
			features.Add(predicatePrevious + wordPrevious + word);
			features.Add(predicatePrevious + word + wordNext);
			return ((string[]) features.ToArray(typeof(string)));
		}
	}
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -