⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parse.cs

📁 英语句子自然语言处理统计分析例子 Statistical parsing of English sentences Shows how to generate parse trees for
💻 CS
📖 第 1 页 / 共 2 页
字号:
		
		public override bool Equals (Object o)
		{
			if (o == null) return false;

			if (this.GetType() != o.GetType()) 
			{
				return false;
			}

			Parse testParse = (Parse)o;
			return (this.Probability == testParse.Probability);
		}  

		public override int GetHashCode ()
		{
			return mProbability.GetHashCode();
		}  

		#endregion

		///<summary>
		///Returns the probability associated with the pos-tag sequence assigned to this parse.
		///</summary>
		///<returns>
		///The probability associated with the pos-tag sequence assigned to this parse.
		///</returns>
		public virtual double GetTagSequenceProbability()
		{
			//System.Console.Error.WriteLine("Parse.GetTagSequenceProbability: " + mType + " " + this);
			if (mParts.Count == 1 && ((Parse)mParts[0]).Type == MaximumEntropyParser.TokenNode)
			{
				//System.Console.Error.WriteLine(this + " " + mParseProbability);
				return System.Math.Log(mProbability);
			}
			else
			{
				if (mParts.Count == 0)
				{
					throw new ParseException("Parse.GetTagSequenceProbability(): Wrong base case!");
					//return 0.0;
				}
				else
				{
					double sum = 0.0;
					foreach (Parse oChildParse in mParts)
					{
						sum += oChildParse.GetTagSequenceProbability();
					}
					return sum;
				}
			}	
		}

		///<summary>
		///Inserts the specified constituent into this parse based on its text span.  This
		///method assumes that the specified constituent can be inserted into this parse.
		///</summary>
		///<param name="constituent">
		///The constituent to be inserted.
		///</param>
		public virtual void Insert(Parse constituent)
		{
			Util.Span constituentSpan = constituent.mSpan;
			if (mSpan.Contains(constituentSpan))
			{
				int currentPart;
				int partCount = mParts.Count;
				for (currentPart = 0; currentPart < partCount; currentPart++)
				{
					Parse subPart = (Parse)mParts[currentPart];
					Util.Span subPartSpan = subPart.mSpan;
					if (subPartSpan.Start > constituentSpan.End)
					{
						break;
					}
					// constituent Contains subPart
					else if (constituentSpan.Contains(subPartSpan))
					{
						mParts.RemoveAt(currentPart);
						currentPart--;
						constituent.mParts.Add(subPart);
						subPart.Parent = constituent;
						partCount = mParts.Count;
					}
					else if (subPartSpan.Contains(constituentSpan)) 
					{
						//System.Console.WriteLine("Parse.insert:subPart contains con");
						subPart.Insert(constituent);
						return;
					}
				}
				mParts.Insert(currentPart, constituent);
				constituent.Parent = this;
			}
			else
			{
				throw new ParseException("Inserting constituent not contained in the sentence!");
			}
		}
		
		///<summary>
		///Displays this parse using Penn Treebank-style formatting.
		///</summary>
		public virtual string Show()
		{
			StringBuilder buffer = new StringBuilder();
			int start = mSpan.Start;
			if (mType != MaximumEntropyParser.TokenNode)
			{
				buffer.Append("(");
				buffer.Append(mType + " ");
			}
			
			foreach (Parse childParse in mParts)
			{
				Util.Span childSpan = childParse.mSpan;
				if (start < childSpan.Start)
				{
					//System.Console.Out.WriteLine("pre " + start + " " + childSpan.Start);
					buffer.Append(mText.Substring(start, (childSpan.Start) - (start)));
				}
				buffer.Append(childParse.Show());
				start = childSpan.End;
			}
			buffer.Append(mText.Substring(start, (mSpan.End) - (start)));
			if (mType != MaximumEntropyParser.TokenNode)
			{
				buffer.Append(")");
			}
			return buffer.ToString();
		}
	
		/// <summary>
		/// Computes the head parses for this parse and its sub-parses and stores this information
		/// in the parse data structure. 
		/// </summary>
		/// <param name="rules">
		/// The head rules which determine how the head of the parse is computed.
		/// </param>
		public virtual void UpdateHeads(IHeadRules rules)
		{
			if (mParts != null && mParts.Count != 0)
			{
				for (int currentPart = 0, partCount = mParts.Count; currentPart < partCount; currentPart++)
				{
					Parse currentParse = (Parse) mParts[currentPart];
					currentParse.UpdateHeads(rules);
				}
				mHead = rules.GetHead((Parse[]) mParts.ToArray(typeof(Parse)), mType);
				if (mHead == null)
				{
					mHead = this;
				}
			}
			else
			{
				mHead = this;
			}
		}
		
		/// <summary>
		/// Returns the parse nodes which are children of this node and which are pos tags.
		/// </summary>
		/// <returns>
		/// the parse nodes which are children of this node and which are pos tags.
		/// </returns>
		public virtual Parse[] GetTagNodes()
		{
			ArrayList tags = new ArrayList();
			ArrayList nodes = new ArrayList();
			nodes.AddRange(mParts);
			while (nodes.Count != 0)
			{
				Parse currentParse = (Parse)nodes[0];
				nodes.RemoveAt(0);
				if (currentParse.IsPosTag)
				{
					tags.Add(currentParse);
				}
				else
				{
					nodes.InsertRange(0, currentParse.GetChildren());
				}
			}
			return (Parse[])tags.ToArray(typeof(Parse));	
		}

		/// <summary>
		/// Returns the deepest shared parent of this node and the specified node. 
		/// If the nodes are identical then their parent is returned.  
		/// If one node is the parent of the other then the parent node is returned.
		/// </summary>
		/// <param name="node">
		/// The node from which parents are compared to this node's parents.
		/// </param>
		/// <returns>
		/// the deepest shared parent of this node and the specified node.
		/// </returns>
		public virtual Parse GetCommonParent(Parse node)
		{
			if (this == node)
			{
				return this.Parent;
			}
			Util.HashSet parents = new Util.HashSet();
			Parse parentParse = this;
			while (parentParse != null)
			{
				parents.Add(parentParse);
				parentParse = parentParse.Parent;
			}
			while (node != null)
			{
				if (parents.Contains(node))
				{
					return node;
				}
				node = node.Parent;
			}
			return null;
		}
	
		protected internal void UpdateChildParents()
		{
			foreach (Parse childParse in mParts)
			{
				childParse.Parent = this;
				childParse.UpdateChildParents();
			}
		}

		#region static methods used to create a Parse from a Penn Treebank parse string

		/// <summary>
		/// The pattern used to find the base constituent label of a Penn Treebank labeled constituent.
		/// </summary>
		private static Regex mTypePattern = new Regex("^([^ =-]+)");

		/// <summary>
		/// The pattern used to identify tokens in Penn Treebank labeled constituents.
		/// </summary>
		private static Regex mTokenPattern = new Regex("^[^ ()]+ ([^ ()]+)\\s*\\)");		

		private static string GetType(string rest)
		{
			if (rest.StartsWith("-LCB-"))
			{
				return "-LCB-";
			}
			else if (rest.StartsWith("-RCB-"))
			{
				return "-RCB-";
			}
			else if (rest.StartsWith("-LRB-"))
			{
				return "-LRB-";
			}
			else if (rest.StartsWith("-RRB-"))
			{
				return "-RRB-";
			}
			else
			{
				MatchCollection typeMatches = mTypePattern.Matches(rest);
				if (typeMatches.Count > 0)
				{
					return typeMatches[0].Value;
				}
			}
			return null;
		}
		
		private static string GetToken(string rest)
		{
			MatchCollection tokenMatches = mTokenPattern.Matches(rest);
			if (tokenMatches.Count > 0)
			{
				return tokenMatches[0].Value;
			}
			return null;
//			int start = rest.IndexOf(" ");
//			if (start > -1)
//			{
//				int end = rest.IndexOfAny(new char[] {'(', ')'}, start); 
//				if  ((end > -1) && (end - start > 1))
//				{
//					return rest.Substring(start + 1, end - start - 1);
//				}
//			}
//			return null;
		}
		
		/// <summary>
		/// Generates a Parse structure from the specified tree-bank style parse string. 
		/// </summary>
		/// <param name="parse">
		/// A tree-bank style parse string.
		/// </param>
		/// <returns>
		/// a Parse structure for the specified tree-bank style parse string.
		/// </returns>
		public static Parse FromParseString(string parse)
		{
			StringBuilder textBuffer = new StringBuilder();
			int offset = 0;
			
			Stack parseStack = new Stack();
			
			ArrayList consitutents = new ArrayList();
			for (int currentChar = 0, charCount = parse.Length; currentChar < charCount; currentChar++)
			{
				char c = parse[currentChar];
				if (c == '(')
				{
					string rest = parse.Substring(currentChar + 1);
					string type = GetType(rest);
					if (type == null)
					{
						throw new ParseException("null type for: " + rest);
					}
					string token = GetToken(rest);
					parseStack.Push(new object[]{type, (object) (offset)});
					if ((object) token != null && type != "-NONE-")
					{
						consitutents.Add(new object[]{MaximumEntropyParser.TokenNode, new Util.Span(offset, offset + token.Length)});
						textBuffer.Append(token).Append(" ");
						offset += token.Length + 1;
					}
				}
				else if (c == ')')
				{
					object[] parts = (object[])parseStack.Pop();
					string type = (string)parts[0];
					if (type != "-NONE-")
					{
						int start = (int)parts[1];
						consitutents.Add(new object[]{parts[0], new Util.Span(start, offset - 1)});
					}
				}
			}
			string text = textBuffer.ToString();
			Parse rootParse = new Parse(text, new Util.Span(0, text.Length), MaximumEntropyParser.TopNode, 1);
			for (int currentConstituent = 0, constituentCount = consitutents.Count; currentConstituent < constituentCount; currentConstituent++)
			{
				object[] parts = (object[])consitutents[currentConstituent];
				string type = (string)parts[0];
				if (type != MaximumEntropyParser.TopNode)
				{
					Parse oConstituent = new Parse(text, (Util.Span)parts[1], type, 1);
					rootParse.Insert(oConstituent);
				}
			}
			return rootParse;
		}

		#endregion		
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -