⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 similargenerator.cs

📁 是用c#编写的
💻 CS
字号:
/* Generate similar sentences of a given setence
 * Author : Dao Ngoc Thanh , thanh.dao@gmx.net 
 * (c) Dao Ngoc Thanh.
 * Methods Used: Simulated Annealing & BackTracking generator
 * 
 * Acknowledgements: To J. Martin for the reuse of his original function "FindSynonyms" .
 * 
 */

using System;
using System.Collections;
using Wnlib;
using WordsMatching;

namespace SimilarSentence
{
	/// <summary>
	/// This class inputs a sentence and produces a list of approximately similar sentences
    /// 
	/// </summary>
	public class SimilarGenerator
	{
		private string _originalSentence;
		const int CONTEXT_SIZE=6;//Local disambiguation within the context size 
		private string[][][][] _dictInfo ;//[words][relations][tokens]
		
		Tokeniser tokenize=new Tokeniser() ;

		ArrayList list=new ArrayList() ;
		MyWordInfo[] _myPos;
		int _numItems=0;
		int _numWord;
		private string bestSentence;
		int bestScore=0;
		private MyWordInfo[][] _alterWord;
		private int[][] dx;
		private int[] _selected;
		const int _max=50;

		private MyWordInfo[] _contextWords;		
		private Opt[] _relatedness=null;
		private int _overallScore=0;					
		private int[][][][] _scores;//[i][alter_i][j][alter_j]

		public SimilarGenerator(MyWordInfo[] pos, string originalSentence)
		{
			_myPos=pos;
			_originalSentence=originalSentence;
			MyInit();			
			Generate();
			list=k_best_sentence;
			
			//
			// TODO: Add constructor logic here
			//
		}
		

		public ArrayList GetResult
		{
			get
			{	
				list.Insert(0, bestSentence) ;
				return list;
			}
		}
		
		public MyWordInfo[] FindSynonyms(ref MyWordInfo pos, bool includeMorphs)
		{
			pos.Word = pos.Word.ToLower();
			Wnlib.Index index = Wnlib.Index.lookup( pos.Word, PartOfSpeech.of( pos.Pos  ) );
		
			if( index == null )
			{
				if( !includeMorphs )
					return null;

				Wnlib.MorphStr morphs = new Wnlib.MorphStr( pos.Word, Wnlib.PartOfSpeech.of( pos.Pos  ) );
				string morph = "";
				while( ( morph = morphs.next() ) != null )
				{
					index = Wnlib.Index.lookup( morph, Wnlib.PartOfSpeech.of( pos.Pos  ) );
					pos.Word=morph;
					if( index != null )
						break;
				}
			}

			
			if( index == null )
				return null;
			
			return LookupCandidates( index, pos );
		}

		public static int GetSynsetIndex(string word, PartsOfSpeech pos)
		{
			word=word.ToLower() ;
			//word=RemoveBadChars (word);
			Wnlib.Index index=Wnlib.Index.lookup( word, PartOfSpeech.of(pos) );
			
			if( index == null )
			{
				Wnlib.MorphStr morphs=new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of( pos  ) );
				string morph = "";
				while( ( morph = morphs.next() ) != null )
				{
					index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos) );
					if( index != null )
						break;
				}
			}			

			if (index == null) return -1;
			else 
				return 0;
		}

		class CompareLexeme : IComparer
		{
			public int Compare(object x, object y) //descent sorting
			{
				return ((MyWordInfo)y).Frequency - ((MyWordInfo)x).Frequency;
			}
		}

		private MyWordInfo[] LookupCandidates(Wnlib.Index index, MyWordInfo pos )
		{						
			if (pos.Sense < 0) pos.Sense=1;						
			SynSet synset=new Wnlib.SynSet( index.offs[pos.Sense - 1 ], index.pos , index.wd, null , pos.Sense - 1);					
						
			ArrayList lexemes=new ArrayList() ;
			ArrayList synIndex=new ArrayList() ;

			foreach (Lexeme obj in synset.words)
			{
				lexemes.Add(obj) ;
				synIndex.Add(index.offs[pos.Sense - 1 ]);
			}
			
			if (index.offs.Length > 1)
			{
				if (lexemes.Count <= 1)
				{
					for(int i=0; i < index.offs.Length; i++ )
					{				
						synset=new Wnlib.SynSet( index.offs[i], index.pos, index.wd, null, i );

						foreach (Lexeme obj in synset.words)
						{
							synIndex.Add(index.offs[i]);
							lexemes.Add(obj) ;
						}
					}
				}
				else
				{
					synset=new Wnlib.SynSet( index.offs[0], index.pos, index.wd, null, 0 );
					int count=0; //get top most frequency word senses
					foreach (Lexeme obj in synset.words)
					{
						lexemes.Add(obj) ;
						synIndex.Add(index.offs[0]);
						++count;
						if (count > 4) break;
					}

				}
			}
			
			ArrayList sortedSet=new ArrayList() ;
			Hashtable trace=new Hashtable() ;
			int hasSem=0;
			for (int i = 0; i < lexemes.Count; i++)
			{				
				Lexeme word=(Lexeme)lexemes[i];				
				word.word=word.word.ToLower() ;

				int senIndex=(int)synIndex[i];
				if (senIndex != -1  && word.wnsns > 0)
				{
					word.semcor=new Wnlib.SemCor(word, senIndex);
					lexemes[i]=word;					
					++hasSem;
				}

				if (!trace.ContainsKey(word.word) )					
				{					
					if ((word.semcor != null &&  word.semcor.semcor  > 0 ) || (hasSem < 4))
					{
						trace[word.word]=1;
						sortedSet.Add(word) ;
					}
				}
				//catch
				{}
			}
			
			Lexeme[] words=(Lexeme[])sortedSet.ToArray( typeof(Lexeme) );						

			ArrayList candidates=new ArrayList();

			for( int i=0; i < words.Length; i++ )
			{
				string word=words[i].word.Replace("_", " " );				
				if( word[0] <= 'Z' ) continue;

				MyWordInfo newpos=new MyWordInfo(word, pos.Pos) ;
				newpos.Sense=words[i].wnsns;
				if (words[i].semcor != null)
					newpos.Frequency=words[i].semcor.semcor;
				else
					newpos.Frequency=0;

				candidates.Add( newpos);								
			}

			if (!trace.ContainsKey (index.wd))
				candidates.Add(pos) ;

			if (candidates.Count > 1)
			{
				CompareLexeme comparer=new CompareLexeme();
				candidates.Sort(comparer);
			}
			

			return (MyWordInfo[])candidates.ToArray( typeof(MyWordInfo) );
		}

		
		private int GetNeighbour(MyWordInfo[] current, out MyWordInfo[] trial)
		{			
			trial=(MyWordInfo[])current.Clone() ;

			int wordIndex=random.Next(current.Length);
			if (_alterWord[wordIndex] != null )
			{
				int candIndex=random.Next(_alterWord[wordIndex].Length) ;
				
				_selected[wordIndex]=candIndex;
				_contextWords[wordIndex]=_alterWord[wordIndex][candIndex];
				
				if (!Read_WordSenseInfo (wordIndex))  return -1;

				trial[wordIndex]=_alterWord[wordIndex][candIndex];
				int overall=0;
				for (int i=0; i < trial.Length; i++)
				{
					overall += ComputeScore (i);
				}

				return overall;
			}

			return -1;
		}
		

		private int InitialSentence(out MyWordInfo[] current)
		{
			current=new MyWordInfo[_myPos.Length] ;
			int overall=0;
			for (int i=0; i < current.Length; i++)
				if (_alterWord[i] != null && _alterWord[i].Length > 0)
			{
				for (int j = 0; j < _alterWord[i].Length; j++)
				{
					_selected[i]=j;
					_contextWords[i]=_alterWord[i][j];	
				
					if (Read_WordSenseInfo (i))
					{
						int score=ComputeScore(i);				
						overall +=score;

						break;
					}

				}
			}

			return overall;
		}

		private bool Metropolis(float delta, float temperature)
		{
			double p=random.NextDouble() ;
			return (p <= Math.Exp( delta / temperature));
			//return (delta >= 0) && (p.NextDouble() <= Math.Exp(- delta / temperature));

		}

		private float GetCost(int num)
		{
			return (float)1.0f/(1 + num);
		}

		private Random random;
		private void IterativeGenerate(int trialNum, float descentFactor)
		{
			float temperature=1;
			float ANNEAL_SCHEDULE=0.9F;
			int MAX_TRIAL=100;
			int MAX_SUCCEED=10; // 1/10 of N_TRIAL

			MyWordInfo[] current;
			int current_cost=InitialSentence(out current);
			Add_Sentence(Convert.ToInt32(current_cost) );

			random=new Random() ;
			for (int i=0; i < 100; i++)
			{								
				int nsuccess=0;
				for (int j=0; j < MAX_TRIAL; j++)
				{
					MyWordInfo[] trial;
					int new_cost=GetNeighbour (current,out trial);

					if ( new_cost != -1)
					{						
						///float delta=current_cost - new_cost;
						float delta=new_cost - current_cost;
						//float delta=GetCost(trial_cost) - GetCost(current_cost);

						if (delta > 0 || Metropolis (delta, temperature)) //accept trial higher than current or with a probability
						{
							current_cost=new_cost;
							current=trial;
							Add_Sentence(new_cost) ;
							++nsuccess;
							
						}						
					}
				}
				
				temperature *= ANNEAL_SCHEDULE;
			}
		}

		private void Generate()
		{				
			_alterWord=new MyWordInfo[_myPos.Length][] ;
			
			_numWord=_myPos.Length;
			dx=new int[_numWord][] ;
			_selected=new int[_numWord] ;

			for(int i=0; i <_myPos.Length; i++)				
			{		
				_selected[i]=-1;
				MyWordInfo pos=_myPos[i];				
				if (pos.Pos != PartsOfSpeech.Unknown && pos.Sense != -1)
				{
					_alterWord[i]=FindSynonyms(ref pos , true );
					
					if (_alterWord[i] != null)
					{
						foreach(MyWordInfo poss in _alterWord[i])						
							poss.Pos=pos.Pos;
						
						dx[i]=new int[_alterWord[i].Length] ;
						_dictInfo[i]=new string[_alterWord[i].Length][][] ;

					}				
				}
			}
			
			_scores=new int[_myPos.Length][][][] ;
			for(int i=0; i <_myPos.Length; i++)
				if (_alterWord[i] != null)
			{				
				_scores[i]=new int[_alterWord[i].Length][][] ;
				for(int a_i=0; a_i < _alterWord[i].Length; a_i++)					
				{
					_scores[i][a_i]=new int[_myPos.Length][] ;
					
					for(int j=0; j <_myPos.Length; j++)
						if (_alterWord[j] != null)	
					{
						_scores[i][a_i][j]=new int[_alterWord[j].Length] ;
					}
				}
			}

			_numItems=0 ;
			list.Clear() ;
			bestScore=0;
			bestSentence=string.Empty;
			IterativeGenerate(100, 0.9F);
			//TryAll (0);
			
			list.Insert(0, bestSentence) ;

		}
		
		private ArrayList k_best_sentence=new ArrayList() ;
		private ArrayList k_best_score=new ArrayList() ;

		private void Add_Sentence(int score)
		{
			++_numItems ;
			MyWordInfo[] pos=new MyWordInfo[_myPos.Length] ;
			string newsen="";
			string[] words=new string[_numWord] ;
			for (int i=0; i <_numWord ; i++)
			{									
				string word=string.Empty ;

				if (_selected[i] == -1 )
				{					
					word=_myPos[i].Word;
				}
				else
				{						
					word=_alterWord[i][_selected[i]].Word;										
				}
				words[i]=word;
				pos[i]=new MyWordInfo(word, _myPos[i].Pos) ;
			}

			newsen=string.Format(_originalSentence, words) ;
			
			if (score > bestScore)
			{
				bestSentence=newsen + " " + score;
				bestScore=score;
			}

			if (k_best_sentence.Count < 500)
			{
				newsen=newsen + " " + score;
				if (!k_best_sentence.Contains(newsen) )
				{
					k_best_sentence.Add(newsen);
					k_best_score.Add(score);
				}
			}
			else
			{
				int min=10000000;
				int rem=-1;
				for (int j=0; j < k_best_sentence.Count ; j++)
				{
					if ((int)k_best_score[j] < min)
					{
						min=(int)k_best_score[j];
						rem=j;
					}
				}
				newsen=newsen + " " + score;
				if (!k_best_sentence.Contains(newsen) && rem != -1)				
				{
					k_best_sentence.RemoveAt(rem) ;
					k_best_score.RemoveAt(rem) ;

					k_best_sentence.Add(newsen);
					k_best_score.Add(score);					
				}

			}							
			
		}

		private void BackTracking(int index)
		{
			//if (_numItems > _max) return;
			
			if (_numItems > 1000) return;
			if (index == _numWord)
			{

				Add_Sentence(_overallScore);

				return;
			}

			if (_alterWord[index] != null && _alterWord[index].Length > 0)
			{
				for (int j = 0; j < _alterWord[index].Length; j++)
					if (dx[index][j] == 0)
					{
						dx[index][j]=1;
						_selected[index]=j;
						_contextWords[index]=_alterWord[index][j];
						int delta=0;
						if (Read_WordSenseInfo (index)) delta=ComputeScore(index);
						_overallScore +=delta;

						BackTracking(index + 1);

						_selected[index]=-1;
						_overallScore -=delta;
						_contextWords[index]=null;						
						dx[index][j]=0;
					}
			}
			else
			{
				_selected[index] = -1;
				BackTracking(index + 1);
			}
		}

		private int ComputeScore(int currentIndex)
		{
			int total=0;
			{
				if (_dictInfo[currentIndex] == null) return 0;
				int med=CONTEXT_SIZE/2;
				
				for (int i=currentIndex - med; i < currentIndex; i++)
					if ( i >= 0 && _dictInfo[i] != null && _selected[i] >= 0)
					{
						if (_scores[i][_selected[i]][currentIndex][_selected[currentIndex]] > 0)
							total += _scores[i][_selected[i]][currentIndex][_selected[currentIndex]];
						else
						{
							int score=ScoringSensePair (_dictInfo[i][_selected[i]], _dictInfo[currentIndex][_selected[currentIndex]]);
							total += score;
							if (score > 0 )
							{
								_scores[i][_selected[i]][currentIndex][_selected[currentIndex]]=score;
								_scores[currentIndex][_selected[currentIndex]][i][_selected[i]]=score;
								
								for (int a_i=0; a_i < _alterWord[i].Length; a_i++ )
									if (_alterWord[i][a_i].SynsetIndex == _alterWord[i][_selected[i]].SynsetIndex)
								{
									for (int a_j=0; a_j < _alterWord[currentIndex].Length; a_j++ )
										if (_alterWord[currentIndex][a_j].SynsetIndex == _alterWord[currentIndex][_selected[currentIndex]].SynsetIndex)
									{
										_scores[i][a_i][currentIndex][a_j]=score;
										_scores[currentIndex][a_j][i][a_i]=score;
									}
								}
							}
						}
					}

			}

			return total;
		}		

		private void MyInit()
		{
			tokenize.UseStemming=true;
			_contextWords=new MyWordInfo[_myPos.Length] ;
			_dictInfo=new string[_contextWords.Length][][][];			

			for (int i=0; i < _myPos.Length; i++)				
			{			
				++_myPos[i].Sense ;					
			}								
		}

		private bool Read_WordSenseInfo(int wordIndex)
		{
			if (_dictInfo[wordIndex][_selected[wordIndex]] != null) return true;

            _relatedness = Relatedness.GetRelatedness(_contextWords[wordIndex].Pos);

			//if (stop) 						
			{
                string[][] tmp = Relatedness.GetRelatednessGlosses(_contextWords[wordIndex].Word, _contextWords[wordIndex].Sense, _relatedness);						
				_dictInfo[wordIndex][_selected[wordIndex]]=tmp;

				for (int i=0; i < _dictInfo[wordIndex].Length ; i++)				
					if (_alterWord[wordIndex][i].SynsetIndex == _contextWords[wordIndex].SynsetIndex)
				{
					_dictInfo[wordIndex][i]=tmp;
				}
			}

			if (_dictInfo[wordIndex][_selected[wordIndex]] != null && _dictInfo[wordIndex][_selected[wordIndex]].Length > 0 ) 
				return true;
			else 
				return false;

		}

		private int GetOverlap(string[] a,string[] b)
		{
			//IOverlapCounter overlap=new SimpleOverlapCounter() ;
			IOverlapCounter overlap=new ExtOverlapCounter() ;
			return overlap.GetScore(a, b) ;
		}
			
		private int ScoringSensePair(string[][] sense1, string[][] sense2)
		{
			int score=0;
			
			try
			{
				int m=sense1.Length , n=sense2.Length ;
				for(int i=0; i < m; i++)
				{
					for(int j=0; j < n; j++)
					{
						score +=GetOverlap(sense1[i], sense2[j]) ;
					}
				}
			}catch (Exception e)
			{
				int uu=0;
			}

			return score;
		}


		
		private string RemoveBadChars(string s)
		{
			string[] badChars=new string[]{"=>", "==","=","->",">","+",";",",","_","-","."} ;
			foreach(string ch in badChars)			
				s=s.Replace(ch, " ") ;

			return s;
		}
		


	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -