⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 textmatching.cs

📁 In the previous article, we presented an approach for capturing similarity between words that was co
💻 CS
字号:
/*
 * tf/idf implementation 
 * Author: Thanh Dao, thanh.dao@gmx.net or thanh.ngoc.dao@gmail.com
 */

using System;
using System.Collections;

namespace ServiceRanking
{
	/// <summary>
	/// 
	/// </summary>
	public class TextMatching
	{
		
		public TextMatching()
		{
		}

        public static int ComputeEditDistance(string s, string t)
        {
            int n = s.Length;
            int m = t.Length;
            int[,] distance = new int[n + 1, m + 1];
            int cost = 0;

            if (n == 0) return m;
            if (m == 0) return n;

            for (int i = 0; i <= n; distance[i, 0] = i++) ;
            for (int j = 0; j <= m; distance[0, j] = j++) ;


            for (int i = 1; i <= n; i++)
            {
                for (int j = 1; j <= m; j++)
                {
                    cost = (t.Substring(j - 1, 1) == s.Substring(i - 1, 1) ? 0 : 1); // all cost op of 1
                    distance[i, j] = MathLib.Min3(distance[i - 1, j] + 1,
                        distance[i, j - 1] + 1,
                        distance[i - 1, j - 1] + cost);
                }
            }

            return distance[n, m];
        }

        public static float GetEditDistanceSimilarity(string string1, string string2)
        {
            if ((Object)string1 == null || (Object)string2 == null || string2.Length == 0 || string1.Length == 0)
            {
                return 0.0F;
            }

            float dis = ComputeEditDistance(string1, string2);

            float maxLen = string1.Length;
            if (maxLen < (float)string2.Length)
                maxLen = string2.Length;

            float minLen = string1.Length;
            if (minLen > (float)string2.Length)
                minLen = string2.Length;

            if (dis + minLen == maxLen && dis / minLen <= 0.65F) //affixes : pre + suff + middle, acronym, abbreviation
                return 0.9F;

            if (maxLen == 0.0F)
                return 1.0F;
            else
            {
                return (float)Math.Round(1.0F - dis / maxLen, 2);
            }
        }		

		internal static string[] GenerateNGrams(string str, int gramlength)
		{
			if ( str == null || str.Length == 0)
				return null;
			
			ArrayList grams=new ArrayList();
			int length=str.Length;
			if (length < gramlength)
			{
				string gram;
				for (int i=1; i <= length; i++)
				{
					gram=str.Substring(0, (i) - (0));
					if (grams.IndexOf(gram) == - 1)
					{						
						grams.Add(gram);
					}
				}
				
				gram=str.Substring(length - 1, (length) - (length - 1));
				if (grams.IndexOf(gram) == - 1)
				{					
					grams.Add(gram);
				}
			}
			else
			{
				for (int i=1; i <= gramlength - 1; i++)
				{
					string gram=str.Substring(0, (i) - (0));
					if (grams.IndexOf(gram) == - 1)
					{
						
						grams.Add(gram);
					}
				}
				
				for (int i=0; i < (length - gramlength) + 1; i++)
				{
					string gram=str.Substring(i, (i + gramlength) - (i));
					if (grams.IndexOf(gram) == - 1)
					{						
						grams.Add(gram);
					}
				}
				
				for (int i=(length - gramlength) + 1; i < length; i++)
				{
					string gram=str.Substring(i, (length) - (i));
					if (grams.IndexOf(gram) == - 1)
					{						
						grams.Add(gram);
					}
				}
			}

            return (string[])grams.ToArray(typeof(string)); 
		}
		
		public static float GetNGramSimilarity(string str1, string str2, int gramlength)
		{
			if ((object) str1 == null || (object) str2 == null || str1.Length == 0 || str2.Length == 0)
				return 0.0F;

			string[] grams1=GenerateNGrams(str1, gramlength);
			string[] grams2=GenerateNGrams(str2, gramlength);
			int count=0;
			for (int i=0; i < grams1.Length; i++)
			{
				for (int j=0; j < grams2.Length; j++)
				{
					if (!grams1[i].Equals(grams2[j]))
						continue;
					count++;
					break;
				}
			}
						
			float sim=(2.0F * (float) count) / (float) (grams1.Length + grams2.Length);
			return sim;
		}
		
		public static float GetBigramSimilarity(string str1, string str2)
		{
			return GetNGramSimilarity(str1, str2, 2);
		}
		
		public static float GetTrigramSimilarity(string str1, string str2)
		{
			return GetNGramSimilarity(str1, str2, 3);
		}
		
		public static float GetQGramSimilarity(string str1, string str2)
		{
			return GetNGramSimilarity(str1, str2, 4);
		}
				
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -