⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bd.cpp

📁 文本相似度比较```很好的`
💻 CPP
字号:

/************************************************************
*	File	:	CheatKiller.cpp
*	Purpose	:	To implement the Levenshtein Distance
*	Author	:	Jason He
*	Created	:	2004.3.9 18:00
*	Comment	:	NULL
*************************************************************/



#include <windows.h>
#include <iostream>
#include <string>
#include <fstream>
#include <assert.h>

using namespace std;

//Convert ascii to unicode to support MBCS
wstring Convert2Unicode(string MutiByte)
{
	wchar_t *buf;
	wstring b;

	int ret = ::MultiByteToWideChar(CP_THREAD_ACP, MB_PRECOMPOSED, MutiByte.c_str(), MutiByte.length(), NULL, NULL);

	if( ret < 0 )
	{
		return b;
	}
	
	buf = new wchar_t[ret + 1];
	ret = MultiByteToWideChar(CP_THREAD_ACP, MB_PRECOMPOSED, MutiByte.c_str(), MutiByte.length(), buf, ret);

	if (ret < 0)
	{
		return b;
	}

	buf[ret] = 0;
	b = buf;

	return b;

}


class LDistance
{
public:

	// constructor
	LDistance(wstring &a, wstring &b) : m_strFirst(a), m_strSecond(b)
	{
		this->Alloc();
		this->m_dist = -1;
	}

	// Destructor
	virtual ~LDistance()
	{
		this->Free();
	}

	// get the LDistance
	int GetDistance()
	{
		int cost;

		if (this->m_dist !=  -1)
		{
			return this->m_dist;
		}

		int n = static_cast<int>(m_strFirst.length());
		int m = static_cast<int>(m_strSecond.length());

		for(int i = 0; i <= n; i++)
			m_ppMatrix[i][0] = i;

		for(int j = 0; j <= m; j++)
			m_ppMatrix[0][j] = j;

		for(int i = 1; i <= n; i++)
		{
			wchar_t s_i = m_strFirst[i - 1];
			for(int j = 1; j <= m; j++)
			{
				wchar_t t_j = m_strSecond[j - 1];
				
				if(s_i == t_j)
				{
					cost = 0;
				}
				else
				{
					cost = 1;
				}
				m_ppMatrix[i][j] = Minimum(m_ppMatrix[i - 1][j] + 1, m_ppMatrix[i][j - 1] + 1, m_ppMatrix[i - 1][j - 1] + cost);
			}

		}
		this->m_dist= m_ppMatrix[n][m];
		return m_dist;
	}

	// Get the relative Levenshtein Distance
	double GetRelativeDistance()
	{
		double ret;

		int n = static_cast<int>(m_strFirst.length());
		int m = static_cast<int>(m_strSecond.length());
		int nMax = m > n ? m : n;
		int dist = this->m_dist == -1 ? this->GetDistance() : this->m_dist;
		
		ret = static_cast<double>(dist) / nMax;
		return ret;
	}

private:
	// Get the minimum number of three integers.
	inline int Minimum(int a, int b, int c)
	{
		int mi = a;
		if(b < mi)
			mi = b;
		if(c < mi)
			mi = c;
		return mi;
	}

	// Allocate memory for m_ppMatrix
	void Alloc()
	{
		int size = 0;
		int n = static_cast<int>(m_strFirst.length());
		int m = static_cast<int>(m_strSecond.length());

		m_ppMatrix = new int*[n + 1];

		size += n + 1;

		for(int k = 0; k < n + 1; k++)
		{
			m_ppMatrix[k] = new int[m + 1];
			size += m + 1;
		}

		cout << "Memory Usage is " << size / 256 / 1024 << "MB" << endl;
	}

	// Free memory for m_ppMatrix
	void Free()
	{
		int n = static_cast<int>(m_strFirst.length());

		// free memory
		for (int l = 0; l < n + 1; l++)
		{
			delete [] m_ppMatrix[l];
		}
		delete[] m_ppMatrix;
	}

private:
	int ** m_ppMatrix;		// the calculatrion matrix
	wstring m_strFirst;		// the first string
	wstring m_strSecond;		// the second string
	int m_dist;
};



// Entry Point
int main()
{
	fstream a("c:\\a.txt");
	fstream b("c:\\b.txt");

	string aa, bb;
	string buf;

	while(a >> buf)
	{
		aa += buf;
	}

	buf.clear();
	while(b >> buf)
	{
		bb += buf;
	}


	wstring c = Convert2Unicode(aa);
	wstring d = Convert2Unicode(bb);

	DWORD dwBegin, dwEnd;



	LDistance* ld = new LDistance(c, d);



	cout << c.length() << endl;
	cout << d.length() << endl;

	dwBegin = GetTickCount();
	cout << "The Levenshtein Distance is " << ld->GetDistance() << endl;
	dwEnd = GetTickCount();
	cout << "The relative Levenshtein Distance is " << ld->GetRelativeDistance() << endl;

	delete ld;

	cout << "Time Cost is " << dwEnd - dwBegin << " MilliSecond" << endl;


	return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -