⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 qms.cpp

📁 Quick Mutli Search算法
💻 CPP
字号:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#ifndef MAX
#define MAX(a, b)  (((a) > (b)) ? (a) : (b))
#endif

#ifndef MIN
#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
#endif

class CQuickMutilSearch
{
public:
	CQuickMutilSearch()
	{
		m_iPatternCount = 0;
		m_bBuild = false;
		m_iMax = 0;
		m_iMin = 0;
		m_Matrix = NULL;
		m_bCase = true;
		m_bReverse = false;

		m_pow[0] = 1;
		for (int i = 1; i < QMS_INTSIZE; ++i)
			m_pow[i] = m_pow[i-1] * 2;
	}

	virtual ~CQuickMutilSearch()
	{
		Release();
	}

public:
	bool AddPattern(char* str, int len)
	{
		if (m_iPatternCount >= QMS_INTSIZE || m_bBuild)
			return false;

		m_aryStr [m_iPatternCount] = str;
		m_aryLen [m_iPatternCount] = len;
		++ m_iPatternCount;

		return true;
	}

	char* Search(char* szText, int iLen, bool bCase = true, bool bReverse = false)
	{
		if (szText == NULL || iLen <= 0 || m_iPatternCount <= 0)
			return NULL;

		if (m_bCase != bCase)
		{
			m_bCase = bCase;
			m_bBuild = false;
		}

		if (m_bReverse != bReverse)
		{
			m_bReverse = bReverse;
			m_bBuild = false;
		}

		if (!m_bBuild)
		{
			Build();
		}

		int i, j, k, p;

		if (m_bReverse)
		{
			j = iLen - m_iMin;
			while (j >= 0) 
			{
				k = 0xFFFFFFFF;
				for (i = 0; i < m_iMax && j+i < iLen; ++i)
				{
					k = k & m_Matrix[i * QMS_CHARSET + szText[j+i]];
					if (k == 0)
						break;
				}

				if (k != 0)
				{
					p = ReverseFindPower(k);
					if (i >= m_aryLen[p])
						return szText + j;	// matched
				}

				if (j-1 > 0)
					j -= m_Buffer[szText[j-1]];
				else
					return NULL;
			}

			return NULL;
		}
		else
		{
			j = m_iMin - m_iMax;
			while (j <= iLen - m_iMax) 
			{
				k = 0xFFFFFFFF;
				for (i = m_iMax-1; i >= 0 && j+i >= 0; --i)
				{
					k = k & m_Matrix[i * QMS_CHARSET + szText[j+i]];
					if (k == 0)
						break;
				}

				if (k != 0)
				{
					p = ReverseFindPower(k);
					if (m_iMax-i-1 >= m_aryLen[p])
						return szText + j + m_iMax - m_aryLen[p];	// matched
				}

				j += m_Buffer[szText[j + m_iMax]];
			}
		
			return NULL;
		}
	}

private:
	void Build()
	{
		m_bBuild = true;

		int i, j, k;
		m_iMax = m_iMin = m_aryLen[0];
		for (i = 0; i < m_iPatternCount; ++i)
		{
			m_iMax = MAX(m_iMax, m_aryLen[i]);
			m_iMin = MIN(m_iMin, m_aryLen[i]);
		}

		Release();
		m_Matrix = new int [QMS_CHARSET * m_iMax];

		// m_Matrix
		memset(m_Matrix, 0, sizeof(int) * QMS_CHARSET * m_iMax);

		// m_Buffer
		for (i = 0; i < QMS_CHARSET; ++i)
			m_Buffer[i] = m_iMin + 1;

		for (k = 0; k < m_iPatternCount; ++k)
		{
			if (!m_bReverse)
			{
				for (i = m_aryLen[k] - m_iMin; i < m_aryLen[k]; ++i)
					m_Buffer[m_aryStr[k][i]] = MIN(m_aryLen[k] - i, m_Buffer[m_aryStr[k][i]]);

				for (i = 0; i < m_iMax - m_aryLen[k]; ++i)
					for (j=0; j < QMS_CHARSET; ++j)
						m_Matrix[i * QMS_CHARSET + j] |= m_pow[k];

				for (i = 0; i < m_aryLen[k]; ++ i)
				{
					m_Matrix[(m_iMax - m_aryLen[k] + i) * QMS_CHARSET + m_aryStr[k][i]] |= m_pow[k];

					if (!m_bCase)
					{
						if (m_aryStr[k][i] >= 'a' && m_aryStr[k][i] <= 'z')
							m_Matrix[(m_iMax - m_aryLen[k] + i) * QMS_CHARSET + m_aryStr[k][i] - 0x20] |= m_pow[k];
						else if (m_aryStr[k][i] >= 'A' && m_aryStr[k][i] <= 'Z')
							m_Matrix[(m_iMax - m_aryLen[k] + i) * QMS_CHARSET + m_aryStr[k][i] + 0x20] |= m_pow[k];
					}
				}
			}
			else
			{
				for (i = 0; i < m_iMin; ++i)
					m_Buffer[m_aryStr[k][i]] = MIN(i+1, m_Buffer[m_aryStr[k][i]]);

				for (i = m_aryLen[k]; i < m_iMax; ++i)
					for (j=0; j < QMS_CHARSET; ++j)
						m_Matrix[i * QMS_CHARSET + j] |= m_pow[k];

				for (i = 0; i < m_aryLen[k]; ++ i)
				{
					m_Matrix[i * QMS_CHARSET + m_aryStr[k][i]] |= m_pow[k];

					if (!m_bCase)
					{
						if (m_aryStr[k][i] >= 'a' && m_aryStr[k][i] <= 'z')
							m_Matrix[i * QMS_CHARSET + m_aryStr[k][i] - 0x20] |= m_pow[k];
						else if (m_aryStr[k][i] >= 'A' && m_aryStr[k][i] <= 'Z')
							m_Matrix[i * QMS_CHARSET + m_aryStr[k][i] + 0x20] |= m_pow[k];
					}
				}
			}
		}
	}

	void Release()
	{
		if (m_Matrix)
		{
			delete m_Matrix;
			m_Matrix = NULL;
		}
	}

	int ReverseFindPower(int Value)
	{
		for (int i=0; i<QMS_INTSIZE; ++i)
		{
			if ((m_pow[i] & Value) != 0)
				return i;
		}
		return 0;
	}

private:
	enum { QMS_INTSIZE = sizeof(int)*8, QMS_CHARSET = 256 };

	bool	m_bBuild;	// 是否调用了Build

	int		m_Buffer[QMS_CHARSET];
	int*	m_Matrix;

	int		m_iPatternCount;
	char*	m_aryStr [QMS_INTSIZE];
	int		m_aryLen [QMS_INTSIZE];

	int		m_iMax;
	int		m_iMin;
	int		m_pow[QMS_INTSIZE];

	bool	m_bCase;
	bool	m_bReverse;
};

void main()
{
	//          0123456789012345678
	char t[] = "Forforelseelse";
	char x[] = "for";
	char y[] = "else";
	char z[] = "cab";

	CQuickMutilSearch qms;
	qms.AddPattern(x, strlen(x));
	qms.AddPattern(y, strlen(y));

	{
		int iLen = strlen(t);
		char* p = t;
		char* pe = p + iLen;
		char* pc;
		do
		{
			pc = qms.Search(t, iLen, false, true);
			if (!pc)
				break;

			printf("%d ", pc - t);
			iLen = pc - t;
		}
		while (true);
	}
	printf("\n");
	{
		int iLen = strlen(t);
		char* p = t;
		char* pe = p + iLen;
		char* pc;
		do
		{
			pc = qms.Search(p, iLen, false);
			if (!pc)
				break;

			printf("%d ", pc - t);
			p = pc + 1;
			iLen = pe - p;
		}
		while (true);
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -