⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 km.c

📁 多关键字匹配的源码
💻 C
字号:
/************************************************************************//*                                                                      *//*   km.h                                                               *//*                                                                      *//*   Definitions and functions used in km.c                             *//*                                                                      *//*   Author: Shi Zhi-wei                                                *//*   Date: 04.05.05                                                     *//*                                                                      *//*   Copyright (c) 2005  Shi Zhi-wei - All rights reserved              *//*                                                                      *//*   This software is available for non-commercial use only. It must    *//*   not be modified and distributed without prior permission of the    *//*   author. The author is not responsible for implications from the    *//*   use of this software.                                              *//*                                                                      *//************************************************************************/#include <string.h>
#include <stdio.h>
#include <stdlib.h>

#include "km.h"

char pszKeyWords[64][50]; /*for query words*/
int iNumOfKeyWords; /*number of query words*/
int iRuleId[64]; /*the RuleId of Keywords*/
__int64 iRules[50];/*the Rules*/
int iNumOfRules;/*the number of rules*/
__int64 iFailCondition;/*the fail condition of the whole verification*/
int iVerified[50];/*mark for word verified*/
int iExceptionInRules[50];/*mark for rule including exception*/
__int64 iFailedRules;/*marks of failed rules*/
__int64 iTestRule[50];/*the Test Result*/

/*read query data*/
int ReadQuery( char *pszFileName, char* pszQueryData)
{
	FILE* stream;
	int iNumOfRead;
	char *pdest;
	int iLen;
	char buf[MAXBUFSIZE];
	char* pCursor;

	if (pszFileName == NULL || pszFileName[0] == '\0') {
		printf("No file\n");
		return -1;
	}
	stream = fopen (pszFileName, "r");
	if (stream == NULL) {
		printf("Can not open file %s\n", pszFileName);
		return -1;
	}
	iNumOfRead = fread( buf, sizeof( char ), MAXBUFSIZE-1, stream );
	buf[iNumOfRead] = '\0';
	if( ferror( stream ) )      {
		printf( "Read Data error\n" );
		return -1;
	}
	pCursor = buf;
	
	pdest = strstr( pCursor, "<QUERY>" );
	if ( pdest == NULL ) return -1;
	pCursor = pdest + strlen( "<QUERY>" );
	pdest = strstr( pCursor, "</QUERY>" );
	if ( pdest == NULL ) return -1;
	iLen = pdest - pCursor;
	strncpy(pszQueryData, pCursor, iLen);
	pszQueryData[iLen] = 0;

	return 0;
}

/*generate rules using query message*/
int GenerateRules( char* pszInMessage )
{
	__int64 iTempRule;
	char* p = pszInMessage;
	int iAnti = 0;
	int iLength;
	if (pszInMessage == NULL || pszInMessage[0] == 0) 
	{
		printf("Error in message!\n");
		return -1;
	}
	iNumOfRules = 0;
	iTempRule = 0;
	iExceptionInRules[iNumOfRules] = 0;

	while (*p == ' ' || *p == '+' || *p == '|') p++;/*omit space, +, | */

	/*begin process */
	while (*p != 0){

		iLength = 0;/*length of keyword*/

		if ( *p == '-'){/*for elimination*/
			iAnti = 1;
			p++;
			continue;
		}
		while (*p == ' ' || *p == '+' || *p == '|') p++;/*omit space, +, | */

		/*get a word*/
		while (*p != ' ' && *p != '+' && *p != '|' && *p != 0) {
			pszKeyWords[iNumOfKeyWords][iLength] = *p;
			iLength++;
			if (iLength > 48)
			{
				pszKeyWords[iNumOfKeyWords][iLength] = '\0';
				printf("too long word: %s\n", pszKeyWords[iNumOfKeyWords]); 
				return -1;
			}
			p++;
		}
		pszKeyWords[iNumOfKeyWords][iLength] = '\0';
		iRuleId[iNumOfKeyWords] = iNumOfRules;
		
		/*renew rule*/
		if ( !iAnti )
			iTempRule += 1 << iNumOfKeyWords;
		else {
			iAnti = 0;
			iExceptionInRules[iNumOfRules] = 1;
		}

		while ( *p == ' ' || *p == '+') p++;/* omit space and +*/
		if (*p == '|') {/*end rule*/
			iRules[iNumOfRules++] = iTempRule;
			iTempRule = 0;
			p++;
			while (*p == ' ' || *p == '+' || *p == '|') p++;/* omit space, +, |*/
		}
		else if (*p == 0) {
			iRules[iNumOfRules++] = iTempRule;
		}
		
		iNumOfKeyWords ++;
	}
	//end process

	iFailCondition = ( 1<<(iNumOfRules) ) - 1;
	return 0;
}

/*find out a word in the keywords set or not*/
int FindWord(char* pszWord, int* piPos)
{
	int i;
	if ( pszWord == NULL || piPos == NULL || pszWord[0] == 0 ) return -2;
	for ( i = 0; i < iNumOfKeyWords; i++ ) {
		if ( iVerified[i] == 1 ) continue; /*this word has already been verified*/
		if ( pszWord[0] > 0) {
			if ( !strcmp(pszWord, pszKeyWords[i]) ) {//find it
				*piPos = i;
				iVerified[i] = 1;
				return iRuleId[i];
			}
		}
		else {
			if ( !strncmp(pszWord, pszKeyWords[i], strlen(pszKeyWords[i])) ) {//find it
				*piPos = i;
				iVerified[i] = 1;
				return iRuleId[i];
			}
		}
	}
	return -1;//not found
}

int VerifyRules(char* pszWord)
{
	int iRId, iPos;
	iRId = FindWord(pszWord, &iPos);
	if ( iRId < 0 ) return 0;//not keywords
	
	else if ( (iRules[iRId]&(1<<iPos)) == 0 ) {//indisposition appearance, Rule[iRId] failed
		iFailedRules += (1 << iRId);
		if (iFailedRules == iFailCondition) //Total verifying failed
			return -1; 
	}
	
	else {//new keyword found
		iTestRule[iRId] += 1 << iPos;
		if ( !iExceptionInRules[iRId] ) {
			if ((iTestRule[iRId]^iRules[iRId]) == 0) //Rule[iRId] verifying succeeded
				return iRId + 1;
		}
	}
	
	return 0;//verifying does not end
}

int DoQuery( char* pszMessage)
{
	int hr;
	char *p, *p1, p2[2];
	int iPos = 0;
	char szWord[50];
	char* pszStopWord = "~!@#$%^&*()_+=-|\\:;?/<>,.\'\" \t\n";
	int i;
	
	if ( pszMessage == NULL || pszMessage[0] == 0 ) return -1;
	iFailedRules = 0;
	for ( i = 0; i < iNumOfKeyWords; i++ ) {
		iVerified[i] = 0;
	}
	for ( i = 0; i< iNumOfRules; i++ ) {
		iTestRule[i] = 0;
	}
	
	p = pszMessage;
	
	while ( *p != 0 ) {
		while ( *p > 0 ) {// UNK
			//TODO: Add code deal with english word
			p2[0] = *p;
			p2[1] = 0;
			p1 = strstr(pszStopWord, p2);
			if (p1 == NULL) {//not stop word
				szWord[iPos++] = *p;
			}
			else if (iPos > 0) {//a word had been detected
				szWord[iPos] = 0;
				hr = VerifyRules(szWord);
				if (hr != 0) return hr;
				iPos = 0;
			}
			p++;
		}
		if ( iPos > 0 ) {//still a word left
			szWord[iPos] = 0;
			hr = VerifyRules(szWord);
			if (hr != 0) return hr;
			iPos = 0;
		}
		if ( *p < 0 ) {//read a chinese word
			szWord[0] = *p;
			if ( *(p+1) == 0 ) {
				return -2;		// half chinese word
			}
			else {
				szWord[1] = *(p+1);
				szWord[2] = 0;
			}
			hr = VerifyRules(p);
			if ( hr != 0 ) return hr;
			
			p += 2;
		}
	}
	for ( i = 0; i < iNumOfKeyWords; i++ ){
		if ( iExceptionInRules[i] && (iFailedRules&(1<<i)) == 0 ) {//no exception found
			if ( iTestRule[i] == iRules[i] ) return i+1;//all keywords found
		}
	}
	return -1;//query failed
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -