📄 km.c
字号:
/************************************************************************//* *//* km.h *//* *//* Definitions and functions used in km.c *//* *//* Author: Shi Zhi-wei *//* Date: 04.05.05 *//* *//* Copyright (c) 2005 Shi Zhi-wei - All rights reserved *//* *//* This software is available for non-commercial use only. It must *//* not be modified and distributed without prior permission of the *//* author. The author is not responsible for implications from the *//* use of this software. *//* *//************************************************************************/#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "km.h"
char pszKeyWords[64][50]; /*for query words*/
int iNumOfKeyWords; /*number of query words*/
int iRuleId[64]; /*the RuleId of Keywords*/
__int64 iRules[50];/*the Rules*/
int iNumOfRules;/*the number of rules*/
__int64 iFailCondition;/*the fail condition of the whole verification*/
int iVerified[50];/*mark for word verified*/
int iExceptionInRules[50];/*mark for rule including exception*/
__int64 iFailedRules;/*marks of failed rules*/
__int64 iTestRule[50];/*the Test Result*/
/*read query data*/
int ReadQuery( char *pszFileName, char* pszQueryData)
{
FILE* stream;
int iNumOfRead;
char *pdest;
int iLen;
char buf[MAXBUFSIZE];
char* pCursor;
if (pszFileName == NULL || pszFileName[0] == '\0') {
printf("No file\n");
return -1;
}
stream = fopen (pszFileName, "r");
if (stream == NULL) {
printf("Can not open file %s\n", pszFileName);
return -1;
}
iNumOfRead = fread( buf, sizeof( char ), MAXBUFSIZE-1, stream );
buf[iNumOfRead] = '\0';
if( ferror( stream ) ) {
printf( "Read Data error\n" );
return -1;
}
pCursor = buf;
pdest = strstr( pCursor, "<QUERY>" );
if ( pdest == NULL ) return -1;
pCursor = pdest + strlen( "<QUERY>" );
pdest = strstr( pCursor, "</QUERY>" );
if ( pdest == NULL ) return -1;
iLen = pdest - pCursor;
strncpy(pszQueryData, pCursor, iLen);
pszQueryData[iLen] = 0;
return 0;
}
/*generate rules using query message*/
int GenerateRules( char* pszInMessage )
{
__int64 iTempRule;
char* p = pszInMessage;
int iAnti = 0;
int iLength;
if (pszInMessage == NULL || pszInMessage[0] == 0)
{
printf("Error in message!\n");
return -1;
}
iNumOfRules = 0;
iTempRule = 0;
iExceptionInRules[iNumOfRules] = 0;
while (*p == ' ' || *p == '+' || *p == '|') p++;/*omit space, +, | */
/*begin process */
while (*p != 0){
iLength = 0;/*length of keyword*/
if ( *p == '-'){/*for elimination*/
iAnti = 1;
p++;
continue;
}
while (*p == ' ' || *p == '+' || *p == '|') p++;/*omit space, +, | */
/*get a word*/
while (*p != ' ' && *p != '+' && *p != '|' && *p != 0) {
pszKeyWords[iNumOfKeyWords][iLength] = *p;
iLength++;
if (iLength > 48)
{
pszKeyWords[iNumOfKeyWords][iLength] = '\0';
printf("too long word: %s\n", pszKeyWords[iNumOfKeyWords]);
return -1;
}
p++;
}
pszKeyWords[iNumOfKeyWords][iLength] = '\0';
iRuleId[iNumOfKeyWords] = iNumOfRules;
/*renew rule*/
if ( !iAnti )
iTempRule += 1 << iNumOfKeyWords;
else {
iAnti = 0;
iExceptionInRules[iNumOfRules] = 1;
}
while ( *p == ' ' || *p == '+') p++;/* omit space and +*/
if (*p == '|') {/*end rule*/
iRules[iNumOfRules++] = iTempRule;
iTempRule = 0;
p++;
while (*p == ' ' || *p == '+' || *p == '|') p++;/* omit space, +, |*/
}
else if (*p == 0) {
iRules[iNumOfRules++] = iTempRule;
}
iNumOfKeyWords ++;
}
//end process
iFailCondition = ( 1<<(iNumOfRules) ) - 1;
return 0;
}
/*find out a word in the keywords set or not*/
int FindWord(char* pszWord, int* piPos)
{
int i;
if ( pszWord == NULL || piPos == NULL || pszWord[0] == 0 ) return -2;
for ( i = 0; i < iNumOfKeyWords; i++ ) {
if ( iVerified[i] == 1 ) continue; /*this word has already been verified*/
if ( pszWord[0] > 0) {
if ( !strcmp(pszWord, pszKeyWords[i]) ) {//find it
*piPos = i;
iVerified[i] = 1;
return iRuleId[i];
}
}
else {
if ( !strncmp(pszWord, pszKeyWords[i], strlen(pszKeyWords[i])) ) {//find it
*piPos = i;
iVerified[i] = 1;
return iRuleId[i];
}
}
}
return -1;//not found
}
int VerifyRules(char* pszWord)
{
int iRId, iPos;
iRId = FindWord(pszWord, &iPos);
if ( iRId < 0 ) return 0;//not keywords
else if ( (iRules[iRId]&(1<<iPos)) == 0 ) {//indisposition appearance, Rule[iRId] failed
iFailedRules += (1 << iRId);
if (iFailedRules == iFailCondition) //Total verifying failed
return -1;
}
else {//new keyword found
iTestRule[iRId] += 1 << iPos;
if ( !iExceptionInRules[iRId] ) {
if ((iTestRule[iRId]^iRules[iRId]) == 0) //Rule[iRId] verifying succeeded
return iRId + 1;
}
}
return 0;//verifying does not end
}
int DoQuery( char* pszMessage)
{
int hr;
char *p, *p1, p2[2];
int iPos = 0;
char szWord[50];
char* pszStopWord = "~!@#$%^&*()_+=-|\\:;?/<>,.\'\" \t\n";
int i;
if ( pszMessage == NULL || pszMessage[0] == 0 ) return -1;
iFailedRules = 0;
for ( i = 0; i < iNumOfKeyWords; i++ ) {
iVerified[i] = 0;
}
for ( i = 0; i< iNumOfRules; i++ ) {
iTestRule[i] = 0;
}
p = pszMessage;
while ( *p != 0 ) {
while ( *p > 0 ) {// UNK
//TODO: Add code deal with english word
p2[0] = *p;
p2[1] = 0;
p1 = strstr(pszStopWord, p2);
if (p1 == NULL) {//not stop word
szWord[iPos++] = *p;
}
else if (iPos > 0) {//a word had been detected
szWord[iPos] = 0;
hr = VerifyRules(szWord);
if (hr != 0) return hr;
iPos = 0;
}
p++;
}
if ( iPos > 0 ) {//still a word left
szWord[iPos] = 0;
hr = VerifyRules(szWord);
if (hr != 0) return hr;
iPos = 0;
}
if ( *p < 0 ) {//read a chinese word
szWord[0] = *p;
if ( *(p+1) == 0 ) {
return -2; // half chinese word
}
else {
szWord[1] = *(p+1);
szWord[2] = 0;
}
hr = VerifyRules(p);
if ( hr != 0 ) return hr;
p += 2;
}
}
for ( i = 0; i < iNumOfKeyWords; i++ ){
if ( iExceptionInRules[i] && (iFailedRules&(1<<i)) == 0 ) {//no exception found
if ( iTestRule[i] == iRules[i] ) return i+1;//all keywords found
}
}
return -1;//query failed
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -