📄 kmpex.cpp
字号:
//-----------------------------------------------------------
// KMP算法是字符串模式匹配算法,
// 解决DBCS字符集的问题。
//------------------------------------------------------------
#include <stdio.h>
void KMP_NEXT(const char *pszPattern,int nSize,int *pNextRecv)
{
pNextRecv[0]=-1;
for(int i=1;i<nSize;i++ )
{
int j=pNextRecv[i-1];
while(pszPattern[i]!=pszPattern[j+1]&& j>=0 ) j=pNextRecv[j] ; //递推计算
if(pszPattern[i]==pszPattern[j+1])
pNextRecv[i]=j+1;
else pNextRecv[i]=0; //
}
}
//-------------------------------------------------------
// 经过多字节字符集过滤处理后的KMP算法
// 代码支持GB18030标准。
//-------------------------------------------------------
int KMP_DBCS(const char *pszText,int nPos,const char *pszPattern,int nPatternSize,int *pNext)
{
int nRet=-1;
int bTmpNext=pNext==NULL;
if(bTmpNext)
{
pNext=new int[nPatternSize];
KMP_NEXT(pszPattern,nPatternSize,pNext);
}
int nLength=strlen(pszText);
int i=nPos,j=0;
while(i<nLength && nRet==-1)
{ //计算当前字符长度
int nCharSize=0;
if(pszText[i]>0)
nCharSize=1;
else if((unsigned char)pszText[i+1]<0x40)
nCharSize=4;
else nCharSize=2;
int bMatch=0;
if(j+nCharSize<=nPatternSize)
bMatch=memcmp(pszText+i,pszPattern+j,nCharSize)==0;
if(bMatch)
{
i+=nCharSize;
j+=nCharSize;
if(j==nPatternSize) nRet=i-nPatternSize;
}else
{
int nNext=pNext[j];
if(nNext==-1)
{
i+=nCharSize;
j=0;
}else
{
j=nNext;
}
}
}
if(bTmpNext)
delete []pNext;
return nRet;
}
//-------------------------------------------------------
// 普通KMP算法
//-------------------------------------------------------
int KMP(const char *pszText,int nPos,const char *pszPattern,int nPatternSize,int *pNext)
{
int nRet=-1;
int bTmpNext=pNext==NULL;
if(bTmpNext)
{
pNext=new int[nPatternSize];
KMP_NEXT(pszPattern,nPatternSize,pNext);
}
int nLength=strlen(pszText);
int i=nPos,j=0;
while(i<nLength && nRet==-1)
{
if(pszText[i]==pszPattern[j])
{
i++;
j++;
if(j==nPatternSize) nRet=i-nPatternSize;
}else
{
j=pNext[j];
if(j==-1) i++,j=0;
}
}
if(bTmpNext)
delete []pNext;
return nRet;
}
//使用示例:
int main(int argc, char *argv[])
{
char * s="aba癬ghc_ghabcacabaabc_ghbaab__ghca_gc_g_gb_ghcacab";
char * r=" 0 0 0 0 0";
char * t="_gh";
cout<<s<<endl<<r<<endl;
int ps=strlen(t);
int *pnext=new int[ps];
KMP_NEXT(t,ps,pnext);
cout<<"KMP_DBCS output:"<<endl;
int nRet=KMP_DBCS(s,0,t,ps,pnext);
int nCount=0;
while(nRet!=-1)
{
cout<<"pos="<<nRet<<endl;
nCount++;
nRet=KMP_DBCS(s,nRet+ps,t,ps,pnext);
}
cout<<"count="<<nCount<<endl;
cout<<"KMP output"<<endl;
nRet=KMP(s,0,t,ps,pnext);
nCount=0;
while(nRet!=-1)
{
cout<<"pos="<<nRet<<endl;
nCount++;
nRet=KMP(s,nRet+ps,t,ps,pnext);
}
cout<<"count="<<nCount<<endl;
delete []pnext; system("PAUSE");
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -