📄 segtool.cpp
字号:
// SegTool.cpp: implementation of the CSegTool class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "SegTool.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CSegTool::CSegTool()
{
}
CSegTool::~CSegTool()
{
m_WordList.clear();
}
bool CSegTool::InitSegTool(char *psWordDict){
FILE *fpWordFile = NULL;
char szWord[256];
if( psWordDict == NULL ) return false;
if( (fpWordFile = fopen(psWordDict,"r")) == NULL ) return false;
while( fgets(szWord,256,fpWordFile) != NULL ){
if( *szWord == 0 ) continue;
if( szWord[strlen(szWord) - 1] == '\n' ){
szWord[strlen(szWord) - 1] = 0 ;
}
m_WordList.push_back(szWord);
}
fclose(fpWordFile);
return true;
}
int CSegTool::SegFile( char *psInputFile,char *psOutputFile ){
FILE *fpInput = NULL;
FILE *fpOutput = NULL;
char szLine[1024];
char szInput[1096];
char szOutput[1096];
char *psRes = NULL;
*szInput = 0;
if( (fpInput = fopen(psInputFile,"r")) == NULL ){
return false;
}
if( (fpOutput = fopen(psOutputFile,"w")) == NULL ){
return false;
}
while( fgets(szLine,1024,fpInput ) != NULL ){
if( psRes != NULL ){
strcpy(szInput,psRes );
}
strcat(szInput,szLine);
SegSentence( 40 , szInput, szOutput, psRes );
fprintf(fpOutput, "%s" , szOutput);
}
if( psRes != NULL ){
strcpy(szInput,psRes );
}
SegSentence( 0 , szInput, szOutput, psRes );
fprintf(fpOutput, "%s" , szOutput);
fclose(fpInput );
fclose(fpOutput );
return true;
}
int CSegTool::SegSentence( int nResNum ,char *psInputSen , char *psOutputSen ,char *&psRes){
*psOutputSen = 0;
if( nResNum >= strlen(psInputSen) ) {
psRes = psInputSen;
return strlen(psInputSen);
}
int nSegNum = 0;
char *psInput = NULL;
char szWord[256];
psInput = psInputSen;
*psOutputSen = 0;
while( nSegNum + nResNum <= strlen(psInputSen) ){
if( GetOneWord(psInput + nSegNum ,szWord) == false ) break;
strcat(psOutputSen,szWord);
strcat(psOutputSen,"/");
nSegNum += strlen(szWord);
}
psRes = psInput + nSegNum ;
return strlen(psInputSen) - nSegNum ;
}
bool CSegTool::GetOneWord(char *psInput ,char *psOutput){
char ch = 0;
int nPos = 0;
int nHead = 0;
int nTail = m_WordList.size();
char szSearchWord[256];
char szWordSeg[256];
int nFlag = -1;
if( *psInput == 0 ) return false;
if( (ch = *(psInput + nPos)) & 0x80 ){
nPos += 2;
}else{
nPos += 1;
}
memcpy(szSearchWord,psInput,nPos);
szSearchWord[nPos] = 0;
strcpy(szWordSeg,szSearchWord);
while(nPos <= strlen(psInput)){
if( (nFlag = SearchWordHeadRange(szSearchWord,nHead ,nTail ,nHead,nTail)) == -1 ){
strcpy(psOutput,szWordSeg);
return true;
}else if( nFlag == 1 ){
}else if( nFlag == 2 ){
strcpy(szWordSeg,szSearchWord);
if( nHead == nTail ) {
strcpy(psOutput,szWordSeg);
return true;
}
nHead++;
}
if( (ch = *(psInput + nPos)) & 0x80 ){
nPos += 2;
}else{
nPos += 1;
}
memcpy(szSearchWord,psInput,nPos);
szSearchWord[nPos] = 0;
}
strcpy(psOutput,szWordSeg);
return true;
}
bool CSegTool::GetWord( int nIndex, char *psWord ){
if( nIndex < 0 || nIndex > m_WordList.size() ) return false;
strcpy(psWord,m_WordList[nIndex].begin());
return true;
}
int CSegTool::SearchOneWord( char *psKey ){ // return index or -1 if not
return BSearch(psKey,0,m_WordList.size());
}
int CSegTool::SearchWordHeadRange( char *psKey ,int nHead,int nTail,int &nReHead, int &nReTail){
if( (nReHead = SearchWordHeadTop(psKey,nHead,nTail) ) == - 1 ) return -1;
nReTail = SearchWordHeadDown(psKey,nHead,nTail);
if( strcmp(m_WordList[nReHead].begin(),psKey ) == 0 ){
return 2;
}else{
return 1;
}
}
int CSegTool::SearchWordHeadTop( char *psKey ,int nHead, int nTail){
int nMiddle = (nHead + nTail) / 2;
if( nMiddle == 0 ) return -1;
if( strstr(m_WordList[nMiddle].begin(), psKey) == m_WordList[nMiddle].begin() ){
if( strstr(m_WordList[nMiddle - 1].begin(), psKey) == m_WordList[nMiddle - 1].begin() ){
return SearchWordHeadTop(psKey,nHead,nMiddle - 1);
}else{
return nMiddle;
}
}else if( nHead >= nTail ) {
return -1;
}else if( strcmp(m_WordList[nMiddle].begin(), psKey) > 0 ){
return SearchWordHeadTop(psKey,nHead,nMiddle - 1);
}else{
return SearchWordHeadTop(psKey,nMiddle + 1,nTail);
}
return -1;
}
int CSegTool::SearchWordHeadDown( char *psKey ,int nHead, int nTail){
int nMiddle = (nHead + nTail) / 2;
if( nMiddle == m_WordList.size() ) return -1;
if( strstr(m_WordList[nMiddle].begin(), psKey) == m_WordList[nMiddle].begin() ){
if( strstr(m_WordList[nMiddle + 1].begin(), psKey) == m_WordList[nMiddle + 1].begin() ){
return SearchWordHeadDown(psKey,nMiddle + 1,nTail);
}else{
return nMiddle;
}
}else if( nHead == nTail) {
return -1;
}else if( strcmp(m_WordList[nMiddle].begin(), psKey) > 0 ){
return SearchWordHeadDown(psKey,nHead,nMiddle - 1);
}else{
return SearchWordHeadDown(psKey,nMiddle + 1,nTail);
}
return -1;
}
int CSegTool::BSearch( char *psKey ,int nHead , int nTail ){
int nMiddle = (nHead + nTail) / 2;
if( strcmp(m_WordList[nMiddle].begin(), psKey) == 0 ){
return nMiddle;
}else if( nHead == nTail || nHead + 1 == nTail) {
return -1;
}else if( strcmp(m_WordList[nMiddle].begin(), psKey) > 0 ){
return BSearch(psKey,nHead,nMiddle - 1);
}else{
return BSearch(psKey,nMiddle + 1,nTail);
}
return -1;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -