📄 senlink.cpp
字号:
#include "stdafx.h"
#include "stdio.h"
#include "process.h"
#include "SenLink.h"
CSenNode::CSenNode()
{
Sen.Empty();
lSeek = 0;
lLen = 0;
pLast = NULL;
pNext = NULL;
}
CSenNode::~CSenNode()
{
}
CSenLink::CSenLink()
{
nSenNum = 0;
nContainError = 0;
}
CSenLink::~CSenLink()
{
if(nSenNum != 0)
delete NewSeekBuf;
fclose(fpIn);
}
//初始化SeekBuf
BOOL CSenLink::InitSeekBuf(char *sDicOut)
{
char ch;
fpIn = fopen(sDicIn,"rb");
if( !fpIn ){
strMsg.Format("Cann't open %s!",sDicIn);
AfxMessageBox(strMsg);
return FALSE;
}
//初始化SeekBuf数组
//SeekBuf[2*n] -- 第n个词条的起始位置
//SeekBuf[2*n + 1] -- 第n个词条的长度
SeekBufPos = 0;
SeekBuf[SeekBufPos++] = ftell(fpIn); //第一句词条的起始位置
for( ; !feof(fpIn) ; ){
ch = fgetc(fpIn);
if( ch == 0x0d ){
ch = fgetc(fpIn);
if( ch == 0x0a ){
//本句词条的长度
SeekBuf[SeekBufPos] = ftell(fpIn) - SeekBuf[SeekBufPos - 1];
SeekBufPos++;
//下一句词条的起始位置
SeekBuf[SeekBufPos] = ftell(fpIn);
SeekBufPos++;
}
}
if(SeekBufPos >= MAXBUFNUM){
long lNum = MAXBUFNUM;
strMsg.Format("您的词典的词条数已经超过%d,建索引失败!",lNum/2);
AfxMessageBox(strMsg);
return FALSE;
}
}
//最后一句词条的长度
SeekBuf[SeekBufPos] = ftell(fpIn) - SeekBuf[SeekBufPos - 1];
//词条数nSenNum
nSenNum = (SeekBufPos+1) / 2;
SeekBufPos = 0;
NewSeekBuf = new long[nSenNum*2];
NewSeekBufPos = 0;
return TRUE;
}
//初始化链表
BOOL CSenLink::InitLink()
{
CSenNode *pNewSenNode;
pNewSenNode = &SenNodeBuf[0];
GetNewSen(pNewSenNode,0);
pHead = pNewSenNode;
pEnd = pNewSenNode;
pNewSenNode->pLast = pNewSenNode;
pNewSenNode->pNext = pNewSenNode;
for(int i=1; i < LINKNUM && i< nSenNum; i++)
{
pNewSenNode = &SenNodeBuf[i];
GetNewSen(pNewSenNode,i);
InsertNewSen(pNewSenNode);
}
if(i >= nSenNum) //如果词条数小于等于链表长度
return FALSE;
return TRUE;
}
//将第nSenBuf句的词条读入pNewSenNode中
BOOL CSenLink::GetNewSen(CSenNode *pNewSenNode,int nBufNo)
{
char SenBuf[300];
pNewSenNode->Sen.Empty();
pNewSenNode->pLast = NULL;
pNewSenNode->pNext = NULL;
pNewSenNode->lSeek = SeekBuf[2*nBufNo];
pNewSenNode->lLen = SeekBuf[2*nBufNo + 1];
fseek(fpIn,pNewSenNode->lSeek,SEEK_SET);
//fread(SenBuf,sizeof(char),pNewSenNode->lLen,fpIn);
if(fgets(SenBuf,pNewSenNode->lLen,fpIn) == NULL)
return FALSE;
//fscanf(fpIn,"%s",SenBuf);
pNewSenNode->Sen = SenBuf;
return TRUE;
}
BOOL CSenLink::InsertNewSen(CSenNode *pNewSenNode)
{
BOOL bDone;
CSenNode *pTmp;
pTmp = pHead;
bDone = FALSE;
if(bOrderMethor ==TRUE) //从大到小排序
{
if(pNewSenNode->Sen >= pHead->Sen)
pHead = pNewSenNode;
do{
if(pNewSenNode->Sen >= pTmp->Sen)
{
AddNodeToLink(pNewSenNode,pTmp); //将pNewSenNode加到pTmp前
bDone = TRUE;
break;
}
else
pTmp = pTmp->pNext;
//如果pNewSenNode小于链表中的所有节点
if(pTmp == pHead){
AddNodeToLink(pNewSenNode,pTmp); //将pNewSenNode加到pTmp前
pEnd = pNewSenNode;
bDone = FALSE;
//bContainError = TRUE; //将包含错误标志设为真
break;
}
}while(TRUE);
}
else //从小到大排序
{
if(pNewSenNode->Sen <= pHead->Sen)
pHead = pNewSenNode;
do{
if(pNewSenNode->Sen <= pTmp->Sen)
{
AddNodeToLink(pNewSenNode,pTmp); //将pNewSenNode加到pTmp前
bDone = TRUE;
break;
}
else
pTmp = pTmp->pNext;
//如果pNewSenNode大于链表中的所有节点
if(pTmp == pHead){
AddNodeToLink(pNewSenNode,pTmp); //将pNewSenNode加到pTmp前
pEnd = pNewSenNode;
bDone = FALSE;
//bContainError = TRUE; //将包含错误标志设为真
break;
}
}while(TRUE);
}
return bDone;
}
void CSenLink::AddNodeToLink(CSenNode *pNew, CSenNode *pTmp)
{
CSenNode *pLastNode;
pLastNode = pTmp->pLast;
pTmp->pLast = pNew;
pLastNode->pNext = pNew;
pNew->pLast = pLastNode;
pNew->pNext = pTmp;
}
//将链表的末节点从链表中释放,返回其指针
CSenNode *CSenLink::FreeNode()
{
CSenNode *pNew = pEnd;
pEnd = pEnd->pLast;
pNew->pLast->pNext = pNew->pNext;
pNew->pNext->pLast = pNew->pLast;
pNew->Sen.Empty();
pNew->lSeek = 0;
pNew->lLen = 0;
pNew->pLast = NULL;
pNew->pNext = NULL;
return pNew;
}
void CSenLink::SaveEndNode(CSenNode *pNode)
{
NewSeekBuf[NewSeekBufPos++] = pNode->lSeek;
NewSeekBuf[NewSeekBufPos++] = pNode->lLen;
}
void CSenLink::SaveLinkToNewBuf()
{
CSenNode *pTmp;
SaveEndNode(pEnd);
for(pTmp = pEnd->pLast; pTmp !=pEnd; pTmp = pTmp->pLast)
SaveEndNode(pTmp);
}
//若还需再次排序,则先将NewSeekBuf --> SeekBuf
void CSenLink::ChangeBuf()
{
for(int i=0; i < 2*nSenNum; i++)
SeekBuf[i] = NewSeekBuf[i];
NewSeekBufPos = 0;
}
BOOL CSenLink::SaveOrderResult(char *sDicOut)
{
char SenBuf[500];
fpOut = fopen(sDicOut,"wb");
if( !fpOut ){
strMsg.Format("无法创建临时文件,建索引失败!");
AfxMessageBox(strMsg);
return FALSE;
}
long lLen,lSeek,lRdLen,lWrLen;
int SenNo,i;
if(bSaveMethor == TRUE)
i = 0;
else
i = 2*nSenNum - 1;
for(SenNo = 0; SenNo < nSenNum; SenNo ++)
{
if(bSaveMethor == TRUE){
lSeek = NewSeekBuf[i++];
lLen = NewSeekBuf[i++];
}
else{
lLen = NewSeekBuf[i--];
lSeek = NewSeekBuf[i--];
}
fseek(fpIn,lSeek,SEEK_SET);
lRdLen = fread(SenBuf,sizeof(char),lLen,fpIn);
//if( !fgets(SenBuf,lLen,fpIn) ){
if( lRdLen != lLen ){
strMsg.Format("读临时文件时发生错误,建索引失败!");
AfxMessageBox(strMsg);
return FALSE;
}
lWrLen = fwrite(SenBuf,sizeof(char),lLen,fpOut);
if( lWrLen != lLen){
strMsg.Format("写临时文件时发生错误,磁盘已满?");
AfxMessageBox(strMsg);
return FALSE;
}
}
fclose(fpOut);
return TRUE;
}
BOOL CSenLink::SortMain(char *sIn,char *sOut,BOOL bMethor)
{
int nBufNo;
strcpy(sDicIn,sIn);
strcpy(sDicOut,sOut);
bOrderMethor = bMethor;
bSaveMethor = TRUE; //排序结果按正常顺序存储
if( !InitSeekBuf(sDicOut) )
return FALSE;
do{
if (InitLink() == FALSE) //初始化链表
{
SaveLinkToNewBuf(); //如果待排序内容数小于链表节点数,排序完毕
break;
}
nContainError = 0;
NewSeekBufPos = 0;
pNewSenNode = &SenNodeBuf[LINKNUM];
for(nBufNo = LINKNUM; nBufNo < nSenNum; nBufNo++)
{
GetNewSen(pNewSenNode,nBufNo);
if( !InsertNewSen(pNewSenNode) )
nContainError ++;
SaveEndNode(pEnd); //保存末节点
pNewSenNode = FreeNode();
}
//如果待排序的内容基本上是按要求顺序排序的
if( nContainError < (int) (nSenNum - LINKNUM) / 2 )
{
SaveLinkToNewBuf(); //保存链表到NewSeekBuf
ChangeBuf(); //NewSeekBuf --> SeekBuf
}
//如果待排序的内容基本上是按要求顺序反排序的
else{
bOrderMethor = !bOrderMethor;
bSaveMethor = FALSE;
}
}while( nContainError );
if( !SaveOrderResult(sDicOut) )
return FALSE;
return TRUE;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -