⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 senlink.cpp

📁 计算机英汉机器翻译系统中的英语词性标注方法实现
💻 CPP
字号:
#include "stdafx.h"
#include "stdio.h"
#include "process.h"
#include "SenLink.h"

CSenNode::CSenNode()
{
	Sen.Empty();
	lSeek = 0;
	lLen = 0;
	pLast = NULL;
	pNext = NULL;
}

CSenNode::~CSenNode()
{
}

CSenLink::CSenLink()
{
	nSenNum = 0;
	nContainError = 0;
}

CSenLink::~CSenLink()
{
	if(nSenNum != 0)
		delete NewSeekBuf;
	fclose(fpIn);
}

//初始化SeekBuf
BOOL CSenLink::InitSeekBuf(char *sDicOut)
{
	char ch;

	fpIn = fopen(sDicIn,"rb");
	if( !fpIn ){
		strMsg.Format("Cann't open %s!",sDicIn);
		AfxMessageBox(strMsg);
		return FALSE;
	}

	//初始化SeekBuf数组
	//SeekBuf[2*n] -- 第n个词条的起始位置
	//SeekBuf[2*n + 1] -- 第n个词条的长度
	SeekBufPos = 0;
	SeekBuf[SeekBufPos++] = ftell(fpIn);	//第一句词条的起始位置
	for( ; !feof(fpIn) ; ){
		ch = fgetc(fpIn);
		if( ch == 0x0d ){
			 ch = fgetc(fpIn);
			 if( ch == 0x0a ){
				 //本句词条的长度
				 SeekBuf[SeekBufPos] = ftell(fpIn) - SeekBuf[SeekBufPos - 1];
				 SeekBufPos++;
				 //下一句词条的起始位置
				 SeekBuf[SeekBufPos] = ftell(fpIn);
				 SeekBufPos++;				 
			}
		}
		if(SeekBufPos >= MAXBUFNUM){
			long lNum = MAXBUFNUM;
			strMsg.Format("您的词典的词条数已经超过%d,建索引失败!",lNum/2);
			AfxMessageBox(strMsg);
			return FALSE;
		}
	}
	//最后一句词条的长度
	SeekBuf[SeekBufPos] = ftell(fpIn) - SeekBuf[SeekBufPos - 1];
	
	//词条数nSenNum
	nSenNum = (SeekBufPos+1) / 2;
	SeekBufPos = 0;
	NewSeekBuf = new long[nSenNum*2];
	NewSeekBufPos = 0;

	return TRUE;
}
//初始化链表
BOOL CSenLink::InitLink()
{
	CSenNode *pNewSenNode;

	pNewSenNode = &SenNodeBuf[0];
	GetNewSen(pNewSenNode,0);
	pHead = pNewSenNode;
	pEnd = pNewSenNode;
	pNewSenNode->pLast = pNewSenNode;
	pNewSenNode->pNext = pNewSenNode;
	for(int i=1; i < LINKNUM && i< nSenNum; i++)
	{
		pNewSenNode = &SenNodeBuf[i];	
		GetNewSen(pNewSenNode,i);
		InsertNewSen(pNewSenNode);
	}
	if(i >= nSenNum)	//如果词条数小于等于链表长度
		return FALSE;
	return TRUE;
}

//将第nSenBuf句的词条读入pNewSenNode中
BOOL CSenLink::GetNewSen(CSenNode *pNewSenNode,int nBufNo)
{
	char SenBuf[300];
	
	pNewSenNode->Sen.Empty();
	pNewSenNode->pLast = NULL;
	pNewSenNode->pNext = NULL;
	pNewSenNode->lSeek = SeekBuf[2*nBufNo];
	pNewSenNode->lLen  = SeekBuf[2*nBufNo + 1];
	fseek(fpIn,pNewSenNode->lSeek,SEEK_SET);
	//fread(SenBuf,sizeof(char),pNewSenNode->lLen,fpIn);
	
	if(fgets(SenBuf,pNewSenNode->lLen,fpIn) == NULL)
		return FALSE;
	
	//fscanf(fpIn,"%s",SenBuf);
	pNewSenNode->Sen = SenBuf;	
	return TRUE;
}

BOOL CSenLink::InsertNewSen(CSenNode *pNewSenNode)
{
	BOOL bDone;
	CSenNode *pTmp;
	
	pTmp = pHead;
	bDone = FALSE;
	if(bOrderMethor ==TRUE)  //从大到小排序
	{
		if(pNewSenNode->Sen >= pHead->Sen)
			pHead = pNewSenNode;
		do{
			if(pNewSenNode->Sen >= pTmp->Sen)
			{
				AddNodeToLink(pNewSenNode,pTmp);			//将pNewSenNode加到pTmp前
				bDone = TRUE;
				break;
			}
			else
				pTmp = pTmp->pNext;
			
			//如果pNewSenNode小于链表中的所有节点
			if(pTmp == pHead){
				AddNodeToLink(pNewSenNode,pTmp);			//将pNewSenNode加到pTmp前
				pEnd = pNewSenNode;
				bDone = FALSE;
				//bContainError = TRUE;		//将包含错误标志设为真
				break;
			}
		}while(TRUE);
	}
	else				//从小到大排序
	{
		if(pNewSenNode->Sen <= pHead->Sen)
			pHead = pNewSenNode;
		do{
			if(pNewSenNode->Sen <= pTmp->Sen)
			{
				AddNodeToLink(pNewSenNode,pTmp);			//将pNewSenNode加到pTmp前
				bDone = TRUE;
				break;
			}
			else
				pTmp = pTmp->pNext;
			
			//如果pNewSenNode大于链表中的所有节点
			if(pTmp == pHead){
				AddNodeToLink(pNewSenNode,pTmp);			//将pNewSenNode加到pTmp前
				pEnd = pNewSenNode;
				bDone = FALSE;
				//bContainError = TRUE;		//将包含错误标志设为真
				break;
			}
		}while(TRUE);
	}

	return bDone;
}

void CSenLink::AddNodeToLink(CSenNode *pNew, CSenNode *pTmp)
{
	CSenNode *pLastNode;

	pLastNode = pTmp->pLast;
	pTmp->pLast = pNew;
	pLastNode->pNext = pNew;
	pNew->pLast = pLastNode;
	pNew->pNext = pTmp;
}

//将链表的末节点从链表中释放,返回其指针
CSenNode *CSenLink::FreeNode()
{
	CSenNode *pNew = pEnd;
	pEnd = pEnd->pLast;
	
	pNew->pLast->pNext = pNew->pNext;
	pNew->pNext->pLast = pNew->pLast;
	pNew->Sen.Empty();
	pNew->lSeek = 0;
	pNew->lLen = 0;
	pNew->pLast = NULL;
	pNew->pNext = NULL;

	return pNew;
}

void CSenLink::SaveEndNode(CSenNode *pNode)
{
	NewSeekBuf[NewSeekBufPos++] = pNode->lSeek;
	NewSeekBuf[NewSeekBufPos++] = pNode->lLen;
}

void CSenLink::SaveLinkToNewBuf()
{
	CSenNode *pTmp;

	SaveEndNode(pEnd);
	for(pTmp = pEnd->pLast; pTmp !=pEnd; pTmp = pTmp->pLast)
		SaveEndNode(pTmp);
}
//若还需再次排序,则先将NewSeekBuf --> SeekBuf
void CSenLink::ChangeBuf()
{
	for(int i=0; i < 2*nSenNum; i++)
		SeekBuf[i] = NewSeekBuf[i];
	NewSeekBufPos = 0;	
}

BOOL CSenLink::SaveOrderResult(char *sDicOut)
{
	char SenBuf[500];

	fpOut = fopen(sDicOut,"wb");
	if( !fpOut ){
		strMsg.Format("无法创建临时文件,建索引失败!");
		AfxMessageBox(strMsg);
		return FALSE;
	}

	long lLen,lSeek,lRdLen,lWrLen;

	int SenNo,i;
	if(bSaveMethor == TRUE)
		i = 0;
	else
		i = 2*nSenNum - 1;
	for(SenNo = 0; SenNo < nSenNum; SenNo ++)
	{
		if(bSaveMethor == TRUE){
			lSeek = NewSeekBuf[i++];
			lLen = NewSeekBuf[i++];
		}
		else{
			lLen = NewSeekBuf[i--];
			lSeek = NewSeekBuf[i--];			
		}

		fseek(fpIn,lSeek,SEEK_SET);
		lRdLen = fread(SenBuf,sizeof(char),lLen,fpIn);
		
		//if( !fgets(SenBuf,lLen,fpIn) ){
		if( lRdLen != lLen ){
			strMsg.Format("读临时文件时发生错误,建索引失败!");
			AfxMessageBox(strMsg);
			return FALSE;
		}
		lWrLen = fwrite(SenBuf,sizeof(char),lLen,fpOut);
		if( lWrLen != lLen){
			strMsg.Format("写临时文件时发生错误,磁盘已满?");
			AfxMessageBox(strMsg);
			return FALSE;
		}
	}
	fclose(fpOut);

	return TRUE;
}

BOOL CSenLink::SortMain(char *sIn,char *sOut,BOOL bMethor)
{
	int nBufNo;

	strcpy(sDicIn,sIn);
	strcpy(sDicOut,sOut);
	bOrderMethor = bMethor;
	bSaveMethor = TRUE;		//排序结果按正常顺序存储

	if( !InitSeekBuf(sDicOut) )
		return FALSE;

	do{
		if (InitLink() == FALSE)		   //初始化链表
		{
			SaveLinkToNewBuf();			   //如果待排序内容数小于链表节点数,排序完毕
			break;
		}
		
		nContainError = 0;
		NewSeekBufPos = 0;
		pNewSenNode = &SenNodeBuf[LINKNUM];
		for(nBufNo = LINKNUM; nBufNo < nSenNum; nBufNo++)
		{			
			GetNewSen(pNewSenNode,nBufNo);
			if( !InsertNewSen(pNewSenNode) )
				nContainError ++;
			SaveEndNode(pEnd);		  //保存末节点
			pNewSenNode = FreeNode();
		}
		//如果待排序的内容基本上是按要求顺序排序的
		if( nContainError < (int) (nSenNum - LINKNUM) / 2 )
		{
			SaveLinkToNewBuf();	//保存链表到NewSeekBuf 
			ChangeBuf();		//NewSeekBuf --> SeekBuf
		}
		//如果待排序的内容基本上是按要求顺序反排序的
		else{
			bOrderMethor = !bOrderMethor;
			bSaveMethor = FALSE;
		}
	}while( nContainError );
	if( !SaveOrderResult(sDicOut) )
		return FALSE;
	
	return TRUE;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -