⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lex.cpp

📁 lex语法分析
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// lex.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include "lex.h"

#include <stdlib.h>
#include <fstream>
#include <string>
#include <vector>

#include <string.h>
#include "LexBTree.h"
#include "LibLexic.h"
#include "LexConst.h"

#define MAX_CHARS_LINE 10240
#define LEN_FILENAME  64
#define LEN_DIGIT     8


#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

/////////////////////////////////////////////////////////////////////////////
// The one and only application object

CWinApp theApp;

using namespace std;
void pre_exec(void);
void exec(char * cstr_fname);
void ShowToFile(void);
void ShowToFileEx(void);
void ShowToFile1(void);
int SumWordFreq(CLexBTree::SCell& cell);
int SumCellTagNum(CLexBTree::SCell& cell);
void CstrFormat(char* cstrSrc,int nSrc,char* cstrDest,int dist);
void CstrFormatEx(char* cstrSrc,int nSrc,char* cstrDest,int dist);
void CstrFormatChar(char* cstrSrc1,char* cstrSrc2,char* cstrDest,int dist);
void CstrFormatCharEx(char* cstrSrc1,char* cstrSrc2,char* cstrDest,int dist);
bool HaveTagUnknown(CLexBTree::SCell& cell);

void TitleFormat(char* cstrShow);
void TitleHyphenFormat(char* cstrShow);
int AllWordNumFormat(char* cstrShow);
int RecWordNumFormat(char* cstrShow);
int AllSubRecNumFormat(char* cstrShow);
void WriteLattice();
void WriteBigSum();
void WriteEachTagInDictSum();

#define NUM_ARYWORDOFBIGSUM  4096
struct SBigSumInfo{
  int  sum;
  int  page_naddr;
  int  cell_slot;
  char word[32];
}g_aryWordOfBigSum[NUM_ARYWORDOFBIGSUM];
CLexBTree *pBT;
CLibLexic liblexic;
void _tmain(void)
{
	for(int i=0; i<NUM_ARYWORDOFBIGSUM; i++)
	{
		g_aryWordOfBigSum[i].page_naddr = -1;
		g_aryWordOfBigSum[i].cell_slot = -1;
		g_aryWordOfBigSum[i].sum = 0;
		g_aryWordOfBigSum[i].word[0] = '\0';
	}
	pre_exec();
//	ShowToFile();
	ShowToFile1();
	ShowToFileEx();
	WriteLattice();
	WriteBigSum();
	WriteEachTagInDictSum();
}


void pre_exec(void)
{
	int nStartLine;
	int nNumOfLines;
	char cstr_filename[LEN_FILENAME];
	char cstr_digit[LEN_DIGIT];
	CStdioFile mfc_libfile("liblist",CFile::modeRead);

	CStdioFile mfc_logfile("startln.log",CFile::modeRead);
	mfc_logfile.ReadString(cstr_digit,LEN_DIGIT);
	nStartLine = atoi(cstr_digit);
	mfc_logfile.Close();

	CStdioFile mfc_numlinefile("numlines.log",CFile::modeRead);
	mfc_numlinefile.ReadString(cstr_digit,LEN_DIGIT);
	nNumOfLines = atoi(cstr_digit);
	mfc_numlinefile.Close();

	for(int k=1;
		(k<nStartLine)  ;
		k++) {
		if(!mfc_libfile.ReadString(cstr_filename,LEN_FILENAME)) 
			break;
	}
	cout<<"Let's begin  ..."<<endl;
	try {	
		pBT = new CLexBTree();
		for(int i=0; 
		(i<nNumOfLines) && mfc_libfile.ReadString(cstr_filename,LEN_FILENAME);
		i++){
			if(cstr_filename[strlen(cstr_filename)-1]=='\n') {
				cstr_filename[strlen(cstr_filename)-1] = 0;
				//ReadString会把回车符也算在字符串内
			}
			exec(cstr_filename);
			cout<<"Handled No."<<nStartLine+i<<" file -- "<<cstr_filename<<endl;
		}
		pBT->FlushToDisk();
		delete pBT;
	}
	catch(CNodeError){
			cout<<"Node Error"<<endl;
			exit(1);
	}
	catch(CKeyValueError) {
			cout<<"KeyValue.error"<<endl;
			exit(1);
	}
	catch(CTagNumError) {
			cout<<"Tag num error"<<endl;
			exit(1);
	}
	catch (CFileException) {
			cout<<"Open File Error!"<<endl;
			exit(1);
	}


	mfc_libfile.Close();
}
void ShowToFile(void)
{
	char cstrShow[120];
	CLexBTree::SPage page;
	int naddr,i;

	CFile filRead("lexdict.dat",CFile::modeRead);
	CStdioFile filWrite("out.txt",CFile::modeCreate|CFile::modeWrite);

	for(naddr=0; naddr!=-1; )
	{
		filRead.Seek(naddr* sizeof(CLexBTree::SPage),CFile::begin);
		filRead.Read((void*) &page,sizeof(CLexBTree::SPage));
		for(i=0; i<page.used_cell_slot_sum; i++) {
			CstrFormat(page.cell_ary[i].word,SumWordFreq(page.cell_ary[i]),
				cstrShow,35);
			filWrite.WriteString(cstrShow);
			filWrite.WriteString("\n");
		}
		naddr = page.next_page_naddr;		
	}
	filRead.Close();
	filWrite.Close();
}
void SaveBigSumWordInfo(int page_naddr,int cell_slot,int sum,char word[32])
{
//	struct tagBigSumInfo{
//  int  sum;
//  int  page_naddr;
//  int  cell_slot;
//  char word[48];
//  }g_aryWordOfBigSum[NUM_ARYWORDOFBIGSUM];
	int i,j;
	int len;
	for(len=0,i=0; i<NUM_ARYWORDOFBIGSUM; i++)
	{
		if(g_aryWordOfBigSum[i].sum>0)
			len ++;
		else {
			break;
		}
	}
	for(i=0; i<NUM_ARYWORDOFBIGSUM; i++)
	{
		if(sum > g_aryWordOfBigSum[i].sum) 
		{
			for(j=len; j>i; j--)
			{
				if(j<NUM_ARYWORDOFBIGSUM) {				
					g_aryWordOfBigSum[j].page_naddr = g_aryWordOfBigSum[j-1].page_naddr;
					g_aryWordOfBigSum[j].cell_slot  = g_aryWordOfBigSum[j-1].cell_slot;
					g_aryWordOfBigSum[j].sum = g_aryWordOfBigSum[j-1].sum;
					strcpy(g_aryWordOfBigSum[j].word,g_aryWordOfBigSum[j-1].word);
				}
			}
			g_aryWordOfBigSum[i].page_naddr = page_naddr;
			g_aryWordOfBigSum[i].cell_slot  = cell_slot;
			g_aryWordOfBigSum[i].sum = sum;
			strcpy(g_aryWordOfBigSum[i].word,word);
			break;
		}
	}	
}
void WriteBigSum()
{
	char cstr[128] ;
	char cstrEx[128];
	char cstrTemp[128];

	CStdioFile filBigSum("bigsum.txt",CFile::modeCreate|CFile::modeWrite);
	CStdioFile filBigSumEx("bigsumex.txt",CFile::modeCreate|CFile::modeWrite);
	for(int i=0; i<NUM_ARYWORDOFBIGSUM; i++)
	{
		strcpy(cstr,"");
		CstrFormat(cstr, g_aryWordOfBigSum[i].page_naddr,cstrTemp,5);
		strcpy(cstr,cstrTemp);
		CstrFormat(cstr,g_aryWordOfBigSum[i].cell_slot,cstrTemp,15);
		strcpy(cstr,cstrTemp);
		CstrFormatChar(cstr,g_aryWordOfBigSum[i].word,cstrTemp,25);;
		strcpy(cstr,cstrTemp);
		CstrFormat(cstr,g_aryWordOfBigSum[i].sum ,cstrTemp,60);
		strcpy(cstr,cstrTemp);

		strcpy(cstrEx,"{");
		CstrFormatEx(cstrEx, g_aryWordOfBigSum[i].page_naddr,cstrTemp,10);
		strcpy(cstrEx,cstrTemp);
		CstrFormatCharEx(cstrEx, ",",cstrTemp,11);
		strcpy(cstrEx,cstrTemp);
		CstrFormatEx(cstrEx,g_aryWordOfBigSum[i].cell_slot,cstrTemp,20);
		strcpy(cstrEx,cstrTemp);
		CstrFormatCharEx(cstrEx, "},",cstrTemp,24);
		strcpy(cstrEx,cstrTemp);
		
		filBigSum.WriteString(cstr);
		filBigSum.WriteString("\n");	
		filBigSumEx.WriteString(cstrEx);
		filBigSumEx.WriteString("\n");	
	}
	filBigSum.Close();
	filBigSumEx.Close();
}
void WriteEachTagInDictSum()
{
	int i;
	char cstr[1024];
	char cstrTemp[1024];
	CStdioFile filEachTagSum("eachtagsum.txt",CFile::modeCreate|CFile::modeWrite);
	strcpy(cstr,"{");
	for(i=0; i<61; i++)
	{	
		CstrFormatEx(cstr,liblexic.m_aryRecWordTagNum[i],cstrTemp,(i+1)*10);
		strcpy(cstr,cstrTemp);
		CstrFormatCharEx(cstr,",",cstrTemp,(i+1)*10+1);
		strcpy(cstr,cstrTemp);
	}
	CstrFormatEx(cstr,liblexic.m_aryRecWordTagNum[i],cstrTemp,(i+1)*10);
	strcpy(cstr,cstrTemp);
	CstrFormatCharEx(cstr,"},",cstrTemp,(i+1)*10+4);
	strcpy(cstr,cstrTemp);		
	filEachTagSum.WriteString(cstr);
	filEachTagSum.WriteString("\n");
	filEachTagSum.Close();	
}
void WriteLattice()
{
	int i,j;
	char cstr[1024];
	char cstrTemp[1024];
	CStdioFile filLattice("lattice.txt",CFile::modeCreate|CFile::modeWrite);
	for(i=0; i<62; i++)
	{
		strcpy(cstr,"{");
		for(j=0; j<61; j++)
		{
			CstrFormatEx(cstr,liblexic.m_aryTagLattice[i][j],cstrTemp,(j+1)*10);
			strcpy(cstr,cstrTemp);
			CstrFormatCharEx(cstr,",",cstrTemp,(j+1)*10+1);
			strcpy(cstr,cstrTemp);
		}
		CstrFormatEx(cstr,liblexic.m_aryTagLattice[i][j],cstrTemp,(j+1)*10);
		strcpy(cstr,cstrTemp);
		CstrFormatCharEx(cstr,"}",cstrTemp,(j+1)*10+4);
		strcpy(cstr,cstrTemp);
		filLattice.WriteString(cstr);
		filLattice.WriteString("\n");
	}
	filLattice.Close();
	
}
void ShowToFileEx(void)
{
	char cstrShow[1024];
	CLexBTree::SPage page;
	int naddr,i;
	int nTagNum,nMaxTagNum;
	int nRecAllWordNum;
	int nFreq;
	bool bHaveUnkownTag;

	CFile filRead("lexdict.dat",CFile::modeRead);
	CStdioFile filWrite("out.txt",CFile::modeCreate|CFile::modeWrite);
	char word[32];
	nMaxTagNum = 0;
	nRecAllWordNum = 0;
	bHaveUnkownTag = false;
	for(naddr=0; naddr!=-1; )
	{
		filRead.Seek(naddr* sizeof(CLexBTree::SPage),CFile::begin);
		filRead.Read((void*) &page,sizeof(CLexBTree::SPage));
		for(i=0; i<page.used_cell_slot_sum; i++) {
			nFreq = SumWordFreq(page.cell_ary[i]);
			strcpy(word,page.cell_ary[i].word);
			SaveBigSumWordInfo(naddr,i,nFreq,word);
			CstrFormat(page.cell_ary[i].word,nFreq,	cstrShow,35);
			filWrite.WriteString(cstrShow);
			filWrite.WriteString("\n");
			nRecAllWordNum += nFreq;
			nTagNum = SumCellTagNum(page.cell_ary[i]);
			if(nTagNum > nMaxTagNum) nMaxTagNum = nTagNum;
			if(!bHaveUnkownTag) 
				bHaveUnkownTag = HaveTagUnknown(page.cell_ary[i]);
		}
		naddr = page.next_page_naddr;		
	}

	filWrite.WriteString("\n");
	CstrFormat("Max record tag num :",nMaxTagNum,cstrShow,15);
	filWrite.WriteString(cstrShow);
	filWrite.WriteString("\n");

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -