📄 lex.cpp
字号:
// lex.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include "lex.h"
#include <stdlib.h>
#include <fstream>
#include <string>
#include <vector>
#include <string.h>
#include "LexBTree.h"
#include "LibLexic.h"
#include "LexConst.h"
#define MAX_CHARS_LINE 10240
#define LEN_FILENAME 64
#define LEN_DIGIT 8
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
/////////////////////////////////////////////////////////////////////////////
// The one and only application object
CWinApp theApp;
using namespace std;
void pre_exec(void);
void exec(char * cstr_fname);
void ShowToFile(void);
void ShowToFileEx(void);
void ShowToFile1(void);
int SumWordFreq(CLexBTree::SCell& cell);
int SumCellTagNum(CLexBTree::SCell& cell);
void CstrFormat(char* cstrSrc,int nSrc,char* cstrDest,int dist);
void CstrFormatEx(char* cstrSrc,int nSrc,char* cstrDest,int dist);
void CstrFormatChar(char* cstrSrc1,char* cstrSrc2,char* cstrDest,int dist);
void CstrFormatCharEx(char* cstrSrc1,char* cstrSrc2,char* cstrDest,int dist);
bool HaveTagUnknown(CLexBTree::SCell& cell);
void TitleFormat(char* cstrShow);
void TitleHyphenFormat(char* cstrShow);
int AllWordNumFormat(char* cstrShow);
int RecWordNumFormat(char* cstrShow);
int AllSubRecNumFormat(char* cstrShow);
void WriteLattice();
void WriteBigSum();
void WriteEachTagInDictSum();
#define NUM_ARYWORDOFBIGSUM 4096
struct SBigSumInfo{
int sum;
int page_naddr;
int cell_slot;
char word[32];
}g_aryWordOfBigSum[NUM_ARYWORDOFBIGSUM];
CLexBTree *pBT;
CLibLexic liblexic;
void _tmain(void)
{
for(int i=0; i<NUM_ARYWORDOFBIGSUM; i++)
{
g_aryWordOfBigSum[i].page_naddr = -1;
g_aryWordOfBigSum[i].cell_slot = -1;
g_aryWordOfBigSum[i].sum = 0;
g_aryWordOfBigSum[i].word[0] = '\0';
}
pre_exec();
// ShowToFile();
ShowToFile1();
ShowToFileEx();
WriteLattice();
WriteBigSum();
WriteEachTagInDictSum();
}
void pre_exec(void)
{
int nStartLine;
int nNumOfLines;
char cstr_filename[LEN_FILENAME];
char cstr_digit[LEN_DIGIT];
CStdioFile mfc_libfile("liblist",CFile::modeRead);
CStdioFile mfc_logfile("startln.log",CFile::modeRead);
mfc_logfile.ReadString(cstr_digit,LEN_DIGIT);
nStartLine = atoi(cstr_digit);
mfc_logfile.Close();
CStdioFile mfc_numlinefile("numlines.log",CFile::modeRead);
mfc_numlinefile.ReadString(cstr_digit,LEN_DIGIT);
nNumOfLines = atoi(cstr_digit);
mfc_numlinefile.Close();
for(int k=1;
(k<nStartLine) ;
k++) {
if(!mfc_libfile.ReadString(cstr_filename,LEN_FILENAME))
break;
}
cout<<"Let's begin ..."<<endl;
try {
pBT = new CLexBTree();
for(int i=0;
(i<nNumOfLines) && mfc_libfile.ReadString(cstr_filename,LEN_FILENAME);
i++){
if(cstr_filename[strlen(cstr_filename)-1]=='\n') {
cstr_filename[strlen(cstr_filename)-1] = 0;
//ReadString会把回车符也算在字符串内
}
exec(cstr_filename);
cout<<"Handled No."<<nStartLine+i<<" file -- "<<cstr_filename<<endl;
}
pBT->FlushToDisk();
delete pBT;
}
catch(CNodeError){
cout<<"Node Error"<<endl;
exit(1);
}
catch(CKeyValueError) {
cout<<"KeyValue.error"<<endl;
exit(1);
}
catch(CTagNumError) {
cout<<"Tag num error"<<endl;
exit(1);
}
catch (CFileException) {
cout<<"Open File Error!"<<endl;
exit(1);
}
mfc_libfile.Close();
}
void ShowToFile(void)
{
char cstrShow[120];
CLexBTree::SPage page;
int naddr,i;
CFile filRead("lexdict.dat",CFile::modeRead);
CStdioFile filWrite("out.txt",CFile::modeCreate|CFile::modeWrite);
for(naddr=0; naddr!=-1; )
{
filRead.Seek(naddr* sizeof(CLexBTree::SPage),CFile::begin);
filRead.Read((void*) &page,sizeof(CLexBTree::SPage));
for(i=0; i<page.used_cell_slot_sum; i++) {
CstrFormat(page.cell_ary[i].word,SumWordFreq(page.cell_ary[i]),
cstrShow,35);
filWrite.WriteString(cstrShow);
filWrite.WriteString("\n");
}
naddr = page.next_page_naddr;
}
filRead.Close();
filWrite.Close();
}
void SaveBigSumWordInfo(int page_naddr,int cell_slot,int sum,char word[32])
{
// struct tagBigSumInfo{
// int sum;
// int page_naddr;
// int cell_slot;
// char word[48];
// }g_aryWordOfBigSum[NUM_ARYWORDOFBIGSUM];
int i,j;
int len;
for(len=0,i=0; i<NUM_ARYWORDOFBIGSUM; i++)
{
if(g_aryWordOfBigSum[i].sum>0)
len ++;
else {
break;
}
}
for(i=0; i<NUM_ARYWORDOFBIGSUM; i++)
{
if(sum > g_aryWordOfBigSum[i].sum)
{
for(j=len; j>i; j--)
{
if(j<NUM_ARYWORDOFBIGSUM) {
g_aryWordOfBigSum[j].page_naddr = g_aryWordOfBigSum[j-1].page_naddr;
g_aryWordOfBigSum[j].cell_slot = g_aryWordOfBigSum[j-1].cell_slot;
g_aryWordOfBigSum[j].sum = g_aryWordOfBigSum[j-1].sum;
strcpy(g_aryWordOfBigSum[j].word,g_aryWordOfBigSum[j-1].word);
}
}
g_aryWordOfBigSum[i].page_naddr = page_naddr;
g_aryWordOfBigSum[i].cell_slot = cell_slot;
g_aryWordOfBigSum[i].sum = sum;
strcpy(g_aryWordOfBigSum[i].word,word);
break;
}
}
}
void WriteBigSum()
{
char cstr[128] ;
char cstrEx[128];
char cstrTemp[128];
CStdioFile filBigSum("bigsum.txt",CFile::modeCreate|CFile::modeWrite);
CStdioFile filBigSumEx("bigsumex.txt",CFile::modeCreate|CFile::modeWrite);
for(int i=0; i<NUM_ARYWORDOFBIGSUM; i++)
{
strcpy(cstr,"");
CstrFormat(cstr, g_aryWordOfBigSum[i].page_naddr,cstrTemp,5);
strcpy(cstr,cstrTemp);
CstrFormat(cstr,g_aryWordOfBigSum[i].cell_slot,cstrTemp,15);
strcpy(cstr,cstrTemp);
CstrFormatChar(cstr,g_aryWordOfBigSum[i].word,cstrTemp,25);;
strcpy(cstr,cstrTemp);
CstrFormat(cstr,g_aryWordOfBigSum[i].sum ,cstrTemp,60);
strcpy(cstr,cstrTemp);
strcpy(cstrEx,"{");
CstrFormatEx(cstrEx, g_aryWordOfBigSum[i].page_naddr,cstrTemp,10);
strcpy(cstrEx,cstrTemp);
CstrFormatCharEx(cstrEx, ",",cstrTemp,11);
strcpy(cstrEx,cstrTemp);
CstrFormatEx(cstrEx,g_aryWordOfBigSum[i].cell_slot,cstrTemp,20);
strcpy(cstrEx,cstrTemp);
CstrFormatCharEx(cstrEx, "},",cstrTemp,24);
strcpy(cstrEx,cstrTemp);
filBigSum.WriteString(cstr);
filBigSum.WriteString("\n");
filBigSumEx.WriteString(cstrEx);
filBigSumEx.WriteString("\n");
}
filBigSum.Close();
filBigSumEx.Close();
}
void WriteEachTagInDictSum()
{
int i;
char cstr[1024];
char cstrTemp[1024];
CStdioFile filEachTagSum("eachtagsum.txt",CFile::modeCreate|CFile::modeWrite);
strcpy(cstr,"{");
for(i=0; i<61; i++)
{
CstrFormatEx(cstr,liblexic.m_aryRecWordTagNum[i],cstrTemp,(i+1)*10);
strcpy(cstr,cstrTemp);
CstrFormatCharEx(cstr,",",cstrTemp,(i+1)*10+1);
strcpy(cstr,cstrTemp);
}
CstrFormatEx(cstr,liblexic.m_aryRecWordTagNum[i],cstrTemp,(i+1)*10);
strcpy(cstr,cstrTemp);
CstrFormatCharEx(cstr,"},",cstrTemp,(i+1)*10+4);
strcpy(cstr,cstrTemp);
filEachTagSum.WriteString(cstr);
filEachTagSum.WriteString("\n");
filEachTagSum.Close();
}
void WriteLattice()
{
int i,j;
char cstr[1024];
char cstrTemp[1024];
CStdioFile filLattice("lattice.txt",CFile::modeCreate|CFile::modeWrite);
for(i=0; i<62; i++)
{
strcpy(cstr,"{");
for(j=0; j<61; j++)
{
CstrFormatEx(cstr,liblexic.m_aryTagLattice[i][j],cstrTemp,(j+1)*10);
strcpy(cstr,cstrTemp);
CstrFormatCharEx(cstr,",",cstrTemp,(j+1)*10+1);
strcpy(cstr,cstrTemp);
}
CstrFormatEx(cstr,liblexic.m_aryTagLattice[i][j],cstrTemp,(j+1)*10);
strcpy(cstr,cstrTemp);
CstrFormatCharEx(cstr,"}",cstrTemp,(j+1)*10+4);
strcpy(cstr,cstrTemp);
filLattice.WriteString(cstr);
filLattice.WriteString("\n");
}
filLattice.Close();
}
void ShowToFileEx(void)
{
char cstrShow[1024];
CLexBTree::SPage page;
int naddr,i;
int nTagNum,nMaxTagNum;
int nRecAllWordNum;
int nFreq;
bool bHaveUnkownTag;
CFile filRead("lexdict.dat",CFile::modeRead);
CStdioFile filWrite("out.txt",CFile::modeCreate|CFile::modeWrite);
char word[32];
nMaxTagNum = 0;
nRecAllWordNum = 0;
bHaveUnkownTag = false;
for(naddr=0; naddr!=-1; )
{
filRead.Seek(naddr* sizeof(CLexBTree::SPage),CFile::begin);
filRead.Read((void*) &page,sizeof(CLexBTree::SPage));
for(i=0; i<page.used_cell_slot_sum; i++) {
nFreq = SumWordFreq(page.cell_ary[i]);
strcpy(word,page.cell_ary[i].word);
SaveBigSumWordInfo(naddr,i,nFreq,word);
CstrFormat(page.cell_ary[i].word,nFreq, cstrShow,35);
filWrite.WriteString(cstrShow);
filWrite.WriteString("\n");
nRecAllWordNum += nFreq;
nTagNum = SumCellTagNum(page.cell_ary[i]);
if(nTagNum > nMaxTagNum) nMaxTagNum = nTagNum;
if(!bHaveUnkownTag)
bHaveUnkownTag = HaveTagUnknown(page.cell_ary[i]);
}
naddr = page.next_page_naddr;
}
filWrite.WriteString("\n");
CstrFormat("Max record tag num :",nMaxTagNum,cstrShow,15);
filWrite.WriteString(cstrShow);
filWrite.WriteString("\n");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -