📄 diction.cpp
字号:
const WORD Value_9 = 423;
const WORD Value_10 = 424;
const WORD Value_11 = 425;
const WORD Value_12 = 426;
const WORD Value_13 = 427;
const WORD Value_14 = 428;
const WORD Value_15 = 429;
const WORD Value_16 = 430;
const WORD Value_17 = 431;
const WORD Value_18 = 432;
const WORD Value_19 = 433;
const WORD Value_20 = 434;
const WORD Value_30 = 435;
const WORD Value_40 = 436;
const WORD Value_50 = 437;
const WORD Value_60 = 438;
const WORD Value_70 = 439;
const WORD Value_80 = 440;
const WORD Value_90 = 441;
const WORD Value_100 = 442;
const WORD Value_1000 = 443;
const WORD Value_1000000 = 444;
const WORD Value_1000000000 = 445;
const WORD Value_END = 445;
//Followings are defines of slot Per
const WORD Per_BEGIN = 446;
const WORD Per = 446;
const WORD Per_First = 447;
const WORD Per_Second = 448;
const WORD Per_Third = 449;
const WORD Per_1 = 450;
const WORD Per_2 = 451;
const WORD Per_3 = 452;
const WORD Per_END = 452;
#include "stdafx.h"
//#include "Generate.h"
//#include "UserDDlg.h"//del by xuned
#include "SenLink.h"
#include "direct.h"
#include "Huffman.h"
#include "DictMent.h"
#include "Diction.h"
//#include "RuleCode.h"
#include <string.h>
#include <memory.h>
#include <stdio.h>
#include <ctype.h>
#include <math.h>
//qlp add 5.27
#include "PosTag.h"
#include "ChildFrm.h"
#include "DICTMENT.H"
#include "Bplus.h"
extern DictSearch g_objLexSearch;
// Following defined in WordRes.cpp
extern void DictWordInit(DictNode* pDictNode);
extern void LxhFreeDictNode(DictNode* pDictNode);
//yys 98.5.26 Bgn
char *sLog = "DictRes\\Build.log";
FILE *fpLog;
//yys 98.5.26 End
#define _OutDict
IX_DESC g_EcDict;
void CreateDictInd(LPSTR lpInput,LPSTR lpInd);
extern LPSTR myfgets(LPSTR pszString,int n,FILE *fp);
Dictionary::Dictionary(char *dfname, char *ifname, int isMode) :
IsamMgr(dfname, DIC_DTA_ITEM_LEN)
{//DictSearch g_objLexSearch;
idxfname = new char[strlen(ifname) + 1];
strcpy(idxfname, ifname);
ifile[0] = new IndexMgr(idxfname, DIC_WORD_LEN, 1);
idxname[0] = "Word";
buffer = new char[DIC_WORD_LEN];
noidxs = 1;
if ( ifile[0]->created_idx() ) {
rebuild_idxs();
}
if ( isMode == moClosed )
close();
}
void Dictionary::read_data(void *block)
{
char *pszPtr = (char *)block;
memcpy((void *)m_pszOffset, pszPtr, DIC_OFFSET_LEN);
pszPtr += DIC_OFFSET_LEN;
}
void Dictionary::write_data(void *block)
{
char *pszPtr = (char *)block;
memcpy(pszPtr, (void *)m_pszOffset, DIC_OFFSET_LEN);
pszPtr += DIC_OFFSET_LEN;
}
void Dictionary::fill_buffer(int idxno, long recno)
{
char *pszPtr = buffer;
switch (idxno)
{
case 0:
memset(buffer, 0, DIC_WORD_LEN);
prep(m_pszWord, pszPtr, DIC_WORD_LEN);
if ( memcmp(m_pszWord,pszPtr,DIC_WORD_LEN) != 0 )
ASSERT(FALSE);
pszPtr += DIC_WORD_LEN;
break;
default:
prep(recno, pszPtr, 4);
break;
}
}
void Dictionary::clear_buf(void)
{
char *block = new char[DIC_DTA_ITEM_LEN];
memset(block, 0, DIC_DTA_ITEM_LEN);
read_data(block);
delete block;
}
int Dictionary::GetKeyWordLen()
{
for ( int Loop=0;Loop<DIC_WORD_LEN;Loop ++ )
if ( m_pszWord[Loop] == '\0' ) break;
return Loop;
}
void DecompressIndexOffsetInfo(LPSTR pszSouOffset,long &lOffset,int &nLen)
{
nLen = MAKELONG(MAKEWORD((BYTE)pszSouOffset[4],(BYTE)pszSouOffset[5]),0);
lOffset = MAKELONG(MAKEWORD(pszSouOffset[0],pszSouOffset[1]),
MAKEWORD(pszSouOffset[2],pszSouOffset[3]));
}
void CompressIndexOffsetInfo(long lOffset,int nLen,LPSTR pszTarOffset)
// nKeywordOrgLen 英文关键字的压缩前长度
{
ASSERT( lOffset < 0xffffff );
ASSERT( nLen < 0xffff );
pszTarOffset[0] = LOBYTE(LOWORD(lOffset));
pszTarOffset[1] = HIBYTE(LOWORD(lOffset));
pszTarOffset[2] = LOBYTE(HIWORD(lOffset));
pszTarOffset[3] = HIBYTE(HIWORD(lOffset));
pszTarOffset[4] = LOBYTE(LOWORD(nLen));
pszTarOffset[5] = HIBYTE(LOWORD(nLen));
#ifdef _DEBUG
long lResOffset;
int nResLen;
DecompressIndexOffsetInfo(pszTarOffset,lResOffset,nResLen);
ASSERT( lResOffset == lOffset );
ASSERT( nLen == nResLen );
#endif
}
long WriteIndexData(FILE *fpIndexDat,LPCTSTR pszIndexData,int nLen)
{
long lSite = ftell(fpIndexDat);
fwrite(pszIndexData,sizeof(char),nLen,fpIndexDat);
return lSite;
}
void ReadIndexData(FILE *fpIndexDat,LPSTR pszIndexData,long lOffset,int nLen)
{
fseek(fpIndexDat,lOffset,SEEK_SET);
fread(pszIndexData,sizeof(char),nLen,fpIndexDat);
}
CDictIndex::CDictIndex()
{
}
CDictIndex::~CDictIndex()
{
}
LPSTR myfgets(LPSTR pszString,int n,FILE *fp)
{
LPSTR pszTep;
fgets(pszString,n,fp);
pszTep = strchr(pszString,0x0d);
if ( pszTep != NULL ) {
*pszTep = '\0';
}
return pszString;
}
BOOL CDictIndex::FillMap(CMapStringToOb &mapName,
FILE *fpInDefine,
FILE *fpOutDefine,
FILE *fpCodeTable,
BOOL bWrtSegName)
// bWrtSegName 是否输出节名到定义文件和编码文件中,此变量仅用于
// DEFINE.TXT中NoValSlot节
{
CString strOutDefine;
strOutDefine.Format("\r\n\r\n//Followings are defines of slot %s\r\n",
m_szSegmentName);
fputs(strOutDefine,fpOutDefine);
strOutDefine.Format("const WORD %s_BEGIN = %d;\r\n",
m_szSegmentName,m_nDefineValue);
fputs(strOutDefine,fpOutDefine);
CString strCodeTable;
strCodeTable.Format("%s %d\r\n",
m_szSegmentName,m_nDefineValue);
fputs(strCodeTable,fpCodeTable);
strOutDefine.Format("const WORD %s = %d;\r\n",
m_szSegmentName,m_nDefineValue);
fputs(strOutDefine,fpOutDefine);
BOOL bIsQualfr;
if ( strcmp(m_szSegmentName,"AddQualfr") == 0 )
bIsQualfr = TRUE;
else
bIsQualfr = FALSE;
m_nDefineValue ++;
const int MAX_DEFINE_LINE_LEN = 100;
char szLine[MAX_DEFINE_LINE_LEN];
CString strLine;
ObWord *pObject;
do {
myfgets(szLine,MAX_DEFINE_LINE_LEN,fpInDefine);
if ( feof(fpInDefine) )
break;
else if ( szLine[0] == '[' ) {
LPSTR pszTep;
pszTep = strchr(szLine,']');
if ( pszTep == NULL ) {
ASSERT(FALSE);
}
*pszTep = '\0';
strOutDefine.Format("const WORD %s_END = %d;\r\n",
m_szSegmentName,m_nDefineValue-1);
fputs(strOutDefine ,fpOutDefine);
strcpy(m_szSegmentName,szLine+1);
break;
}
strLine = szLine;
if ( bWrtSegName ) {
strCodeTable.Format("%s=%s %d\r\n",
m_szSegmentName,szLine,m_nDefineValue);
} else
strCodeTable.Format("%s %d\r\n",
szLine,m_nDefineValue);
fputs(strCodeTable,fpCodeTable);
// 在输出到DICTDEF.H文件前,将待输出行中的'/'换为'_'
char *pLine = szLine;
do {
if ( *pLine == '/' )
*pLine = '_';
pLine ++;
} while( *pLine != '\0' );
if ( bWrtSegName ) {
if ( bIsQualfr == FALSE ) {
strOutDefine.Format("const WORD %s_%s = %d;\r\n",
m_szSegmentName,szLine,m_nDefineValue);
fputs(strOutDefine,fpOutDefine);
}
} else {
strOutDefine.Format("const WORD %s = %d;\r\n",
szLine,m_nDefineValue);
fputs(strOutDefine,fpOutDefine);
}
pObject = new ObWord;
pObject->GiveVolue(m_nDefineValue);
m_nDefineValue ++;
mapName.SetAt( strLine, (ObWord*)pObject );
} while ( TRUE );
return TRUE;
}
BOOL CDictIndex::FillMapWithArray(CMapStringToOb &mapName,
CStringArray &arrayName,
int &nNameNum,FILE *fpInDefine,
FILE *fpOutDefine)
{
char szLine[MAX_DEFINE_LINE_LEN];
nNameNum = 0;
CString strLine;
ObWord *pObject;
do {
myfgets(szLine,MAX_DEFINE_LINE_LEN,fpInDefine);
if ( feof(fpInDefine) )
break;
else if ( szLine[0] == '[' ) {
LPSTR pszTep;
pszTep = strchr(szLine,']');
if ( pszTep == NULL ) {
ASSERT(FALSE);
}
*pszTep = '\0';
strcpy(m_szSegmentName,szLine+1);
break;
}
nNameNum ++;
strLine = szLine;
arrayName.Add(strLine);
pObject = new ObWord;
pObject->GiveVolue(nNameNum-1);
mapName.SetAt( strLine,(ObWord*)pObject );
if ( strcmp(strLine,"AddQualfr") == 0 )
m_nQualfrCode = nNameNum-1;
} while ( TRUE );
return TRUE;
}
BOOL CDictIndex::BuildIndexInit(LPSTR pszInDefineName,
LPSTR pszOutDefineName,
LPSTR pszCodeTable)
// pszInDefineName 原始的定义文件 DEFINE.TXT
// pszOutDefineName 输出的定义文件 DictDef.h
// pszCodeTable 编码对照表文件
{
//yys 5.26
fpLog = fopen(sLog , "w");
if( !fpLog ){
AfxMessageBox("Cann't Creat file!",MB_OK);
return FALSE;
}
FILE *fpInDefine = fopen(pszInDefineName,"rb");
if ( fpInDefine == NULL ) {
CString strMsg;
strMsg.Format(" 无法打开文件 %s !",pszInDefineName);
AfxMessageBox(strMsg);
return FALSE;
}
FILE *fpOutDefine = fopen(pszOutDefineName,"wb");
if ( fpOutDefine == NULL ) {
CString strMsg;
strMsg.Format("无法创建文件 %s !",pszOutDefineName);
AfxMessageBox(strMsg);
return FALSE;
}
FILE *fpCodeTable = fopen(pszCodeTable,"wb");
if ( fpOutDefine == NULL ) {
CString strMsg;
strMsg.Format("无法创建文件 %s !",pszCodeTable);
AfxMessageBox(strMsg);
return FALSE;
}
fputs("#ifndef _DICTDEF_H",fpOutDefine);
fputc('\n',fpOutDefine);
fputs("#define _DICTDEF_H",fpOutDefine);
m_nDefineValue = 1;
// 跳过第一行
char szLine[MAX_DEFINE_LINE_LEN];
myfgets(szLine,MAX_DEFINE_LINE_LEN,fpInDefine);
LPSTR pszTep;
pszTep = strchr(szLine,']');
if ( pszTep == NULL ) {
ASSERT(FALSE);
}
*pszTep = '\0';
strcpy(m_szSegmentName,szLine+1);
FillMap(m_mapCate,fpInDefine,fpOutDefine,fpCodeTable,TRUE);
FillMap(m_mapHead,fpInDefine,fpOutDefine,fpCodeTable,TRUE);
FillMapWithArray(m_mapSlotName,m_arraySlotName,
m_nSlotNameNum,fpInDefine,fpOutDefine);
//FillMap(m_mapSlotName,fpInDefine);
FillMap(m_mapNoValueSlot,fpInDefine,fpOutDefine,fpCodeTable,FALSE);
FillMap(m_mapAmbig,fpInDefine,fpOutDefine,fpCodeTable,TRUE);
for ( int Loop=0;Loop<m_nSlotNameNum;Loop++ ) {
FillMap(m_mapSlotValue[Loop],fpInDefine,fpOutDefine,fpCodeTable,TRUE);
}
fputs("#endif",fpOutDefine);
fclose(fpInDefine);
fclose(fpOutDefine);
fclose(fpCodeTable);
// 申请缓冲区
m_nWordInfoBuffSize = MAX_RECORD_LEN;
m_pszWordInfoBuff = (LPSTR)GlobalLock(GlobalAlloc(GMEM_MOVEABLE|GMEM_SHARE,
m_nWordInfoBuffSize));
return TRUE;
}
void CDictIndex::FreeMap(CMapStringToOb &mapName)
{
POSITION pos = mapName.GetStartPosition();
CString string;
while( pos != NULL ) {
ObWord* pObject;
mapName.GetNextAssoc( pos, string, ( CObject*& )pObject );
delete pObject;
}
mapName.RemoveAll();
}
void CDictIndex::BuildIndexExitInit()
// 释放保留字数组占用的空间
{
FreeMap(m_mapCate);
FreeMap(m_mapHead);
FreeMap(m_mapAmbig);
FreeMap(m_mapNoValueSlot);
m_arraySlotName.RemoveAll();
for ( int Loop=0;Loop<m_nSlotNameNum;Loop++ ) {
FreeMap(m_mapSlotValue[Loop]);
}
GlobalUnlock(GlobalHandle(m_pszWordInfoBuff));
GlobalFree(GlobalHandle(m_pszWordInfoBuff));
}
void DecodeWordRule(LPSTR pszCodedRule,LPSTR pszOrgRule)
// 对编码后的规则进行解码
// pszCodedRule 编码后的规则
// pszOrgRule 解码后的规则
// 注:
// 编码后的规则的格式:
// 规则个数(1 byte),规则左部的长度(1 byte),规则左部,
// 规则右部的长度(1 byte),规则右部,...
{
LPSTR pszCodePtr = pszCodedRule;
LPSTR pszOrgPtr = pszOrgRule;
UCHAR ucRuleNum = *pszCodePtr;
pszCodePtr ++;
UCHAR ucLen;
for ( char Loop = 0;Loop<ucRuleNum;Loop++ ) {
strcpy(pszOrgPtr,"@");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -