📄 scsortview.cpp
字号:
{
for (bLow = 0x40; bLow <= 0xA0; ++bLow)
{
if (bLow == 0x7F)
continue;
gbkFile.Write(&bHigh, sizeof(bHigh));
gbkFile.Write(&bLow, sizeof(bLow));
gbkFile.Write(&g_usEnt, sizeof(g_usEnt));
}
}
// (3)GBK/2: B0A1~F7FE:GB 2312汉字区,收录汉字6763个,5个空位是D7FA~D7FE。
for (bHigh = 0xB0; bHigh <= 0xF7; ++bHigh)
{
for (bLow = 0xA1; bLow <= 0xFE; ++bLow)
{
if (bHigh == 0xD7 && (bLow >= 0xFA && bLow <= 0xFE))
continue;
gbkFile.Write(&bHigh, sizeof(bHigh));
gbkFile.Write(&bLow, sizeof(bLow));
gbkFile.Write(&g_usEnt, sizeof(g_usEnt));
}
}
gbkFile.Close();
CString strGBKFile = g_strResultDir + "\\所有汉字21003.txt";
CString strHelp;
strHelp.Format("请严格执行下列操作之后才单击确定关闭本对话框,否则将产生文件异常!\n\n\n"\
"(1)打开MS Excel,从【数据】->【导入外部数据】->【导入数据】\n\n"\
"(2)在【选择数据源】对话框中选择“%s”,然后单击【确定】\n\n"\
"(3)在【文本导入向导-3 步骤之1】对话框中将【文件原始格式】设为:“936:简体中文(GB2312)”\n\n"\
"(4)单击完成,选中Excel中第一列导入的汉字按【升序排序】。然后单击【文件】->【另存为】菜单\n\n"\
"(5)在【另存为】对话框中:\n"\
" (a)设置【文件名(N)】:所有汉字21003.csv\n"\
" (b)设置【保存类型】:“CSV(逗号分隔)(*.csv)”\n"\
" 确保保存路径为:%s\n\n",
strGBKFile,
g_strResultDir);
MessageBox(NULL, strHelp, "帮助!", MB_ICONINFORMATION);
}
void printAllGBK()
{
BYTE bHigh = 0;
BYTE bLow = 0;
CFile gbkFile(g_strResultDir + "\\所有GBK.txt", CFile::modeCreate | CFile::modeReadWrite);
// (1)GBK/3: 8140~A0FE:GB 13000.1扩充汉字区,收录CJK汉字6080个。
for (bHigh = 0x81; bHigh <= 0xFE; ++bHigh)
{
for (bLow = 0x40; bLow <= 0xFE; ++bLow)
{
if (bLow == 0x7F)
continue;
gbkFile.Write(&bHigh, sizeof(bHigh));
gbkFile.Write(&bLow, sizeof(bLow));
gbkFile.Write(&g_usEnt, sizeof(g_usEnt));
}
}
gbkFile.Close();
}
void temp()
{
// TODO: Add your command handler code here
CString strDYZDB;
CFileDialog dyzDBSelectDlg(TRUE, NULL, NULL, OFN_HIDEREADONLY, "GBK码升序排序的多音字字库(SortDYZ.txt)|*.txt||", NULL);
if (dyzDBSelectDlg.DoModal() == IDOK)
{
strDYZDB = dyzDBSelectDlg.GetPathName();
}
if (_access(strDYZDB, 0x0) != 0)
{
AfxMessageBox("GBK码升序排序的多音字字库不存在或已经被损害!");
return;
}
USHORT usOff = 0;
BYTE bHigh = 0;
BYTE bLow = 0;
char chPY;
CString strDYMapping[(0xFE-0x81+1)*(0xFE-0x40+1) + 1];
for (bHigh = 0x81; bHigh <= 0xFE; ++bHigh)
{
for (bLow = 0x40; bLow <= 0xFE; ++bLow)
{
usOff = bLow;
usOff <<= 0x8;
usOff |= bHigh;
if (GetSingleHZJP((char*)&usOff, chPY))
{
usOff = (bHigh-0x81)*191 + (bLow-0x40);
strDYMapping[usOff].Format("\"%c\",", chPY);
}
else
{
usOff = (bHigh-0x81)*191 + (bLow-0x40);
strDYMapping[usOff].Format("NULL,");
}
}
}
try
{
CStdioFile fileDYZ(strDYZDB, CStdioFile::modeRead);
CString strDYZ;
while (fileDYZ.ReadString(strDYZ))
{
strDYZ.TrimLeft();
strDYZ.TrimRight();
strDYZ.TrimLeft(_T(' '));
strDYZ.TrimRight(_T(' '));
if (strDYZ.IsEmpty())
continue;
CString strGBK;
strDYZ = strDYZ.Mid( strDYZ.Find(_T(' '))+1 );
strGBK = strDYZ.Mid( strDYZ.ReverseFind(_T(' '))+1 );
strDYZ = strDYZ.Left( strDYZ.Find(_T(' ')) );
USHORT usWord = atoi(strGBK);
bLow = usWord & 0xFF;
bHigh = (usWord & 0xFF00) >> 0x8;
usOff = (bHigh - 0x81)*191 + (bLow - 0x40);
strDYMapping[usOff].Format("\"%s\",", strDYZ.GetBuffer(0));
}// while
// (1)Header
CFile fileDYZArray;
fileDYZArray.Open(g_strResultDir + "\\多音字映射表2.txt", CStdioFile::modeCreate | CStdioFile::modeReadWrite);
fileDYZArray.Write("static char* g_pszDYZIndex[] = {", strlen("static char* g_pszDYZIndex[] = {"));
fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
for (bHigh = 0x81; bHigh <= 0xFE; ++bHigh)
{
for (bLow = 0x40; bLow <= 0xFE; ++bLow)
{
usOff = (bHigh-0x81) * (0xFE-0x40+1) + (bLow-0x40);
fileDYZArray.Write(strDYMapping[usOff].GetBuffer(0), strDYMapping[usOff].GetLength());
if ((usOff+1) % 0x10 == 0)
{
fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
}
}
}
// (3)
fileDYZArray.Write("};", strlen("};"));
fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
fileDYZArray.Close();
fileDYZ.Close();
}
catch (CFileException*) {
AfxMessageBox("CFileException() at CSCSortView::OnDyzSort()!");
}
AfxMessageBox("多音字解析完成,见\\result\\多音字映射表2.txt!");
}
void CSCSortView::OnExtractDYZ()
{
// TODO: Add your command handler code here
CString strOrgDYZFile, strFileName;
CFileDialog orgDYZFileDlg(TRUE, NULL, NULL, OFN_HIDEREADONLY, "原始多音字字库(*.txt)|*.txt||", NULL);
if (orgDYZFileDlg.DoModal() == IDOK)
{
strOrgDYZFile = orgDYZFileDlg.GetPathName();
strFileName = orgDYZFileDlg.GetFileName();
}
if (_access(strOrgDYZFile, 0x0) != 0)
{
AfxMessageBox("文件不存在或已经被损害!");
return;
}
int iTestCnt = 0;
int i = 0;
short shWordRange = (0xFE - 0x81 + 1) * (0xFE - 0x40 + 1);
ST_HZUnit* pUnit = new ST_HZUnit[shWordRange];
try
{
CStdioFile fileOrgDYZ(strOrgDYZFile, CStdioFile::modeRead);
CString strDYZ;
while (fileOrgDYZ.ReadString(strDYZ))
{
strDYZ.TrimLeft();
strDYZ.TrimRight();
if (strDYZ.IsEmpty())
continue;
LPSTR ppStr[5];
int iCnt = fast_trim(strDYZ.GetBuffer(0), ',', ppStr, 10);
for(i = 0; i < iCnt; i++)
{
CString strTmp1(ppStr[i]);
CString strTmp2;
int iPos1 = strTmp1.Find('(');
int iPos2 = strTmp1.Find(')');
while(iPos1 >= 2 && iPos2 >= 4)
{
// 取()中的声母
char pszIdx[2] = {0,};
pszIdx[0] = strTmp1.GetAt(iPos1 + 1);
if((pszIdx[0] >= 'A' && pszIdx[0] <= 'Z') || (pszIdx[0] >= 'a' && pszIdx[0] <= 'z'))
{
char pszHZ[3] = {0,};
memcpy(pszHZ, strTmp1.Mid(iPos1 - 2, 2), 2);
char chPY;
// 如果是中文字
if(GetSingleHZJP(pszHZ, chPY))
{
if(pszIdx[0] >= 'A' && pszIdx[0] <= 'Z')
pszIdx[0] += 32;
WORD nOffset = WORD(((BYTE)pszHZ[0] - 0x81) * 191 + (BYTE)pszHZ[1] - 0x40);
// 该汉字对应的拼音声母表中是否有该字母
CString strTmp(pUnit[nOffset].pszPY);
if(strTmp.Find(pszIdx[0]) < 0)
{
pUnit[nOffset].usCode = (BYTE)pszHZ[0] * 256 + (BYTE)pszHZ[1];
strcat(pUnit[nOffset].pszPY, pszIdx);
strcpy(pUnit[nOffset].pszHZ, pszHZ);
}
}
}
strTmp1 = strTmp1.Mid(iPos2 + 1);
iPos1 = strTmp1.Find('(');
iPos2 = strTmp1.Find(')');
}
}
iTestCnt++;
}// while
CString strTrgDYZFile = strOrgDYZFile.Left(strOrgDYZFile.Find(strFileName));
strTrgDYZFile += "specialDYZ.txt";
CStdioFile outputFile(strTrgDYZFile, CStdioFile::modeCreate | CStdioFile::modeWrite);
for(i = 0; i < shWordRange; i++)
{
CString strTmp;
if(strlen(pUnit[i].pszPY) > 1)
{
strTmp.Format("%s,\t%d\t,%s\n", pUnit[i].pszHZ, pUnit[i].usCode, pUnit[i].pszPY);
outputFile.Write(strTmp.GetBuffer(0), strTmp.GetLength());
}
}
delete[] pUnit;
pUnit = NULL;
fileOrgDYZ.Close();
outputFile.Close();
AfxMessageBox("Finished!");
}
catch (CFileException*) {
AfxMessageBox("CFileException() at CSCSortView::OnExtractDYZ()!");
}
}
void CSCSortView::OnNewDyzSort()
{
// TODO: Add your command handler code here
// temp();
// return;
CString strDYZDB;
CFileDialog dyzDBSelectDlg(TRUE, NULL, NULL, OFN_HIDEREADONLY, "specialDYZ.txt)|*.txt||", NULL);
if (dyzDBSelectDlg.DoModal() == IDOK)
{
strDYZDB = dyzDBSelectDlg.GetPathName();
}
if (_access(strDYZDB, 0x0) != 0)
{
AfxMessageBox("特殊多音字字库不存在或已经被损害!");
return;
}
USHORT usOff = 0;
BYTE bHigh = 0;
BYTE bLow = 0;
CString strDYMapping[(0xFE-0x81+1)*(0xFE-0x40+1) + 1];
for (usOff = 0; usOff <= (0xFE-0x81+1)*(0xFE-0x40+1); ++usOff)
{
strDYMapping[usOff].Format("NULL,");
}
try
{
CString strAllDYZ;
CFileDialog AllDYZSelectDlg(TRUE, NULL, NULL, OFN_HIDEREADONLY, "SortDyz.txt)|*.txt||", NULL);
if (AllDYZSelectDlg.DoModal() == IDOK)
{
strAllDYZ = AllDYZSelectDlg.GetPathName();
}
if (_access(strAllDYZ, 0x0) != 0)
{
AfxMessageBox("原始多音字字库不存在或已经被损害!");
return;
}
CStdioFile fileAllDYZ(strAllDYZ, CStdioFile::modeRead);
CString strTmp;
std::vector<USHORT> vContainer;
while (fileAllDYZ.ReadString(strTmp))
{
CString strGBK = strTmp.Mid( strTmp.ReverseFind(_T(' '))+1 );
USHORT usCode = atoi(strGBK);
vContainer.push_back(usCode);
}
fileAllDYZ.Close();
CStdioFile fileDYZ(strDYZDB, CStdioFile::modeRead);
CString strDYZ;
while (fileDYZ.ReadString(strDYZ))
{
strDYZ.TrimLeft();
strDYZ.TrimRight();
if (strDYZ.IsEmpty())
continue;
CString strGBK = strDYZ.Left(strDYZ.Find(','));
strDYZ = strDYZ.Mid( strDYZ.Find(',') + 1);
strGBK = strDYZ.Left( strDYZ.Find(',') );
strDYZ = strDYZ.Mid( strDYZ.Find(',') + 1);
USHORT usCode = atoi(strGBK);
// match code
int iStart = 0;
int iEnd = vContainer.size() - 1;
int iMid = 0;
bool bFind = false;
while(iStart <= iEnd)
{
iMid = (iStart + iEnd) / 2;
if(vContainer[iMid] < usCode)
iStart = iMid + 1;
else if(vContainer[iMid] > usCode)
iEnd = iMid - 1;
else
{
bFind = true;
break;
}
}
if(bFind)
{
usOff = ((usCode >> 8) - 0x81) * 191 + (usCode & 0xff) - 0x40;
strDYMapping[usOff].Format("\"%s\",", strDYZ.GetBuffer(0));
}
}// while
vContainer.clear();
// (1)Header
CFile fileDYZArray;
fileDYZArray.Open(g_strResultDir + "\\多音字映射表.txt", CStdioFile::modeCreate | CStdioFile::modeReadWrite);
fileDYZArray.Write("static char* g_pszDYZIndex[] = {", strlen("static char* g_pszDYZIndex[] = {"));
fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
int iLineCnt = 0;
for (bHigh = 0x81; bHigh <= 0xFE; ++bHigh)
{
for (bLow = 0x40; bLow <= 0xFE; ++bLow)
{
usOff = (bHigh-0x81) * (0xFE-0x40+1) + (bLow-0x40);
if(usOff == 17691)
TRACE0("Find");
fileDYZArray.Write(strDYMapping[usOff].GetBuffer(0), strDYMapping[usOff].GetLength());
if ((usOff+1) % 0x10 == 0)
{
fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
iLineCnt++;
}
}
}
// (3)
fileDYZArray.Write("};", strlen("};"));
fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
fileDYZArray.Close();
fileDYZ.Close();
}
catch (CFileException*) {
AfxMessageBox("CFileException() at CSCSortView::OnDyzSort()!");
}
AfxMessageBox("多音字解析完成,见\\result\\多音字映射表.txt!");
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -