📄 scsortview.cpp
字号:
// SCSortView.cpp : implementation of the CSCSortView class
//
#include "stdafx.h"
#include "SCSort.h"
#include "SCSortDoc.h"
#include "SCSortView.h"
#include "cnpy.h"
#include "GBK.h"
#include <io.h>
#include <direct.h>
#include <vector>
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
CString g_strResultDir;
USHORT g_usEnt = 0x0A0D;
/*
#define Fast_Trim(pBuf, szTrim, ppStr, iCnt) \
char* s = pBuf; \
ppStr[iCnt] = s; \
while(*s) { \
if (*s == szTrim) \
{ \
*s = 0; \
iCnt++; \
s ++; \
ppStr[iCnt] = s; \
} \
else \
if (*s == '\r') \
{ \
*s = 0; \
break; \
} \
else \
if (*s == '\n') \
{ \
*s = 0; \
break; \
} \
else \
{ \
s++; \
} \
} \
iCnt++;
*/
// 截取字符串
inline int fast_trim(char* buffer, char trim_ch, LPSTR *res, int _max_no)
{
int nn = 0;
char* s = buffer;
_max_no --;
res[nn] = s;
while(*s)
{
if (*s == trim_ch)
{
*s = 0;
nn++;
s ++;
res[nn] = s;
if (nn >= _max_no) return nn+1;
}
else
if (*s == '\r')
{
*s = 0;
break;
}
else
if (*s == '\n')
{
*s = 0;
break;
}
else
{
s++;
}
}
return nn+1;
}
/////////////////////////////////////////////////////////////////////////////
// CSCSortView
IMPLEMENT_DYNCREATE(CSCSortView, CView)
BEGIN_MESSAGE_MAP(CSCSortView, CView)
//{{AFX_MSG_MAP(CSCSortView)
ON_COMMAND(ID_BUILD_SC_PYSORT, OnBuildScPysort)
ON_COMMAND(ID_DYZ_SORT, OnDyzSort)
ON_COMMAND(ID_EXTRACT_DYZ, OnExtractDYZ)
ON_COMMAND(ID_NEW_DYZ_SORT, OnNewDyzSort)
//}}AFX_MSG_MAP
// Standard printing commands
ON_COMMAND(ID_FILE_PRINT, CView::OnFilePrint)
ON_COMMAND(ID_FILE_PRINT_DIRECT, CView::OnFilePrint)
ON_COMMAND(ID_FILE_PRINT_PREVIEW, CView::OnFilePrintPreview)
END_MESSAGE_MAP()
/////////////////////////////////////////////////////////////////////////////
// CSCSortView construction/destruction
CSCSortView::CSCSortView()
{
// TODO: add construction code here
globalInit();
}
CSCSortView::~CSCSortView()
{
}
BOOL CSCSortView::PreCreateWindow(CREATESTRUCT& cs)
{
// TODO: Modify the Window class or styles here by modifying
// the CREATESTRUCT cs
return CView::PreCreateWindow(cs);
}
/////////////////////////////////////////////////////////////////////////////
// CSCSortView drawing
void CSCSortView::OnDraw(CDC* pDC)
{
CSCSortDoc* pDoc = GetDocument();
ASSERT_VALID(pDoc);
// TODO: add draw code for native data here
}
/////////////////////////////////////////////////////////////////////////////
// CSCSortView printing
BOOL CSCSortView::OnPreparePrinting(CPrintInfo* pInfo)
{
// default preparation
return DoPreparePrinting(pInfo);
}
void CSCSortView::OnBeginPrinting(CDC* /*pDC*/, CPrintInfo* /*pInfo*/)
{
// TODO: add extra initialization before printing
}
void CSCSortView::OnEndPrinting(CDC* /*pDC*/, CPrintInfo* /*pInfo*/)
{
// TODO: add cleanup after printing
}
/////////////////////////////////////////////////////////////////////////////
// CSCSortView diagnostics
#ifdef _DEBUG
void CSCSortView::AssertValid() const
{
CView::AssertValid();
}
void CSCSortView::Dump(CDumpContext& dc) const
{
CView::Dump(dc);
}
CSCSortDoc* CSCSortView::GetDocument() // non-debug version is inline
{
ASSERT(m_pDocument->IsKindOf(RUNTIME_CLASS(CSCSortDoc)));
return (CSCSortDoc*)m_pDocument;
}
#endif //_DEBUG
/////////////////////////////////////////////////////////////////////////////
// CSCSortView message handlers
void CSCSortView::OnBuildScPysort()
{
// TODO: Add your command handler code here
// (1)将汉字按GBK码输出
printAllHZByGBK();
// printAllGBK();
// (2)将上述汉字在Excel中按拼音排序
try
{
BYTE bHigh = 0;
BYTE bLow = 0;
short shWordRange = (0xFE - 0x81 + 1) * (0xFE - 0x40 + 1);
short *pshGBKPYID = new short[shWordRange];
ASSERT(pshGBKPYID != NULL);
memset(pshGBKPYID, 0xFF, shWordRange * sizeof(short));
CFile gbkFile(g_strResultDir + "\\所有汉字21003.csv", CFile::modeRead);
short shSCWordNum = (short)(gbkFile.GetLength()/0x4);
for (short shPYID = 0x1; shPYID <= shSCWordNum; ++shPYID)
{
// 高位在前,低位在后
short shGBKCode = 0;
gbkFile.Read(&shGBKCode, sizeof(shGBKCode));
gbkFile.Seek(sizeof(g_usEnt), CFile::current);
bHigh = shGBKCode & 0xFF;
bLow = (shGBKCode & 0xFF00) >> 0x8;
ASSERT(bHigh >= 0x81 && bHigh <= 0xFE);
ASSERT(bLow >= 0x40 && bLow <= 0xFE && bLow != 0x7F);
short shGBKOffset = (bHigh - 0x81) * (0xFE - 0x40 + 1) + (bLow - 0x40);
pshGBKPYID[shGBKOffset] = shPYID;
}
// 输出汉字拼音映射表
CFile filePYMapping;
filePYMapping.Open(g_strResultDir + "\\拼音映射表.txt", CFile::modeCreate | CFile::modeReadWrite);
// (1)
char *pszTemp = "static int g_nHZStatByPinYin[CODE_COUNT] = ";
filePYMapping.Write(pszTemp, strlen(pszTemp));
filePYMapping.Write(&g_usEnt, sizeof(g_usEnt));
pszTemp = "{197,1079,2412,3396,3572,4185,5023,6075,7690,8250,9815,10630,11106,11143,11761,"\
"12795,13121,14434,15330,15958,17278,19227,21003};";
filePYMapping.Write(pszTemp, strlen(pszTemp));
filePYMapping.Write(&g_usEnt, sizeof(g_usEnt));
filePYMapping.Write("static WORD g_dwHZPYIDBySortAsc[] = ", strlen("static WORD g_dwHZPYIDBySortAsc[] = {"));
filePYMapping.Write(&g_usEnt, sizeof(g_usEnt));
// (2)
CString str;
for (short shCount = 0; shCount < shWordRange; ++shCount)
{
str.Format("0x%4hX,", pshGBKPYID[shCount]);
str.Replace(' ', '0');
filePYMapping.Write(str.GetBuffer(0), str.GetLength());
if ((shCount + 1) % 0x10 == 0)
filePYMapping.Write(&g_usEnt, sizeof(g_usEnt));
}
// (3)
if (shWordRange % 0x10 != 0)
{
filePYMapping.Write(&g_usEnt, sizeof(g_usEnt));
}
filePYMapping.Write("};", strlen("};"));
filePYMapping.Write(&g_usEnt, sizeof(g_usEnt));
filePYMapping.Close();
delete []pshGBKPYID;
pshGBKPYID = NULL;
gbkFile.Close();
str.Format("汉字拼音映射表解析完毕!将生成的汉字拼音映射表文件“%s”的全部内容覆盖"\
"Chinese工程下的GBK.cpp文件中的相应内容。",
g_strResultDir + "拼音映射表.txt");
AfxMessageBox(str);
}
catch (CFileException*) {
AfxMessageBox("CFileException in CSCSortView::OnBuildScPysort()!");
}
}
void CSCSortView::OnDyzSort()
{
temp();
// return;
// TODO: Add your command handler code here
CString strDYZDB;
CFileDialog dyzDBSelectDlg(TRUE, NULL, NULL, OFN_HIDEREADONLY, "GBK码升序排序的多音字字库(SortDYZ.txt)|*.txt||", NULL);
if (dyzDBSelectDlg.DoModal() == IDOK)
{
strDYZDB = dyzDBSelectDlg.GetPathName();
}
if (_access(strDYZDB, 0x0) != 0)
{
AfxMessageBox("GBK码升序排序的多音字字库不存在或已经被损害!");
return;
}
USHORT usOff = 0;
BYTE bHigh = 0;
BYTE bLow = 0;
CString strDYMapping[(0xFE-0x81+1)*(0xFE-0x40+1) + 1];
for (usOff = 0; usOff <= (0xFE-0x81+1)*(0xFE-0x40+1); ++usOff)
{
strDYMapping[usOff].Format("NULL,");
}
try
{
CStdioFile fileDYZ(strDYZDB, CStdioFile::modeRead);
CString strDYZ;
while (fileDYZ.ReadString(strDYZ))
{
strDYZ.TrimLeft();
strDYZ.TrimRight();
strDYZ.TrimLeft(_T(' '));
strDYZ.TrimRight(_T(' '));
if (strDYZ.IsEmpty())
continue;
CString strGBK;
strDYZ = strDYZ.Mid( strDYZ.Find(_T(' '))+1 );
strGBK = strDYZ.Mid( strDYZ.ReverseFind(_T(' '))+1 );
strDYZ = strDYZ.Left( strDYZ.Find(_T(' ')) );
USHORT usWord = atoi(strGBK);
// for test
if(usWord == 56759)
TRACE0("Find!");
bLow = usWord & 0xFF;
bHigh = (usWord & 0xFF00) >> 0x8;
usOff = (bHigh - 0x81)*191 + (bLow - 0x40);
strDYMapping[usOff].Format("\"%s\",", strDYZ.GetBuffer(0));
}// while
// (1)Header
CFile fileDYZArray;
fileDYZArray.Open(g_strResultDir + "\\多音字映射表.txt", CStdioFile::modeCreate | CStdioFile::modeReadWrite);
fileDYZArray.Write("static char* g_pszDYZIndex[] = {", strlen("static char* g_pszDYZIndex[] = {"));
fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
for (bHigh = 0x81; bHigh <= 0xFE; ++bHigh)
{
for (bLow = 0x40; bLow <= 0xFE; ++bLow)
{
usOff = (bHigh-0x81) * (0xFE-0x40+1) + (bLow-0x40);
fileDYZArray.Write(strDYMapping[usOff].GetBuffer(0), strDYMapping[usOff].GetLength());
if ((usOff+1) % 0x10 == 0)
{
fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
}
}
}
// (3)
fileDYZArray.Write("};", strlen("};"));
fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
fileDYZArray.Close();
fileDYZ.Close();
}
catch (CFileException*) {
AfxMessageBox("CFileException() at CSCSortView::OnDyzSort()!");
}
AfxMessageBox("多音字解析完成,见\\result\\多音字映射表.txt!");
}
void globalInit()
{
char pszCD[MAX_PATH + 1] = "\0";
GetCurrentDirectory(MAX_PATH, pszCD);
g_strResultDir.Format("%s", pszCD);
g_strResultDir += "\\result";
if (_access(g_strResultDir, 0x0) != 0)
mkdir(g_strResultDir);
}
void printAllHZByGBK()
{
BYTE bHigh = 0;
BYTE bLow = 0;
CFile gbkFile(g_strResultDir + "\\所有汉字21003.txt", CFile::modeCreate | CFile::modeReadWrite);
// (1)GBK/3: 8140~A0FE:GB 13000.1扩充汉字区,收录CJK汉字6080个。
for (bHigh = 0x81; bHigh <= 0xA0; ++bHigh)
{
for (bLow = 0x40; bLow <= 0xFE; ++bLow)
{
if (bLow == 0x7F)
continue;
gbkFile.Write(&bHigh, sizeof(bHigh));
gbkFile.Write(&bLow, sizeof(bLow));
gbkFile.Write(&g_usEnt, sizeof(g_usEnt));
}
}
// (2)GBK/4: AA40~FEA0:GB 13000.1扩充汉字区,收录CJK和增补汉字8160个。
for (bHigh = 0xAA; bHigh <= 0xFE; ++bHigh)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -