⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scsortview.cpp

📁 中文编码转换
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// SCSortView.cpp : implementation of the CSCSortView class
//

#include "stdafx.h"
#include "SCSort.h"

#include "SCSortDoc.h"
#include "SCSortView.h"
#include "cnpy.h"
#include "GBK.h"
#include <io.h>
#include <direct.h>
#include <vector>

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

CString g_strResultDir;
USHORT  g_usEnt = 0x0A0D;

/*
#define Fast_Trim(pBuf, szTrim, ppStr, iCnt) \
	char* s = pBuf; \
	ppStr[iCnt] = s; \
	while(*s) { \
		if (*s == szTrim) \
		{ \
			*s = 0; \
			iCnt++; \
			s ++; \
			ppStr[iCnt] = s; \
		} \
		else \
		if (*s == '\r') \
		{ \
			*s = 0; \
			break; \
		} \
		else \
		if (*s == '\n') \
		{ \
			*s = 0; \
			break; \
		} \
		else \
		{ \
			s++; \
		} \
	} \
	iCnt++;
*/

// 截取字符串
inline int fast_trim(char* buffer, char trim_ch, LPSTR *res, int _max_no)
{
	int nn = 0;
	char*	s = buffer;

	_max_no --;

	res[nn] = s;
	
	while(*s)
	{
		if (*s == trim_ch)
		{
			*s = 0;
			nn++;
			s ++;
			res[nn] = s;
			if (nn >= _max_no)	return nn+1;
		}
		else
		if (*s == '\r')
		{
			*s = 0;
			break;
		}
		else
		if (*s == '\n')
		{
			*s = 0;
			break;
		}
		else
		{
			
			s++;		
		}
	}
	return nn+1;
}


/////////////////////////////////////////////////////////////////////////////
// CSCSortView

IMPLEMENT_DYNCREATE(CSCSortView, CView)

BEGIN_MESSAGE_MAP(CSCSortView, CView)
	//{{AFX_MSG_MAP(CSCSortView)
	ON_COMMAND(ID_BUILD_SC_PYSORT, OnBuildScPysort)
	ON_COMMAND(ID_DYZ_SORT, OnDyzSort)
	ON_COMMAND(ID_EXTRACT_DYZ, OnExtractDYZ)
	ON_COMMAND(ID_NEW_DYZ_SORT, OnNewDyzSort)
	//}}AFX_MSG_MAP
	// Standard printing commands
	ON_COMMAND(ID_FILE_PRINT, CView::OnFilePrint)
	ON_COMMAND(ID_FILE_PRINT_DIRECT, CView::OnFilePrint)
	ON_COMMAND(ID_FILE_PRINT_PREVIEW, CView::OnFilePrintPreview)
END_MESSAGE_MAP()

/////////////////////////////////////////////////////////////////////////////
// CSCSortView construction/destruction

CSCSortView::CSCSortView()
{
	// TODO: add construction code here
	globalInit();
}

CSCSortView::~CSCSortView()
{
}

BOOL CSCSortView::PreCreateWindow(CREATESTRUCT& cs)
{
	// TODO: Modify the Window class or styles here by modifying
	//  the CREATESTRUCT cs

	return CView::PreCreateWindow(cs);
}

/////////////////////////////////////////////////////////////////////////////
// CSCSortView drawing

void CSCSortView::OnDraw(CDC* pDC)
{
	CSCSortDoc* pDoc = GetDocument();
	ASSERT_VALID(pDoc);
	// TODO: add draw code for native data here
}

/////////////////////////////////////////////////////////////////////////////
// CSCSortView printing

BOOL CSCSortView::OnPreparePrinting(CPrintInfo* pInfo)
{
	// default preparation
	return DoPreparePrinting(pInfo);
}

void CSCSortView::OnBeginPrinting(CDC* /*pDC*/, CPrintInfo* /*pInfo*/)
{
	// TODO: add extra initialization before printing
}

void CSCSortView::OnEndPrinting(CDC* /*pDC*/, CPrintInfo* /*pInfo*/)
{
	// TODO: add cleanup after printing
}

/////////////////////////////////////////////////////////////////////////////
// CSCSortView diagnostics

#ifdef _DEBUG
void CSCSortView::AssertValid() const
{
	CView::AssertValid();
}

void CSCSortView::Dump(CDumpContext& dc) const
{
	CView::Dump(dc);
}

CSCSortDoc* CSCSortView::GetDocument() // non-debug version is inline
{
	ASSERT(m_pDocument->IsKindOf(RUNTIME_CLASS(CSCSortDoc)));
	return (CSCSortDoc*)m_pDocument;
}
#endif //_DEBUG

/////////////////////////////////////////////////////////////////////////////
// CSCSortView message handlers


void CSCSortView::OnBuildScPysort()
{
	// TODO: Add your command handler code here
	// (1)将汉字按GBK码输出
	printAllHZByGBK();
//	printAllGBK();

	// (2)将上述汉字在Excel中按拼音排序
	try
	{
		BYTE  bHigh = 0;
		BYTE  bLow  = 0;
		short shWordRange = (0xFE - 0x81 + 1) * (0xFE - 0x40 + 1);
		short *pshGBKPYID = new short[shWordRange];
		ASSERT(pshGBKPYID != NULL);
		memset(pshGBKPYID, 0xFF, shWordRange * sizeof(short));
		
		CFile gbkFile(g_strResultDir + "\\所有汉字21003.csv", CFile::modeRead);
		short shSCWordNum = (short)(gbkFile.GetLength()/0x4);
		for (short shPYID = 0x1; shPYID <= shSCWordNum; ++shPYID)
		{
			// 高位在前,低位在后
			short shGBKCode = 0;
			gbkFile.Read(&shGBKCode, sizeof(shGBKCode));
			gbkFile.Seek(sizeof(g_usEnt), CFile::current);
			bHigh = shGBKCode & 0xFF;
			bLow  = (shGBKCode & 0xFF00) >> 0x8;
			ASSERT(bHigh >= 0x81 && bHigh <= 0xFE);
			ASSERT(bLow  >= 0x40 && bLow  <= 0xFE && bLow != 0x7F);
			
			short shGBKOffset = (bHigh - 0x81) * (0xFE - 0x40 + 1) + (bLow - 0x40);
			pshGBKPYID[shGBKOffset] = shPYID;
		}

		// 输出汉字拼音映射表
		CFile filePYMapping;
		filePYMapping.Open(g_strResultDir + "\\拼音映射表.txt", CFile::modeCreate | CFile::modeReadWrite);
		// (1)
		char *pszTemp = "static int g_nHZStatByPinYin[CODE_COUNT] = ";
		filePYMapping.Write(pszTemp, strlen(pszTemp));
		filePYMapping.Write(&g_usEnt, sizeof(g_usEnt));
		pszTemp = "{197,1079,2412,3396,3572,4185,5023,6075,7690,8250,9815,10630,11106,11143,11761,"\
				  "12795,13121,14434,15330,15958,17278,19227,21003};";
		filePYMapping.Write(pszTemp, strlen(pszTemp));
		filePYMapping.Write(&g_usEnt, sizeof(g_usEnt));
		
		filePYMapping.Write("static WORD g_dwHZPYIDBySortAsc[] = ", strlen("static WORD g_dwHZPYIDBySortAsc[] = {"));
		filePYMapping.Write(&g_usEnt, sizeof(g_usEnt));
		// (2)
		CString str;
		for (short shCount = 0; shCount < shWordRange; ++shCount)
		{
			str.Format("0x%4hX,", pshGBKPYID[shCount]);
			str.Replace(' ', '0');
			filePYMapping.Write(str.GetBuffer(0), str.GetLength());
			if ((shCount + 1) % 0x10 == 0)
				filePYMapping.Write(&g_usEnt, sizeof(g_usEnt));
		}
		// (3)
		if (shWordRange % 0x10 != 0)
		{
			filePYMapping.Write(&g_usEnt, sizeof(g_usEnt));
		}
		filePYMapping.Write("};", strlen("};"));
		filePYMapping.Write(&g_usEnt, sizeof(g_usEnt));
		filePYMapping.Close();

		delete []pshGBKPYID;
		pshGBKPYID = NULL;
		gbkFile.Close();

		str.Format("汉字拼音映射表解析完毕!将生成的汉字拼音映射表文件“%s”的全部内容覆盖"\
			"Chinese工程下的GBK.cpp文件中的相应内容。",
			g_strResultDir + "拼音映射表.txt");
		AfxMessageBox(str);
	}
	catch (CFileException*) {
		AfxMessageBox("CFileException in CSCSortView::OnBuildScPysort()!");
	}
}

void CSCSortView::OnDyzSort() 
{
	temp();
//	return;

	// TODO: Add your command handler code here
	CString strDYZDB;
	CFileDialog dyzDBSelectDlg(TRUE, NULL, NULL, OFN_HIDEREADONLY, "GBK码升序排序的多音字字库(SortDYZ.txt)|*.txt||", NULL);
	if (dyzDBSelectDlg.DoModal() == IDOK)
	{
		strDYZDB = dyzDBSelectDlg.GetPathName();
	}
	if (_access(strDYZDB, 0x0) != 0)
	{
		AfxMessageBox("GBK码升序排序的多音字字库不存在或已经被损害!");
		return;
	}

	USHORT usOff = 0;
	BYTE bHigh = 0;
	BYTE bLow = 0;
	CString strDYMapping[(0xFE-0x81+1)*(0xFE-0x40+1) + 1];
	for (usOff = 0; usOff <= (0xFE-0x81+1)*(0xFE-0x40+1); ++usOff)
	{
		strDYMapping[usOff].Format("NULL,");
	}

	try
	{
		CStdioFile fileDYZ(strDYZDB, CStdioFile::modeRead);
		CString strDYZ;
		while (fileDYZ.ReadString(strDYZ))
		{
			strDYZ.TrimLeft();
			strDYZ.TrimRight();
			strDYZ.TrimLeft(_T('	'));
			strDYZ.TrimRight(_T('	'));
			if (strDYZ.IsEmpty())
				continue;
			
			CString strGBK;
			strDYZ = strDYZ.Mid( strDYZ.Find(_T('	'))+1 );
			strGBK = strDYZ.Mid( strDYZ.ReverseFind(_T('	'))+1 );
			strDYZ = strDYZ.Left( strDYZ.Find(_T('	')) );

			USHORT usWord = atoi(strGBK);
			// for test
			if(usWord == 56759)
				TRACE0("Find!");
			bLow  = usWord & 0xFF;
			bHigh = (usWord & 0xFF00) >> 0x8;
			usOff = (bHigh - 0x81)*191 + (bLow - 0x40);
			strDYMapping[usOff].Format("\"%s\",", strDYZ.GetBuffer(0));
		}// while

		// (1)Header
		CFile fileDYZArray;
		fileDYZArray.Open(g_strResultDir + "\\多音字映射表.txt", CStdioFile::modeCreate | CStdioFile::modeReadWrite);
		fileDYZArray.Write("static char* g_pszDYZIndex[] = {", strlen("static char* g_pszDYZIndex[] = {"));
		fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));

		for (bHigh = 0x81; bHigh <= 0xFE; ++bHigh)
		{
			for (bLow = 0x40; bLow <= 0xFE; ++bLow)
			{
				usOff = (bHigh-0x81) * (0xFE-0x40+1) + (bLow-0x40);
				fileDYZArray.Write(strDYMapping[usOff].GetBuffer(0), strDYMapping[usOff].GetLength());
				if ((usOff+1) % 0x10 == 0)
				{
					fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
				}
			}
		}
		// (3)
		fileDYZArray.Write("};", strlen("};"));
		fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
		fileDYZArray.Close();
		fileDYZ.Close();
	}
	catch (CFileException*) {
		AfxMessageBox("CFileException() at CSCSortView::OnDyzSort()!");
	}

	AfxMessageBox("多音字解析完成,见\\result\\多音字映射表.txt!");
}


void globalInit()
{
	char pszCD[MAX_PATH + 1] = "\0";
	GetCurrentDirectory(MAX_PATH, pszCD);
	g_strResultDir.Format("%s", pszCD);
	g_strResultDir += "\\result";
	if (_access(g_strResultDir, 0x0) != 0)
		mkdir(g_strResultDir);
}


void printAllHZByGBK()
{
	BYTE  bHigh = 0;
	BYTE  bLow  = 0;
	CFile gbkFile(g_strResultDir + "\\所有汉字21003.txt", CFile::modeCreate | CFile::modeReadWrite);
	
	// (1)GBK/3: 8140~A0FE:GB 13000.1扩充汉字区,收录CJK汉字6080个。
	for (bHigh = 0x81; bHigh <= 0xA0; ++bHigh)
	{
		for (bLow = 0x40; bLow <= 0xFE; ++bLow)
		{
			if (bLow == 0x7F)
				continue;

			gbkFile.Write(&bHigh, sizeof(bHigh));
			gbkFile.Write(&bLow, sizeof(bLow));
			gbkFile.Write(&g_usEnt, sizeof(g_usEnt));
		}
	}

	// (2)GBK/4: AA40~FEA0:GB 13000.1扩充汉字区,收录CJK和增补汉字8160个。
	for (bHigh = 0xAA; bHigh <= 0xFE; ++bHigh)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -