⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scsortview.cpp

📁 中文编码转换
💻 CPP
📖 第 1 页 / 共 2 页
字号:
	{
		for (bLow = 0x40; bLow <= 0xA0; ++bLow)
		{
			if (bLow == 0x7F)
				continue;

			gbkFile.Write(&bHigh, sizeof(bHigh));
			gbkFile.Write(&bLow, sizeof(bLow));
			gbkFile.Write(&g_usEnt, sizeof(g_usEnt));
		}
	}

	// (3)GBK/2: B0A1~F7FE:GB 2312汉字区,收录汉字6763个,5个空位是D7FA~D7FE。
	for (bHigh = 0xB0; bHigh <= 0xF7; ++bHigh)
	{
		for (bLow = 0xA1; bLow <= 0xFE; ++bLow)
		{
			if (bHigh == 0xD7 && (bLow >= 0xFA && bLow <= 0xFE))
				continue;

			gbkFile.Write(&bHigh, sizeof(bHigh));
			gbkFile.Write(&bLow, sizeof(bLow));
			gbkFile.Write(&g_usEnt, sizeof(g_usEnt));
		}
	}
	
	gbkFile.Close();


	CString strGBKFile = g_strResultDir + "\\所有汉字21003.txt";
	CString strHelp;
	strHelp.Format("请严格执行下列操作之后才单击确定关闭本对话框,否则将产生文件异常!\n\n\n"\
		"(1)打开MS Excel,从【数据】->【导入外部数据】->【导入数据】\n\n"\
		"(2)在【选择数据源】对话框中选择“%s”,然后单击【确定】\n\n"\
		"(3)在【文本导入向导-3 步骤之1】对话框中将【文件原始格式】设为:“936:简体中文(GB2312)”\n\n"\
		"(4)单击完成,选中Excel中第一列导入的汉字按【升序排序】。然后单击【文件】->【另存为】菜单\n\n"\
		"(5)在【另存为】对话框中:\n"\
		"	(a)设置【文件名(N)】:所有汉字21003.csv\n"\
		"	(b)设置【保存类型】:“CSV(逗号分隔)(*.csv)”\n"\
		"	确保保存路径为:%s\n\n", 
		strGBKFile,
		g_strResultDir);
	MessageBox(NULL, strHelp, "帮助!", MB_ICONINFORMATION);
}

void printAllGBK()
{
	BYTE  bHigh = 0;
	BYTE  bLow  = 0;
	CFile gbkFile(g_strResultDir + "\\所有GBK.txt", CFile::modeCreate | CFile::modeReadWrite);
	
	// (1)GBK/3: 8140~A0FE:GB 13000.1扩充汉字区,收录CJK汉字6080个。
	for (bHigh = 0x81; bHigh <= 0xFE; ++bHigh)
	{
		for (bLow = 0x40; bLow <= 0xFE; ++bLow)
		{
			if (bLow == 0x7F)
				continue;

			gbkFile.Write(&bHigh, sizeof(bHigh));
			gbkFile.Write(&bLow, sizeof(bLow));
			gbkFile.Write(&g_usEnt, sizeof(g_usEnt));
		}
	}
	gbkFile.Close();
}

void temp()
{
	// TODO: Add your command handler code here
	CString strDYZDB;
	CFileDialog dyzDBSelectDlg(TRUE, NULL, NULL, OFN_HIDEREADONLY, "GBK码升序排序的多音字字库(SortDYZ.txt)|*.txt||", NULL);
	if (dyzDBSelectDlg.DoModal() == IDOK)
	{
		strDYZDB = dyzDBSelectDlg.GetPathName();
	}
	if (_access(strDYZDB, 0x0) != 0)
	{
		AfxMessageBox("GBK码升序排序的多音字字库不存在或已经被损害!");
		return;
	}

	USHORT usOff = 0;
	BYTE bHigh = 0;
	BYTE bLow = 0;
	char chPY;
	CString strDYMapping[(0xFE-0x81+1)*(0xFE-0x40+1) + 1];
	for (bHigh = 0x81; bHigh <= 0xFE; ++bHigh)
	{
		for (bLow = 0x40; bLow <= 0xFE; ++bLow)
		{
			usOff = bLow;
			usOff <<= 0x8;
			usOff |= bHigh;
			if (GetSingleHZJP((char*)&usOff, chPY))
			{
				usOff = (bHigh-0x81)*191 + (bLow-0x40);
				strDYMapping[usOff].Format("\"%c\",", chPY);
			}
			else
			{
				usOff = (bHigh-0x81)*191 + (bLow-0x40);
				strDYMapping[usOff].Format("NULL,");
			}
		}
	}

	try
	{
		CStdioFile fileDYZ(strDYZDB, CStdioFile::modeRead);
		CString strDYZ;
		while (fileDYZ.ReadString(strDYZ))
		{
			strDYZ.TrimLeft();
			strDYZ.TrimRight();
			strDYZ.TrimLeft(_T('	'));
			strDYZ.TrimRight(_T('	'));
			if (strDYZ.IsEmpty())
				continue;
			
			CString strGBK;
			strDYZ = strDYZ.Mid( strDYZ.Find(_T('	'))+1 );
			strGBK = strDYZ.Mid( strDYZ.ReverseFind(_T('	'))+1 );
			strDYZ = strDYZ.Left( strDYZ.Find(_T('	')) );
			USHORT usWord = atoi(strGBK);
			bLow  = usWord & 0xFF;
			bHigh = (usWord & 0xFF00) >> 0x8;
			usOff = (bHigh - 0x81)*191 + (bLow - 0x40);
			strDYMapping[usOff].Format("\"%s\",", strDYZ.GetBuffer(0));
		}// while

		// (1)Header
		CFile fileDYZArray;
		fileDYZArray.Open(g_strResultDir + "\\多音字映射表2.txt", CStdioFile::modeCreate | CStdioFile::modeReadWrite);
		fileDYZArray.Write("static char* g_pszDYZIndex[] = {", strlen("static char* g_pszDYZIndex[] = {"));
		fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));

		for (bHigh = 0x81; bHigh <= 0xFE; ++bHigh)
		{
			for (bLow = 0x40; bLow <= 0xFE; ++bLow)
			{
				usOff = (bHigh-0x81) * (0xFE-0x40+1) + (bLow-0x40);
				fileDYZArray.Write(strDYMapping[usOff].GetBuffer(0), strDYMapping[usOff].GetLength());
				if ((usOff+1) % 0x10 == 0)
				{
					fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
				}
			}
		}
		// (3)
		fileDYZArray.Write("};", strlen("};"));
		fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
		fileDYZArray.Close();
		fileDYZ.Close();
	}
	catch (CFileException*) {
		AfxMessageBox("CFileException() at CSCSortView::OnDyzSort()!");
	}

	AfxMessageBox("多音字解析完成,见\\result\\多音字映射表2.txt!");
}

void CSCSortView::OnExtractDYZ() 
{
	// TODO: Add your command handler code here
	CString strOrgDYZFile, strFileName;
	CFileDialog orgDYZFileDlg(TRUE, NULL, NULL, OFN_HIDEREADONLY, "原始多音字字库(*.txt)|*.txt||", NULL);
	if (orgDYZFileDlg.DoModal() == IDOK)
	{
		strOrgDYZFile = orgDYZFileDlg.GetPathName();
		strFileName = orgDYZFileDlg.GetFileName();
	}
	if (_access(strOrgDYZFile, 0x0) != 0)
	{
		AfxMessageBox("文件不存在或已经被损害!");
		return;
	}

	int iTestCnt = 0;
	int i = 0;
	short shWordRange = (0xFE - 0x81 + 1) * (0xFE - 0x40 + 1);
	ST_HZUnit* pUnit = new ST_HZUnit[shWordRange];

	try
	{
		CStdioFile fileOrgDYZ(strOrgDYZFile, CStdioFile::modeRead);
		CString strDYZ;
		while (fileOrgDYZ.ReadString(strDYZ))
		{
			strDYZ.TrimLeft();
			strDYZ.TrimRight();
			if (strDYZ.IsEmpty())
				continue;
			
			LPSTR ppStr[5];
			int iCnt = fast_trim(strDYZ.GetBuffer(0), ',', ppStr, 10);
			for(i = 0; i < iCnt; i++)
			{
				CString strTmp1(ppStr[i]);				
				CString strTmp2;
				int iPos1 = strTmp1.Find('(');
				int iPos2 = strTmp1.Find(')');			
				while(iPos1 >= 2 && iPos2 >= 4)
				{
					// 取()中的声母
					char pszIdx[2] = {0,};
					pszIdx[0] = strTmp1.GetAt(iPos1 + 1);
					
					if((pszIdx[0] >= 'A' && pszIdx[0] <= 'Z') || (pszIdx[0] >= 'a' && pszIdx[0] <= 'z'))
					{
						char pszHZ[3] = {0,};
						memcpy(pszHZ, strTmp1.Mid(iPos1 - 2, 2), 2);
						char chPY;
						// 如果是中文字
						if(GetSingleHZJP(pszHZ, chPY))
						{
							if(pszIdx[0] >= 'A' && pszIdx[0] <= 'Z')
								pszIdx[0] += 32;
							WORD nOffset = WORD(((BYTE)pszHZ[0] - 0x81) * 191 + (BYTE)pszHZ[1] - 0x40);
							// 该汉字对应的拼音声母表中是否有该字母
							CString strTmp(pUnit[nOffset].pszPY);
							if(strTmp.Find(pszIdx[0]) < 0)
							{
								pUnit[nOffset].usCode = (BYTE)pszHZ[0] * 256 + (BYTE)pszHZ[1];
								strcat(pUnit[nOffset].pszPY, pszIdx);
								strcpy(pUnit[nOffset].pszHZ, pszHZ);
							}
						}						
					}

					strTmp1 = strTmp1.Mid(iPos2 + 1);
					iPos1 = strTmp1.Find('(');
					iPos2 = strTmp1.Find(')');	
				}
			}
			iTestCnt++; 			
		}// while
		
		CString strTrgDYZFile = strOrgDYZFile.Left(strOrgDYZFile.Find(strFileName));
		strTrgDYZFile += "specialDYZ.txt";
		CStdioFile outputFile(strTrgDYZFile, CStdioFile::modeCreate | CStdioFile::modeWrite);
		for(i = 0; i < shWordRange; i++)
		{
			CString strTmp;
			if(strlen(pUnit[i].pszPY) > 1)
			{
				strTmp.Format("%s,\t%d\t,%s\n", pUnit[i].pszHZ, pUnit[i].usCode, pUnit[i].pszPY);
				outputFile.Write(strTmp.GetBuffer(0), strTmp.GetLength());
			}
		}
		delete[] pUnit;
		pUnit = NULL;
		
		fileOrgDYZ.Close();
		outputFile.Close();
		AfxMessageBox("Finished!");
	}
	catch (CFileException*) {
		AfxMessageBox("CFileException() at CSCSortView::OnExtractDYZ()!");
	}

	
}

void CSCSortView::OnNewDyzSort() 
{
	// TODO: Add your command handler code here
//	temp();
//	return;

	CString strDYZDB;
	CFileDialog dyzDBSelectDlg(TRUE, NULL, NULL, OFN_HIDEREADONLY, "specialDYZ.txt)|*.txt||", NULL);
	if (dyzDBSelectDlg.DoModal() == IDOK)
	{
		strDYZDB = dyzDBSelectDlg.GetPathName();
	}
	if (_access(strDYZDB, 0x0) != 0)
	{
		AfxMessageBox("特殊多音字字库不存在或已经被损害!");
		return;
	}

	USHORT usOff = 0;
	BYTE bHigh = 0;
	BYTE bLow = 0;
	CString strDYMapping[(0xFE-0x81+1)*(0xFE-0x40+1) + 1];
	for (usOff = 0; usOff <= (0xFE-0x81+1)*(0xFE-0x40+1); ++usOff)
	{
		strDYMapping[usOff].Format("NULL,");
	}

	try
	{
		CString strAllDYZ;
		CFileDialog AllDYZSelectDlg(TRUE, NULL, NULL, OFN_HIDEREADONLY, "SortDyz.txt)|*.txt||", NULL);
		if (AllDYZSelectDlg.DoModal() == IDOK)
		{
			strAllDYZ = AllDYZSelectDlg.GetPathName();
		}
		if (_access(strAllDYZ, 0x0) != 0)
		{
			AfxMessageBox("原始多音字字库不存在或已经被损害!");
			return;
		}
		CStdioFile fileAllDYZ(strAllDYZ, CStdioFile::modeRead);
		CString strTmp;
		std::vector<USHORT> vContainer;
		while (fileAllDYZ.ReadString(strTmp))
		{			
			CString strGBK = strTmp.Mid( strTmp.ReverseFind(_T('	'))+1 );
			USHORT usCode = atoi(strGBK);
			vContainer.push_back(usCode);
		}
		fileAllDYZ.Close();

		CStdioFile fileDYZ(strDYZDB, CStdioFile::modeRead);
		CString strDYZ;
		while (fileDYZ.ReadString(strDYZ))
		{
			strDYZ.TrimLeft();
			strDYZ.TrimRight();
			if (strDYZ.IsEmpty())
				continue;
			
			CString strGBK = strDYZ.Left(strDYZ.Find(','));
			strDYZ = strDYZ.Mid( strDYZ.Find(',') + 1);
			strGBK = strDYZ.Left( strDYZ.Find(',') );
			strDYZ = strDYZ.Mid( strDYZ.Find(',') + 1);
			USHORT usCode = atoi(strGBK);

			// match code
			int iStart = 0;
			int iEnd = vContainer.size() - 1;
			int iMid = 0;
			bool bFind = false;
			while(iStart <= iEnd)
			{
				iMid = (iStart + iEnd) / 2;
				if(vContainer[iMid] < usCode)
					iStart = iMid + 1;
				else if(vContainer[iMid] > usCode)
					iEnd = iMid - 1;
				else
				{
					bFind = true;
					break;
				}
			}
			if(bFind)
			{
				usOff = ((usCode >> 8) - 0x81) * 191 + (usCode & 0xff) - 0x40;
				strDYMapping[usOff].Format("\"%s\",", strDYZ.GetBuffer(0));
			}
		}// while
		vContainer.clear();

		// (1)Header
		CFile fileDYZArray;
		fileDYZArray.Open(g_strResultDir + "\\多音字映射表.txt", CStdioFile::modeCreate | CStdioFile::modeReadWrite);
		fileDYZArray.Write("static char* g_pszDYZIndex[] = {", strlen("static char* g_pszDYZIndex[] = {"));
		fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));

		int iLineCnt = 0;
		for (bHigh = 0x81; bHigh <= 0xFE; ++bHigh)
		{
			for (bLow = 0x40; bLow <= 0xFE; ++bLow)
			{
				usOff = (bHigh-0x81) * (0xFE-0x40+1) + (bLow-0x40);
				if(usOff == 17691)
					TRACE0("Find");
				fileDYZArray.Write(strDYMapping[usOff].GetBuffer(0), strDYMapping[usOff].GetLength());
				if ((usOff+1) % 0x10 == 0)
				{
					fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
					iLineCnt++;
				}
			}
		}
		// (3)
		fileDYZArray.Write("};", strlen("};"));
		fileDYZArray.Write(&g_usEnt, sizeof(g_usEnt));
		fileDYZArray.Close();
		fileDYZ.Close();
	}
	catch (CFileException*) {
		AfxMessageBox("CFileException() at CSCSortView::OnDyzSort()!");
	}

	AfxMessageBox("多音字解析完成,见\\result\\多音字映射表.txt!");
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -