⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cnpy.cpp

📁 中文编码转换
💻 CPP
字号:
// CNPY.cpp : Defines the entry point for the DLL application.
#include "../StdAfx.h"
#include "cnpy.h"
#include "GBK.h"
#include "../dyz.h"


BOOL __stdcall GetSingleHZJP(const char *pszHZ, char &chPY, BOOL bLower, int nCodeSet)
{
	switch(nCodeSet) 
	{
	case 0:
		break;
	case 1:
		break;
	default:
		break;
	}

	return GetGBKJianPinCode(pszHZ, chPY, bLower);
};



int __stdcall GetMulHZJP(LPCSTR pszSrc, int nSrcLen, LPSTR pszDist, int &nDistLen, BOOL bLower, int nCodeSet)
{
	if (pszSrc == NULL || pszDist == NULL || nDistLen < 0)
		return -1;

	int nJPLen = 0;
	memset(pszDist, 0, nDistLen);
	for (int nSrcOff = 0; nSrcOff < nSrcLen; )
	{
		if (pszSrc[nSrcOff] < 0)
		{
			if(GetSingleHZJP(&(pszSrc[nSrcOff]), pszDist[nJPLen], bLower, nCodeSet))
			{
				nJPLen++;
			}
			nSrcOff += 2;
		}
		else
		{
			pszDist[nJPLen] = pszSrc[nSrcOff];
			if (bLower && pszDist[nJPLen] >= 0x41 && pszDist[nJPLen] <= 0x5a)
			{
				pszDist[nJPLen] += 0x20;
			}
			nJPLen++;
			nSrcOff++;
		}
	}

	nDistLen = nJPLen;
	return nDistLen;
}


static char szNumber2Char[10] = {'l', 'y', 'e', 's', 's', 'w', 'l', 'q', 'b', 'j'};

PPCHAR __stdcall GetFullHZJPByDYZ(LPCSTR pszSrc, int nSrcLen, int &nDistLen, 
							  BOOL bLower /* = TRUE */, int nCodeSet /* = DEFAULT_CN_CODESET*/)
{
	if (pszSrc == NULL)
	{
		nDistLen = 0;
		return NULL;
	}

	CDYChain *pDYChain = new CDYChain;
	char     *pszDist = new char[nSrcLen + 1];
	memset(pszDist, 0x0, nSrcLen+1);

	// 解析简拼码中的多音字信息
	for (int nWordID = 0, nSrcOff = 0; nSrcOff < nSrcLen; )
	{
		if (pszSrc[nSrcOff] < 0)
		{// (1)中文字符
			if(GetSingleHZJP(&(pszSrc[nSrcOff]), pszDist[nWordID], bLower, nCodeSet))
			{
				char *pszDY = GetMulPYCode(&(pszSrc[nSrcOff]));
				if (pszDY != NULL)
				{// 如果第nWordID个字是多音字,则插入节点到多音字链表,多音字信息同时被复制
					pDYChain->AddTail(new CDYNode(nWordID, pszDY));
				}
				++nWordID;
			}
			nSrcOff += 2;
		}
		else
		{// (2)ASCII码字符及一些其它字符
			char szTmp = pszSrc[nSrcOff];
			if( szTmp >= '0' && szTmp <= '9' ||
				szTmp >= 0x41 && szTmp <= 0x5a ||
				szTmp >= 0x61 && szTmp <= 0x7a)
			{			
				pszDist[nWordID] = szTmp;
				if(szTmp >= '0' && szTmp <= '9')
					pszDist[nWordID] = szNumber2Char[szTmp - '0'];
				else if (bLower && szTmp >= 0x41 && szTmp <= 0x5a)
				{
					pszDist[nWordID] += 0x20;
				}
				++nWordID;
			}
			
			nSrcOff += 1;
		}
	}

	// 计算多音简拼码最多读音个数,如“莘厦”返回排列:"ss", "sx", "xs", "xx",nDistLen = 4
	int nDYZID  = 0;
	int nDYZNum = pDYChain->GetCount();
	CDYNode *pDYZNode = new CDYNode[nDYZNum];
	nDistLen = 1;
	for (CDYNode *pTemp = pDYChain->GetHead(); pTemp != NULL; pTemp = pTemp->GetNext())
	{
		// 多音字pDYZNode[nDYZID]有strlen(pTemp->GetDY())种读音
		pDYZNode[nDYZID++].Clone(pTemp);
		nDistLen *= strlen(pTemp->GetDY());
	}

	//初始化结果集
	PPCHAR ppDYRet = new char*[nDistLen];
	for (nDYZID = 0; nDYZID < nDistLen; ++nDYZID)
	{
		ppDYRet[nDYZID] = new char[strlen(pszDist) + 1];
		strcpy(ppDYRet[nDYZID], pszDist);
	}

	int *pDim1 = new int[nDYZNum];
	int *pDim2 = new int[nDYZNum];
	for (nDYZID = 0; nDYZID < nDYZNum; ++nDYZID)
	{// 第nDYZID个多音字有pDim1[nDYZID]种读音
		pDim1[nDYZID] = strlen(pDYZNode[nDYZID].GetDY());
	}

	// 总计nDistLen种简拼码中的第nDYZID种简拼码中:第j个多音字取第pDim2[j]个读音
	for (nDYZID = 0; nDYZID < nDistLen; ++nDYZID)
	{
		int i = 0, j = 0;
		for (i = 0; i < nDYZNum; ++i)
		{
			int nTemp = 1;
			for (j = i+1; j < nDYZNum; ++j)
			{
				nTemp *= pDim1[j];
			}
			if (i == 0)
				pDim2[i] = nDYZID/nTemp;
			else if (i == nDYZNum-1)
				pDim2[i] = nDYZID%pDim1[i];
			else
				pDim2[i] = (nDYZID%(nTemp*pDim1[i])) / nTemp;
		}

		for (i = 0; i < nDYZNum; ++i)
		{
			char *pszDY = pDYZNode[i].GetDY();
			*(ppDYRet[nDYZID] + pDYZNode[i].GetIndex()) = pszDY[pDim2[i]];
		}
	}
	if (pDim1 != NULL)
		delete []pDim1;
	if (pDim2 != NULL)
		delete []pDim2;
	if (pszDist != NULL)
		delete []pszDist;
	if (pDYChain != NULL)
		delete pDYChain;
	if (pDYZNode != NULL)
		delete []pDYZNode;

	return ppDYRet;
}

/////////////////////////////
// 功能:根据两两相邻汉字的多音编码建立索引
// 举例:三个汉字的字符串 X1X2X3,对应的多音数目分别为n1n2n3,则索引组合共有 n1 * n2 + n2 * n3
//       若不是汉字而是0~9数字、a~z或是A~Z字符,则多音数目为1
PPCHAR __stdcall GetAdjoiningJPByDYZ(LPCSTR pszSrc, int nSrcLen, int &nDistLen, 
							   BOOL bLower, int nCodeSet)
{
	if (pszSrc == NULL)
	{
		nDistLen = 0;
		return NULL;
	}

	CDYChain *pDYChain = new CDYChain;
	char     *pszDist = new char[nSrcLen + 1];
	memset(pszDist, 0x0, nSrcLen+1);

	// 解析简拼码中的多音字信息
	for (int nWordID = 0, nSrcOff = 0; nSrcOff < nSrcLen; )
	{
		if (pszSrc[nSrcOff] < 0)
		{// (1)中文字符
			if(GetSingleHZJP(&(pszSrc[nSrcOff]), pszDist[nWordID], bLower, nCodeSet))
			{
				char *pszDY = GetMulPYCode(&(pszSrc[nSrcOff]));
				if (pszDY != NULL)
				{// 如果第nWordID个字是多音字,则插入节点到多音字链表,多音字信息同时被复制
					pDYChain->AddTail(new CDYNode(nWordID, pszDY));
				}
				else
					pDYChain->AddTail(new CDYNode(nWordID, &pszDist[nWordID]));
				++nWordID;
			}
			nSrcOff += 2;
		}
		else
		{// (2)ASCII码字符及一些其它字符
			pszDist[nWordID] = pszSrc[nSrcOff];
			if (bLower && pszDist[nWordID] >= 0x41 && pszDist[nWordID] <= 0x5a)
			{
				pszDist[nWordID] += 0x20;
			}

			char szTmp[2] = {0,};
			szTmp[0] = pszDist[nWordID];
			// 删除可能出现的乱、怪字符		
			if((szTmp[0] >= '0' && szTmp[0] <= '9') || (szTmp[0] >= 'a' && szTmp[0] <= 'z'))
				pDYChain->AddTail(new CDYNode(nWordID, szTmp));
			++nWordID;
			nSrcOff += 1;
		}
	}

	// 计算多音简拼码最多读音个数,如“莘厦”返回排列:"ss", "sx", "xs", "xx",nDistLen = 4
	int nDYZID  = 0;
	int nDYZNum = pDYChain->GetCount();
	int iTmpLen = 0;
	CDYNode *pTemp = NULL;
	nDistLen = 0;
	for (pTemp = pDYChain->GetHead(); pTemp != NULL; pTemp = pTemp->GetNext())
	{
		if(nDYZID > 0)
			nDistLen += strlen(pTemp->GetDY()) * iTmpLen;
		iTmpLen = strlen(pTemp->GetDY());
		nDYZID++;
	}

	//初始化结果集
	PPCHAR ppDYRet = new char*[nDistLen];
	for (nDYZID = 0; nDYZID < nDistLen; nDYZID++)
	{
		ppDYRet[nDYZID] = new char[2 + 1];
		memset(ppDYRet[nDYZID], 0x0, 2 + 1);
	}

	// 总计nDistLen种相邻多音字组合
	nDYZID = 0;
	char* pszOld = NULL;
	char* pszNew = NULL;
	int iCnt = 0;
	for (pTemp = pDYChain->GetHead(); pTemp != NULL; pTemp = pTemp->GetNext(), iCnt++)
	{
		if(iCnt > 0)
		{
			pszNew = pTemp->GetDY();
			for(int i = 0; i < strlen(pszOld); i++)
			{				
				for(int j = 0; j < strlen(pszNew); j++)
				{
					ppDYRet[nDYZID][0] = pszOld[i];
					ppDYRet[nDYZID][1] = pszNew[j];
					nDYZID++;
				}
			}
		}

		pszOld = pTemp->GetDY();		
	}

	if (pszDist != NULL)
		delete []pszDist;
	if (pDYChain != NULL)
		delete pDYChain;

	return ppDYRet;
}


int __stdcall QJ2BJ(LPCSTR pszSrc, int nSrcLen, LPSTR pszDist, int &nDistLen)
{
	BYTE *pbSrc = (BYTE*)pszSrc;
	int nDistIndex = 0;
	if (pbSrc == NULL || pszDist == NULL)
		return -1;
	
	memset(pszDist, 0, nDistLen);
	for (int nSrcOff = 0; nSrcOff < nSrcLen; )
	{
		if (pbSrc[nSrcOff] == 0xA3)	//全角字符
		{
			pszDist[nDistIndex] = (char)(pbSrc[nSrcOff+1] - 0x80);
			nSrcOff += 2;
			nDistIndex += 1;
		}
		else if ((pbSrc[nSrcOff] == 0xA1) && (pbSrc[nSrcOff+1] == 0xA1))//全角空格
		{
			pszDist[nDistIndex] = ' ';
			nSrcOff += 2;
			nDistIndex += 1;
		}
		else if (pbSrc[nSrcOff] >= 0x81 && pbSrc[nSrcOff] <= 0xFE)//中文
		{
			memcpy(pszDist+nDistIndex, pbSrc+nSrcOff, 0x2);
			nSrcOff += 2;
			nDistIndex += 2;
		}
		else
		{
			pszDist[nDistIndex] = pbSrc[nSrcOff];
			nDistIndex += 1;
			nSrcOff += 1;
		}
	}
	
	return nDistLen;
}


void __stdcall BIG5ToGBK(char *pszSrc)   
{
	if(strcmp(pszSrc, "") == 0)
		return;
	
	int     nStrLen = strlen(pszSrc);
	wchar_t *pws    = new wchar_t[nStrLen + 1];
	int     nReturn = MultiByteToWideChar(CODEPAGE_CHINESE_TRADITIONAL/*950*/, 0, pszSrc, nStrLen, pws, nStrLen + 1);
	BOOL    bValue  = false;
	nReturn = WideCharToMultiByte(CODEPAGE_CHINESE_SIMPLIFIED/*936*/, 0, pws, nReturn, pszSrc, nStrLen+1, "?", &bValue);
	pszSrc[nReturn] = 0;

	if (pws != NULL)
		delete []pws;
}


void __stdcall GBKToBIG5(char *pszSrc)   
{
	if(strcmp(pszSrc, "") == 0)
		return;
	
	int     nStrLen = strlen(pszSrc);
	wchar_t *pws    = new wchar_t[nStrLen + 1];
	BOOL    bValue  = false;

	MultiByteToWideChar(CODEPAGE_CHINESE_SIMPLIFIED/*936*/, 0, pszSrc, nStrLen, pws, nStrLen + 1);
	WideCharToMultiByte(CODEPAGE_CHINESE_TRADITIONAL/*950*/, 0, pws, nStrLen, pszSrc, nStrLen + 1, "?", &bValue);
	pszSrc[nStrLen] = 0;
	if (pws != NULL)
		delete []pws;
}

 
void __stdcall GB2312ToGBK(char *pszSrc)
{
	if(strcmp(pszSrc, "") == 0)
		return;
	
	int   nStrLen = strlen(pszSrc);
	DWORD dwLCID  = MAKELCID( MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC );
	int   nReturn = LCMapString(dwLCID, LCMAP_TRADITIONAL_CHINESE, pszSrc, nStrLen, NULL, 0);
	
	if(nReturn == 0)
		return;
	
	char *pszBuf = new char[nReturn + 1];
	LCMapString(dwLCID, LCMAP_TRADITIONAL_CHINESE, pszSrc, nReturn, pszBuf, nReturn + 1);
	strncpy(pszSrc, pszBuf, nReturn);

	if (pszBuf != NULL)
		delete []pszBuf;
}


void __stdcall GBKToGB2312(char *pszSrc)
{
	if(strcmp(pszSrc, "") == 0)
		return;
	
	int   nStrLen = strlen(pszSrc);
	DWORD dwLCID  = MAKELCID( MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_BIG5 );
	int   nReturn = LCMapString(dwLCID, LCMAP_SIMPLIFIED_CHINESE, pszSrc, nStrLen, NULL, 0);
	
	if(nReturn == 0)
		return;
	
	char *pszBuf = new char[nReturn + 1];
	LCMapString(dwLCID, LCMAP_SIMPLIFIED_CHINESE, pszSrc, nReturn, pszBuf, nReturn + 1);
	strncpy(pszSrc, pszBuf, nReturn);
	
	if (pszBuf != NULL)
		delete []pszBuf;
}


void __stdcall MyDelete(PPCHAR &ppDYRet, const int &nDistLen)
{
	if (ppDYRet == NULL)
		return;

	for (int i = 0; i < nDistLen; ++i)
	{
		if (ppDYRet[i] != NULL)
			delete [](ppDYRet[i]);
	}
	delete []ppDYRet;
	ppDYRet = NULL;
}




/************************************ CDYNode ************************************/
CDYNode::CDYNode()
{
	m_unIndex = 0;
	m_pszDY = NULL;
	m_pNext = NULL;
}

CDYNode::CDYNode(const UINT &unIndex, char *pszDY /* = NULL */)
{
	if (pszDY != NULL)
	{
		m_pszDY = new char[strlen(pszDY)+1];
		if (m_pszDY != NULL)
			strcpy(m_pszDY, pszDY);
	}

	m_unIndex = unIndex;
	m_pNext = NULL;
}

CDYNode::~CDYNode()
{
	if (m_pszDY != NULL)
	{
		delete []m_pszDY;
		m_pszDY = NULL;
	}
	m_pNext = NULL;
}

// Get
UINT CDYNode::GetIndex() const
{
	return m_unIndex;
}

char* CDYNode::GetDY() const
{
	return m_pszDY;
}

CDYNode* CDYNode::GetNext() const
{
	return m_pNext;
}

// Set
void CDYNode::SetIndex(const UINT &unIndex)
{
	m_unIndex = unIndex;
}

void CDYNode::SetDY(char *pszDY)
{
	if (pszDY == NULL)
		return;

	if (m_pszDY != NULL)
	{
		delete []m_pszDY;
		m_pszDY = NULL;
	}

	m_pszDY = new char[strlen(pszDY)+1];
	if (m_pszDY != NULL)
	{
		strcpy(m_pszDY, pszDY);
	}
}

void CDYNode::SetNext(CDYNode *pNext)
{
	m_pNext = pNext;
}

void CDYNode::Clone(CDYNode *pSrc)
{
	if (pSrc == NULL)
		return;

	if (m_pszDY != NULL)
	{
		delete []m_pszDY;
		m_pszDY = NULL;
	}

	m_unIndex = pSrc->GetIndex();
	m_pszDY = new char[strlen(pSrc->GetDY()) + 1];
	strcpy(m_pszDY, pSrc->GetDY());
}

CDYChain::CDYChain()
{
	m_pHead = NULL;
	m_pTail = NULL;
	m_unCount = 0;
}

CDYChain::~CDYChain()
{
	for (CDYNode *pNode = m_pHead; pNode != NULL;)
	{
		m_pHead = pNode->GetNext();
		delete pNode;
		pNode = m_pHead;
	}

	m_pHead = NULL;
	m_pTail = NULL;
	m_unCount = 0;
}

// Get
CDYNode* CDYChain::GetHead() const
{
	return m_pHead;
}

CDYNode* CDYChain::GetTail() const
{
	return m_pTail;
}

UINT CDYChain::GetCount() const
{
	return m_unCount;
}

// Insert
void CDYChain::AddTail(CDYNode *pInsNode)
{
	if (pInsNode == NULL)
		return;

	pInsNode->SetNext(NULL);
	if (m_pHead == NULL)
		m_pHead = pInsNode;

	if (m_pTail != NULL)
		m_pTail->SetNext(pInsNode);

	m_pTail = pInsNode;
	++m_unCount;
}
/************************************ ~CDYChain ************************************/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -