📄 cnpy.cpp
字号:
// CNPY.cpp : Defines the entry point for the DLL application.
#include "../StdAfx.h"
#include "cnpy.h"
#include "GBK.h"
#include "../dyz.h"
BOOL __stdcall GetSingleHZJP(const char *pszHZ, char &chPY, BOOL bLower, int nCodeSet)
{
switch(nCodeSet)
{
case 0:
break;
case 1:
break;
default:
break;
}
return GetGBKJianPinCode(pszHZ, chPY, bLower);
};
int __stdcall GetMulHZJP(LPCSTR pszSrc, int nSrcLen, LPSTR pszDist, int &nDistLen, BOOL bLower, int nCodeSet)
{
if (pszSrc == NULL || pszDist == NULL || nDistLen < 0)
return -1;
int nJPLen = 0;
memset(pszDist, 0, nDistLen);
for (int nSrcOff = 0; nSrcOff < nSrcLen; )
{
if (pszSrc[nSrcOff] < 0)
{
if(GetSingleHZJP(&(pszSrc[nSrcOff]), pszDist[nJPLen], bLower, nCodeSet))
{
nJPLen++;
}
nSrcOff += 2;
}
else
{
pszDist[nJPLen] = pszSrc[nSrcOff];
if (bLower && pszDist[nJPLen] >= 0x41 && pszDist[nJPLen] <= 0x5a)
{
pszDist[nJPLen] += 0x20;
}
nJPLen++;
nSrcOff++;
}
}
nDistLen = nJPLen;
return nDistLen;
}
static char szNumber2Char[10] = {'l', 'y', 'e', 's', 's', 'w', 'l', 'q', 'b', 'j'};
PPCHAR __stdcall GetFullHZJPByDYZ(LPCSTR pszSrc, int nSrcLen, int &nDistLen,
BOOL bLower /* = TRUE */, int nCodeSet /* = DEFAULT_CN_CODESET*/)
{
if (pszSrc == NULL)
{
nDistLen = 0;
return NULL;
}
CDYChain *pDYChain = new CDYChain;
char *pszDist = new char[nSrcLen + 1];
memset(pszDist, 0x0, nSrcLen+1);
// 解析简拼码中的多音字信息
for (int nWordID = 0, nSrcOff = 0; nSrcOff < nSrcLen; )
{
if (pszSrc[nSrcOff] < 0)
{// (1)中文字符
if(GetSingleHZJP(&(pszSrc[nSrcOff]), pszDist[nWordID], bLower, nCodeSet))
{
char *pszDY = GetMulPYCode(&(pszSrc[nSrcOff]));
if (pszDY != NULL)
{// 如果第nWordID个字是多音字,则插入节点到多音字链表,多音字信息同时被复制
pDYChain->AddTail(new CDYNode(nWordID, pszDY));
}
++nWordID;
}
nSrcOff += 2;
}
else
{// (2)ASCII码字符及一些其它字符
char szTmp = pszSrc[nSrcOff];
if( szTmp >= '0' && szTmp <= '9' ||
szTmp >= 0x41 && szTmp <= 0x5a ||
szTmp >= 0x61 && szTmp <= 0x7a)
{
pszDist[nWordID] = szTmp;
if(szTmp >= '0' && szTmp <= '9')
pszDist[nWordID] = szNumber2Char[szTmp - '0'];
else if (bLower && szTmp >= 0x41 && szTmp <= 0x5a)
{
pszDist[nWordID] += 0x20;
}
++nWordID;
}
nSrcOff += 1;
}
}
// 计算多音简拼码最多读音个数,如“莘厦”返回排列:"ss", "sx", "xs", "xx",nDistLen = 4
int nDYZID = 0;
int nDYZNum = pDYChain->GetCount();
CDYNode *pDYZNode = new CDYNode[nDYZNum];
nDistLen = 1;
for (CDYNode *pTemp = pDYChain->GetHead(); pTemp != NULL; pTemp = pTemp->GetNext())
{
// 多音字pDYZNode[nDYZID]有strlen(pTemp->GetDY())种读音
pDYZNode[nDYZID++].Clone(pTemp);
nDistLen *= strlen(pTemp->GetDY());
}
//初始化结果集
PPCHAR ppDYRet = new char*[nDistLen];
for (nDYZID = 0; nDYZID < nDistLen; ++nDYZID)
{
ppDYRet[nDYZID] = new char[strlen(pszDist) + 1];
strcpy(ppDYRet[nDYZID], pszDist);
}
int *pDim1 = new int[nDYZNum];
int *pDim2 = new int[nDYZNum];
for (nDYZID = 0; nDYZID < nDYZNum; ++nDYZID)
{// 第nDYZID个多音字有pDim1[nDYZID]种读音
pDim1[nDYZID] = strlen(pDYZNode[nDYZID].GetDY());
}
// 总计nDistLen种简拼码中的第nDYZID种简拼码中:第j个多音字取第pDim2[j]个读音
for (nDYZID = 0; nDYZID < nDistLen; ++nDYZID)
{
int i = 0, j = 0;
for (i = 0; i < nDYZNum; ++i)
{
int nTemp = 1;
for (j = i+1; j < nDYZNum; ++j)
{
nTemp *= pDim1[j];
}
if (i == 0)
pDim2[i] = nDYZID/nTemp;
else if (i == nDYZNum-1)
pDim2[i] = nDYZID%pDim1[i];
else
pDim2[i] = (nDYZID%(nTemp*pDim1[i])) / nTemp;
}
for (i = 0; i < nDYZNum; ++i)
{
char *pszDY = pDYZNode[i].GetDY();
*(ppDYRet[nDYZID] + pDYZNode[i].GetIndex()) = pszDY[pDim2[i]];
}
}
if (pDim1 != NULL)
delete []pDim1;
if (pDim2 != NULL)
delete []pDim2;
if (pszDist != NULL)
delete []pszDist;
if (pDYChain != NULL)
delete pDYChain;
if (pDYZNode != NULL)
delete []pDYZNode;
return ppDYRet;
}
/////////////////////////////
// 功能:根据两两相邻汉字的多音编码建立索引
// 举例:三个汉字的字符串 X1X2X3,对应的多音数目分别为n1n2n3,则索引组合共有 n1 * n2 + n2 * n3
// 若不是汉字而是0~9数字、a~z或是A~Z字符,则多音数目为1
PPCHAR __stdcall GetAdjoiningJPByDYZ(LPCSTR pszSrc, int nSrcLen, int &nDistLen,
BOOL bLower, int nCodeSet)
{
if (pszSrc == NULL)
{
nDistLen = 0;
return NULL;
}
CDYChain *pDYChain = new CDYChain;
char *pszDist = new char[nSrcLen + 1];
memset(pszDist, 0x0, nSrcLen+1);
// 解析简拼码中的多音字信息
for (int nWordID = 0, nSrcOff = 0; nSrcOff < nSrcLen; )
{
if (pszSrc[nSrcOff] < 0)
{// (1)中文字符
if(GetSingleHZJP(&(pszSrc[nSrcOff]), pszDist[nWordID], bLower, nCodeSet))
{
char *pszDY = GetMulPYCode(&(pszSrc[nSrcOff]));
if (pszDY != NULL)
{// 如果第nWordID个字是多音字,则插入节点到多音字链表,多音字信息同时被复制
pDYChain->AddTail(new CDYNode(nWordID, pszDY));
}
else
pDYChain->AddTail(new CDYNode(nWordID, &pszDist[nWordID]));
++nWordID;
}
nSrcOff += 2;
}
else
{// (2)ASCII码字符及一些其它字符
pszDist[nWordID] = pszSrc[nSrcOff];
if (bLower && pszDist[nWordID] >= 0x41 && pszDist[nWordID] <= 0x5a)
{
pszDist[nWordID] += 0x20;
}
char szTmp[2] = {0,};
szTmp[0] = pszDist[nWordID];
// 删除可能出现的乱、怪字符
if((szTmp[0] >= '0' && szTmp[0] <= '9') || (szTmp[0] >= 'a' && szTmp[0] <= 'z'))
pDYChain->AddTail(new CDYNode(nWordID, szTmp));
++nWordID;
nSrcOff += 1;
}
}
// 计算多音简拼码最多读音个数,如“莘厦”返回排列:"ss", "sx", "xs", "xx",nDistLen = 4
int nDYZID = 0;
int nDYZNum = pDYChain->GetCount();
int iTmpLen = 0;
CDYNode *pTemp = NULL;
nDistLen = 0;
for (pTemp = pDYChain->GetHead(); pTemp != NULL; pTemp = pTemp->GetNext())
{
if(nDYZID > 0)
nDistLen += strlen(pTemp->GetDY()) * iTmpLen;
iTmpLen = strlen(pTemp->GetDY());
nDYZID++;
}
//初始化结果集
PPCHAR ppDYRet = new char*[nDistLen];
for (nDYZID = 0; nDYZID < nDistLen; nDYZID++)
{
ppDYRet[nDYZID] = new char[2 + 1];
memset(ppDYRet[nDYZID], 0x0, 2 + 1);
}
// 总计nDistLen种相邻多音字组合
nDYZID = 0;
char* pszOld = NULL;
char* pszNew = NULL;
int iCnt = 0;
for (pTemp = pDYChain->GetHead(); pTemp != NULL; pTemp = pTemp->GetNext(), iCnt++)
{
if(iCnt > 0)
{
pszNew = pTemp->GetDY();
for(int i = 0; i < strlen(pszOld); i++)
{
for(int j = 0; j < strlen(pszNew); j++)
{
ppDYRet[nDYZID][0] = pszOld[i];
ppDYRet[nDYZID][1] = pszNew[j];
nDYZID++;
}
}
}
pszOld = pTemp->GetDY();
}
if (pszDist != NULL)
delete []pszDist;
if (pDYChain != NULL)
delete pDYChain;
return ppDYRet;
}
int __stdcall QJ2BJ(LPCSTR pszSrc, int nSrcLen, LPSTR pszDist, int &nDistLen)
{
BYTE *pbSrc = (BYTE*)pszSrc;
int nDistIndex = 0;
if (pbSrc == NULL || pszDist == NULL)
return -1;
memset(pszDist, 0, nDistLen);
for (int nSrcOff = 0; nSrcOff < nSrcLen; )
{
if (pbSrc[nSrcOff] == 0xA3) //全角字符
{
pszDist[nDistIndex] = (char)(pbSrc[nSrcOff+1] - 0x80);
nSrcOff += 2;
nDistIndex += 1;
}
else if ((pbSrc[nSrcOff] == 0xA1) && (pbSrc[nSrcOff+1] == 0xA1))//全角空格
{
pszDist[nDistIndex] = ' ';
nSrcOff += 2;
nDistIndex += 1;
}
else if (pbSrc[nSrcOff] >= 0x81 && pbSrc[nSrcOff] <= 0xFE)//中文
{
memcpy(pszDist+nDistIndex, pbSrc+nSrcOff, 0x2);
nSrcOff += 2;
nDistIndex += 2;
}
else
{
pszDist[nDistIndex] = pbSrc[nSrcOff];
nDistIndex += 1;
nSrcOff += 1;
}
}
return nDistLen;
}
void __stdcall BIG5ToGBK(char *pszSrc)
{
if(strcmp(pszSrc, "") == 0)
return;
int nStrLen = strlen(pszSrc);
wchar_t *pws = new wchar_t[nStrLen + 1];
int nReturn = MultiByteToWideChar(CODEPAGE_CHINESE_TRADITIONAL/*950*/, 0, pszSrc, nStrLen, pws, nStrLen + 1);
BOOL bValue = false;
nReturn = WideCharToMultiByte(CODEPAGE_CHINESE_SIMPLIFIED/*936*/, 0, pws, nReturn, pszSrc, nStrLen+1, "?", &bValue);
pszSrc[nReturn] = 0;
if (pws != NULL)
delete []pws;
}
void __stdcall GBKToBIG5(char *pszSrc)
{
if(strcmp(pszSrc, "") == 0)
return;
int nStrLen = strlen(pszSrc);
wchar_t *pws = new wchar_t[nStrLen + 1];
BOOL bValue = false;
MultiByteToWideChar(CODEPAGE_CHINESE_SIMPLIFIED/*936*/, 0, pszSrc, nStrLen, pws, nStrLen + 1);
WideCharToMultiByte(CODEPAGE_CHINESE_TRADITIONAL/*950*/, 0, pws, nStrLen, pszSrc, nStrLen + 1, "?", &bValue);
pszSrc[nStrLen] = 0;
if (pws != NULL)
delete []pws;
}
void __stdcall GB2312ToGBK(char *pszSrc)
{
if(strcmp(pszSrc, "") == 0)
return;
int nStrLen = strlen(pszSrc);
DWORD dwLCID = MAKELCID( MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC );
int nReturn = LCMapString(dwLCID, LCMAP_TRADITIONAL_CHINESE, pszSrc, nStrLen, NULL, 0);
if(nReturn == 0)
return;
char *pszBuf = new char[nReturn + 1];
LCMapString(dwLCID, LCMAP_TRADITIONAL_CHINESE, pszSrc, nReturn, pszBuf, nReturn + 1);
strncpy(pszSrc, pszBuf, nReturn);
if (pszBuf != NULL)
delete []pszBuf;
}
void __stdcall GBKToGB2312(char *pszSrc)
{
if(strcmp(pszSrc, "") == 0)
return;
int nStrLen = strlen(pszSrc);
DWORD dwLCID = MAKELCID( MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_BIG5 );
int nReturn = LCMapString(dwLCID, LCMAP_SIMPLIFIED_CHINESE, pszSrc, nStrLen, NULL, 0);
if(nReturn == 0)
return;
char *pszBuf = new char[nReturn + 1];
LCMapString(dwLCID, LCMAP_SIMPLIFIED_CHINESE, pszSrc, nReturn, pszBuf, nReturn + 1);
strncpy(pszSrc, pszBuf, nReturn);
if (pszBuf != NULL)
delete []pszBuf;
}
void __stdcall MyDelete(PPCHAR &ppDYRet, const int &nDistLen)
{
if (ppDYRet == NULL)
return;
for (int i = 0; i < nDistLen; ++i)
{
if (ppDYRet[i] != NULL)
delete [](ppDYRet[i]);
}
delete []ppDYRet;
ppDYRet = NULL;
}
/************************************ CDYNode ************************************/
CDYNode::CDYNode()
{
m_unIndex = 0;
m_pszDY = NULL;
m_pNext = NULL;
}
CDYNode::CDYNode(const UINT &unIndex, char *pszDY /* = NULL */)
{
if (pszDY != NULL)
{
m_pszDY = new char[strlen(pszDY)+1];
if (m_pszDY != NULL)
strcpy(m_pszDY, pszDY);
}
m_unIndex = unIndex;
m_pNext = NULL;
}
CDYNode::~CDYNode()
{
if (m_pszDY != NULL)
{
delete []m_pszDY;
m_pszDY = NULL;
}
m_pNext = NULL;
}
// Get
UINT CDYNode::GetIndex() const
{
return m_unIndex;
}
char* CDYNode::GetDY() const
{
return m_pszDY;
}
CDYNode* CDYNode::GetNext() const
{
return m_pNext;
}
// Set
void CDYNode::SetIndex(const UINT &unIndex)
{
m_unIndex = unIndex;
}
void CDYNode::SetDY(char *pszDY)
{
if (pszDY == NULL)
return;
if (m_pszDY != NULL)
{
delete []m_pszDY;
m_pszDY = NULL;
}
m_pszDY = new char[strlen(pszDY)+1];
if (m_pszDY != NULL)
{
strcpy(m_pszDY, pszDY);
}
}
void CDYNode::SetNext(CDYNode *pNext)
{
m_pNext = pNext;
}
void CDYNode::Clone(CDYNode *pSrc)
{
if (pSrc == NULL)
return;
if (m_pszDY != NULL)
{
delete []m_pszDY;
m_pszDY = NULL;
}
m_unIndex = pSrc->GetIndex();
m_pszDY = new char[strlen(pSrc->GetDY()) + 1];
strcpy(m_pszDY, pSrc->GetDY());
}
CDYChain::CDYChain()
{
m_pHead = NULL;
m_pTail = NULL;
m_unCount = 0;
}
CDYChain::~CDYChain()
{
for (CDYNode *pNode = m_pHead; pNode != NULL;)
{
m_pHead = pNode->GetNext();
delete pNode;
pNode = m_pHead;
}
m_pHead = NULL;
m_pTail = NULL;
m_unCount = 0;
}
// Get
CDYNode* CDYChain::GetHead() const
{
return m_pHead;
}
CDYNode* CDYChain::GetTail() const
{
return m_pTail;
}
UINT CDYChain::GetCount() const
{
return m_unCount;
}
// Insert
void CDYChain::AddTail(CDYNode *pInsNode)
{
if (pInsNode == NULL)
return;
pInsNode->SetNext(NULL);
if (m_pHead == NULL)
m_pHead = pInsNode;
if (m_pTail != NULL)
m_pTail->SetNext(pInsNode);
m_pTail = pInsNode;
++m_unCount;
}
/************************************ ~CDYChain ************************************/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -