📄 pattree.c

📁 Symbian平台数字键盘手机输入法源码
💻 C
📖 第 1 页 / 共 5 页
字号:
typedef struct tagHW_WORD_NODE_T {
	unsigned short nIndex_SingleCharWord[PATTREE_CAND_NUM];
	unsigned short nIndex_MultiCharWord[PATTREE_MAX_PREDICTION_NUM];

	unsigned char byStart[PATTREE_MAX_INDEXINLEXICON_NUM];
	unsigned short nIndex[PATTREE_MAX_INDEXINLEXICON_NUM];
	unsigned short nMinIndexInLexicon[PATTREE_MAX_INDEXINLEXICON_NUM];
	unsigned short nMaxIndexInLexicon[PATTREE_MAX_INDEXINLEXICON_NUM];
	long nHWLogProb[PATTREE_MAX_INDEXINLEXICON_NUM];

	unsigned char byNum_SingleCharWord;
	unsigned char byNum_MultiCharWord;
	unsigned char byNum_IndexInLexicon;

}	HW_WORD_NODE_T;

typedef struct tagWORD_LIST_T
{
	long			nLogProb;
	unsigned short	nIndex;
	unsigned char	byStart;
	unsigned char	reserved[1];
} WORD_LIST_T;

typedef struct tagHW_SENTENCE_T
{
	//////////////////////////////////
	OUTBUFFER OutBuffer;

#ifdef PATTREE_POSBIGRAM
	SENTENCE_NODE_T Nodes[PATTREE_MAX_HW_INPUT_LEN][PATTREE_MAX_POSTYPE];
#else
	SENTENCE_NODE_T Nodes[PATTREE_MAX_HW_INPUT_LEN];
#endif
	// this part must be compatible with SENTENCE_T
	//////////////////////////////////

	HW_WORD_NODE_T	Words[PATTREE_MAX_HW_INPUT_LEN];

	unsigned short	Candidates[PATTREE_MAX_HW_CAND_LEN];
	unsigned short	Distances[PATTREE_MAX_HW_CAND_LEN];

	wchar_t *pwszPredict[PATTREE_MAX_PREDICTION_NUM];
	int		nPredictNum;

	union {
		unsigned char	abTraceBuffer[PATTREE_MAX_TRACE_LEN];			// working buffer for HWE
		WORD_LIST_T		aWordList[PATTREE_MAX_INDEXINLEXICON_NUM];		// working buffer of all words in each node for add trace to sentence fill predict
	};
	unsigned char abHWBuffer[PATTREE_MAX_HW_BUFFER_LEN];
	
	//unsigned char byCharNum;

}	HW_SENTENCE_T;

#endif
/***************************************************************HW SENTENCE END***********************************************************************************/

#pragma pack( pop, BEFORE_PATTREE_T )
/*********************************************************************************************/
							  
/*********************************************************************************************/
/* Static Dic Base function */
#define IND_IS_NONNULL(pInd)								( ((pInd)->cCode) & 0x80 )
#define IND_IS_CONFLICTED(pInd)								( ((pInd)->cCode) & 0x40 )
#define IND_GET_REALCODE(pInd)								( ((pInd)->cCode) & 0x3F )

#define StaticDic_GetPinyinList( pDic, n )					( (char*)(pDic->abData + ((long*)(pDic->abData + pDic->ReadOnlyHead.nOffset_anPinyinListOffset))[n]) )
#define StaticDic_GetCharStrokeCode( pDic )					( (unsigned short*)(pDic->abData + pDic->ReadOnlyHead.nOffset_awCharStrokeCode) )
#define StaticDic_GetCharPinyinCode( pDic )					( (unsigned short*)(pDic->abData + pDic->ReadOnlyHead.nOffset_awCharPinyinCode) )
#define StaticLexiconEntry_GetOwnPinyinCode(pLexicon)		( (unsigned short*)(((char*)pLexicon) + pLexicon->cPinyinCodeOffset) )

static unsigned short StaticLexiconEntry_GetPinyinCode( STATIC_LEXICON_ENTRY_T* pLexicon, STATIC_DIC_T *pDic, int n ) 
{ 
	if ( pLexicon->cPinyinNum != 0 )
		return StaticLexiconEntry_GetOwnPinyinCode(pLexicon)[n];
	else
		return StaticDic_GetCharPinyinCode(pDic)[pLexicon->wszWord[n] - pDic->ReadOnlyHead.wCharCodeOffset];
}

#define StaticDic_GetCharStrokeNum(x)						( ((x) & 0xF000) ? 4 : ( ((x) & 0x0F00) ? 3 : ( ((x) & 0x00F0) ? 2 : 1 ) ) )

#define StaticDic_GetIndNum_Pinyin( pDic )					( (unsigned short*)(pDic->abData + pDic->ReadOnlyHead.nOffset_awIndNum_Pinyin) )
#define StaticDic_GetIndNum( pDic, pSentence )				( (unsigned short*)(pDic->abData + pSentence->nDicOffset_awIndNum) )
#define StaticDic_GetInd_Pinyin( pDic, step )				( (IND_T*)(pDic->abData + ((long*)(pDic->abData + pDic->ReadOnlyHead.nOffset_anIndOffset_Pinyin))[step]) )
#define StaticDic_GetInd( pDic, pSentence, step )			( (IND_T*)(pDic->abData + ((long*)(pDic->abData + pSentence->nDicOffset_anIndOffset))[step]) )
#define StaticDic_GetLexicon( pDic, n )						( (STATIC_LEXICON_ENTRY_T*)(pDic->abData + ((long*)(pDic->abData + pDic->ReadOnlyHead.nOffset_anlexiconsOffset))[n]) )
#define StaticDic_GetLexiconOffset( pDic )					( (long*)(pSDic->abData + pSDic->ReadOnlyHead.nOffset_anlexiconsOffset) )

#define StaticDic_GetLexiconLogprob( pDic, n )				( ((TYPE_PROB*)(pDic->abData_User + pDic->UserHead.nOffset_aLexiconLogprob))[n] )
#define StaticDic_GetWordIndex_Pinyin( pDic )				( (unsigned short*)(pDic->abData_User + pDic->UserHead.nOffset_awWordIndex_Pinyin) )
#define StaticDic_GetWordIndex( pDic, pSentence )			( (unsigned short*)(pDic->abData_User + pSentence->nDicOffset_awWordIndex) )
#define StaticDic_GetMaxSeriesLogProb( pDic, pSentence )	( (TYPE_PROB*)(pDic->abData_User + pSentence->nDicOffset_aMaxSeriesLogProb) )

#define StaticDic_GetByOffset_User( pDic, nOffset )			( pDic->abData_User + nOffset )
#define StaticDic_GetIndNum_User( pDic, pSentence )			( (unsigned short*)(pDic->abData_User + pSentence->nDicOffset_awIndNum_User) )
#define StaticDic_GetWordIndex_User( pDic, pSentence )		( (unsigned short*)(pDic->abData_User + pSentence->nDicOffset_awWordIndex_User) )
#define StaticDic_GetMaxSeriesLogProb_User(pDic, pSentence)	( (TYPE_PROB*)(pDic->abData_User + pSentence->nDicOffset_aMaxSeriesLogProb_User) )
#define StaticDic_GetInd_User( pDic, pSentence, step )		( (IND_T*)(pDic->abData_User + ((long*)(pDic->abData_User + pSentence->nDicOffset_anIndOffset_User))[step]) )
#define StaticDic_GetLexicon_User( pDic, n )				( (STATIC_LEXICON_ENTRY_T*)(pDic->abData_User + ((long*)(pDic->abData_User + pDic->UserHead.nOffset_anlexiconsOffset_User))[n - pDic->ReadOnlyHead.nLexiconNum]) )
#define StaticDic_GetLexiconLogprob_User( pDic, n )			( ((TYPE_PROB*)(pDic->abData_User + pDic->UserHead.nOffset_aLexiconLogprob_User))[n] )

#define StaticDic_GetWordIndex_Pinyin_User( pDic )			( (unsigned short*)(pDic->abData_User + pDic->UserHead.nOffset_awWordIndex_Pinyin_User) )
#define StaticDic_GetWordIndex_PinyinIndex_User( pDic )		( (unsigned char*)(pDic->abData_User + pDic->UserHead.nOffset_awWordIndex_Pinyin_User + pDic->UserHead.nUSER_DIC_MAX_ENTRY * 2 * sizeof(unsigned short) ) )
#define StaticDic_GetWordIndex_Stroke_User( pDic )			( (unsigned short*)(pDic->abData_User + pDic->UserHead.nOffset_awWordIndex_Stroke_User) )
#define StaticDic_GetMaxSeriesLogProb_Pinyin_User( pDic )	( (TYPE_PROB*)(pDic->abData_User + pDic->UserHead.nOffset_aMaxSeriesLogProb_Pinyin_User) )
#define StaticDic_GetMaxSeriesLogProb_Stroke_User( pDic )	( (TYPE_PROB*)(pDic->abData_User + pDic->UserHead.nOffset_aMaxSeriesLogProb_Stroke_User) )
#define StaticDic_GetLexicon_User_AbsoluteIndex( pDic, n )	( (STATIC_LEXICON_ENTRY_T*)(pDic->abData_User + ((long*)(pDic->abData_User + pDic->UserHead.nOffset_anlexiconsOffset_User))[n]) )
#define StaticDic_GetLexiconOffset_User( pDic )				( (long*)(pDic->abData_User + pDic->UserHead.nOffset_anlexiconsOffset_User) )
#define StaticDic_GetIndNum_Pinyin_User( pDic )				( (unsigned short*)(pDic->abData_User + pDic->UserHead.nOffset_awIndNum_Pinyin_User) )
#define StaticDic_GetIndNum_Stroke_User( pDic )				( (unsigned short*)(pDic->abData_User + pDic->UserHead.nOffset_awIndNum_Stroke_User) )
#define StaticDic_GetInd_Pinyin_User( pDic, step )			( (IND_T*)(pDic->abData_User + ((long*)(pDic->abData_User + pDic->UserHead.nOffset_anIndOffset_Pinyin_User))[step]) )
#define StaticDic_GetInd_Stroke_User( pDic, step )			( (IND_T*)(pDic->abData_User + ((long*)(pDic->abData_User + pDic->UserHead.nOffset_anIndOffset_Stroke_User))[step]) )

//#define StaticDic_GetLexiconByWordID( pDic, pSentence, ID )	StaticDic_GetLexicon( pDic, StaticDic_GetWordIndex( pDic, pSentence )[ID] )
static STATIC_LEXICON_ENTRY_T* StaticDic_GetLexiconByWordID( STATIC_DIC_T *pDic, SENTENCE_T *pSentence, unsigned short ID )	
{
	if ( ID >= (unsigned short)pSentence->nWordIndexNum ) {
		return StaticDic_GetLexicon_User( pDic, StaticDic_GetWordIndex_User( pDic, pSentence )[ID - pSentence->nWordIndexNum] );
	}
	else {
		return StaticDic_GetLexicon( pDic, StaticDic_GetWordIndex( pDic, pSentence )[ID] );
	}
}

static unsigned short * StaticDic_GetLexiconIndexByWordID( STATIC_DIC_T *pDic, SENTENCE_T *pSentence, unsigned short ID )	
{
	if ( ID >= (unsigned short)pSentence->nWordIndexNum ) {
		return &StaticDic_GetWordIndex_User( pDic, pSentence )[ID - pSentence->nWordIndexNum];
	}
	else {
		return &StaticDic_GetWordIndex( pDic, pSentence )[ID];
	}
}

static TYPE_PROB * StaticDic_GetMaxSeriesLogProbByIND( STATIC_DIC_T *pDic, SENTENCE_T *pSentence, IND_T *pInd )	
{
	if ( pInd->wWordIDFirst >= (unsigned short)pSentence->nWordIndexNum ) {
		return &StaticDic_GetMaxSeriesLogProb_User( pDic, pSentence )[pInd->wWordIDFirst - pSentence->nWordIndexNum];
	}
	else {
		return &StaticDic_GetMaxSeriesLogProb( pDic, pSentence )[pInd->wWordIDFirst];
	}
}

static STATIC_LEXICON_ENTRY_T* StaticDic_GetLexiconByIndex( STATIC_DIC_T *pDic, unsigned short nIndex )	
{
	if ( nIndex >= (unsigned short)pDic->ReadOnlyHead.nLexiconNum ) {
		return StaticDic_GetLexicon_User(pDic, nIndex);
	}
	else {
		return StaticDic_GetLexicon(pDic, nIndex);
	}
}

static TYPE_PROB* StaticDic_GetLexiconLogprobByIndex( STATIC_DIC_T *pDic, unsigned short nIndex )	
{
	if ( nIndex >= (unsigned short)pDic->ReadOnlyHead.nLexiconNum ) {
		return &StaticDic_GetLexiconLogprob_User(pDic, nIndex - pDic->ReadOnlyHead.nLexiconNum);
	}
	else {
		return &StaticDic_GetLexiconLogprob(pDic, nIndex);
	}
}

static TYPE_PROB* StaticDic_GetLexiconLogprobByWordID( STATIC_DIC_T *pDic, SENTENCE_T *pSentence, unsigned short wWordID )	
{
	return StaticDic_GetLexiconLogprobByIndex(pDic, *StaticDic_GetLexiconIndexByWordID(pDic, pSentence, wWordID));
}

static IND_T* StaticDic_GetInd_Spec( const STATIC_DIC_T *pDic, SENTENCE_T *pSentence, int step, int nIsUserDic )
{
	if ( nIsUserDic == 0 )
		return StaticDic_GetInd( pDic, pSentence, step );
	else
		return StaticDic_GetInd_User( pDic, pSentence, step );
}

static unsigned short* StaticDic_GetIndNum_Spec( const STATIC_DIC_T *pDic, SENTENCE_T *pSentence, int nIsUserDic )
{
	if ( nIsUserDic == 0 )
		return StaticDic_GetIndNum( pDic, pSentence );
	else
		return StaticDic_GetIndNum_User( pDic, pSentence );
}

#define StaticDic_GetPOSBigram( pDic )					( (TYPE_PROB*)(pDic->abData + pDic->ReadOnlyHead.nOffset_aPOSBigram) )
#define StaticDic_GetPOSUnigram( pDic )					( (TYPE_PROB*)(pDic->abData + pDic->ReadOnlyHead.nOffset_aPOSUnigram) )

#define StaticDic_GetENIndNum( pDic )					( (unsigned short*)(pDic->abData + pDic->ReadOnlyHead.nOffset_awIndNum_Pinyin) )
#define StaticDic_GetENInd( pDic, step )				( (EN_IND_T*)(pDic->abData + ((long*)(pDic->abData + pDic->ReadOnlyHead.nOffset_anIndOffset_Pinyin))[step]) )

/*********************************************************************************************/


/*********************************************************************************************/
/* Some basic function needed */

static char* salloc( char* str )
{
	char *pStr;
	int nLen = strlen(str);
	pStr = malloc(nLen+1);
	memcpy(pStr, str, nLen+1);
	return pStr;
}

static long GetStaticDicVersion( short nLanguage )		
{
	long n;

	if ( nLanguage == PATTREE_DIC_CHS ) {
		n = PATTREE_BIN_VERSION | PATTREE_DIC_STATIC;
#ifdef PATTREE_POSBIGRAM
		n |= PATTREE_DIC_POSBIGRAM;
#endif
		n |= PATTREE_DIC_TYPE_PROB_CHAR;
	}
	else
		n = PATTREE_BIN_EN_VERSION | PATTREE_DIC_STATIC;

#ifdef _UNICODE
	n |= PATTREE_DIC_UNICODE;
#endif

	return n;
}

long NEXTAP_GetKernelVersion(void)
{
	long n;

	n = PATTREE_KERNEL_VERSION | PATTREE_DIC_STATIC;
#ifdef PATTREE_POSBIGRAM
		n |= PATTREE_DIC_POSBIGRAM;
#endif
	n |= PATTREE_DIC_TYPE_PROB_CHAR;
#ifdef _UNICODE
	n |= PATTREE_DIC_UNICODE;
#endif

	return n;	
}

/* basic memory function end */
/*********************************************************************************************/

#define DIGI2CODE(c,code,mode)				\
{											\
	if ( mode == PATTREE_CHS_PINYIN )		\
		PINYINDIGI2CODE(c,code)				\
	else									\
		STROKEDIGI2CODE(c,code)				\
}
#define PINYINDIGI2CODE(c,code)				\
{											\
	if ( '2' <= c && '9' >= c )				\
		code = c-'2';						\
	else									\
		code = -1;							\
}
#define STROKEDIGI2CODE(c,code)				\
{											\
	if ( '1' <= c && '5' >= c )				\
		code = c-'1';						\
	else									\
		code = -1;							\
}
#define STROKECODE2DIGI(code,c)				( c = code + '1' )

static const char DIGI2ALPHABET[][4] = { {'a', 'b', 'c', 0}, {'d', 'e', 'f', 0}, {'g', 'h', 'i', 0}, {'j', 'k', 'l', 0}, {'m', 'n', 'o', 0}, {'p', 'q', 'r', 's'}, 
									{'t', 'u', 'v', 0}, {'w', 'x', 'y', 'z'} };
static const int DIGI2ALPHABETNUM[] = { 3, 3, 3, 3, 3, 4, 3, 4 };
static const char ALPHABET2DIGI[26] = { '2', '2', '2', '3', '3', '3', '4', '4', '4', '5', '5', '5', '6', '6', '6', '7', '7', '7', '7', '8', '8', '8', '9', '9', '9', '9' };

//////////////////////////////////////////////////////////////////
// create dic 

static int StaticDic_Pinyin2Digi( STATIC_LEXICON_ENTRY_T *pLexicon, STATIC_DIC_T *pDic, char *pszDigi, unsigned char bPinyinIndex )
{
	int i, nWordLen, nLen;
	int nPos;
	char *pszCur;

	pszDigi[0] = 0;

	nWordLen = ch_strlen(pLexicon->wszWord);
	nLen = 0;
	nPos = bPinyinIndex * nWordLen;
	for ( i = 0; i < nWordLen; i ++ ) {
		pszCur = StaticDic_GetPinyinList( pDic, StaticLexiconEntry_GetPinyinCode( pLexicon, pDic, nPos+i ) );
		while ( *pszCur ) {
            if ( pszCur[1] == ':' ) {
				if ( !(pszCur[-1] == 'j' || pszCur[-1] == 'q' || pszCur[0] == 'x') ) {
					pszDigi[nLen++] = ALPHABET2DIGI['v' - 'a'];
				}
				else
					pszDigi[nLen++] = ALPHABET2DIGI[pszCur[0] - 'a'];
				pszCur += 2;
			}
			else {
				pszDigi[nLen++] = ALPHABET2DIGI[pszCur[0] - 'a'];
				pszCur ++;
			}
		}
	}
	pszDigi[nLen] = 0;

	return nLen;
}

static int StaticDic_LexiconStrokeCode2Digi( STATIC_LEXICON_ENTRY_T *pLexicon, STATIC_DIC_T *pDic, char *pszDigi )
{
	int i, nWordLen, nLen, k;
	unsigned short *pwCharStrokeCode;
	unsigned short wCode, wThisCode;

	pszDigi[0] = 0;

	pwCharStrokeCode = StaticDic_GetCharStrokeCode(pDic);
	nWordLen = ch_strlen(pLexicon->wszWord);
	nLen = 0;
	for ( i = 0; i < nWordLen; i ++ ) {
		wCode = pwCharStrokeCode[pLexicon->wszWord[i] - pDic->ReadOnlyHead.wCharCodeOffset];
		for ( k = 0; k < 4; k ++ ) {
			wThisCode = (wCode >> (4*k)) & 0x000F;
			if ( wThisCode == 0 )
				break;
			pszDigi[nLen] = wThisCode + '0';
			nLen ++;
		}
	}
	pszDigi[nLen] = 0;

	return nLen;
}

void PATTREE_FreeStaticDic( void* pDicBuffer )
{
	STATIC_DIC_T *pDic;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -