📄 jpcntx.cpp
字号:
if (mDone) return; //The buffer we got is byte oriented, and a character may span in more than one //buffers. In case the last one or two byte in last buffer is not complete, we //record how many byte needed to complete that character and skip these bytes here. //We can choose to record those bytes as well and analyse the character once it //is complete, but since a character will not make much difference, by simply skipping //this character will simply our logic and improve performance. for (i = mNeedToSkipCharNum; i < aLen; ) { order = GetOrder(aBuf+i, &charLen); i+= charLen; if (i > aLen){ mNeedToSkipCharNum = i - aLen; mLastCharOrder = -1; } else { if (order != -1 && mLastCharOrder != -1) { mTotalRel ++; if (mTotalRel > MAX_REL_THRESHOLD) { mDone = PR_TRUE; break; } mRelSample[jp2CharContext[mLastCharOrder][order]]++; } mLastCharOrder = order; } } return;}void JapaneseContextAnalysis::Reset(void){ mTotalRel = 0; for (PRUint32 i = 0; i < NUM_OF_CATEGORY; i++) mRelSample[i] = 0; mNeedToSkipCharNum = 0; mLastCharOrder = -1; mDone = PR_FALSE;}#define DONT_KNOW (float)-1float JapaneseContextAnalysis::GetConfidence(){ //This is just one way to calculate confidence. It works well for me. if (mTotalRel > MINIMUM_DATA_THRESHOLD) return ((float)(mTotalRel - mRelSample[0]))/mTotalRel; else return (float)DONT_KNOW;}PRInt32 SJISContextAnalysis::GetOrder(const char* str, PRUint32 *charLen){ //find out current char's byte length if ((unsigned char)*str >= (unsigned char)0x81 && (unsigned char)*str <= (unsigned char)0x9f || (unsigned char)*str >= (unsigned char)0xe0 && (unsigned char)*str <= (unsigned char)0xfc ) *charLen = 2; else *charLen = 1; //return its order if it is hiragana if (*str == '\202' && (unsigned char)*(str+1) >= (unsigned char)0x9f && (unsigned char)*(str+1) <= (unsigned char)0xf1) return (unsigned char)*(str+1) - (unsigned char)0x9f; return -1;}PRInt32 EUCJPContextAnalysis::GetOrder(const char* str, PRUint32 *charLen){ //find out current char's byte length if ((unsigned char)*str == (unsigned char)0x8e || (unsigned char)*str >= (unsigned char)0xa1 && (unsigned char)*str <= (unsigned char)0xfe) *charLen = 2; else if ((unsigned char)*str == (unsigned char)0x8f) *charLen = 3; else *charLen = 1; //return its order if it is hiragana if ((unsigned char)*str == (unsigned char)0xa4 && (unsigned char)*(str+1) >= (unsigned char)0xa1 && (unsigned char)*(str+1) <= (unsigned char)0xf3) return (unsigned char)*(str+1) - (unsigned char)0xa1; return -1;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -