📄 jpcntx.cpp

📁 判断一串字符是属于什么字符集的程序

💻 CPP

📖 第 1 页 / 共 2 页

字号:

上一页 12

    if (mDone)    return;  //The buffer we got is byte oriented, and a character may span in more than one  //buffers. In case the last one or two byte in last buffer is not complete, we   //record how many byte needed to complete that character and skip these bytes here.  //We can choose to record those bytes as well and analyse the character once it   //is complete, but since a character will not make much difference, by simply skipping  //this character will simply our logic and improve performance.  for (i = mNeedToSkipCharNum; i < aLen; )  {    order = GetOrder(aBuf+i, &charLen);    i+= charLen;    if (i > aLen){      mNeedToSkipCharNum = i - aLen;      mLastCharOrder = -1;    }    else     {      if (order != -1 && mLastCharOrder != -1)      {        mTotalRel ++;        if (mTotalRel > MAX_REL_THRESHOLD)        {          mDone = PR_TRUE;          break;        }        mRelSample[jp2CharContext[mLastCharOrder][order]]++;      }      mLastCharOrder = order;    }  }    return;}void JapaneseContextAnalysis::Reset(void){  mTotalRel = 0;  for (PRUint32 i = 0; i < NUM_OF_CATEGORY; i++)    mRelSample[i] = 0;  mNeedToSkipCharNum = 0;  mLastCharOrder = -1;  mDone = PR_FALSE;}#define DONT_KNOW (float)-1float  JapaneseContextAnalysis::GetConfidence(){  //This is just one way to calculate confidence. It works well for me.  if (mTotalRel > MINIMUM_DATA_THRESHOLD)    return ((float)(mTotalRel - mRelSample[0]))/mTotalRel;  else     return (float)DONT_KNOW;}PRInt32 SJISContextAnalysis::GetOrder(const char* str, PRUint32 *charLen){  //find out current char's byte length  if ((unsigned char)*str >= (unsigned char)0x81 && (unsigned char)*str <= (unsigned char)0x9f ||       (unsigned char)*str >= (unsigned char)0xe0 && (unsigned char)*str <= (unsigned char)0xfc )      *charLen = 2;  else       *charLen = 1;  //return its order if it is hiragana  if (*str == '\202' &&         (unsigned char)*(str+1) >= (unsigned char)0x9f &&         (unsigned char)*(str+1) <= (unsigned char)0xf1)    return (unsigned char)*(str+1) - (unsigned char)0x9f;  return -1;}PRInt32 EUCJPContextAnalysis::GetOrder(const char* str, PRUint32 *charLen){  //find out current char's byte length  if ((unsigned char)*str == (unsigned char)0x8e ||      (unsigned char)*str >= (unsigned char)0xa1 &&       (unsigned char)*str <= (unsigned char)0xfe)      *charLen = 2;  else if ((unsigned char)*str == (unsigned char)0x8f)    *charLen = 3;  else    *charLen = 1;  //return its order if it is hiragana  if ((unsigned char)*str == (unsigned char)0xa4 &&      (unsigned char)*(str+1) >= (unsigned char)0xa1 &&       (unsigned char)*(str+1) <= (unsigned char)0xf3)     return (unsigned char)*(str+1) - (unsigned char)0xa1;  return -1;}

上一页 12

⌨️ 快捷键说明

复制代码 Ctrl + C

搜索代码 Ctrl + F

全屏模式 F11

切换主题 Ctrl + Shift + D

显示快捷键 ?

增大字号 Ctrl + =

减小字号 Ctrl + -