⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 jpcntx.cpp

📁 判断一串字符是属于什么字符集的程序
💻 CPP
📖 第 1 页 / 共 2 页
字号:
    if (mDone)    return;  //The buffer we got is byte oriented, and a character may span in more than one  //buffers. In case the last one or two byte in last buffer is not complete, we   //record how many byte needed to complete that character and skip these bytes here.  //We can choose to record those bytes as well and analyse the character once it   //is complete, but since a character will not make much difference, by simply skipping  //this character will simply our logic and improve performance.  for (i = mNeedToSkipCharNum; i < aLen; )  {    order = GetOrder(aBuf+i, &charLen);    i+= charLen;    if (i > aLen){      mNeedToSkipCharNum = i - aLen;      mLastCharOrder = -1;    }    else     {      if (order != -1 && mLastCharOrder != -1)      {        mTotalRel ++;        if (mTotalRel > MAX_REL_THRESHOLD)        {          mDone = PR_TRUE;          break;        }        mRelSample[jp2CharContext[mLastCharOrder][order]]++;      }      mLastCharOrder = order;    }  }    return;}void JapaneseContextAnalysis::Reset(void){  mTotalRel = 0;  for (PRUint32 i = 0; i < NUM_OF_CATEGORY; i++)    mRelSample[i] = 0;  mNeedToSkipCharNum = 0;  mLastCharOrder = -1;  mDone = PR_FALSE;}#define DONT_KNOW (float)-1float  JapaneseContextAnalysis::GetConfidence(){  //This is just one way to calculate confidence. It works well for me.  if (mTotalRel > MINIMUM_DATA_THRESHOLD)    return ((float)(mTotalRel - mRelSample[0]))/mTotalRel;  else     return (float)DONT_KNOW;}PRInt32 SJISContextAnalysis::GetOrder(const char* str, PRUint32 *charLen){  //find out current char's byte length  if ((unsigned char)*str >= (unsigned char)0x81 && (unsigned char)*str <= (unsigned char)0x9f ||       (unsigned char)*str >= (unsigned char)0xe0 && (unsigned char)*str <= (unsigned char)0xfc )      *charLen = 2;  else       *charLen = 1;  //return its order if it is hiragana  if (*str == '\202' &&         (unsigned char)*(str+1) >= (unsigned char)0x9f &&         (unsigned char)*(str+1) <= (unsigned char)0xf1)    return (unsigned char)*(str+1) - (unsigned char)0x9f;  return -1;}PRInt32 EUCJPContextAnalysis::GetOrder(const char* str, PRUint32 *charLen){  //find out current char's byte length  if ((unsigned char)*str == (unsigned char)0x8e ||      (unsigned char)*str >= (unsigned char)0xa1 &&       (unsigned char)*str <= (unsigned char)0xfe)      *charLen = 2;  else if ((unsigned char)*str == (unsigned char)0x8f)    *charLen = 3;  else    *charLen = 1;  //return its order if it is hiragana  if ((unsigned char)*str == (unsigned char)0xa4 &&      (unsigned char)*(str+1) >= (unsigned char)0xa1 &&       (unsigned char)*(str+1) <= (unsigned char)0xf3)     return (unsigned char)*(str+1) - (unsigned char)0xa1;  return -1;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -