gb2312prober.cpp

来自「判断一串字符是属于什么字符集的程序」· C++ 代码 · 共 85 行

CPP
85
字号
/*	libcharguess	-	Guess the encoding/charset of a string    Copyright (C) 2003  Stephane Corbe <noubi@users.sourceforge.net>	Based on Mozilla sources    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Lesser General Public    License as published by the Free Software Foundation; either    version 2.1 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Lesser General Public License for more details.    You should have received a copy of the GNU Lesser General Public    License along with this library; if not, write to the Free Software    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/// for S-JIS encoding, obeserve characteristic:// 1, kana character (or hankaku?) often have hight frequency of appereance// 2, kana character often exist in group// 3, certain combination of kana is never used in japanese language#include "GB2312Prober.h"void  nsGB18030Prober::Reset(void){  mCodingSM->Reset();   mState = eDetecting;  mDistributionAnalyser.Reset();  //mContextAnalyser.Reset();}nsProbingState nsGB18030Prober::HandleData(const char* aBuf, PRUint32 aLen){  nsSMState codingState;  for (PRUint32 i = 0; i < aLen; i++)  {    codingState = mCodingSM->NextState(aBuf[i]);    if (codingState == eError)    {      mState = eNotMe;      break;    }    if (codingState == eItsMe)    {      mState = eFoundIt;      break;    }    if (codingState == eStart)    {      PRUint32 charLen = mCodingSM->GetCurrentCharLen();      if (i == 0)      {        mLastChar[1] = aBuf[0];        mDistributionAnalyser.HandleOneChar(mLastChar, charLen);      }      else        mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);    }  }  mLastChar[0] = aBuf[aLen-1];  if (mState == eDetecting)    if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)      mState = eFoundIt;//    else//      mDistributionAnalyser.HandleData(aBuf, aLen);  return mState;}float nsGB18030Prober::GetConfidence(void){  float distribCf = mDistributionAnalyser.GetConfidence();  return (float)distribCf;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?