utf8prober.cpp

来自「判断一串字符是属于什么字符集的程序」· C++ 代码 · 共 76 行

CPP
76
字号
/*	libcharguess	-	Guess the encoding/charset of a string    Copyright (C) 2003  Stephane Corbe <noubi@users.sourceforge.net>	Based on Mozilla sources    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Lesser General Public    License as published by the Free Software Foundation; either    version 2.1 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Lesser General Public License for more details.    You should have received a copy of the GNU Lesser General Public    License along with this library; if not, write to the Free Software    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/#include "UTF8Prober.h"void  nsUTF8Prober::Reset(void){  mCodingSM->Reset();   mNumOfMBChar = 0;  mState = eDetecting;}nsProbingState nsUTF8Prober::HandleData(const char* aBuf, PRUint32 aLen){  nsSMState codingState;  for (PRUint32 i = 0; i < aLen; i++)  {    codingState = mCodingSM->NextState(aBuf[i]);    if (codingState == eError)    {      mState = eNotMe;      break;    }    if (codingState == eItsMe)    {      mState = eFoundIt;      break;    }    if (codingState == eStart)    {      if (mCodingSM->GetCurrentCharLen() >= 2)        mNumOfMBChar++;    }  }  if (mState == eDetecting)    if (GetConfidence() > SHORTCUT_THRESHOLD)      mState = eFoundIt;  return mState;}#define ONE_CHAR_PROB   (float)0.50float nsUTF8Prober::GetConfidence(void){  float unlike = (float)0.99;  if (mNumOfMBChar < 6)  {    for (PRUint32 i = 0; i < mNumOfMBChar; i++)      unlike *= ONE_CHAR_PROB;    return (float)1.0 - unlike;  }  else    return (float)0.99;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?