⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sbcharsetprober.cpp

📁 判断一串字符是属于什么字符集的程序
💻 CPP
字号:
/*	libcharguess	-	Guess the encoding/charset of a string    Copyright (C) 2003  Stephane Corbe <noubi@users.sourceforge.net>	Based on Mozilla sources    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Lesser General Public    License as published by the Free Software Foundation; either    version 2.1 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Lesser General Public License for more details.    You should have received a copy of the GNU Lesser General Public    License along with this library; if not, write to the Free Software    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/#include <stdio.h>#include "SBCharsetProber.h"nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, PRUint32 aLen){  unsigned char order;  for (PRUint32 i = 0; i < aLen; i++)  {    order = mModel->charToOrderMap[(unsigned char)aBuf[i]];    if (order < SYMBOL_CAT_ORDER)      mTotalChar++;    if (order < SAMPLE_SIZE)    {        mFreqChar++;      if (mLastOrder < SAMPLE_SIZE)      {        mTotalSeqs++;        ++(mSeqCounters[mModel->precedenceMatrix[mLastOrder*SAMPLE_SIZE+order]]);      }    }    mLastOrder = order;  }  if (mState == eDetecting)    if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD)    {      float cf = GetConfidence();      if (cf > POSITIVE_SHORTCUT_THRESHOLD)        mState = eFoundIt;      else if (cf < NEGATIVE_SHORTCUT_THRESHOLD)        mState = eNotMe;    }  return mState;}void  nsSingleByteCharSetProber::Reset(void){  mState = eDetecting;  mLastOrder = 255;  for (PRUint32 i = 0; i < NUMBER_OF_SEQ_CAT; i++)    mSeqCounters[i] = 0;  mTotalSeqs = 0;  mTotalChar = 0;  mFreqChar = 0;}//#define NEGATIVE_APPROACH 1float nsSingleByteCharSetProber::GetConfidence(void){#ifdef NEGATIVE_APPROACH  if (mTotalSeqs > 0)    if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT]*10 )      return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT]*10))/mTotalSeqs * mFreqChar / mTotalChar;  return (float)0.01;#else  //POSITIVE_APPROACH  float r;  if (mTotalSeqs > 0) {    r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;    r = r*mFreqChar/mTotalChar;    if (r >= (float)1.00)      r = (float)0.99;    return r;  }  return (float)0.01;#endif}#ifdef DEBUG_chardetvoid nsSingleByteCharSetProber::DumpStatus(){  printf("[%s] prober has confidence %f\r\n", GetCharSetName(), GetConfidence());}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -