⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 escsm.cpp

📁 判断一串字符是属于什么字符集的程序
💻 CPP
字号:
/*	libcharguess	-	Guess the encoding/charset of a string    Copyright (C) 2003  Stephane Corbe <noubi@users.sourceforge.net>	Based on Mozilla sources    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Lesser General Public    License as published by the Free Software Foundation; either    version 2.1 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Lesser General Public License for more details.    You should have received a copy of the GNU Lesser General Public    License along with this library; if not, write to the Free Software    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/#include "codingStateMachine.h"static PRUint32 HZ_cls[ 256 / 8 ] = {PCK4BITS(1,0,0,0,0,0,0,0),  // 00 - 07 PCK4BITS(0,0,0,0,0,0,0,0),  // 08 - 0f PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 PCK4BITS(0,0,0,1,0,0,0,0),  // 18 - 1f PCK4BITS(0,0,0,0,0,0,0,0),  // 20 - 27 PCK4BITS(0,0,0,0,0,0,0,0),  // 28 - 2f PCK4BITS(0,0,0,0,0,0,0,0),  // 30 - 37 PCK4BITS(0,0,0,0,0,0,0,0),  // 38 - 3f PCK4BITS(0,0,0,0,0,0,0,0),  // 40 - 47 PCK4BITS(0,0,0,0,0,0,0,0),  // 48 - 4f PCK4BITS(0,0,0,0,0,0,0,0),  // 50 - 57 PCK4BITS(0,0,0,0,0,0,0,0),  // 58 - 5f PCK4BITS(0,0,0,0,0,0,0,0),  // 60 - 67 PCK4BITS(0,0,0,0,0,0,0,0),  // 68 - 6f PCK4BITS(0,0,0,0,0,0,0,0),  // 70 - 77 PCK4BITS(0,0,0,4,0,5,2,0),  // 78 - 7f PCK4BITS(1,1,1,1,1,1,1,1),  // 80 - 87 PCK4BITS(1,1,1,1,1,1,1,1),  // 88 - 8f PCK4BITS(1,1,1,1,1,1,1,1),  // 90 - 97 PCK4BITS(1,1,1,1,1,1,1,1),  // 98 - 9f PCK4BITS(1,1,1,1,1,1,1,1),  // a0 - a7 PCK4BITS(1,1,1,1,1,1,1,1),  // a8 - af PCK4BITS(1,1,1,1,1,1,1,1),  // b0 - b7 PCK4BITS(1,1,1,1,1,1,1,1),  // b8 - bf PCK4BITS(1,1,1,1,1,1,1,1),  // c0 - c7 PCK4BITS(1,1,1,1,1,1,1,1),  // c8 - cf PCK4BITS(1,1,1,1,1,1,1,1),  // d0 - d7 PCK4BITS(1,1,1,1,1,1,1,1),  // d8 - df PCK4BITS(1,1,1,1,1,1,1,1),  // e0 - e7 PCK4BITS(1,1,1,1,1,1,1,1),  // e8 - ef PCK4BITS(1,1,1,1,1,1,1,1),  // f0 - f7 PCK4BITS(1,1,1,1,1,1,1,1)   // f8 - ff };static PRUint32 HZ_st [ 6] = {PCK4BITS(eStart,eError,     3,eStart,eStart,eStart,eError,eError),//00-07 PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart,     4,eError),//10-17 PCK4BITS(     5,eError,     6,eError,     5,     5,     4,eError),//18-1f PCK4BITS(     4,eError,     4,     4,     4,eError,     4,eError),//20-27 PCK4BITS(     4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f };static const PRUint32 HZCharLenTable[] = {0, 0, 0, 0, 0, 0};SMModel HZSMModel = {  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls },   6,  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st },  HZCharLenTable,  "HZ-GB-2312",};static PRUint32 ISO2022CN_cls [ 256 / 8 ] = {PCK4BITS(2,0,0,0,0,0,0,0),  // 00 - 07 PCK4BITS(0,0,0,0,0,0,0,0),  // 08 - 0f PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 PCK4BITS(0,0,0,1,0,0,0,0),  // 18 - 1f PCK4BITS(0,0,0,0,0,0,0,0),  // 20 - 27 PCK4BITS(0,3,0,0,0,0,0,0),  // 28 - 2f PCK4BITS(0,0,0,0,0,0,0,0),  // 30 - 37 PCK4BITS(0,0,0,0,0,0,0,0),  // 38 - 3f PCK4BITS(0,0,0,4,0,0,0,0),  // 40 - 47 PCK4BITS(0,0,0,0,0,0,0,0),  // 48 - 4f PCK4BITS(0,0,0,0,0,0,0,0),  // 50 - 57 PCK4BITS(0,0,0,0,0,0,0,0),  // 58 - 5f PCK4BITS(0,0,0,0,0,0,0,0),  // 60 - 67 PCK4BITS(0,0,0,0,0,0,0,0),  // 68 - 6f PCK4BITS(0,0,0,0,0,0,0,0),  // 70 - 77 PCK4BITS(0,0,0,0,0,0,0,0),  // 78 - 7f PCK4BITS(2,2,2,2,2,2,2,2),  // 80 - 87 PCK4BITS(2,2,2,2,2,2,2,2),  // 88 - 8f PCK4BITS(2,2,2,2,2,2,2,2),  // 90 - 97 PCK4BITS(2,2,2,2,2,2,2,2),  // 98 - 9f PCK4BITS(2,2,2,2,2,2,2,2),  // a0 - a7 PCK4BITS(2,2,2,2,2,2,2,2),  // a8 - af PCK4BITS(2,2,2,2,2,2,2,2),  // b0 - b7 PCK4BITS(2,2,2,2,2,2,2,2),  // b8 - bf PCK4BITS(2,2,2,2,2,2,2,2),  // c0 - c7 PCK4BITS(2,2,2,2,2,2,2,2),  // c8 - cf PCK4BITS(2,2,2,2,2,2,2,2),  // d0 - d7 PCK4BITS(2,2,2,2,2,2,2,2),  // d8 - df PCK4BITS(2,2,2,2,2,2,2,2),  // e0 - e7 PCK4BITS(2,2,2,2,2,2,2,2),  // e8 - ef PCK4BITS(2,2,2,2,2,2,2,2),  // f0 - f7 PCK4BITS(2,2,2,2,2,2,2,2)   // f8 - ff };static PRUint32 ISO2022CN_st [ 8] = {PCK4BITS(eStart,     3,eError,eStart,eStart,eStart,eStart,eStart),//00-07 PCK4BITS(eStart,eError,eError,eError,eError,eError,eError,eError),//08-0f PCK4BITS(eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//10-17 PCK4BITS(eItsMe,eItsMe,eItsMe,eError,eError,eError,     4,eError),//18-1f PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//20-27 PCK4BITS(     5,     6,eError,eError,eError,eError,eError,eError),//28-2f PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//30-37 PCK4BITS(eError,eError,eError,eError,eError,eItsMe,eError,eStart) //38-3f };static const PRUint32 ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};SMModel ISO2022CNSMModel = {  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls },  9,  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st },  ISO2022CNCharLenTable,  "ISO-2022-CN",};static PRUint32 ISO2022JP_cls [ 256 / 8 ] = {PCK4BITS(2,0,0,0,0,0,0,0),  // 00 - 07 PCK4BITS(0,0,0,0,0,0,2,2),  // 08 - 0f PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 PCK4BITS(0,0,0,1,0,0,0,0),  // 18 - 1f PCK4BITS(0,0,0,0,7,0,0,0),  // 20 - 27 PCK4BITS(3,0,0,0,0,0,0,0),  // 28 - 2f PCK4BITS(0,0,0,0,0,0,0,0),  // 30 - 37 PCK4BITS(0,0,0,0,0,0,0,0),  // 38 - 3f PCK4BITS(6,0,4,0,0,0,0,0),  // 40 - 47 PCK4BITS(0,0,5,0,0,0,0,0),  // 48 - 4f PCK4BITS(0,0,0,0,0,0,0,0),  // 50 - 57 PCK4BITS(0,0,0,0,0,0,0,0),  // 58 - 5f PCK4BITS(0,0,0,0,0,0,0,0),  // 60 - 67 PCK4BITS(0,0,0,0,0,0,0,0),  // 68 - 6f PCK4BITS(0,0,0,0,0,0,0,0),  // 70 - 77 PCK4BITS(0,0,0,0,0,0,0,0),  // 78 - 7f PCK4BITS(2,2,2,2,2,2,2,2),  // 80 - 87 PCK4BITS(2,2,2,2,2,2,2,2),  // 88 - 8f PCK4BITS(2,2,2,2,2,2,2,2),  // 90 - 97 PCK4BITS(2,2,2,2,2,2,2,2),  // 98 - 9f PCK4BITS(2,2,2,2,2,2,2,2),  // a0 - a7 PCK4BITS(2,2,2,2,2,2,2,2),  // a8 - af PCK4BITS(2,2,2,2,2,2,2,2),  // b0 - b7 PCK4BITS(2,2,2,2,2,2,2,2),  // b8 - bf PCK4BITS(2,2,2,2,2,2,2,2),  // c0 - c7 PCK4BITS(2,2,2,2,2,2,2,2),  // c8 - cf PCK4BITS(2,2,2,2,2,2,2,2),  // d0 - d7 PCK4BITS(2,2,2,2,2,2,2,2),  // d8 - df PCK4BITS(2,2,2,2,2,2,2,2),  // e0 - e7 PCK4BITS(2,2,2,2,2,2,2,2),  // e8 - ef PCK4BITS(2,2,2,2,2,2,2,2),  // f0 - f7 PCK4BITS(2,2,2,2,2,2,2,2)   // f8 - ff };static PRUint32 ISO2022JP_st [ 6] = {PCK4BITS(eStart,     3,eError,eStart,eStart,eStart,eStart,eStart),//00-07 PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//08-0f PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//10-17 PCK4BITS(eError,eError,eError,     5,eError,eError,eError,     4),//18-1f PCK4BITS(eError,eError,eError,eError,eItsMe,eError,eItsMe,eError),//20-27 PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eError,eError) //28-2f };static const PRUint32 ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0};SMModel ISO2022JPSMModel = {  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls },  8,  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st },  ISO2022JPCharLenTable,  "ISO-2022-JP",};static PRUint32 ISO2022KR_cls [ 256 / 8 ] = {PCK4BITS(2,0,0,0,0,0,0,0),  // 00 - 07 PCK4BITS(0,0,0,0,0,0,0,0),  // 08 - 0f PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 PCK4BITS(0,0,0,1,0,0,0,0),  // 18 - 1f PCK4BITS(0,0,0,0,3,0,0,0),  // 20 - 27 PCK4BITS(0,4,0,0,0,0,0,0),  // 28 - 2f PCK4BITS(0,0,0,0,0,0,0,0),  // 30 - 37 PCK4BITS(0,0,0,0,0,0,0,0),  // 38 - 3f PCK4BITS(0,0,0,5,0,0,0,0),  // 40 - 47 PCK4BITS(0,0,0,0,0,0,0,0),  // 48 - 4f PCK4BITS(0,0,0,0,0,0,0,0),  // 50 - 57 PCK4BITS(0,0,0,0,0,0,0,0),  // 58 - 5f PCK4BITS(0,0,0,0,0,0,0,0),  // 60 - 67 PCK4BITS(0,0,0,0,0,0,0,0),  // 68 - 6f PCK4BITS(0,0,0,0,0,0,0,0),  // 70 - 77 PCK4BITS(0,0,0,0,0,0,0,0),  // 78 - 7f PCK4BITS(2,2,2,2,2,2,2,2),  // 80 - 87 PCK4BITS(2,2,2,2,2,2,2,2),  // 88 - 8f PCK4BITS(2,2,2,2,2,2,2,2),  // 90 - 97 PCK4BITS(2,2,2,2,2,2,2,2),  // 98 - 9f PCK4BITS(2,2,2,2,2,2,2,2),  // a0 - a7 PCK4BITS(2,2,2,2,2,2,2,2),  // a8 - af PCK4BITS(2,2,2,2,2,2,2,2),  // b0 - b7 PCK4BITS(2,2,2,2,2,2,2,2),  // b8 - bf PCK4BITS(2,2,2,2,2,2,2,2),  // c0 - c7 PCK4BITS(2,2,2,2,2,2,2,2),  // c8 - cf PCK4BITS(2,2,2,2,2,2,2,2),  // d0 - d7 PCK4BITS(2,2,2,2,2,2,2,2),  // d8 - df PCK4BITS(2,2,2,2,2,2,2,2),  // e0 - e7 PCK4BITS(2,2,2,2,2,2,2,2),  // e8 - ef PCK4BITS(2,2,2,2,2,2,2,2),  // f0 - f7 PCK4BITS(2,2,2,2,2,2,2,2)   // f8 - ff };static PRUint32 ISO2022KR_st [ 5] = {PCK4BITS(eStart,     3,eError,eStart,eStart,eStart,eError,eError),//00-07 PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f PCK4BITS(eItsMe,eItsMe,eError,eError,eError,     4,eError,eError),//10-17 PCK4BITS(eError,eError,eError,eError,     5,eError,eError,eError),//18-1f PCK4BITS(eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart) //20-27 };static const PRUint32 ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0};SMModel ISO2022KRSMModel = {  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls },   6,  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st },  ISO2022KRCharLenTable,  "ISO-2022-KR",};

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -