⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 utf8fix.c

📁 OggPlay for Symbian 是symbian上的一个媒体播放程序的源码。它支持ogg,wav等等多媒体格式。
💻 C
📖 第 1 页 / 共 2 页
字号:
/*
AutoConvert, a Chinese HZ/GB/Big5 encodings auto-converter
Copyright (C) 1999  于广辉  Yu Guanghui <ygh@debian.org>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or any
later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA  02111-1307, USA.
*/
/* 
* author: Yu Guanghui <ygh@debian.org>
*	  Network Center
* 	  Dalian Univ. of Tech.	
*	  1999.5
*/

/*
 *Author: Ha Shao <hashao@china.com>
 *Date:	2000.08.30
 */

/* Judge Encoding based on character frequency.
 * We will use 2 hash tables generated from GNU gperf
 */

// Platform settings
#include <OggOs.h>

// This file is for non PLUGIN_SYSTEM only
#if !defined(PLUGIN_SYSTEM)

//#include "zhstatis.h"
#include "Utf8Fix.h"

/////////////////////////////////////////////////
// Locals
//

// not used, #define GBTOPPER        3.500849        /* Top GB frequency. */
// not used, #define B5TOPPER        3.803567        /* Top Big5 frequency. */
// not used, #define MAX_MODULE      16
// not used, #define MAX_BUFFER      8192

struct charHz { char *name; double percent; };

/////////////////////////////////////////////////
// Local Function Prototypes
//

static unsigned int         b5Hash ( register const char *str, register unsigned int len );
static unsigned int         gbHash ( register const char *str, register unsigned int len );
static const struct charHz* inBig5( register const char *str, register unsigned int len );
static const struct charHz* inGB( register const char *str, register unsigned int len );
static int                  j_code3( const char * buff, int count );

/* ANSI-C code produced by gperf version 2.7.1 (19981006 egcs) */
/* Command-line: gperf -L ANSI-C -I -t -H gbHash -N inGB -C -E -o -n -c -D gbpercent.txt  */
/****************************************************
 400 Most frequently used GB chars. 
 ****************************************************/
/* maximum key range = 991, duplicates = 5 */

/////////////////////////////////////////////////
// Global Functions
//

#ifdef OGGPLAYPLUGIN
#define EXPORTED 
#else
#define EXPORTED EXPORT_C
#endif

EXPORTED int jcode(const char* buff)
{
	return j_code(buff, _ogg_strlen(buff));
}

int j_code(const char* buff, int count)
{
	const unsigned char * phz;
	int c_gb=0;
	int c_big5=0;

	char HZ_START[]="~{";
	char HZ_END[]="~}";


/* first we look up "我"  and "的" ,both gb and big5
 * in the text.
 */
	for(phz=(unsigned char * /*FIXIT*/)buff;phz<((unsigned char * /*FIXIT*/)buff+count);phz++){
		if(*phz & 0x80){
			if((*phz==0xB5&&*(phz+1)==0xC4) || ((*phz==0xCE)&&*(phz+1)==0xD2)){
				c_gb++;
				phz++;
				continue;
			}else if((*phz==0xAA&&*(phz+1)==0xBA)|| ((*phz==0xA7)&&*(phz+1)==0xDA)){
				c_big5++;
				phz++;
				continue;
			}
			phz++;
		}
	}

	if(c_gb > c_big5){
		return GB_CODE;
	}else if (c_gb ==  c_big5){ //c_gb == 0,c_big5==0
		if(_ogg_strstr(buff,HZ_START)!=NULL && _ogg_strstr(buff,HZ_END)!=NULL){
			return HZ_CODE;
		}
		/*There is not "我" and "的" in the text
		 *So we test the text with a 400 words table.
		 */
		return j_code3(buff,count);		
	}else{
		return BIG5_CODE;
	}
}

/////////////////////////////////////////////////
// Local Functions
//

#ifdef __GNUC__
__inline
#endif
static unsigned int
gbHash (register const char *str, register unsigned int len)
{
  static const unsigned short asso_values[] =
    {
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991, 991, 991, 991, 991, 991, 991, 991, 991, 991,
      991,  91, 425, 320, 148,  70, 510,  85, 203, 160,
        6,  18, 480, 223, 113, 345,  28, 120, 320, 390,
       75,  20, 275, 380, 230,  50, 433,  15, 145,   5,
      455, 460, 163, 180, 338,  40,   8, 508, 510,  45,
      325,  85,  30, 140, 105, 305,   3, 225, 100, 403,
      113, 480,  25, 205,   0, 250, 393, 371,  63, 260,
      268, 991,  10, 470, 405, 268, 348,  76, 285, 131,
      408, 398, 991, 125, 330, 126, 388, 503,  51, 148,
      350,  46,   1, 166,  15,  16, 485, 106, 458,  41,
      103, 221,  21,  61, 500, 991
    };
  return asso_values[(unsigned char)str[len - 1]] + asso_values[(unsigned char)str[0]];
}

#ifdef __GNUC__
__inline
#endif
static const struct charHz *
inGB (register const char *str, register unsigned int len)
{
  enum
    {
      TOTAL_KEYWORDS = 400,
      MIN_WORD_LENGTH = 2,
      MAX_WORD_LENGTH = 2,
      MIN_HASH_VALUE = 0,
      MAX_HASH_VALUE = 990
    };

  static const struct charHz wordlist[] =
    {
      {"种",	0.151748},
      {"治",	0.062792},
      {"知",	0.176508},
      {"为",	0.442767},
      {"文",	0.214763},
      {"无",	0.213959},
      {"只",	0.229438},
      {"或",	0.085280},
      {"位",	0.128747},
      {"交",	0.073778},
      {"华",	0.094296},
      {"将",	0.168043},
      {"到",	0.514231},
      {"近",	0.063487},
      {"的",	3.500849},
      {"手",	0.211674},
      {"元",	0.071841},
      {"问",	0.140707},
      {"式",	0.057680},
      {"但",	0.167690},
      {"怎",	0.072121},
      {"话",	0.146944},
      {"术",	0.070839},
      {"务",	0.081974},
      {"实",	0.156769},
      {"今",	0.083052},
      {"那",	0.382771},
      {"没",	0.244614},
      {"基",	0.057849},
      {"毛",	0.075090},
      {"得",	0.396983},
      {"命",	0.093383},
      {"众",	0.058726},
      {"还",	0.233198},
      {"活",	0.092784},
      {"公",	0.188655},
      {"倒",	0.054738},
      {"内",	0.111181},
      {"前",	0.200232},
      {"是",	1.345411},
      {"打",	0.129442},
      {"次",	0.121678},
      {"未",	0.054546},
      {"使",	0.118288},
      {"第",	0.123170},
      {"你",	0.436374},
      {"们",	0.495768},
      {"民",	0.174189},
      {"在",	0.964866},
      {"山",	0.103285},
      {"数",	0.091243},
      {"传",	0.063780},
      {"点",	0.152685},
      {"中",	0.717652},
      {"甚",	0.058755},
      {"色",	0.074190},
      {"叫",	0.089328},
      {"主",	0.204736},
      {"心",	0.273937},
      {"西",	0.117537},
      {"教",	0.117812},
      {"过",	0.327524},
      {"之",	0.327879},
      {"因",	0.127394},
      {"么",	0.276714},
      {"我",	1.014949},
      {"机",	0.137512},
      {"要",	0.370943},
      {"直",	0.075193},
      {"条",	0.064573},
      {"太",	0.103034},
      {"性",	0.095783},
      {"声",	0.151640},
      {"一",	1.703619},
      {"市",	0.101242},
      {"神",	0.091301},
      {"业",	0.134539},
      {"五",	0.103181},
      {"被",	0.126292},
      {"深",	0.060073},
      {"以",	0.349432},
      {"当",	0.222406},
      {"爱",	0.073591},
      {"十",	0.211422},
      {"员",	0.097189},
      {"明",	0.152480},
      {"四",	0.123866},
      {"时",	0.412130},
      {"物",	0.078221},
      {"国",	0.624527},
      {"半",	0.061338},
      {"书",	0.098381},
      {"钱",	0.055097},
      {"说",	0.508134},
      {"它",	0.075883},
      {"极",	0.055787},
      {"运",	0.061514},
      {"听",	0.115919},
      {"八",	0.075698},
      {"情",	0.143753},
      {"请",	0.073937},
      {"及",	0.083070},
      {"写",	0.056349},
      {"清",	0.078525},
      {"代",	0.104545},
      {"至",	0.079151},
      {"林",	0.074843},
      {"所",	0.197660},
      {"道",	0.421135},
      {"两",	0.176988},
      {"联",	0.064201},
      {"生",	0.343557},
      {"记",	0.077728},
      {"小",	0.293018},
      {"世",	0.114808},
      {"留",	0.063376},
      {"便",	0.128778},
      {"工",	0.149985},
      {"行",	0.203868},
      {"她",	0.305242},
      {"美",	0.152936},
      {"建",	0.074596},
      {"战",	0.076361},
      {"求",	0.059639},
      {"李",	0.057653},
      {"此",	0.158364},
      {"算",	0.073128},
      {"花",	0.065122},
      {"即",	0.059687},
      {"六",	0.070030},
      {"达",	0.061845},
      {"现",	0.169370},
      {"向",	0.143764},
      {"江",	0.058127},
      {"指",	0.068890},
      {"令",	0.055467},
      {"南",	0.074475},
      {"收",	0.055267},
      {"体",	0.100845},
      {"来",	0.642527},
      {"北",	0.113407},
      {"大",	0.670268},
      {"义",	0.072306},
      {"系",	0.085791},
      {"原",	0.089937},
      {"字",	0.074014},
      {"更",	0.087664},
      {"水",	0.102945},
      {"展",	0.063943},
      {"转",	0.059482},
      {"家",	0.324818},
      {"些",	0.166013},
      {"名",	0.142495},
      {"正",	0.143041},
      {"周",	0.062711},
      {"该",	0.063875},
      {"果",	0.084231},
      {"结",	0.070334},
      {"自",	0.340950},
      {"能",	0.253537},
      {"白",	0.089789},
      {"资",	0.066471},
      {"会",	0.342850},
      {"思",	0.068379},
      {"流",	0.067758},
      {"武",	0.061514},
      {"技",	0.058356},
      {"东",	0.121345},
      {"亲",	0.084066},
      {"受",	0.084736},
      {"对",	0.311761},
      {"远",	0.066691},
      {"最",	0.134213},
      {"死",	0.089264},
      {"识",	0.056331},
      {"万",	0.101912},
      {"台",	0.078054},
      {"上",	0.636581},
      {"里",	0.300629},
      {"轻",	0.058334},
      {"干",	0.076271},
      {"儿",	0.146378},
      {"管",	0.067145},
      {"了",	1.283668},
      {"王",	0.084392},
      {"报",	0.094190},
      {"步",	0.060622},
      {"任",	0.079391},
      {"感",	0.088464},
      {"跟",	0.065369},
      {"不",	1.307755},
      {"各",	0.080068},
      {"年",	0.402590},
      {"然",	0.251950},
      {"住",	0.097438},
      {"全",	0.151277},
      {"等",	0.136630},
      {"往",	0.064102},
      {"安",	0.090869},
      {"什",	0.130740},
      {"想",	0.217789},
      {"解",	0.081322},
      {"金",	0.088088},
      {"作",	0.224425},
      {"德",	0.055379},
      {"化",	0.107695},
      {"他",	0.826706},
      {"月",	0.152597},
      {"让",	0.073580},
      {"见",	0.196038},
      {"事",	0.229908},
      {"共",	0.077287},
      {"提",	0.088749},
      {"几",	0.135949},
      {"革",	0.060397},
      {"分",	0.168552},
      {"夫",	0.067138},
      {"利",	0.084762},
      {"方",	0.202303},
      {"三",	0.185462},
      {"老",	0.182608},
      {"持",	0.053845},

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -