📄 charset.c
字号:
/***************************************************************************** * charset.c: Determine a canonical name for the current locale's character * encoding. ***************************************************************************** * Copyright (C) 2003-2004 VideoLAN * $Id: charset.c 10101 2005-03-02 16:47:31Z robux4 $ * * Author: Derk-Jan Hartman <thedj at users.sf.net> * * vlc_current_charset() an adaption of mp_locale_charset(): * * Copyright (C) 2001-2003 The Mape Project * Written by Karel Zak <zakkr@zf.jcu.cz>. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/#include <stdlib.h>#include <stdio.h>#include <vlc/vlc.h>#if !defined WIN32# if HAVE_LANGINFO_CODESET# include <langinfo.h># else# if HAVE_SETLOCALE# include <locale.h># endif# endif#elif defined WIN32# include <windows.h>#endif#include "charset.h"typedef struct VLCCharsetAlias{ char *psz_alias, *psz_name;} VLCCharsetAlias;/* * The libcharset load all from external text file, but it's strange and * slow solution, we rather use array(s) compiled into source. In the * "good" libc this is not needful -- for example in linux. * * Please, put to this funtion exotic aliases only. The libc 'iconv' knows * a lot of basic aliases (check it first by iconv -l). * */static const char* vlc_encoding_from_language( const char *l ){ /* check for language (and perhaps country) codes */ if (strstr(l, "zh_TW")) return "Big5"; if (strstr(l, "zh_HK")) return "Big5HKSCS"; /* no MIME charset */ if (strstr(l, "zh")) return "GB2312"; if (strstr(l, "th")) return "TIS-620"; if (strstr(l, "ja")) return "EUC-JP"; if (strstr(l, "ko")) return "EUC-KR"; if (strstr(l, "ru")) return "KOI8-R"; if (strstr(l, "uk")) return "KOI8-U"; if (strstr(l, "pl") || strstr(l, "hr") || strstr(l, "hu") || strstr(l, "cs") || strstr(l, "sk") || strstr(l, "sl")) return "ISO-8859-2"; if (strstr(l, "eo") || strstr(l, "mt")) return "ISO-8859-3"; if (strstr(l, "lt") || strstr(l, "la")) return "ISO-8859-4"; if (strstr(l, "bg") || strstr(l, "be") || strstr(l, "mk") || strstr(l, "uk")) return "ISO-8859-5"; if (strstr(l, "ar")) return "ISO-8859-6"; if (strstr(l, "el")) return "ISO-8859-7"; if (strstr(l, "he") || strstr(l, "iw")) return "ISO-8859-8"; if (strstr(l, "tr")) return "ISO-8859-9"; if (strstr(l, "th")) return "ISO-8859-11"; if (strstr(l, "lv")) return "ISO-8859-13"; if (strstr(l, "cy")) return "ISO-8859-14"; if (strstr(l, "et")) return "ISO-8859-15"; /* all latin1 could be iso15 as well */ if (strstr(l, "ro")) return "ISO-8859-2"; /* or ISO-8859-16 */ if (strstr(l, "am") || strstr(l, "vi")) return "UTF-8"; /* We don't know. This ain't working go to default. */ return "ISO-8859-1";}static const char* vlc_charset_aliases( const char *psz_name ){ VLCCharsetAlias *a;#if defined WIN32 VLCCharsetAlias aliases[] = { { "CP936", "GBK" }, { "CP1361", "JOHAB" }, { "CP20127", "ASCII" }, { "CP20866", "KOI8-R" }, { "CP21866", "KOI8-RU" }, { "CP28591", "ISO-8859-1" }, { "CP28592", "ISO-8859-2" }, { "CP28593", "ISO-8859-3" }, { "CP28594", "ISO-8859-4" }, { "CP28595", "ISO-8859-5" }, { "CP28596", "ISO-8859-6" }, { "CP28597", "ISO-8859-7" }, { "CP28598", "ISO-8859-8" }, { "CP28599", "ISO-8859-9" }, { "CP28605", "ISO-8859-15" }, { NULL, NULL } };#elif SYS_AIX VLCCharsetAlias aliases[] = { { "IBM-850", "CP850" }, { "IBM-856", "CP856" }, { "IBM-921", "ISO-8859-13" }, { "IBM-922", "CP922" }, { "IBM-932", "CP932" }, { "IBM-943", "CP943" }, { "IBM-1046", "CP1046" }, { "IBM-1124", "CP1124" }, { "IBM-1129", "CP1129" }, { "IBM-1252", "CP1252" }, { "IBM-EUCCN", "GB2312" }, { "IBM-EUCJP", "EUC-JP" }, { "IBM-EUCKR", "EUC-KR" }, { "IBM-EUCTW", "EUC-TW" }, { NULL, NULL } };#elif SYS_HPUX VLCCharsetAlias aliases[] = { { "ROMAN8", "HP-ROMAN8" }, { "ARABIC8", "HP-ARABIC8" }, { "GREEK8", "HP-GREEK8" }, { "HEBREW8", "HP-HEBREW8" }, { "TURKISH8", "HP-TURKISH8" }, { "KANA8", "HP-KANA8" }, { "HP15CN", "GB2312" }, { NULL, NULL } };#elif SYS_IRIX VLCCharsetAlias aliases[] = { { "EUCCN", "GB2312" }, { NULL, NULL } };#elif SYS_OSF VLCCharsetAlias aliases[] = { { "KSC5601", "CP949" }, { "SDECKANJI", "EUC-JP" }, { "TACTIS", "TIS-620" }, { NULL, NULL } };#elif SYS_SOLARIS VLCCharsetAlias aliases[] = { { "646", "ASCII" }, { "CNS11643", "EUC-TW" }, { "5601", "EUC-KR" }, { "JOHAP92", "JOHAB" }, { "PCK", "SHIFT_JIS" }, { "2533", "TIS-620" }, { NULL, NULL } };#elif SYS_BSD VLCCharsetAlias aliases[] = { { "646", " ASCII" }, { "EUCCN", "GB2312" }, { NULL, NULL } };#else VLCCharsetAlias aliases[] = {{NULL, NULL}};#endif if( aliases ) { for (a = aliases; a->psz_alias; a++) if (strcasecmp (a->psz_alias, psz_name) == 0) return a->psz_name; } /* we return original name beacuse iconv() probably will know * something better about name if we don't know it :-) */ return psz_name;}/* Returns charset from "language_COUNTRY.charset@modifier" string */static char *vlc_encoding_from_locale( char *psz_locale ){ char *psz_dot = strchr( psz_locale, '.' ); if( psz_dot != NULL ) { const char *psz_modifier; static char buf[2 + 10 + 1]; psz_dot++; /* Look for the possible @... trailer and remove it, if any. */ psz_modifier = strchr( psz_dot, '@' ); if( psz_modifier == NULL ) return psz_dot; if( 0 < ( psz_modifier - psz_dot ) < sizeof( buf )) { memcpy( buf, psz_dot, psz_modifier - psz_dot ); buf[ psz_modifier - psz_dot ] = '\0'; return buf; } } /* try language mapping */ return (char *)vlc_encoding_from_language( psz_locale );}vlc_bool_t vlc_current_charset( char **psz_charset ){ const char *psz_codeset;#if !(defined WIN32 || defined OS2)# if HAVE_LANGINFO_CODESET /* Most systems support nl_langinfo( CODESET ) nowadays. */ psz_codeset = nl_langinfo( CODESET );# else /* On old systems which lack it, use setlocale or getenv. */ const char *psz_locale = NULL; /* But most old systems don't have a complete set of locales. Some * (like SunOS 4 or DJGPP) have only the C locale. Therefore we don't * use setlocale here; it would return "C" when it doesn't support the * locale name the user has set. Darwin's setlocale is broken. */# if HAVE_SETLOCALE && !SYS_DARWIN psz_locale = setlocale( LC_ALL, NULL );# endif if( psz_locale == NULL || psz_locale[0] == '\0' ) { psz_locale = getenv( "LC_ALL" ); if( psz_locale == NULL || psz_locale[0] == '\0' ) { psz_locale = getenv( "LC_CTYPE" ); if( psz_locale == NULL || psz_locale[0] == '\0') psz_locale = getenv( "LANG" ); } } /* On some old systems, one used to set locale = "iso8859_1". On others, * you set it to "language_COUNTRY.charset". Darwin only has LANG :( */ psz_codeset = vlc_encoding_from_locale( (char *)psz_locale );# endif /* HAVE_LANGINFO_CODESET */#elif defined WIN32 static char buf[2 + 10 + 1]; /* Woe32 has a function returning the locale's codepage as a number. */ sprintf( buf, "CP%u", GetACP() ); psz_codeset = buf;#elif defined OS2 const char *psz_locale; static char buf[2 + 10 + 1]; ULONG cp[3]; ULONG cplen; /* Allow user to override the codeset, as set in the operating system, * with standard language environment variables. */ psz_locale = getenv( "LC_ALL" ); if( psz_locale == NULL || psz_locale[0] == '\0' ) { psz+locale = getenv( "LC_CTYPE" ); if( psz_locale == NULL || locale[0] == '\0' ) locale = getenv( "LANG" ); } if( psz_locale != NULL && psz_locale[0] != '\0' ) psz_codeset = vlc_encoding_from_locale( psz_locale ); else { /* OS/2 has a function returning the locale's codepage as a number. */ if( DosQueryCp( sizeof( cp ), cp, &cplen ) ) psz_codeset = ""; else { sprintf( buf, "CP%u", cp[0] ); psz_codeset = buf; } }#endif if( psz_codeset == NULL ) /* The canonical name cannot be determined. */ psz_codeset = ""; else psz_codeset = vlc_charset_aliases( psz_codeset ); /* Don't return an empty string. GNU libc and GNU libiconv interpret * the empty string as denoting "the locale's character encoding", * thus GNU libiconv would call this function a second time. */ if( psz_codeset[0] == '\0' ) { /* Last possibility is 'CHARSET' enviroment variable */ if( !( psz_codeset = getenv( "CHARSET" ) ) ) psz_codeset = "ISO-8859-1"; } if( psz_charset ) *psz_charset = strdup((char *)psz_codeset); if( !strcasecmp(psz_codeset, "UTF8") || !strcasecmp(psz_codeset, "UTF-8") ) return VLC_TRUE; return VLC_FALSE;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -