kcharsets.cpp

来自「konqueror3 embedded版本, KDE环境下的当家浏览器的嵌入式版」· C++ 代码 · 共 665 行 · 第 1/2 页

CPP
665
字号
/* This file is part of the KDE libraries    Copyright (C) 1999 Lars Knoll (knoll@kde.org)    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Library General Public    License as published by the Free Software Foundation; either    version 2 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Library General Public License for more details.    You should have received a copy of the GNU Library General Public License    along with this library; see the file COPYING.LIB.  If not, write to    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,    Boston, MA 02110-1301, USA.*/#include "kcharsets.h"#include "kqiodevicegzip_p.h"#include "kentities.c"#include <kapplication.h>#include <kglobal.h>#include <klocale.h>#include <kconfig.h>#include <qfontinfo.h>#include <qstrlist.h>#include <qfontdatabase.h>#include <kdebug.h>#include <qtextcodec.h>#include <qmap.h>#include <qcstring.h>#include <qdir.h>#include <qregexp.h>#include <assert.h>static const char * const language_names[] = {	I18N_NOOP( "Other" ),	I18N_NOOP( "Arabic" ),	I18N_NOOP( "Baltic" ),	I18N_NOOP( "Central European" ),	I18N_NOOP( "Chinese Simplified" ),	I18N_NOOP( "Chinese Traditional" ),	I18N_NOOP( "Cyrillic" ),	I18N_NOOP( "Greek" ),	I18N_NOOP( "Hebrew" ),	I18N_NOOP( "Japanese" ),	I18N_NOOP( "Korean" ),	I18N_NOOP( "Thai" ),	I18N_NOOP( "Turkish" ),	I18N_NOOP( "Western European" ),	I18N_NOOP( "Tamil" ),	I18N_NOOP( "Unicode" ),	I18N_NOOP( "Northern Saami" ),        I18N_NOOP( "Vietnamese" ),        I18N_NOOP( "South-Eastern Europe" )};// This list gives the charsets that can be used to display a file given in a certain encoding.// The list should be in order of preferencestatic const char* const charsets_for_encoding[] = {    "koi8-r",    "koi8-u",    "iso 8859-1",    "iso 8859-2",    "iso 8859-3",    "iso 8859-4",    "iso 8859-5",    "iso 8859-6",    "iso 8859-7",    "iso 8859-8",    "iso 8859-8-i",    "iso 8859-9",    "iso 8859-11",    "iso 8859-13",    "iso 8859-14",    "iso 8859-15",    "iso 8859-16",    "utf8",    "utf16",    "iso-10646-ucs-2",    "cp 1250",    "cp 1251",    "cp 1252",    "cp 1253",    "cp 1254",    "cp 1255",    "cp 1256",    "cp 1257",    "cp 1258",    "ibm850",    "ibm852",    "ibm866",    "tis620",    "eucjp",    "sjis",    "jis7",    "big5",    "big5-hkscs",    "gbk",    "gb18030",    "gb2312",    "euckr",    "tscii",//    "pt 154",    "winsami2",    "cp 874",    0 }; // extra 0 for end// 0 other// 1 Arabic// 2 Baltic// 3 Central European// 4 Chinese Simplified// 5 Chinese Traditional// 6 Cyrillic// 7 Greek// 8 Hebrew// 9 Japanese// 10 Korean// 11 Thai// 12 Turkish// 13 Western European// 14 Tamil// 15 Unicode// 16 Northern Sami// 17 Vietnamese// 18 South-Eastern Europe// ### FIXME KDE4: the name of the encodings should mostly be uppercasestatic struct LanguageForEncoding    {    const char* index;    int data;    } const language_for_encoding[] = {    { "iso 8859-1", 13 },    { "iso 8859-15", 13 },    { "iso 8859-14", 13 },    { "cp 1252", 13 },    { "ibm850", 13 },    { "iso 8859-2", 3 },    { "iso 8859-3", 3 },    { "iso 8859-4", 2 },    { "iso 8859-13", 2 },    { "iso 8859-16", 18 },    { "cp 1250", 3 },    { "cp 1254", 12 },    { "cp 1257", 2 },    { "ibm852", 3 },    { "koi8-r", 6 },    { "iso 8859-5", 6 },    { "cp 1251", 6 },    { "koi8-u", 6 },//    { "pt 154", 6 },    { "ibm866", 6 },    { "big5", 5 },    { "big5-hkscs", 5 },    { "gb18030", 4 },    { "gbk", 4 },    { "gb2312", 4 },    { "euckr", 10 },    { "sjis", 9 },    { "jis7", 9 },    { "eucjp", 9 },    { "iso 8859-7", 7 },    { "cp 1253", 7 },    { "iso 8859-6", 1 },    { "cp 1256", 1 },    { "iso 8859-8", 8 },    { "iso 8859-8-i", 8 },    { "cp 1255", 8 },    { "iso 8859-9", 12 },    { "tis620", 11 },    { "iso 8859-11", 11 },    { "cp 874", 11 },    { "cp 1258", 17 },    { "tscii", 14 },    { "utf8", 15 },    { "utf16", 15 },    { "utf7", 15 }, // ### FIXME: UTF-7 is not in Qt    { "ucs2", 15 },    { "iso-10646-ucs-2", 15 },    { "winsami2", 16},    { 0, 0 } };// defines some different names for codecs that are built into Qt.static struct Builtin    {    const char* index;    const char* data;    } const builtin[] = {    { "iso-ir-111", "koi8-r" },    { "koi8-ru", "koi8-u" }, // ### Qt 3.3 maps it to koi8-r    { "koi unified", "koi8-r" }, // ### FIXME: Qt 3.3 seems to map this to EUC-KR, so this mapping is too late :-(    // Using ISO-8859-1 for ASCII is an approximation at write    { "us-ascii", "iso 8859-1" },    { "usascii", "iso 8859-1" },    { "ascii", "iso 8859-1" },    { "x-utf-8", "utf-8" },    { "x-utf-7", "utf-7" }, // ### FIXME: UTF-7 is not in Qt     { "unicode-1-1-utf-7", "utf-7" }, // ### FIXME: UTF-7 is not in Qt    { "utf-16", "iso-10646-ucs-2" },    { "utf16", "iso-10646-ucs-2" },    { "ucs2", "iso-10646-ucs-2" },    { "iso10646-1", "iso-10646-ucs-2" },    { "gb18030.2000-1", "gb18030" },    { "gb18030.2000-0", "gb18030" },    { "gbk-0", "gbk" },    { "gb2312.1980-0", "gbk" },    { "gb_2312-80", "gbk" },/* this one is not official, but MS is using it :/ */    { "x-euc-kr", "euckr" },    { "jisx0201.1976-0", "eucjp" },    { "jisx0208.1983-0", "eucjp" },    { "jisx0208.1990-0", "eucjp" },    { "jisx0208.1997-0", "eucjp" },    { "jisx0212.1990-0", "eucjp" },    { "jisx0213.2000-1", "eucjp" },    { "jisx0213.2000-2", "eucjp" },    { "windows850", "ibm850" },    { "windows866", "ibm866" },    { "windows1251", "cp 1251" },    { "windows1252", "cp 1252" },    { "windows1253", "cp 1253" },    { "windows1254", "cp 1254" },    { "windows1255", "cp 1255" },    { "windows1256", "cp 1256" },    { "windows1257", "cp 1257" },    { "windows1258", "cp 1258" },    { "windows-850", "ibm850" },    { "windows-866", "ibm866" },    { "x-windows-850", "ibm850" },    { "x-windows-866", "ibm866" },    { "x-windows-1250", "cp 1250" },    { "x-windows-1251", "cp 1251" },    { "x-windows-1252", "cp 1252" },    { "x-windows-1253", "cp 1253" },    { "x-windows-1254", "cp 1254" },    { "x-windows-1255", "cp 1255" },    { "x-windows-1256", "cp 1256" },    { "x-windows-1257", "cp 1257" },    { "x-windows-1258", "cp 1258" },    { "cp819", "iso 8859-1" },    { "cp850", "ibm850" },    { "cp866", "ibm866" },    { "cp-819", "iso 8859-1" },    { "cp-850", "ibm850" },    { "cp-866", "ibm866" },    { "cp-1250", "cp 1250" },    { "cp-1251", "cp 1251" },    { "cp-1252", "cp 1252" },    { "cp-1253", "cp 1253" },    { "cp-1254", "cp 1254" },    { "cp-1255", "cp 1255" },    { "cp-1256", "cp 1256" },    { "cp-1257", "cp 1257" },    { "cp-1258", "cp 1258" },    { "cp-10000", "apple roman" },    { "x-cp-850", "ibm850" },    { "x-cp-866", "ibm866" },    { "x-cp-1250", "cp 1250" },    { "x-cp-1251", "cp 1251" },    { "x-cp-1252", "cp 1252" },    { "x-cp-1253", "cp 1253" },    { "x-cp-1254", "cp 1254" },    { "x-cp-1255", "cp 1255" },    { "x-cp-1256", "cp 1256" },    { "x-cp-1257", "cp 1257" },    { "x-cp-1258", "cp 1258" },    { "x-cp-10000", "apple roman" },    { "ibm819", "iso 8859-1" },    { "thai-tis620", "iso 8859-11" },    { "windows-874", "cp 874" },    { "windows874", "cp 874" },    { "x-windows-874", "cp 874" },    { "x-cp-874", "cp 874" },    { "ibm 874", "cp 874" },    { "ibm874", "cp 874" }, // Qt4 name    { "x-ibm874", "cp 874" },    { "ksc5601.1987-0", "euckr" },    { "x-winsami2", "winsami2" },    { "x-mac-roman", "apple roman" },    { "macintosh", "apple roman" },    { "mac", "apple roman" },    { "csiso2022jp", "jis7" }, // See bug #77243    { "big5-eten", "big5-hkscs" },    { "cp950", "big5-hkscs" },    { 0, 0 }};// some different names for the encodings defined in the charmaps files.// even though the charmap file names are all uppercase, the names are all lowercase here.static struct Aliases    {    const char* index;    const char* data;    } const aliases[] = {    { "cp852", "ibm852" },    { "cp-852", "ibm852" },    { "x-cp-852", "ibm852" },    { "windows852", "ibm852" },    { "windows-852", "ibm852" },    { "x-windows-852", "ibm852" },    { 0, 0 }};// some last resort hints in case the charmap file couldn't be found. This gives at least a partial conversion// and helps making things readable.// the name used as input here is already converted to the more canonical name as defined in the aliases array.static struct ConversionHints    {    const char* index;    const char* data;    } const conversion_hints[] = {    { "cp1250", "iso-8859-2" },    { "koi8-r", "iso-8859-5" },    { "koi8-u", "koi8-r" },    // KDE had always "CP 1251" as best fallback to PT 154. Now that Qt does not offer this encoding anymore, it is our fallback.    { "pt 154", "cp 1251" },    { "paratype-154", "cp 1251" },    { "pt-154", "cp 1251" },    { 0, 0 }};// search an array of items index/data, index is const char*, data is T, find first matching index// and return data, or return 0template< typename T, typename Data >static Data kcharsets_array_search( const T* start, const char* entry ){    for( const T* pos = start;         pos->index != 0;         ++pos )

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?