⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 swiutfconversions.c

📁 sloedgy open sip stack source code
💻 C
📖 第 1 页 / 共 2 页
字号:
/* SWIutfconversions, Unicode conversions */

 /****************License************************************************
  *
  * Copyright 2000-2003.  ScanSoft, Inc.    
  *
  * Use of this software is subject to notices and obligations set forth 
  * in the SpeechWorks Public License - Software Version 1.2 which is 
  * included with this software. 
  *
  * ScanSoft is a registered trademark of ScanSoft, Inc., and OpenSpeech, 
  * SpeechWorks and the SpeechWorks logo are registered trademarks or 
  * trademarks of SpeechWorks International, Inc. in the United States 
  * and other countries.
  *
  ***********************************************************************/
 

 /* -----1=0-------2=0-------3=0-------4=0-------5=0-------6=0-------7=0-------8
  */

#include <vxibuildopts.h>
#if P_VXI
 
 #include <stdio.h>
 #include <string.h>
 #include <vxi/SWIutfconversions.h>
 
 #if 0
 #include "Encoding.h"
 #include <cstring>
 #include <cwchar>
 #include <vector>
 #include <algorithm>
 
 bool initialized = false;
 
 
 // ---------------------------------------------------------------------------
 // Define a registry of decoder functions.
 // ---------------------------------------------------------------------------
 
 typedef bool (*DECODERFUNCTION)(const char *,
                                 std::basic_string<wchar_t> &);
 
 class EncoderEntry {
 public:
   const char * name;
   DECODERFUNCTION function;
 
   EncoderEntry(const char * x, DECODERFUNCTION y)
     : name(x), function(y) { }
 
   EncoderEntry(const EncoderEntry & x) : name(x.name), function(x.function) { }
 
   EncoderEntry & operator=(const EncoderEntry & x)
   { if (this != &x) { name = x.name;  function = x.function; }
     return *this; }
 };
 
 bool operator<(const EncoderEntry & x, const EncoderEntry & y)
 { return strcmp(x.name, y.name) < 0; }
 
 // ---------------------------------------------------------------------------
 
 typedef std::vector<EncoderEntry> DECODER_REGISTRY;
 DECODER_REGISTRY decoderRegistry;
 
 void InitializeDecoder()
 {
   if(initialized)
     return;
   decoderRegistry.push_back(EncoderEntry("utf-8",       DecodeUTF8));
   decoderRegistry.push_back(EncoderEntry("utf8",        DecodeUTF8));
 
   decoderRegistry.push_back(EncoderEntry("us-ascii",    DecodeASCII));
   decoderRegistry.push_back(EncoderEntry("us_ascii",    DecodeASCII));
   decoderRegistry.push_back(EncoderEntry("usascii",     DecodeASCII));
   decoderRegistry.push_back(EncoderEntry("ascii",       DecodeASCII));
 
   decoderRegistry.push_back(EncoderEntry("iso8859-1",   DecodeISO8859_1));
   decoderRegistry.push_back(EncoderEntry("iso-8859-1",  DecodeISO8859_1));
   decoderRegistry.push_back(EncoderEntry("iso_8859-1",  DecodeISO8859_1));
   decoderRegistry.push_back(EncoderEntry("latin1",      DecodeISO8859_1));
   decoderRegistry.push_back(EncoderEntry("latin-1",     DecodeISO8859_1));
   decoderRegistry.push_back(EncoderEntry("latin_1",     DecodeISO8859_1));
   decoderRegistry.push_back(EncoderEntry("ibm-819",     DecodeISO8859_1));
   decoderRegistry.push_back(EncoderEntry("ibm819",      DecodeISO8859_1));
 
   decoderRegistry.push_back(EncoderEntry("iso8859-2",   DecodeISO8859_2));
   decoderRegistry.push_back(EncoderEntry("iso-8859-2",  DecodeISO8859_2));
   decoderRegistry.push_back(EncoderEntry("iso_8859-2",  DecodeISO8859_2));
   decoderRegistry.push_back(EncoderEntry("latin2",      DecodeISO8859_2));
   decoderRegistry.push_back(EncoderEntry("latin-2",     DecodeISO8859_2));
   decoderRegistry.push_back(EncoderEntry("latin_2",     DecodeISO8859_2));
 
   decoderRegistry.push_back(EncoderEntry("iso8859-3",   DecodeISO8859_3));
   decoderRegistry.push_back(EncoderEntry("iso-8859-3",  DecodeISO8859_3));
   decoderRegistry.push_back(EncoderEntry("iso_8859-3",  DecodeISO8859_3));
   decoderRegistry.push_back(EncoderEntry("latin3",      DecodeISO8859_3));
   decoderRegistry.push_back(EncoderEntry("latin-3",     DecodeISO8859_3));
   decoderRegistry.push_back(EncoderEntry("latin_3",     DecodeISO8859_3));
 
   decoderRegistry.push_back(EncoderEntry("iso8859-4",   DecodeISO8859_4));
   decoderRegistry.push_back(EncoderEntry("iso-8859-4",  DecodeISO8859_4));
   decoderRegistry.push_back(EncoderEntry("iso_8859-4",  DecodeISO8859_4));
   decoderRegistry.push_back(EncoderEntry("latin4",      DecodeISO8859_4));
   decoderRegistry.push_back(EncoderEntry("latin-4",     DecodeISO8859_4));
   decoderRegistry.push_back(EncoderEntry("latin_4",     DecodeISO8859_4));
 
   decoderRegistry.push_back(EncoderEntry("iso8859-15",  DecodeISO8859_15));
   decoderRegistry.push_back(EncoderEntry("iso-8859-15", DecodeISO8859_15));
   decoderRegistry.push_back(EncoderEntry("iso_8859-15", DecodeISO8859_15));
   decoderRegistry.push_back(EncoderEntry("latin9",      DecodeISO8859_15));
   decoderRegistry.push_back(EncoderEntry("latin-9",     DecodeISO8859_15));
   decoderRegistry.push_back(EncoderEntry("latin_9",     DecodeISO8859_15));
 
   std::sort(decoderRegistry.begin(), decoderRegistry.end());
 
   initialized = true;
 }
 
 static bool DoInitialization()
 {
   if(initialized)
     return true;
   InitializeDecoder();
   return true;
 }
 bool do_initialization = DoInitialization();
 
 
 int DecodeString(const char * encodingName,
                  const char * inputString,
                  std::basic_string<wchar_t> & outputString)
 {
   if (!initialized || encodingName == NULL || inputString == NULL)
     return -1;
 
   // (1) Convert string to lowercase.
   std::basic_string<char> encoding(encodingName);
   for (unsigned int i = 0; i < encoding.length(); ++i)
     if (encoding[i] < 0x5B && encoding[i] > 0x40)
       encoding[i] += 0x20;
 
   DECODER_REGISTRY::iterator j 
     = std::lower_bound(decoderRegistry.begin(), decoderRegistry.end(),
                        EncoderEntry(encoding.c_str(), NULL));
 
   if (j == decoderRegistry.end() || encoding != (*j).name) return -1;
 
   if ((*j).function(inputString, outputString)) return 0;
   return 1;
 }
 
 
 // ---------------------------------------------------------------------------
 // Now we define the 'simple' decoder functions
 // ---------------------------------------------------------------------------
 
 bool DecodeASCII(const char * in, std::basic_string<wchar_t> & out)
 {
   out.erase();
 
   while (*in != '\0') {
     char c = *in;
     if (c > 0x7f || c < 0) return false;
     out += wchar_t(c);
     ++in;
   }
 
   return true;
 }
 
 
 bool DecodeISO8859_1(const char * in, std::basic_string<wchar_t> & out)
 {
   out.erase();
 
   while (*in != '\0') {
     out += wchar_t(*in);
     ++in;
   } 
   return true;
 }
 
 
 bool DecodeISO8859_2(const char * in, std::basic_string<wchar_t> & out)
 {
   out.erase();
   wchar_t w;
 
   while (*in != '\0') {
     switch (*in) {
     case 0xA1:  w = 0x0104;  break; // LATIN CAPITAL LETTER A WITH OGONEK
     case 0xA2:  w = 0x02D8;  break; // BREVE
     case 0xA3:  w = 0x0141;  break; // LATIN CAPITAL LETTER L WITH STROKE
     case 0xA5:  w = 0x013D;  break; // LATIN CAPITAL LETTER L WITH CARON
     case 0xA6:  w = 0x015A;  break; // LATIN CAPITAL LETTER S WITH ACUTE
     case 0xA9:  w = 0x0160;  break; // LATIN CAPITAL LETTER S WITH CARON
     case 0xAA:  w = 0x015E;  break; // LATIN CAPITAL LETTER S WITH CEDILLA
     case 0xAB:  w = 0x0164;  break; // LATIN CAPITAL LETTER T WITH CARON
     case 0xAC:  w = 0x0179;  break; // LATIN CAPITAL LETTER Z WITH ACUTE
     case 0xAE:  w = 0x017D;  break; // LATIN CAPITAL LETTER Z WITH CARON
     case 0xAF:  w = 0x017B;  break; // LATIN CAPITAL LETTER Z WITH DOT ABOVE
     case 0xB1:  w = 0x0105;  break; // LATIN SMALL LETTER A WITH OGONEK
     case 0xB2:  w = 0x02DB;  break; // OGONEK
     case 0xB3:  w = 0x0142;  break; // LATIN SMALL LETTER L WITH STROKE
     case 0xB5:  w = 0x013E;  break; // LATIN SMALL LETTER L WITH CARON
     case 0xB6:  w = 0x015B;  break; // LATIN SMALL LETTER S WITH ACUTE
     case 0xB7:  w = 0x02C7;  break; // CARON
     case 0xB9:  w = 0x0161;  break; // LATIN SMALL LETTER S WITH CARON
     case 0xBA:  w = 0x015F;  break; // LATIN SMALL LETTER S WITH CEDILLA
     case 0xBB:  w = 0x0165;  break; // LATIN SMALL LETTER T WITH CARON
     case 0xBC:  w = 0x017A;  break; // LATIN SMALL LETTER Z WITH ACUTE
     case 0xBD:  w = 0x02DD;  break; // DOUBLE ACUTE ACCENT
     case 0xBE:  w = 0x017E;  break; // LATIN SMALL LETTER Z WITH CARON
     case 0xBF:  w = 0x017C;  break; // LATIN SMALL LETTER Z WITH DOT ABOVE
     case 0xC0:  w = 0x0154;  break; // LATIN CAPITAL LETTER R WITH ACUTE
     case 0xC3:  w = 0x0102;  break; // LATIN CAPITAL LETTER A WITH BREVE
     case 0xC5:  w = 0x0139;  break; // LATIN CAPITAL LETTER L WITH ACUTE
     case 0xC6:  w = 0x0106;  break; // LATIN CAPITAL LETTER C WITH ACUTE
     case 0xC8:  w = 0x010C;  break; // LATIN CAPITAL LETTER C WITH CARON
     case 0xCA:  w = 0x0118;  break; // LATIN CAPITAL LETTER E WITH OGONEK
     case 0xCC:  w = 0x011A;  break; // LATIN CAPITAL LETTER E WITH CARON
     case 0xCF:  w = 0x010E;  break; // LATIN CAPITAL LETTER D WITH CARON
     case 0xD0:  w = 0x0110;  break; // LATIN CAPITAL LETTER D WITH STROKE
     case 0xD1:  w = 0x0143;  break; // LATIN CAPITAL LETTER N WITH ACUTE
     case 0xD2:  w = 0x0147;  break; // LATIN CAPITAL LETTER N WITH CARON
     case 0xD5:  w = 0x0150;  break; // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
     case 0xD8:  w = 0x0158;  break; // LATIN CAPITAL LETTER R WITH CARON
     case 0xD9:  w = 0x016E;  break; // LATIN CAPITAL LETTER U WITH RING ABOVE
     case 0xDB:  w = 0x0170;  break; // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
     case 0xDE:  w = 0x0162;  break; // LATIN CAPITAL LETTER T WITH CEDILLA
     case 0xE0:  w = 0x0155;  break; // LATIN SMALL LETTER R WITH ACUTE
     case 0xE3:  w = 0x0103;  break; // LATIN SMALL LETTER A WITH BREVE
     case 0xE5:  w = 0x013A;  break; // LATIN SMALL LETTER L WITH ACUTE
     case 0xE6:  w = 0x0107;  break; // LATIN SMALL LETTER C WITH ACUTE
     case 0xE8:  w = 0x010D;  break; // LATIN SMALL LETTER C WITH CARON
     case 0xEA:  w = 0x0119;  break; // LATIN SMALL LETTER E WITH OGONEK
     case 0xEC:  w = 0x011B;  break; // LATIN SMALL LETTER E WITH CARON
     case 0xEF:  w = 0x010F;  break; // LATIN SMALL LETTER D WITH CARON
     case 0xF0:  w = 0x0111;  break; // LATIN SMALL LETTER D WITH STROKE
     case 0xF1:  w = 0x0144;  break; // LATIN SMALL LETTER N WITH ACUTE
     case 0xF2:  w = 0x0148;  break; // LATIN SMALL LETTER N WITH CARON
     case 0xF5:  w = 0x0151;  break; // LATIN SMALL LETTER O WITH DOUBLE ACUTE
     case 0xF8:  w = 0x0159;  break; // LATIN SMALL LETTER R WITH CARON
     case 0xF9:  w = 0x016F;  break; // LATIN SMALL LETTER U WITH RING ABOVE
     case 0xFB:  w = 0x0171;  break; // LATIN SMALL LETTER U WITH DOUBLE ACUTE
     case 0xFE:  w = 0x0163;  break; // LATIN SMALL LETTER T WITH CEDILLA
     case 0xFF:  w = 0x02D9;  break; // DOT ABOVE
     default:
       w = wchar_t(*in);
       break;
     }
     out += w;
     ++in;
   }
 
   return true;
 }
 
 
 bool DecodeISO8859_3(const char * in, std::basic_string<wchar_t> & out)
 {
   out.erase();
   wchar_t w;
 
   while (*in != '\0') {
     switch (*in) {
     case 0xA1:  w = 0x0126;  break; // LATIN CAPITAL LETTER H WITH STROKE
     case 0xA2:  w = 0x02D8;  break; // BREVE
     case 0xA6:  w = 0x0124;  break; // LATIN CAPITAL LETTER H WITH CIRCUMFLEX
     case 0xA9:  w = 0x0130;  break; // LATIN CAPITAL LETTER I WITH DOT ABOVE
     case 0xAA:  w = 0x015E;  break; // LATIN CAPITAL LETTER S WITH CEDILLA
     case 0xAB:  w = 0x011E;  break; // LATIN CAPITAL LETTER G WITH BREVE
     case 0xAC:  w = 0x0134;  break; // LATIN CAPITAL LETTER J WITH CIRCUMFLEX
     case 0xAF:  w = 0x017B;  break; // LATIN CAPITAL LETTER Z WITH DOT ABOVE
     case 0xB1:  w = 0x0127;  break; // LATIN SMALL LETTER H WITH STROKE
     case 0xB6:  w = 0x0125;  break; // LATIN SMALL LETTER H WITH CIRCUMFLEX
     case 0xB9:  w = 0x0131;  break; // LATIN SMALL LETTER DOTLESS I
     case 0xBA:  w = 0x015F;  break; // LATIN SMALL LETTER S WITH CEDILLA
     case 0xBB:  w = 0x011F;  break; // LATIN SMALL LETTER G WITH BREVE
     case 0xBC:  w = 0x0135;  break; // LATIN SMALL LETTER J WITH CIRCUMFLEX
     case 0xBF:  w = 0x017C;  break; // LATIN SMALL LETTER Z WITH DOT ABOVE
     case 0xC5:  w = 0x010A;  break; // LATIN CAPITAL LETTER C WITH DOT ABOVE
     case 0xC6:  w = 0x0108;  break; // LATIN CAPITAL LETTER C WITH CIRCUMFLEX
     case 0xD5:  w = 0x0120;  break; // LATIN CAPITAL LETTER G WITH DOT ABOVE
     case 0xD8:  w = 0x011C;  break; // LATIN CAPITAL LETTER G WITH CIRCUMFLEX
     case 0xDD:  w = 0x016C;  break; // LATIN CAPITAL LETTER U WITH BREVE
     case 0xDE:  w = 0x015C;  break; // LATIN CAPITAL LETTER S WITH CIRCUMFLEX
     case 0xE5:  w = 0x010B;  break; // LATIN SMALL LETTER C WITH DOT ABOVE
     case 0xE6:  w = 0x0109;  break; // LATIN SMALL LETTER C WITH CIRCUMFLEX
     case 0xF5:  w = 0x0121;  break; // LATIN SMALL LETTER G WITH DOT ABOVE
     case 0xF8:  w = 0x011D;  break; // LATIN SMALL LETTER G WITH CIRCUMFLEX
     case 0xFD:  w = 0x016D;  break; // LATIN SMALL LETTER U WITH BREVE
     case 0xFE:  w = 0x015D;  break; // LATIN SMALL LETTER S WITH CIRCUMFLEX
     case 0xFF:  w = 0x02D9;  break; // DOT ABOVE
     default:
       w = wchar_t(*in);
       break;
     }
     out += w;
     ++in;
   }
 
   return true;
 }
 
 
 bool DecodeISO8859_4(const char * in, std::basic_string<wchar_t> & out)
 {
   out.erase();
   wchar_t w;
 
   while (*in != '\0') {
     switch (*in) {
     case 0xA1:  w = 0x0104;  break; // LATIN CAPITAL LETTER A WITH OGONEK
     case 0xA2:  w = 0x0138;  break; // LATIN SMALL LETTER KRA
     case 0xA3:  w = 0x0156;  break; // LATIN CAPITAL LETTER R WITH CEDILLA
     case 0xA5:  w = 0x0128;  break; // LATIN CAPITAL LETTER I WITH TILDE
     case 0xA6:  w = 0x013B;  break; // LATIN CAPITAL LETTER L WITH CEDILLA
     case 0xA9:  w = 0x0160;  break; // LATIN CAPITAL LETTER S WITH CARON
     case 0xAA:  w = 0x0112;  break; // LATIN CAPITAL LETTER E WITH MACRON
     case 0xAB:  w = 0x0122;  break; // LATIN CAPITAL LETTER G WITH CEDILLA
     case 0xAC:  w = 0x0166;  break; // LATIN CAPITAL LETTER T WITH STROKE
     case 0xAE:  w = 0x017D;  break; // LATIN CAPITAL LETTER Z WITH CARON
     case 0xB1:  w = 0x0105;  break; // LATIN SMALL LETTER A WITH OGONEK
     case 0xB2:  w = 0x02DB;  break; // OGONEK
     case 0xB3:  w = 0x0157;  break; // LATIN SMALL LETTER R WITH CEDILLA
     case 0xB5:  w = 0x0129;  break; // LATIN SMALL LETTER I WITH TILDE
     case 0xB6:  w = 0x013C;  break; // LATIN SMALL LETTER L WITH CEDILLA
     case 0xB7:  w = 0x02C7;  break; // CARON
     case 0xB9:  w = 0x0161;  break; // LATIN SMALL LETTER S WITH CARON
     case 0xBA:  w = 0x0113;  break; // LATIN SMALL LETTER E WITH MACRON
     case 0xBB:  w = 0x0123;  break; // LATIN SMALL LETTER G WITH CEDILLA
     case 0xBC:  w = 0x0167;  break; // LATIN SMALL LETTER T WITH STROKE
     case 0xBD:  w = 0x014A;  break; // LATIN CAPITAL LETTER ENG
     case 0xBE:  w = 0x017E;  break; // LATIN SMALL LETTER Z WITH CARON
     case 0xBF:  w = 0x014B;  break; // LATIN SMALL LETTER ENG
     case 0xC0:  w = 0x0100;  break; // LATIN CAPITAL LETTER A WITH MACRON
     case 0xC7:  w = 0x012E;  break; // LATIN CAPITAL LETTER I WITH OGONEK
     case 0xC8:  w = 0x010C;  break; // LATIN CAPITAL LETTER C WITH CARON
     case 0xCA:  w = 0x0118;  break; // LATIN CAPITAL LETTER E WITH OGONEK
     case 0xCC:  w = 0x0116;  break; // LATIN CAPITAL LETTER E WITH DOT ABOVE
     case 0xCF:  w = 0x012A;  break; // LATIN CAPITAL LETTER I WITH MACRON
     case 0xD0:  w = 0x0110;  break; // LATIN CAPITAL LETTER D WITH STROKE
     case 0xD1:  w = 0x0145;  break; // LATIN CAPITAL LETTER N WITH CEDILLA
     case 0xD2:  w = 0x014C;  break; // LATIN CAPITAL LETTER O WITH MACRON

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -